xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 154fd4d1)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* Table based decoder typedefs - used when the relevant bits for decode
79  * are too awkwardly scattered across the instruction (eg SIMD).
80  */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82 
83 typedef struct AArch64DecodeTable {
84     uint32_t pattern;
85     uint32_t mask;
86     AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88 
89 /* initialize TCG globals.  */
90 void a64_translate_init(void)
91 {
92     int i;
93 
94     cpu_pc = tcg_global_mem_new_i64(tcg_env,
95                                     offsetof(CPUARMState, pc),
96                                     "pc");
97     for (i = 0; i < 32; i++) {
98         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99                                           offsetof(CPUARMState, xregs[i]),
100                                           regnames[i]);
101     }
102 
103     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106 
107 /*
108  * Return the core mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return arm_to_core_mmu_idx(useridx);
146 }
147 
148 static void set_btype_raw(int val)
149 {
150     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151                    offsetof(CPUARMState, btype));
152 }
153 
154 static void set_btype(DisasContext *s, int val)
155 {
156     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
157     tcg_debug_assert(val >= 1 && val <= 3);
158     set_btype_raw(val);
159     s->btype = -1;
160 }
161 
162 static void reset_btype(DisasContext *s)
163 {
164     if (s->btype != 0) {
165         set_btype_raw(0);
166         s->btype = 0;
167     }
168 }
169 
170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172     assert(s->pc_save != -1);
173     if (tb_cflags(s->base.tb) & CF_PCREL) {
174         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175     } else {
176         tcg_gen_movi_i64(dest, s->pc_curr + diff);
177     }
178 }
179 
180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182     gen_pc_plus_diff(s, cpu_pc, diff);
183     s->pc_save = s->pc_curr + diff;
184 }
185 
186 /*
187  * Handle Top Byte Ignore (TBI) bits.
188  *
189  * If address tagging is enabled via the TCR TBI bits:
190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
191  *    then the address is zero-extended, clearing bits [63:56]
192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193  *    and TBI1 controls addresses with bit 55 == 1.
194  *    If the appropriate TBI bit is set for the address then
195  *    the address is sign-extended from bit 55 into bits [63:56]
196  *
197  * Here We have concatenated TBI{1,0} into tbi.
198  */
199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200                                 TCGv_i64 src, int tbi)
201 {
202     if (tbi == 0) {
203         /* Load unmodified address */
204         tcg_gen_mov_i64(dst, src);
205     } else if (!regime_has_2_ranges(s->mmu_idx)) {
206         /* Force tag byte to all zero */
207         tcg_gen_extract_i64(dst, src, 0, 56);
208     } else {
209         /* Sign-extend from bit 55.  */
210         tcg_gen_sextract_i64(dst, src, 0, 56);
211 
212         switch (tbi) {
213         case 1:
214             /* tbi0 but !tbi1: only use the extension if positive */
215             tcg_gen_and_i64(dst, dst, src);
216             break;
217         case 2:
218             /* !tbi0 but tbi1: only use the extension if negative */
219             tcg_gen_or_i64(dst, dst, src);
220             break;
221         case 3:
222             /* tbi0 and tbi1: always use the extension */
223             break;
224         default:
225             g_assert_not_reached();
226         }
227     }
228 }
229 
230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232     /*
233      * If address tagging is enabled for instructions via the TCR TBI bits,
234      * then loading an address into the PC will clear out any tag.
235      */
236     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237     s->pc_save = -1;
238 }
239 
240 /*
241  * Handle MTE and/or TBI.
242  *
243  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
244  * for the tag to be present in the FAR_ELx register.  But for user-only
245  * mode we do not have a TLB with which to implement this, so we must
246  * remove the top byte now.
247  *
248  * Always return a fresh temporary that we can increment independently
249  * of the write-back address.
250  */
251 
252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254     TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256     gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258     tcg_gen_mov_i64(clean, addr);
259 #endif
260     return clean;
261 }
262 
263 /* Insert a zero tag into src, with the result at dst. */
264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268 
269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270                              MMUAccessType acc, int log2_size)
271 {
272     gen_helper_probe_access(tcg_env, ptr,
273                             tcg_constant_i32(acc),
274                             tcg_constant_i32(get_mem_index(s)),
275                             tcg_constant_i32(1 << log2_size));
276 }
277 
278 /*
279  * For MTE, check a single logical or atomic access.  This probes a single
280  * address, the exact one specified.  The size and alignment of the access
281  * is not relevant to MTE, per se, but watchpoints do require the size,
282  * and we want to recognize those before making any other changes to state.
283  */
284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285                                       bool is_write, bool tag_checked,
286                                       MemOp memop, bool is_unpriv,
287                                       int core_idx)
288 {
289     if (tag_checked && s->mte_active[is_unpriv]) {
290         TCGv_i64 ret;
291         int desc = 0;
292 
293         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309                         bool tag_checked, MemOp memop)
310 {
311     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312                                  false, get_mem_index(s));
313 }
314 
315 /*
316  * For MTE, check multiple logical sequential accesses.
317  */
318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319                         bool tag_checked, int total_size, MemOp single_mop)
320 {
321     if (tag_checked && s->mte_active[0]) {
322         TCGv_i64 ret;
323         int desc = 0;
324 
325         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
330         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331 
332         ret = tcg_temp_new_i64();
333         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334 
335         return ret;
336     }
337     return clean_data_tbi(s, addr);
338 }
339 
340 /*
341  * Generate the special alignment check that applies to AccType_ATOMIC
342  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343  * naturally aligned, but it must not cross a 16-byte boundary.
344  * See AArch64.CheckAlignment().
345  */
346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347                              bool is_write, MemOp mop)
348 {
349     TCGv_i32 tmp;
350     TCGv_i64 addr;
351     TCGLabel *over_label;
352     MMUAccessType type;
353     int mmu_idx;
354 
355     tmp = tcg_temp_new_i32();
356     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357     tcg_gen_addi_i32(tmp, tmp, imm & 15);
358     tcg_gen_andi_i32(tmp, tmp, 15);
359     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360 
361     over_label = gen_new_label();
362     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363 
364     addr = tcg_temp_new_i64();
365     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366 
367     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368     mmu_idx = get_mem_index(s);
369     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370                                 tcg_constant_i32(mmu_idx));
371 
372     gen_set_label(over_label);
373 
374 }
375 
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379     MemOp size = mop & MO_SIZE;
380 
381     if (size == MO_8) {
382         return mop;
383     }
384 
385     /*
386      * If size == MO_128, this is a LDXP, and the operation is single-copy
387      * atomic for each doubleword, not the entire quadword; it still must
388      * be quadword aligned.
389      */
390     if (size == MO_128) {
391         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392                                    MO_ATOM_IFALIGN_PAIR);
393     }
394     if (dc_isar_feature(aa64_lse2, s)) {
395         check_lse2_align(s, rn, 0, true, mop);
396     } else {
397         mop |= MO_ALIGN;
398     }
399     return finalize_memop(s, mop);
400 }
401 
402 /* Handle the alignment check for AccType_ORDERED instructions. */
403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404                                  bool is_write, MemOp mop)
405 {
406     MemOp size = mop & MO_SIZE;
407 
408     if (size == MO_8) {
409         return mop;
410     }
411     if (size == MO_128) {
412         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413                                    MO_ATOM_IFALIGN_PAIR);
414     }
415     if (!dc_isar_feature(aa64_lse2, s)) {
416         mop |= MO_ALIGN;
417     } else if (!s->naa) {
418         check_lse2_align(s, rn, imm, is_write, mop);
419     }
420     return finalize_memop(s, mop);
421 }
422 
423 typedef struct DisasCompare64 {
424     TCGCond cond;
425     TCGv_i64 value;
426 } DisasCompare64;
427 
428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430     DisasCompare c32;
431 
432     arm_test_cc(&c32, cc);
433 
434     /*
435      * Sign-extend the 32-bit value so that the GE/LT comparisons work
436      * properly.  The NE/EQ comparisons are also fine with this choice.
437       */
438     c64->cond = c32.cond;
439     c64->value = tcg_temp_new_i64();
440     tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442 
443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447 
448 static void gen_exception_internal(int excp)
449 {
450     assert(excp_is_internal(excp));
451     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453 
454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456     gen_a64_update_pc(s, 0);
457     gen_exception_internal(excp);
458     s->base.is_jmp = DISAS_NORETURN;
459 }
460 
461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463     gen_a64_update_pc(s, 0);
464     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465     s->base.is_jmp = DISAS_NORETURN;
466 }
467 
468 static void gen_step_complete_exception(DisasContext *s)
469 {
470     /* We just completed step of an insn. Move from Active-not-pending
471      * to Active-pending, and then also take the swstep exception.
472      * This corresponds to making the (IMPDEF) choice to prioritize
473      * swstep exceptions over asynchronous exceptions taken to an exception
474      * level where debug is disabled. This choice has the advantage that
475      * we do not need to maintain internal state corresponding to the
476      * ISV/EX syndrome bits between completion of the step and generation
477      * of the exception, and our syndrome information is always correct.
478      */
479     gen_ss_advance(s);
480     gen_swstep_exception(s, 1, s->is_ldex);
481     s->base.is_jmp = DISAS_NORETURN;
482 }
483 
484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486     if (s->ss_active) {
487         return false;
488     }
489     return translator_use_goto_tb(&s->base, dest);
490 }
491 
492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494     if (use_goto_tb(s, s->pc_curr + diff)) {
495         /*
496          * For pcrel, the pc must always be up-to-date on entry to
497          * the linked TB, so that it can use simple additions for all
498          * further adjustments.  For !pcrel, the linked TB is compiled
499          * to know its full virtual address, so we can delay the
500          * update to pc to the unlinked path.  A long chain of links
501          * can thus avoid many updates to the PC.
502          */
503         if (tb_cflags(s->base.tb) & CF_PCREL) {
504             gen_a64_update_pc(s, diff);
505             tcg_gen_goto_tb(n);
506         } else {
507             tcg_gen_goto_tb(n);
508             gen_a64_update_pc(s, diff);
509         }
510         tcg_gen_exit_tb(s->base.tb, n);
511         s->base.is_jmp = DISAS_NORETURN;
512     } else {
513         gen_a64_update_pc(s, diff);
514         if (s->ss_active) {
515             gen_step_complete_exception(s);
516         } else {
517             tcg_gen_lookup_and_goto_ptr();
518             s->base.is_jmp = DISAS_NORETURN;
519         }
520     }
521 }
522 
523 /*
524  * Register access functions
525  *
526  * These functions are used for directly accessing a register in where
527  * changes to the final register value are likely to be made. If you
528  * need to use a register for temporary calculation (e.g. index type
529  * operations) use the read_* form.
530  *
531  * B1.2.1 Register mappings
532  *
533  * In instruction register encoding 31 can refer to ZR (zero register) or
534  * the SP (stack pointer) depending on context. In QEMU's case we map SP
535  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536  * This is the point of the _sp forms.
537  */
538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540     if (reg == 31) {
541         TCGv_i64 t = tcg_temp_new_i64();
542         tcg_gen_movi_i64(t, 0);
543         return t;
544     } else {
545         return cpu_X[reg];
546     }
547 }
548 
549 /* register access for when 31 == SP */
550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552     return cpu_X[reg];
553 }
554 
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556  * representing the register contents. This TCGv is an auto-freed
557  * temporary so it need not be explicitly freed, and may be modified.
558  */
559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561     TCGv_i64 v = tcg_temp_new_i64();
562     if (reg != 31) {
563         if (sf) {
564             tcg_gen_mov_i64(v, cpu_X[reg]);
565         } else {
566             tcg_gen_ext32u_i64(v, cpu_X[reg]);
567         }
568     } else {
569         tcg_gen_movi_i64(v, 0);
570     }
571     return v;
572 }
573 
574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576     TCGv_i64 v = tcg_temp_new_i64();
577     if (sf) {
578         tcg_gen_mov_i64(v, cpu_X[reg]);
579     } else {
580         tcg_gen_ext32u_i64(v, cpu_X[reg]);
581     }
582     return v;
583 }
584 
585 /* Return the offset into CPUARMState of a slice (from
586  * the least significant end) of FP register Qn (ie
587  * Dn, Sn, Hn or Bn).
588  * (Note that this is not the same mapping as for A32; see cpu.h)
589  */
590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592     return vec_reg_offset(s, regno, 0, size);
593 }
594 
595 /* Offset of the high half of the 128 bit vector Qn */
596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598     return vec_reg_offset(s, regno, 1, MO_64);
599 }
600 
601 /* Convenience accessors for reading and writing single and double
602  * FP registers. Writing clears the upper parts of the associated
603  * 128 bit vector register, as required by the architecture.
604  * Note that unlike the GP register accessors, the values returned
605  * by the read functions must be manually freed.
606  */
607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609     TCGv_i64 v = tcg_temp_new_i64();
610 
611     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612     return v;
613 }
614 
615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617     TCGv_i32 v = tcg_temp_new_i32();
618 
619     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620     return v;
621 }
622 
623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625     TCGv_i32 v = tcg_temp_new_i32();
626 
627     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628     return v;
629 }
630 
631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
632  * If SVE is not enabled, then there are only 128 bits in the vector.
633  */
634 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
635 {
636     unsigned ofs = fp_reg_offset(s, rd, MO_64);
637     unsigned vsz = vec_full_reg_size(s);
638 
639     /* Nop move, with side effect of clearing the tail. */
640     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
641 }
642 
643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
644 {
645     unsigned ofs = fp_reg_offset(s, reg, MO_64);
646 
647     tcg_gen_st_i64(v, tcg_env, ofs);
648     clear_vec_high(s, false, reg);
649 }
650 
651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
652 {
653     TCGv_i64 tmp = tcg_temp_new_i64();
654 
655     tcg_gen_extu_i32_i64(tmp, v);
656     write_fp_dreg(s, reg, tmp);
657 }
658 
659 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
661                          GVecGen2Fn *gvec_fn, int vece)
662 {
663     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
664             is_q ? 16 : 8, vec_full_reg_size(s));
665 }
666 
667 /* Expand a 2-operand + immediate AdvSIMD vector operation using
668  * an expander function.
669  */
670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
671                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
672 {
673     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
674             imm, is_q ? 16 : 8, vec_full_reg_size(s));
675 }
676 
677 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
679                          GVecGen3Fn *gvec_fn, int vece)
680 {
681     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
682             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
683 }
684 
685 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
687                          int rx, GVecGen4Fn *gvec_fn, int vece)
688 {
689     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
690             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
691             is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693 
694 /* Expand a 2-operand operation using an out-of-line helper.  */
695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
696                              int rn, int data, gen_helper_gvec_2 *fn)
697 {
698     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
699                        vec_full_reg_offset(s, rn),
700                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
701 }
702 
703 /* Expand a 3-operand operation using an out-of-line helper.  */
704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
705                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
706 {
707     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
708                        vec_full_reg_offset(s, rn),
709                        vec_full_reg_offset(s, rm),
710                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
711 }
712 
713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
714  * an out-of-line helper.
715  */
716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
717                               int rm, bool is_fp16, int data,
718                               gen_helper_gvec_3_ptr *fn)
719 {
720     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
721     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
722                        vec_full_reg_offset(s, rn),
723                        vec_full_reg_offset(s, rm), fpst,
724                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
725 }
726 
727 /* Expand a 4-operand operation using an out-of-line helper.  */
728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
729                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
730 {
731     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
732                        vec_full_reg_offset(s, rn),
733                        vec_full_reg_offset(s, rm),
734                        vec_full_reg_offset(s, ra),
735                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
736 }
737 
738 /*
739  * Expand a 4-operand operation using an out-of-line helper that takes
740  * a pointer to the CPU env.
741  */
742 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
743                              int rm, int ra, int data,
744                              gen_helper_gvec_4_ptr *fn)
745 {
746     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
747                        vec_full_reg_offset(s, rn),
748                        vec_full_reg_offset(s, rm),
749                        vec_full_reg_offset(s, ra),
750                        tcg_env,
751                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
752 }
753 
754 /*
755  * Expand a 4-operand + fpstatus pointer + simd data value operation using
756  * an out-of-line helper.
757  */
758 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
759                               int rm, int ra, bool is_fp16, int data,
760                               gen_helper_gvec_4_ptr *fn)
761 {
762     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
763     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
764                        vec_full_reg_offset(s, rn),
765                        vec_full_reg_offset(s, rm),
766                        vec_full_reg_offset(s, ra), fpst,
767                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
768 }
769 
770 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
771  * than the 32 bit equivalent.
772  */
773 static inline void gen_set_NZ64(TCGv_i64 result)
774 {
775     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
776     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
777 }
778 
779 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
780 static inline void gen_logic_CC(int sf, TCGv_i64 result)
781 {
782     if (sf) {
783         gen_set_NZ64(result);
784     } else {
785         tcg_gen_extrl_i64_i32(cpu_ZF, result);
786         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
787     }
788     tcg_gen_movi_i32(cpu_CF, 0);
789     tcg_gen_movi_i32(cpu_VF, 0);
790 }
791 
792 /* dest = T0 + T1; compute C, N, V and Z flags */
793 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
794 {
795     TCGv_i64 result, flag, tmp;
796     result = tcg_temp_new_i64();
797     flag = tcg_temp_new_i64();
798     tmp = tcg_temp_new_i64();
799 
800     tcg_gen_movi_i64(tmp, 0);
801     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
802 
803     tcg_gen_extrl_i64_i32(cpu_CF, flag);
804 
805     gen_set_NZ64(result);
806 
807     tcg_gen_xor_i64(flag, result, t0);
808     tcg_gen_xor_i64(tmp, t0, t1);
809     tcg_gen_andc_i64(flag, flag, tmp);
810     tcg_gen_extrh_i64_i32(cpu_VF, flag);
811 
812     tcg_gen_mov_i64(dest, result);
813 }
814 
815 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
816 {
817     TCGv_i32 t0_32 = tcg_temp_new_i32();
818     TCGv_i32 t1_32 = tcg_temp_new_i32();
819     TCGv_i32 tmp = tcg_temp_new_i32();
820 
821     tcg_gen_movi_i32(tmp, 0);
822     tcg_gen_extrl_i64_i32(t0_32, t0);
823     tcg_gen_extrl_i64_i32(t1_32, t1);
824     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
825     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
826     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
827     tcg_gen_xor_i32(tmp, t0_32, t1_32);
828     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
829     tcg_gen_extu_i32_i64(dest, cpu_NF);
830 }
831 
832 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
833 {
834     if (sf) {
835         gen_add64_CC(dest, t0, t1);
836     } else {
837         gen_add32_CC(dest, t0, t1);
838     }
839 }
840 
841 /* dest = T0 - T1; compute C, N, V and Z flags */
842 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
843 {
844     /* 64 bit arithmetic */
845     TCGv_i64 result, flag, tmp;
846 
847     result = tcg_temp_new_i64();
848     flag = tcg_temp_new_i64();
849     tcg_gen_sub_i64(result, t0, t1);
850 
851     gen_set_NZ64(result);
852 
853     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
854     tcg_gen_extrl_i64_i32(cpu_CF, flag);
855 
856     tcg_gen_xor_i64(flag, result, t0);
857     tmp = tcg_temp_new_i64();
858     tcg_gen_xor_i64(tmp, t0, t1);
859     tcg_gen_and_i64(flag, flag, tmp);
860     tcg_gen_extrh_i64_i32(cpu_VF, flag);
861     tcg_gen_mov_i64(dest, result);
862 }
863 
864 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
865 {
866     /* 32 bit arithmetic */
867     TCGv_i32 t0_32 = tcg_temp_new_i32();
868     TCGv_i32 t1_32 = tcg_temp_new_i32();
869     TCGv_i32 tmp;
870 
871     tcg_gen_extrl_i64_i32(t0_32, t0);
872     tcg_gen_extrl_i64_i32(t1_32, t1);
873     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
874     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
875     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
876     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
877     tmp = tcg_temp_new_i32();
878     tcg_gen_xor_i32(tmp, t0_32, t1_32);
879     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
880     tcg_gen_extu_i32_i64(dest, cpu_NF);
881 }
882 
883 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
884 {
885     if (sf) {
886         gen_sub64_CC(dest, t0, t1);
887     } else {
888         gen_sub32_CC(dest, t0, t1);
889     }
890 }
891 
892 /* dest = T0 + T1 + CF; do not compute flags. */
893 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
894 {
895     TCGv_i64 flag = tcg_temp_new_i64();
896     tcg_gen_extu_i32_i64(flag, cpu_CF);
897     tcg_gen_add_i64(dest, t0, t1);
898     tcg_gen_add_i64(dest, dest, flag);
899 
900     if (!sf) {
901         tcg_gen_ext32u_i64(dest, dest);
902     }
903 }
904 
905 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
906 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
907 {
908     if (sf) {
909         TCGv_i64 result = tcg_temp_new_i64();
910         TCGv_i64 cf_64 = tcg_temp_new_i64();
911         TCGv_i64 vf_64 = tcg_temp_new_i64();
912         TCGv_i64 tmp = tcg_temp_new_i64();
913         TCGv_i64 zero = tcg_constant_i64(0);
914 
915         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
916         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
917         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
918         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
919         gen_set_NZ64(result);
920 
921         tcg_gen_xor_i64(vf_64, result, t0);
922         tcg_gen_xor_i64(tmp, t0, t1);
923         tcg_gen_andc_i64(vf_64, vf_64, tmp);
924         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
925 
926         tcg_gen_mov_i64(dest, result);
927     } else {
928         TCGv_i32 t0_32 = tcg_temp_new_i32();
929         TCGv_i32 t1_32 = tcg_temp_new_i32();
930         TCGv_i32 tmp = tcg_temp_new_i32();
931         TCGv_i32 zero = tcg_constant_i32(0);
932 
933         tcg_gen_extrl_i64_i32(t0_32, t0);
934         tcg_gen_extrl_i64_i32(t1_32, t1);
935         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
936         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
937 
938         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
939         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
940         tcg_gen_xor_i32(tmp, t0_32, t1_32);
941         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
942         tcg_gen_extu_i32_i64(dest, cpu_NF);
943     }
944 }
945 
946 /*
947  * Load/Store generators
948  */
949 
950 /*
951  * Store from GPR register to memory.
952  */
953 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
954                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
955                              bool iss_valid,
956                              unsigned int iss_srt,
957                              bool iss_sf, bool iss_ar)
958 {
959     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
960 
961     if (iss_valid) {
962         uint32_t syn;
963 
964         syn = syn_data_abort_with_iss(0,
965                                       (memop & MO_SIZE),
966                                       false,
967                                       iss_srt,
968                                       iss_sf,
969                                       iss_ar,
970                                       0, 0, 0, 0, 0, false);
971         disas_set_insn_syndrome(s, syn);
972     }
973 }
974 
975 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
976                       TCGv_i64 tcg_addr, MemOp memop,
977                       bool iss_valid,
978                       unsigned int iss_srt,
979                       bool iss_sf, bool iss_ar)
980 {
981     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
982                      iss_valid, iss_srt, iss_sf, iss_ar);
983 }
984 
985 /*
986  * Load from memory to GPR register
987  */
988 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
989                              MemOp memop, bool extend, int memidx,
990                              bool iss_valid, unsigned int iss_srt,
991                              bool iss_sf, bool iss_ar)
992 {
993     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
994 
995     if (extend && (memop & MO_SIGN)) {
996         g_assert((memop & MO_SIZE) <= MO_32);
997         tcg_gen_ext32u_i64(dest, dest);
998     }
999 
1000     if (iss_valid) {
1001         uint32_t syn;
1002 
1003         syn = syn_data_abort_with_iss(0,
1004                                       (memop & MO_SIZE),
1005                                       (memop & MO_SIGN) != 0,
1006                                       iss_srt,
1007                                       iss_sf,
1008                                       iss_ar,
1009                                       0, 0, 0, 0, 0, false);
1010         disas_set_insn_syndrome(s, syn);
1011     }
1012 }
1013 
1014 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1015                       MemOp memop, bool extend,
1016                       bool iss_valid, unsigned int iss_srt,
1017                       bool iss_sf, bool iss_ar)
1018 {
1019     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1020                      iss_valid, iss_srt, iss_sf, iss_ar);
1021 }
1022 
1023 /*
1024  * Store from FP register to memory
1025  */
1026 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1027 {
1028     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1029     TCGv_i64 tmplo = tcg_temp_new_i64();
1030 
1031     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1032 
1033     if ((mop & MO_SIZE) < MO_128) {
1034         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1035     } else {
1036         TCGv_i64 tmphi = tcg_temp_new_i64();
1037         TCGv_i128 t16 = tcg_temp_new_i128();
1038 
1039         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1040         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1041 
1042         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1043     }
1044 }
1045 
1046 /*
1047  * Load from memory to FP register
1048  */
1049 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1050 {
1051     /* This always zero-extends and writes to a full 128 bit wide vector */
1052     TCGv_i64 tmplo = tcg_temp_new_i64();
1053     TCGv_i64 tmphi = NULL;
1054 
1055     if ((mop & MO_SIZE) < MO_128) {
1056         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1057     } else {
1058         TCGv_i128 t16 = tcg_temp_new_i128();
1059 
1060         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1061 
1062         tmphi = tcg_temp_new_i64();
1063         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1064     }
1065 
1066     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1067 
1068     if (tmphi) {
1069         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1070     }
1071     clear_vec_high(s, tmphi != NULL, destidx);
1072 }
1073 
1074 /*
1075  * Vector load/store helpers.
1076  *
1077  * The principal difference between this and a FP load is that we don't
1078  * zero extend as we are filling a partial chunk of the vector register.
1079  * These functions don't support 128 bit loads/stores, which would be
1080  * normal load/store operations.
1081  *
1082  * The _i32 versions are useful when operating on 32 bit quantities
1083  * (eg for floating point single or using Neon helper functions).
1084  */
1085 
1086 /* Get value of an element within a vector register */
1087 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1088                              int element, MemOp memop)
1089 {
1090     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1091     switch ((unsigned)memop) {
1092     case MO_8:
1093         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1094         break;
1095     case MO_16:
1096         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1097         break;
1098     case MO_32:
1099         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1100         break;
1101     case MO_8|MO_SIGN:
1102         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1103         break;
1104     case MO_16|MO_SIGN:
1105         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1106         break;
1107     case MO_32|MO_SIGN:
1108         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1109         break;
1110     case MO_64:
1111     case MO_64|MO_SIGN:
1112         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1113         break;
1114     default:
1115         g_assert_not_reached();
1116     }
1117 }
1118 
1119 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1120                                  int element, MemOp memop)
1121 {
1122     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1123     switch (memop) {
1124     case MO_8:
1125         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1126         break;
1127     case MO_16:
1128         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1129         break;
1130     case MO_8|MO_SIGN:
1131         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1132         break;
1133     case MO_16|MO_SIGN:
1134         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1135         break;
1136     case MO_32:
1137     case MO_32|MO_SIGN:
1138         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1139         break;
1140     default:
1141         g_assert_not_reached();
1142     }
1143 }
1144 
1145 /* Set value of an element within a vector register */
1146 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1147                               int element, MemOp memop)
1148 {
1149     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1150     switch (memop) {
1151     case MO_8:
1152         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1153         break;
1154     case MO_16:
1155         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1156         break;
1157     case MO_32:
1158         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1159         break;
1160     case MO_64:
1161         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1162         break;
1163     default:
1164         g_assert_not_reached();
1165     }
1166 }
1167 
1168 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1169                                   int destidx, int element, MemOp memop)
1170 {
1171     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1172     switch (memop) {
1173     case MO_8:
1174         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1175         break;
1176     case MO_16:
1177         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1178         break;
1179     case MO_32:
1180         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1181         break;
1182     default:
1183         g_assert_not_reached();
1184     }
1185 }
1186 
1187 /* Store from vector register to memory */
1188 static void do_vec_st(DisasContext *s, int srcidx, int element,
1189                       TCGv_i64 tcg_addr, MemOp mop)
1190 {
1191     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1192 
1193     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1194     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1195 }
1196 
1197 /* Load from memory to vector register */
1198 static void do_vec_ld(DisasContext *s, int destidx, int element,
1199                       TCGv_i64 tcg_addr, MemOp mop)
1200 {
1201     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1202 
1203     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1204     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1205 }
1206 
1207 /* Check that FP/Neon access is enabled. If it is, return
1208  * true. If not, emit code to generate an appropriate exception,
1209  * and return false; the caller should not emit any code for
1210  * the instruction. Note that this check must happen after all
1211  * unallocated-encoding checks (otherwise the syndrome information
1212  * for the resulting exception will be incorrect).
1213  */
1214 static bool fp_access_check_only(DisasContext *s)
1215 {
1216     if (s->fp_excp_el) {
1217         assert(!s->fp_access_checked);
1218         s->fp_access_checked = true;
1219 
1220         gen_exception_insn_el(s, 0, EXCP_UDEF,
1221                               syn_fp_access_trap(1, 0xe, false, 0),
1222                               s->fp_excp_el);
1223         return false;
1224     }
1225     s->fp_access_checked = true;
1226     return true;
1227 }
1228 
1229 static bool fp_access_check(DisasContext *s)
1230 {
1231     if (!fp_access_check_only(s)) {
1232         return false;
1233     }
1234     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1235         gen_exception_insn(s, 0, EXCP_UDEF,
1236                            syn_smetrap(SME_ET_Streaming, false));
1237         return false;
1238     }
1239     return true;
1240 }
1241 
1242 /*
1243  * Check that SVE access is enabled.  If it is, return true.
1244  * If not, emit code to generate an appropriate exception and return false.
1245  * This function corresponds to CheckSVEEnabled().
1246  */
1247 bool sve_access_check(DisasContext *s)
1248 {
1249     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1250         assert(dc_isar_feature(aa64_sme, s));
1251         if (!sme_sm_enabled_check(s)) {
1252             goto fail_exit;
1253         }
1254     } else if (s->sve_excp_el) {
1255         gen_exception_insn_el(s, 0, EXCP_UDEF,
1256                               syn_sve_access_trap(), s->sve_excp_el);
1257         goto fail_exit;
1258     }
1259     s->sve_access_checked = true;
1260     return fp_access_check(s);
1261 
1262  fail_exit:
1263     /* Assert that we only raise one exception per instruction. */
1264     assert(!s->sve_access_checked);
1265     s->sve_access_checked = true;
1266     return false;
1267 }
1268 
1269 /*
1270  * Check that SME access is enabled, raise an exception if not.
1271  * Note that this function corresponds to CheckSMEAccess and is
1272  * only used directly for cpregs.
1273  */
1274 static bool sme_access_check(DisasContext *s)
1275 {
1276     if (s->sme_excp_el) {
1277         gen_exception_insn_el(s, 0, EXCP_UDEF,
1278                               syn_smetrap(SME_ET_AccessTrap, false),
1279                               s->sme_excp_el);
1280         return false;
1281     }
1282     return true;
1283 }
1284 
1285 /* This function corresponds to CheckSMEEnabled. */
1286 bool sme_enabled_check(DisasContext *s)
1287 {
1288     /*
1289      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1290      * to be zero when fp_excp_el has priority.  This is because we need
1291      * sme_excp_el by itself for cpregs access checks.
1292      */
1293     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1294         s->fp_access_checked = true;
1295         return sme_access_check(s);
1296     }
1297     return fp_access_check_only(s);
1298 }
1299 
1300 /* Common subroutine for CheckSMEAnd*Enabled. */
1301 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1302 {
1303     if (!sme_enabled_check(s)) {
1304         return false;
1305     }
1306     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1307         gen_exception_insn(s, 0, EXCP_UDEF,
1308                            syn_smetrap(SME_ET_NotStreaming, false));
1309         return false;
1310     }
1311     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1312         gen_exception_insn(s, 0, EXCP_UDEF,
1313                            syn_smetrap(SME_ET_InactiveZA, false));
1314         return false;
1315     }
1316     return true;
1317 }
1318 
1319 /*
1320  * Expanders for AdvSIMD translation functions.
1321  */
1322 
1323 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1324                             gen_helper_gvec_2 *fn)
1325 {
1326     if (!a->q && a->esz == MO_64) {
1327         return false;
1328     }
1329     if (fp_access_check(s)) {
1330         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1331     }
1332     return true;
1333 }
1334 
1335 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1336                             gen_helper_gvec_3 *fn)
1337 {
1338     if (!a->q && a->esz == MO_64) {
1339         return false;
1340     }
1341     if (fp_access_check(s)) {
1342         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1343     }
1344     return true;
1345 }
1346 
1347 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1348 {
1349     if (!a->q && a->esz == MO_64) {
1350         return false;
1351     }
1352     if (fp_access_check(s)) {
1353         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1354     }
1355     return true;
1356 }
1357 
1358 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1359 {
1360     if (a->esz == MO_64) {
1361         return false;
1362     }
1363     if (fp_access_check(s)) {
1364         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1365     }
1366     return true;
1367 }
1368 
1369 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1370 {
1371     if (a->esz == MO_8) {
1372         return false;
1373     }
1374     return do_gvec_fn3_no64(s, a, fn);
1375 }
1376 
1377 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1378 {
1379     if (!a->q && a->esz == MO_64) {
1380         return false;
1381     }
1382     if (fp_access_check(s)) {
1383         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1384     }
1385     return true;
1386 }
1387 
1388 /*
1389  * This utility function is for doing register extension with an
1390  * optional shift. You will likely want to pass a temporary for the
1391  * destination register. See DecodeRegExtend() in the ARM ARM.
1392  */
1393 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1394                               int option, unsigned int shift)
1395 {
1396     int extsize = extract32(option, 0, 2);
1397     bool is_signed = extract32(option, 2, 1);
1398 
1399     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1400     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1401 }
1402 
1403 static inline void gen_check_sp_alignment(DisasContext *s)
1404 {
1405     /* The AArch64 architecture mandates that (if enabled via PSTATE
1406      * or SCTLR bits) there is a check that SP is 16-aligned on every
1407      * SP-relative load or store (with an exception generated if it is not).
1408      * In line with general QEMU practice regarding misaligned accesses,
1409      * we omit these checks for the sake of guest program performance.
1410      * This function is provided as a hook so we can more easily add these
1411      * checks in future (possibly as a "favour catching guest program bugs
1412      * over speed" user selectable option).
1413      */
1414 }
1415 
1416 /*
1417  * This provides a simple table based table lookup decoder. It is
1418  * intended to be used when the relevant bits for decode are too
1419  * awkwardly placed and switch/if based logic would be confusing and
1420  * deeply nested. Since it's a linear search through the table, tables
1421  * should be kept small.
1422  *
1423  * It returns the first handler where insn & mask == pattern, or
1424  * NULL if there is no match.
1425  * The table is terminated by an empty mask (i.e. 0)
1426  */
1427 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1428                                                uint32_t insn)
1429 {
1430     const AArch64DecodeTable *tptr = table;
1431 
1432     while (tptr->mask) {
1433         if ((insn & tptr->mask) == tptr->pattern) {
1434             return tptr->disas_fn;
1435         }
1436         tptr++;
1437     }
1438     return NULL;
1439 }
1440 
1441 /*
1442  * The instruction disassembly implemented here matches
1443  * the instruction encoding classifications in chapter C4
1444  * of the ARM Architecture Reference Manual (DDI0487B_a);
1445  * classification names and decode diagrams here should generally
1446  * match up with those in the manual.
1447  */
1448 
1449 static bool trans_B(DisasContext *s, arg_i *a)
1450 {
1451     reset_btype(s);
1452     gen_goto_tb(s, 0, a->imm);
1453     return true;
1454 }
1455 
1456 static bool trans_BL(DisasContext *s, arg_i *a)
1457 {
1458     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1459     reset_btype(s);
1460     gen_goto_tb(s, 0, a->imm);
1461     return true;
1462 }
1463 
1464 
1465 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1466 {
1467     DisasLabel match;
1468     TCGv_i64 tcg_cmp;
1469 
1470     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1471     reset_btype(s);
1472 
1473     match = gen_disas_label(s);
1474     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1475                         tcg_cmp, 0, match.label);
1476     gen_goto_tb(s, 0, 4);
1477     set_disas_label(s, match);
1478     gen_goto_tb(s, 1, a->imm);
1479     return true;
1480 }
1481 
1482 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1483 {
1484     DisasLabel match;
1485     TCGv_i64 tcg_cmp;
1486 
1487     tcg_cmp = tcg_temp_new_i64();
1488     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1489 
1490     reset_btype(s);
1491 
1492     match = gen_disas_label(s);
1493     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1494                         tcg_cmp, 0, match.label);
1495     gen_goto_tb(s, 0, 4);
1496     set_disas_label(s, match);
1497     gen_goto_tb(s, 1, a->imm);
1498     return true;
1499 }
1500 
1501 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1502 {
1503     /* BC.cond is only present with FEAT_HBC */
1504     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1505         return false;
1506     }
1507     reset_btype(s);
1508     if (a->cond < 0x0e) {
1509         /* genuinely conditional branches */
1510         DisasLabel match = gen_disas_label(s);
1511         arm_gen_test_cc(a->cond, match.label);
1512         gen_goto_tb(s, 0, 4);
1513         set_disas_label(s, match);
1514         gen_goto_tb(s, 1, a->imm);
1515     } else {
1516         /* 0xe and 0xf are both "always" conditions */
1517         gen_goto_tb(s, 0, a->imm);
1518     }
1519     return true;
1520 }
1521 
1522 static void set_btype_for_br(DisasContext *s, int rn)
1523 {
1524     if (dc_isar_feature(aa64_bti, s)) {
1525         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1526         if (rn == 16 || rn == 17) {
1527             set_btype(s, 1);
1528         } else {
1529             TCGv_i64 pc = tcg_temp_new_i64();
1530             gen_pc_plus_diff(s, pc, 0);
1531             gen_helper_guarded_page_br(tcg_env, pc);
1532             s->btype = -1;
1533         }
1534     }
1535 }
1536 
1537 static void set_btype_for_blr(DisasContext *s)
1538 {
1539     if (dc_isar_feature(aa64_bti, s)) {
1540         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1541         set_btype(s, 2);
1542     }
1543 }
1544 
1545 static bool trans_BR(DisasContext *s, arg_r *a)
1546 {
1547     set_btype_for_br(s, a->rn);
1548     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1549     s->base.is_jmp = DISAS_JUMP;
1550     return true;
1551 }
1552 
1553 static bool trans_BLR(DisasContext *s, arg_r *a)
1554 {
1555     TCGv_i64 dst = cpu_reg(s, a->rn);
1556     TCGv_i64 lr = cpu_reg(s, 30);
1557     if (dst == lr) {
1558         TCGv_i64 tmp = tcg_temp_new_i64();
1559         tcg_gen_mov_i64(tmp, dst);
1560         dst = tmp;
1561     }
1562     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1563     gen_a64_set_pc(s, dst);
1564     set_btype_for_blr(s);
1565     s->base.is_jmp = DISAS_JUMP;
1566     return true;
1567 }
1568 
1569 static bool trans_RET(DisasContext *s, arg_r *a)
1570 {
1571     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1572     s->base.is_jmp = DISAS_JUMP;
1573     return true;
1574 }
1575 
1576 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1577                                    TCGv_i64 modifier, bool use_key_a)
1578 {
1579     TCGv_i64 truedst;
1580     /*
1581      * Return the branch target for a BRAA/RETA/etc, which is either
1582      * just the destination dst, or that value with the pauth check
1583      * done and the code removed from the high bits.
1584      */
1585     if (!s->pauth_active) {
1586         return dst;
1587     }
1588 
1589     truedst = tcg_temp_new_i64();
1590     if (use_key_a) {
1591         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1592     } else {
1593         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1594     }
1595     return truedst;
1596 }
1597 
1598 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1599 {
1600     TCGv_i64 dst;
1601 
1602     if (!dc_isar_feature(aa64_pauth, s)) {
1603         return false;
1604     }
1605 
1606     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1607     set_btype_for_br(s, a->rn);
1608     gen_a64_set_pc(s, dst);
1609     s->base.is_jmp = DISAS_JUMP;
1610     return true;
1611 }
1612 
1613 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1614 {
1615     TCGv_i64 dst, lr;
1616 
1617     if (!dc_isar_feature(aa64_pauth, s)) {
1618         return false;
1619     }
1620 
1621     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1622     lr = cpu_reg(s, 30);
1623     if (dst == lr) {
1624         TCGv_i64 tmp = tcg_temp_new_i64();
1625         tcg_gen_mov_i64(tmp, dst);
1626         dst = tmp;
1627     }
1628     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1629     gen_a64_set_pc(s, dst);
1630     set_btype_for_blr(s);
1631     s->base.is_jmp = DISAS_JUMP;
1632     return true;
1633 }
1634 
1635 static bool trans_RETA(DisasContext *s, arg_reta *a)
1636 {
1637     TCGv_i64 dst;
1638 
1639     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1640     gen_a64_set_pc(s, dst);
1641     s->base.is_jmp = DISAS_JUMP;
1642     return true;
1643 }
1644 
1645 static bool trans_BRA(DisasContext *s, arg_bra *a)
1646 {
1647     TCGv_i64 dst;
1648 
1649     if (!dc_isar_feature(aa64_pauth, s)) {
1650         return false;
1651     }
1652     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1653     gen_a64_set_pc(s, dst);
1654     set_btype_for_br(s, a->rn);
1655     s->base.is_jmp = DISAS_JUMP;
1656     return true;
1657 }
1658 
1659 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1660 {
1661     TCGv_i64 dst, lr;
1662 
1663     if (!dc_isar_feature(aa64_pauth, s)) {
1664         return false;
1665     }
1666     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1667     lr = cpu_reg(s, 30);
1668     if (dst == lr) {
1669         TCGv_i64 tmp = tcg_temp_new_i64();
1670         tcg_gen_mov_i64(tmp, dst);
1671         dst = tmp;
1672     }
1673     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1674     gen_a64_set_pc(s, dst);
1675     set_btype_for_blr(s);
1676     s->base.is_jmp = DISAS_JUMP;
1677     return true;
1678 }
1679 
1680 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1681 {
1682     TCGv_i64 dst;
1683 
1684     if (s->current_el == 0) {
1685         return false;
1686     }
1687     if (s->trap_eret) {
1688         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1689         return true;
1690     }
1691     dst = tcg_temp_new_i64();
1692     tcg_gen_ld_i64(dst, tcg_env,
1693                    offsetof(CPUARMState, elr_el[s->current_el]));
1694 
1695     translator_io_start(&s->base);
1696 
1697     gen_helper_exception_return(tcg_env, dst);
1698     /* Must exit loop to check un-masked IRQs */
1699     s->base.is_jmp = DISAS_EXIT;
1700     return true;
1701 }
1702 
1703 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1704 {
1705     TCGv_i64 dst;
1706 
1707     if (!dc_isar_feature(aa64_pauth, s)) {
1708         return false;
1709     }
1710     if (s->current_el == 0) {
1711         return false;
1712     }
1713     /* The FGT trap takes precedence over an auth trap. */
1714     if (s->trap_eret) {
1715         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1716         return true;
1717     }
1718     dst = tcg_temp_new_i64();
1719     tcg_gen_ld_i64(dst, tcg_env,
1720                    offsetof(CPUARMState, elr_el[s->current_el]));
1721 
1722     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1723 
1724     translator_io_start(&s->base);
1725 
1726     gen_helper_exception_return(tcg_env, dst);
1727     /* Must exit loop to check un-masked IRQs */
1728     s->base.is_jmp = DISAS_EXIT;
1729     return true;
1730 }
1731 
1732 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1733 {
1734     return true;
1735 }
1736 
1737 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1738 {
1739     /*
1740      * When running in MTTCG we don't generate jumps to the yield and
1741      * WFE helpers as it won't affect the scheduling of other vCPUs.
1742      * If we wanted to more completely model WFE/SEV so we don't busy
1743      * spin unnecessarily we would need to do something more involved.
1744      */
1745     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1746         s->base.is_jmp = DISAS_YIELD;
1747     }
1748     return true;
1749 }
1750 
1751 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1752 {
1753     s->base.is_jmp = DISAS_WFI;
1754     return true;
1755 }
1756 
1757 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1758 {
1759     /*
1760      * When running in MTTCG we don't generate jumps to the yield and
1761      * WFE helpers as it won't affect the scheduling of other vCPUs.
1762      * If we wanted to more completely model WFE/SEV so we don't busy
1763      * spin unnecessarily we would need to do something more involved.
1764      */
1765     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1766         s->base.is_jmp = DISAS_WFE;
1767     }
1768     return true;
1769 }
1770 
1771 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1772 {
1773     if (!dc_isar_feature(aa64_wfxt, s)) {
1774         return false;
1775     }
1776 
1777     /*
1778      * Because we need to pass the register value to the helper,
1779      * it's easier to emit the code now, unlike trans_WFI which
1780      * defers it to aarch64_tr_tb_stop(). That means we need to
1781      * check ss_active so that single-stepping a WFIT doesn't halt.
1782      */
1783     if (s->ss_active) {
1784         /* Act like a NOP under architectural singlestep */
1785         return true;
1786     }
1787 
1788     gen_a64_update_pc(s, 4);
1789     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1790     /* Go back to the main loop to check for interrupts */
1791     s->base.is_jmp = DISAS_EXIT;
1792     return true;
1793 }
1794 
1795 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1796 {
1797     if (!dc_isar_feature(aa64_wfxt, s)) {
1798         return false;
1799     }
1800 
1801     /*
1802      * We rely here on our WFE implementation being a NOP, so we
1803      * don't need to do anything different to handle the WFET timeout
1804      * from what trans_WFE does.
1805      */
1806     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1807         s->base.is_jmp = DISAS_WFE;
1808     }
1809     return true;
1810 }
1811 
1812 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1813 {
1814     if (s->pauth_active) {
1815         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1816     }
1817     return true;
1818 }
1819 
1820 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1821 {
1822     if (s->pauth_active) {
1823         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1824     }
1825     return true;
1826 }
1827 
1828 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1829 {
1830     if (s->pauth_active) {
1831         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1832     }
1833     return true;
1834 }
1835 
1836 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1837 {
1838     if (s->pauth_active) {
1839         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1840     }
1841     return true;
1842 }
1843 
1844 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1845 {
1846     if (s->pauth_active) {
1847         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1848     }
1849     return true;
1850 }
1851 
1852 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1853 {
1854     /* Without RAS, we must implement this as NOP. */
1855     if (dc_isar_feature(aa64_ras, s)) {
1856         /*
1857          * QEMU does not have a source of physical SErrors,
1858          * so we are only concerned with virtual SErrors.
1859          * The pseudocode in the ARM for this case is
1860          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1861          *      AArch64.vESBOperation();
1862          * Most of the condition can be evaluated at translation time.
1863          * Test for EL2 present, and defer test for SEL2 to runtime.
1864          */
1865         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1866             gen_helper_vesb(tcg_env);
1867         }
1868     }
1869     return true;
1870 }
1871 
1872 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1873 {
1874     if (s->pauth_active) {
1875         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1876     }
1877     return true;
1878 }
1879 
1880 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1881 {
1882     if (s->pauth_active) {
1883         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1884     }
1885     return true;
1886 }
1887 
1888 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1889 {
1890     if (s->pauth_active) {
1891         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1892     }
1893     return true;
1894 }
1895 
1896 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1897 {
1898     if (s->pauth_active) {
1899         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1900     }
1901     return true;
1902 }
1903 
1904 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1905 {
1906     if (s->pauth_active) {
1907         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1908     }
1909     return true;
1910 }
1911 
1912 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1913 {
1914     if (s->pauth_active) {
1915         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1916     }
1917     return true;
1918 }
1919 
1920 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1921 {
1922     if (s->pauth_active) {
1923         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1924     }
1925     return true;
1926 }
1927 
1928 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1929 {
1930     if (s->pauth_active) {
1931         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1932     }
1933     return true;
1934 }
1935 
1936 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1937 {
1938     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1939     return true;
1940 }
1941 
1942 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1943 {
1944     /* We handle DSB and DMB the same way */
1945     TCGBar bar;
1946 
1947     switch (a->types) {
1948     case 1: /* MBReqTypes_Reads */
1949         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1950         break;
1951     case 2: /* MBReqTypes_Writes */
1952         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1953         break;
1954     default: /* MBReqTypes_All */
1955         bar = TCG_BAR_SC | TCG_MO_ALL;
1956         break;
1957     }
1958     tcg_gen_mb(bar);
1959     return true;
1960 }
1961 
1962 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1963 {
1964     /*
1965      * We need to break the TB after this insn to execute
1966      * self-modifying code correctly and also to take
1967      * any pending interrupts immediately.
1968      */
1969     reset_btype(s);
1970     gen_goto_tb(s, 0, 4);
1971     return true;
1972 }
1973 
1974 static bool trans_SB(DisasContext *s, arg_SB *a)
1975 {
1976     if (!dc_isar_feature(aa64_sb, s)) {
1977         return false;
1978     }
1979     /*
1980      * TODO: There is no speculation barrier opcode for TCG;
1981      * MB and end the TB instead.
1982      */
1983     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1984     gen_goto_tb(s, 0, 4);
1985     return true;
1986 }
1987 
1988 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1989 {
1990     if (!dc_isar_feature(aa64_condm_4, s)) {
1991         return false;
1992     }
1993     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1994     return true;
1995 }
1996 
1997 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1998 {
1999     TCGv_i32 z;
2000 
2001     if (!dc_isar_feature(aa64_condm_5, s)) {
2002         return false;
2003     }
2004 
2005     z = tcg_temp_new_i32();
2006 
2007     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2008 
2009     /*
2010      * (!C & !Z) << 31
2011      * (!(C | Z)) << 31
2012      * ~((C | Z) << 31)
2013      * ~-(C | Z)
2014      * (C | Z) - 1
2015      */
2016     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2017     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2018 
2019     /* !(Z & C) */
2020     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2021     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2022 
2023     /* (!C & Z) << 31 -> -(Z & ~C) */
2024     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2025     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2026 
2027     /* C | Z */
2028     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2029 
2030     return true;
2031 }
2032 
2033 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2034 {
2035     if (!dc_isar_feature(aa64_condm_5, s)) {
2036         return false;
2037     }
2038 
2039     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2040     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2041 
2042     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2043     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2044 
2045     tcg_gen_movi_i32(cpu_NF, 0);
2046     tcg_gen_movi_i32(cpu_VF, 0);
2047 
2048     return true;
2049 }
2050 
2051 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2052 {
2053     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2054         return false;
2055     }
2056     if (a->imm & 1) {
2057         set_pstate_bits(PSTATE_UAO);
2058     } else {
2059         clear_pstate_bits(PSTATE_UAO);
2060     }
2061     gen_rebuild_hflags(s);
2062     s->base.is_jmp = DISAS_TOO_MANY;
2063     return true;
2064 }
2065 
2066 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2067 {
2068     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2069         return false;
2070     }
2071     if (a->imm & 1) {
2072         set_pstate_bits(PSTATE_PAN);
2073     } else {
2074         clear_pstate_bits(PSTATE_PAN);
2075     }
2076     gen_rebuild_hflags(s);
2077     s->base.is_jmp = DISAS_TOO_MANY;
2078     return true;
2079 }
2080 
2081 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2082 {
2083     if (s->current_el == 0) {
2084         return false;
2085     }
2086     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2087     s->base.is_jmp = DISAS_TOO_MANY;
2088     return true;
2089 }
2090 
2091 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2092 {
2093     if (!dc_isar_feature(aa64_ssbs, s)) {
2094         return false;
2095     }
2096     if (a->imm & 1) {
2097         set_pstate_bits(PSTATE_SSBS);
2098     } else {
2099         clear_pstate_bits(PSTATE_SSBS);
2100     }
2101     /* Don't need to rebuild hflags since SSBS is a nop */
2102     s->base.is_jmp = DISAS_TOO_MANY;
2103     return true;
2104 }
2105 
2106 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2107 {
2108     if (!dc_isar_feature(aa64_dit, s)) {
2109         return false;
2110     }
2111     if (a->imm & 1) {
2112         set_pstate_bits(PSTATE_DIT);
2113     } else {
2114         clear_pstate_bits(PSTATE_DIT);
2115     }
2116     /* There's no need to rebuild hflags because DIT is a nop */
2117     s->base.is_jmp = DISAS_TOO_MANY;
2118     return true;
2119 }
2120 
2121 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2122 {
2123     if (dc_isar_feature(aa64_mte, s)) {
2124         /* Full MTE is enabled -- set the TCO bit as directed. */
2125         if (a->imm & 1) {
2126             set_pstate_bits(PSTATE_TCO);
2127         } else {
2128             clear_pstate_bits(PSTATE_TCO);
2129         }
2130         gen_rebuild_hflags(s);
2131         /* Many factors, including TCO, go into MTE_ACTIVE. */
2132         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2133         return true;
2134     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2135         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2136         return true;
2137     } else {
2138         /* Insn not present */
2139         return false;
2140     }
2141 }
2142 
2143 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2144 {
2145     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2146     s->base.is_jmp = DISAS_TOO_MANY;
2147     return true;
2148 }
2149 
2150 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2151 {
2152     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2153     /* Exit the cpu loop to re-evaluate pending IRQs. */
2154     s->base.is_jmp = DISAS_UPDATE_EXIT;
2155     return true;
2156 }
2157 
2158 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2159 {
2160     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2161         return false;
2162     }
2163 
2164     if (a->imm == 0) {
2165         clear_pstate_bits(PSTATE_ALLINT);
2166     } else if (s->current_el > 1) {
2167         set_pstate_bits(PSTATE_ALLINT);
2168     } else {
2169         gen_helper_msr_set_allint_el1(tcg_env);
2170     }
2171 
2172     /* Exit the cpu loop to re-evaluate pending IRQs. */
2173     s->base.is_jmp = DISAS_UPDATE_EXIT;
2174     return true;
2175 }
2176 
2177 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2178 {
2179     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2180         return false;
2181     }
2182     if (sme_access_check(s)) {
2183         int old = s->pstate_sm | (s->pstate_za << 1);
2184         int new = a->imm * 3;
2185 
2186         if ((old ^ new) & a->mask) {
2187             /* At least one bit changes. */
2188             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2189                                 tcg_constant_i32(a->mask));
2190             s->base.is_jmp = DISAS_TOO_MANY;
2191         }
2192     }
2193     return true;
2194 }
2195 
2196 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2197 {
2198     TCGv_i32 tmp = tcg_temp_new_i32();
2199     TCGv_i32 nzcv = tcg_temp_new_i32();
2200 
2201     /* build bit 31, N */
2202     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2203     /* build bit 30, Z */
2204     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2205     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2206     /* build bit 29, C */
2207     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2208     /* build bit 28, V */
2209     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2210     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2211     /* generate result */
2212     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2213 }
2214 
2215 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2216 {
2217     TCGv_i32 nzcv = tcg_temp_new_i32();
2218 
2219     /* take NZCV from R[t] */
2220     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2221 
2222     /* bit 31, N */
2223     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2224     /* bit 30, Z */
2225     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2226     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2227     /* bit 29, C */
2228     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2229     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2230     /* bit 28, V */
2231     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2232     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2233 }
2234 
2235 static void gen_sysreg_undef(DisasContext *s, bool isread,
2236                              uint8_t op0, uint8_t op1, uint8_t op2,
2237                              uint8_t crn, uint8_t crm, uint8_t rt)
2238 {
2239     /*
2240      * Generate code to emit an UNDEF with correct syndrome
2241      * information for a failed system register access.
2242      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2243      * but if FEAT_IDST is implemented then read accesses to registers
2244      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2245      * syndrome.
2246      */
2247     uint32_t syndrome;
2248 
2249     if (isread && dc_isar_feature(aa64_ids, s) &&
2250         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2251         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2252     } else {
2253         syndrome = syn_uncategorized();
2254     }
2255     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2256 }
2257 
2258 /* MRS - move from system register
2259  * MSR (register) - move to system register
2260  * SYS
2261  * SYSL
2262  * These are all essentially the same insn in 'read' and 'write'
2263  * versions, with varying op0 fields.
2264  */
2265 static void handle_sys(DisasContext *s, bool isread,
2266                        unsigned int op0, unsigned int op1, unsigned int op2,
2267                        unsigned int crn, unsigned int crm, unsigned int rt)
2268 {
2269     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2270                                       crn, crm, op0, op1, op2);
2271     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2272     bool need_exit_tb = false;
2273     bool nv_trap_to_el2 = false;
2274     bool nv_redirect_reg = false;
2275     bool skip_fp_access_checks = false;
2276     bool nv2_mem_redirect = false;
2277     TCGv_ptr tcg_ri = NULL;
2278     TCGv_i64 tcg_rt;
2279     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2280 
2281     if (crn == 11 || crn == 15) {
2282         /*
2283          * Check for TIDCP trap, which must take precedence over
2284          * the UNDEF for "no such register" etc.
2285          */
2286         switch (s->current_el) {
2287         case 0:
2288             if (dc_isar_feature(aa64_tidcp1, s)) {
2289                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2290             }
2291             break;
2292         case 1:
2293             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2294             break;
2295         }
2296     }
2297 
2298     if (!ri) {
2299         /* Unknown register; this might be a guest error or a QEMU
2300          * unimplemented feature.
2301          */
2302         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2303                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2304                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2305         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2306         return;
2307     }
2308 
2309     if (s->nv2 && ri->nv2_redirect_offset) {
2310         /*
2311          * Some registers always redirect to memory; some only do so if
2312          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2313          * pairs which share an offset; see the table in R_CSRPQ).
2314          */
2315         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2316             nv2_mem_redirect = s->nv1;
2317         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2318             nv2_mem_redirect = !s->nv1;
2319         } else {
2320             nv2_mem_redirect = true;
2321         }
2322     }
2323 
2324     /* Check access permissions */
2325     if (!cp_access_ok(s->current_el, ri, isread)) {
2326         /*
2327          * FEAT_NV/NV2 handling does not do the usual FP access checks
2328          * for registers only accessible at EL2 (though it *does* do them
2329          * for registers accessible at EL1).
2330          */
2331         skip_fp_access_checks = true;
2332         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2333             /*
2334              * This is one of the few EL2 registers which should redirect
2335              * to the equivalent EL1 register. We do that after running
2336              * the EL2 register's accessfn.
2337              */
2338             nv_redirect_reg = true;
2339             assert(!nv2_mem_redirect);
2340         } else if (nv2_mem_redirect) {
2341             /*
2342              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2343              * UNDEF to EL1.
2344              */
2345         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2346             /*
2347              * This register / instruction exists and is an EL2 register, so
2348              * we must trap to EL2 if accessed in nested virtualization EL1
2349              * instead of UNDEFing. We'll do that after the usual access checks.
2350              * (This makes a difference only for a couple of registers like
2351              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2352              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2353              * an accessfn which does nothing when called from EL1, because
2354              * the trap-to-EL3 controls which would apply to that register
2355              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2356              */
2357             nv_trap_to_el2 = true;
2358         } else {
2359             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2360             return;
2361         }
2362     }
2363 
2364     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2365         /* Emit code to perform further access permissions checks at
2366          * runtime; this may result in an exception.
2367          */
2368         gen_a64_update_pc(s, 0);
2369         tcg_ri = tcg_temp_new_ptr();
2370         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2371                                        tcg_constant_i32(key),
2372                                        tcg_constant_i32(syndrome),
2373                                        tcg_constant_i32(isread));
2374     } else if (ri->type & ARM_CP_RAISES_EXC) {
2375         /*
2376          * The readfn or writefn might raise an exception;
2377          * synchronize the CPU state in case it does.
2378          */
2379         gen_a64_update_pc(s, 0);
2380     }
2381 
2382     if (!skip_fp_access_checks) {
2383         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2384             return;
2385         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2386             return;
2387         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2388             return;
2389         }
2390     }
2391 
2392     if (nv_trap_to_el2) {
2393         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2394         return;
2395     }
2396 
2397     if (nv_redirect_reg) {
2398         /*
2399          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2400          * Conveniently in all cases the encoding of the EL1 register is
2401          * identical to the EL2 register except that opc1 is 0.
2402          * Get the reginfo for the EL1 register to use for the actual access.
2403          * We don't use the EL1 register's access function, and
2404          * fine-grained-traps on EL1 also do not apply here.
2405          */
2406         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2407                                  crn, crm, op0, 0, op2);
2408         ri = get_arm_cp_reginfo(s->cp_regs, key);
2409         assert(ri);
2410         assert(cp_access_ok(s->current_el, ri, isread));
2411         /*
2412          * We might not have done an update_pc earlier, so check we don't
2413          * need it. We could support this in future if necessary.
2414          */
2415         assert(!(ri->type & ARM_CP_RAISES_EXC));
2416     }
2417 
2418     if (nv2_mem_redirect) {
2419         /*
2420          * This system register is being redirected into an EL2 memory access.
2421          * This means it is not an IO operation, doesn't change hflags,
2422          * and need not end the TB, because it has no side effects.
2423          *
2424          * The access is 64-bit single copy atomic, guaranteed aligned because
2425          * of the definition of VCNR_EL2. Its endianness depends on
2426          * SCTLR_EL2.EE, not on the data endianness of EL1.
2427          * It is done under either the EL2 translation regime or the EL2&0
2428          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2429          * PSTATE.PAN is 0.
2430          */
2431         TCGv_i64 ptr = tcg_temp_new_i64();
2432         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2433         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2434         int memidx = arm_to_core_mmu_idx(armmemidx);
2435         uint32_t syn;
2436 
2437         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2438 
2439         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2440         tcg_gen_addi_i64(ptr, ptr,
2441                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2442         tcg_rt = cpu_reg(s, rt);
2443 
2444         syn = syn_data_abort_vncr(0, !isread, 0);
2445         disas_set_insn_syndrome(s, syn);
2446         if (isread) {
2447             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2448         } else {
2449             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2450         }
2451         return;
2452     }
2453 
2454     /* Handle special cases first */
2455     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2456     case 0:
2457         break;
2458     case ARM_CP_NOP:
2459         return;
2460     case ARM_CP_NZCV:
2461         tcg_rt = cpu_reg(s, rt);
2462         if (isread) {
2463             gen_get_nzcv(tcg_rt);
2464         } else {
2465             gen_set_nzcv(tcg_rt);
2466         }
2467         return;
2468     case ARM_CP_CURRENTEL:
2469     {
2470         /*
2471          * Reads as current EL value from pstate, which is
2472          * guaranteed to be constant by the tb flags.
2473          * For nested virt we should report EL2.
2474          */
2475         int el = s->nv ? 2 : s->current_el;
2476         tcg_rt = cpu_reg(s, rt);
2477         tcg_gen_movi_i64(tcg_rt, el << 2);
2478         return;
2479     }
2480     case ARM_CP_DC_ZVA:
2481         /* Writes clear the aligned block of memory which rt points into. */
2482         if (s->mte_active[0]) {
2483             int desc = 0;
2484 
2485             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2486             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2487             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2488 
2489             tcg_rt = tcg_temp_new_i64();
2490             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2491                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2492         } else {
2493             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2494         }
2495         gen_helper_dc_zva(tcg_env, tcg_rt);
2496         return;
2497     case ARM_CP_DC_GVA:
2498         {
2499             TCGv_i64 clean_addr, tag;
2500 
2501             /*
2502              * DC_GVA, like DC_ZVA, requires that we supply the original
2503              * pointer for an invalid page.  Probe that address first.
2504              */
2505             tcg_rt = cpu_reg(s, rt);
2506             clean_addr = clean_data_tbi(s, tcg_rt);
2507             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2508 
2509             if (s->ata[0]) {
2510                 /* Extract the tag from the register to match STZGM.  */
2511                 tag = tcg_temp_new_i64();
2512                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2513                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2514             }
2515         }
2516         return;
2517     case ARM_CP_DC_GZVA:
2518         {
2519             TCGv_i64 clean_addr, tag;
2520 
2521             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2522             tcg_rt = cpu_reg(s, rt);
2523             clean_addr = clean_data_tbi(s, tcg_rt);
2524             gen_helper_dc_zva(tcg_env, clean_addr);
2525 
2526             if (s->ata[0]) {
2527                 /* Extract the tag from the register to match STZGM.  */
2528                 tag = tcg_temp_new_i64();
2529                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2530                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2531             }
2532         }
2533         return;
2534     default:
2535         g_assert_not_reached();
2536     }
2537 
2538     if (ri->type & ARM_CP_IO) {
2539         /* I/O operations must end the TB here (whether read or write) */
2540         need_exit_tb = translator_io_start(&s->base);
2541     }
2542 
2543     tcg_rt = cpu_reg(s, rt);
2544 
2545     if (isread) {
2546         if (ri->type & ARM_CP_CONST) {
2547             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2548         } else if (ri->readfn) {
2549             if (!tcg_ri) {
2550                 tcg_ri = gen_lookup_cp_reg(key);
2551             }
2552             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2553         } else {
2554             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2555         }
2556     } else {
2557         if (ri->type & ARM_CP_CONST) {
2558             /* If not forbidden by access permissions, treat as WI */
2559             return;
2560         } else if (ri->writefn) {
2561             if (!tcg_ri) {
2562                 tcg_ri = gen_lookup_cp_reg(key);
2563             }
2564             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2565         } else {
2566             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2567         }
2568     }
2569 
2570     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2571         /*
2572          * A write to any coprocessor register that ends a TB
2573          * must rebuild the hflags for the next TB.
2574          */
2575         gen_rebuild_hflags(s);
2576         /*
2577          * We default to ending the TB on a coprocessor register write,
2578          * but allow this to be suppressed by the register definition
2579          * (usually only necessary to work around guest bugs).
2580          */
2581         need_exit_tb = true;
2582     }
2583     if (need_exit_tb) {
2584         s->base.is_jmp = DISAS_UPDATE_EXIT;
2585     }
2586 }
2587 
2588 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2589 {
2590     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2591     return true;
2592 }
2593 
2594 static bool trans_SVC(DisasContext *s, arg_i *a)
2595 {
2596     /*
2597      * For SVC, HVC and SMC we advance the single-step state
2598      * machine before taking the exception. This is architecturally
2599      * mandated, to ensure that single-stepping a system call
2600      * instruction works properly.
2601      */
2602     uint32_t syndrome = syn_aa64_svc(a->imm);
2603     if (s->fgt_svc) {
2604         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2605         return true;
2606     }
2607     gen_ss_advance(s);
2608     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2609     return true;
2610 }
2611 
2612 static bool trans_HVC(DisasContext *s, arg_i *a)
2613 {
2614     int target_el = s->current_el == 3 ? 3 : 2;
2615 
2616     if (s->current_el == 0) {
2617         unallocated_encoding(s);
2618         return true;
2619     }
2620     /*
2621      * The pre HVC helper handles cases when HVC gets trapped
2622      * as an undefined insn by runtime configuration.
2623      */
2624     gen_a64_update_pc(s, 0);
2625     gen_helper_pre_hvc(tcg_env);
2626     /* Architecture requires ss advance before we do the actual work */
2627     gen_ss_advance(s);
2628     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2629     return true;
2630 }
2631 
2632 static bool trans_SMC(DisasContext *s, arg_i *a)
2633 {
2634     if (s->current_el == 0) {
2635         unallocated_encoding(s);
2636         return true;
2637     }
2638     gen_a64_update_pc(s, 0);
2639     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2640     /* Architecture requires ss advance before we do the actual work */
2641     gen_ss_advance(s);
2642     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2643     return true;
2644 }
2645 
2646 static bool trans_BRK(DisasContext *s, arg_i *a)
2647 {
2648     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2649     return true;
2650 }
2651 
2652 static bool trans_HLT(DisasContext *s, arg_i *a)
2653 {
2654     /*
2655      * HLT. This has two purposes.
2656      * Architecturally, it is an external halting debug instruction.
2657      * Since QEMU doesn't implement external debug, we treat this as
2658      * it is required for halting debug disabled: it will UNDEF.
2659      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2660      */
2661     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2662         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2663     } else {
2664         unallocated_encoding(s);
2665     }
2666     return true;
2667 }
2668 
2669 /*
2670  * Load/Store exclusive instructions are implemented by remembering
2671  * the value/address loaded, and seeing if these are the same
2672  * when the store is performed. This is not actually the architecturally
2673  * mandated semantics, but it works for typical guest code sequences
2674  * and avoids having to monitor regular stores.
2675  *
2676  * The store exclusive uses the atomic cmpxchg primitives to avoid
2677  * races in multi-threaded linux-user and when MTTCG softmmu is
2678  * enabled.
2679  */
2680 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2681                                int size, bool is_pair)
2682 {
2683     int idx = get_mem_index(s);
2684     TCGv_i64 dirty_addr, clean_addr;
2685     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2686 
2687     s->is_ldex = true;
2688     dirty_addr = cpu_reg_sp(s, rn);
2689     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2690 
2691     g_assert(size <= 3);
2692     if (is_pair) {
2693         g_assert(size >= 2);
2694         if (size == 2) {
2695             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2696             if (s->be_data == MO_LE) {
2697                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2698                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2699             } else {
2700                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2701                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2702             }
2703         } else {
2704             TCGv_i128 t16 = tcg_temp_new_i128();
2705 
2706             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2707 
2708             if (s->be_data == MO_LE) {
2709                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2710                                       cpu_exclusive_high, t16);
2711             } else {
2712                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2713                                       cpu_exclusive_val, t16);
2714             }
2715             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2716             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2717         }
2718     } else {
2719         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2720         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2721     }
2722     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2723 }
2724 
2725 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2726                                 int rn, int size, int is_pair)
2727 {
2728     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2729      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2730      *     [addr] = {Rt};
2731      *     if (is_pair) {
2732      *         [addr + datasize] = {Rt2};
2733      *     }
2734      *     {Rd} = 0;
2735      * } else {
2736      *     {Rd} = 1;
2737      * }
2738      * env->exclusive_addr = -1;
2739      */
2740     TCGLabel *fail_label = gen_new_label();
2741     TCGLabel *done_label = gen_new_label();
2742     TCGv_i64 tmp, clean_addr;
2743     MemOp memop;
2744 
2745     /*
2746      * FIXME: We are out of spec here.  We have recorded only the address
2747      * from load_exclusive, not the entire range, and we assume that the
2748      * size of the access on both sides match.  The architecture allows the
2749      * store to be smaller than the load, so long as the stored bytes are
2750      * within the range recorded by the load.
2751      */
2752 
2753     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2754     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2755     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2756 
2757     /*
2758      * The write, and any associated faults, only happen if the virtual
2759      * and physical addresses pass the exclusive monitor check.  These
2760      * faults are exceedingly unlikely, because normally the guest uses
2761      * the exact same address register for the load_exclusive, and we
2762      * would have recognized these faults there.
2763      *
2764      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2765      * unaligned 4-byte write within the range of an aligned 8-byte load.
2766      * With LSE2, the store would need to cross a 16-byte boundary when the
2767      * load did not, which would mean the store is outside the range
2768      * recorded for the monitor, which would have failed a corrected monitor
2769      * check above.  For now, we assume no size change and retain the
2770      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2771      *
2772      * It is possible to trigger an MTE fault, by performing the load with
2773      * a virtual address with a valid tag and performing the store with the
2774      * same virtual address and a different invalid tag.
2775      */
2776     memop = size + is_pair;
2777     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2778         memop |= MO_ALIGN;
2779     }
2780     memop = finalize_memop(s, memop);
2781     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2782 
2783     tmp = tcg_temp_new_i64();
2784     if (is_pair) {
2785         if (size == 2) {
2786             if (s->be_data == MO_LE) {
2787                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2788             } else {
2789                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2790             }
2791             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2792                                        cpu_exclusive_val, tmp,
2793                                        get_mem_index(s), memop);
2794             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2795         } else {
2796             TCGv_i128 t16 = tcg_temp_new_i128();
2797             TCGv_i128 c16 = tcg_temp_new_i128();
2798             TCGv_i64 a, b;
2799 
2800             if (s->be_data == MO_LE) {
2801                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2802                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2803                                         cpu_exclusive_high);
2804             } else {
2805                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2806                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2807                                         cpu_exclusive_val);
2808             }
2809 
2810             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2811                                         get_mem_index(s), memop);
2812 
2813             a = tcg_temp_new_i64();
2814             b = tcg_temp_new_i64();
2815             if (s->be_data == MO_LE) {
2816                 tcg_gen_extr_i128_i64(a, b, t16);
2817             } else {
2818                 tcg_gen_extr_i128_i64(b, a, t16);
2819             }
2820 
2821             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2822             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2823             tcg_gen_or_i64(tmp, a, b);
2824 
2825             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2826         }
2827     } else {
2828         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2829                                    cpu_reg(s, rt), get_mem_index(s), memop);
2830         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2831     }
2832     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2833     tcg_gen_br(done_label);
2834 
2835     gen_set_label(fail_label);
2836     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2837     gen_set_label(done_label);
2838     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2839 }
2840 
2841 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2842                                  int rn, int size)
2843 {
2844     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2845     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2846     int memidx = get_mem_index(s);
2847     TCGv_i64 clean_addr;
2848     MemOp memop;
2849 
2850     if (rn == 31) {
2851         gen_check_sp_alignment(s);
2852     }
2853     memop = check_atomic_align(s, rn, size);
2854     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2855     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2856                                memidx, memop);
2857 }
2858 
2859 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2860                                       int rn, int size)
2861 {
2862     TCGv_i64 s1 = cpu_reg(s, rs);
2863     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2864     TCGv_i64 t1 = cpu_reg(s, rt);
2865     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2866     TCGv_i64 clean_addr;
2867     int memidx = get_mem_index(s);
2868     MemOp memop;
2869 
2870     if (rn == 31) {
2871         gen_check_sp_alignment(s);
2872     }
2873 
2874     /* This is a single atomic access, despite the "pair". */
2875     memop = check_atomic_align(s, rn, size + 1);
2876     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2877 
2878     if (size == 2) {
2879         TCGv_i64 cmp = tcg_temp_new_i64();
2880         TCGv_i64 val = tcg_temp_new_i64();
2881 
2882         if (s->be_data == MO_LE) {
2883             tcg_gen_concat32_i64(val, t1, t2);
2884             tcg_gen_concat32_i64(cmp, s1, s2);
2885         } else {
2886             tcg_gen_concat32_i64(val, t2, t1);
2887             tcg_gen_concat32_i64(cmp, s2, s1);
2888         }
2889 
2890         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2891 
2892         if (s->be_data == MO_LE) {
2893             tcg_gen_extr32_i64(s1, s2, cmp);
2894         } else {
2895             tcg_gen_extr32_i64(s2, s1, cmp);
2896         }
2897     } else {
2898         TCGv_i128 cmp = tcg_temp_new_i128();
2899         TCGv_i128 val = tcg_temp_new_i128();
2900 
2901         if (s->be_data == MO_LE) {
2902             tcg_gen_concat_i64_i128(val, t1, t2);
2903             tcg_gen_concat_i64_i128(cmp, s1, s2);
2904         } else {
2905             tcg_gen_concat_i64_i128(val, t2, t1);
2906             tcg_gen_concat_i64_i128(cmp, s2, s1);
2907         }
2908 
2909         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2910 
2911         if (s->be_data == MO_LE) {
2912             tcg_gen_extr_i128_i64(s1, s2, cmp);
2913         } else {
2914             tcg_gen_extr_i128_i64(s2, s1, cmp);
2915         }
2916     }
2917 }
2918 
2919 /*
2920  * Compute the ISS.SF bit for syndrome information if an exception
2921  * is taken on a load or store. This indicates whether the instruction
2922  * is accessing a 32-bit or 64-bit register. This logic is derived
2923  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2924  */
2925 static bool ldst_iss_sf(int size, bool sign, bool ext)
2926 {
2927 
2928     if (sign) {
2929         /*
2930          * Signed loads are 64 bit results if we are not going to
2931          * do a zero-extend from 32 to 64 after the load.
2932          * (For a store, sign and ext are always false.)
2933          */
2934         return !ext;
2935     } else {
2936         /* Unsigned loads/stores work at the specified size */
2937         return size == MO_64;
2938     }
2939 }
2940 
2941 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2942 {
2943     if (a->rn == 31) {
2944         gen_check_sp_alignment(s);
2945     }
2946     if (a->lasr) {
2947         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2948     }
2949     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2950     return true;
2951 }
2952 
2953 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2954 {
2955     if (a->rn == 31) {
2956         gen_check_sp_alignment(s);
2957     }
2958     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2959     if (a->lasr) {
2960         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2961     }
2962     return true;
2963 }
2964 
2965 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2966 {
2967     TCGv_i64 clean_addr;
2968     MemOp memop;
2969     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2970 
2971     /*
2972      * StoreLORelease is the same as Store-Release for QEMU, but
2973      * needs the feature-test.
2974      */
2975     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2976         return false;
2977     }
2978     /* Generate ISS for non-exclusive accesses including LASR.  */
2979     if (a->rn == 31) {
2980         gen_check_sp_alignment(s);
2981     }
2982     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2983     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2984     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2985                                 true, a->rn != 31, memop);
2986     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2987               iss_sf, a->lasr);
2988     return true;
2989 }
2990 
2991 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2992 {
2993     TCGv_i64 clean_addr;
2994     MemOp memop;
2995     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2996 
2997     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2998     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2999         return false;
3000     }
3001     /* Generate ISS for non-exclusive accesses including LASR.  */
3002     if (a->rn == 31) {
3003         gen_check_sp_alignment(s);
3004     }
3005     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3006     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3007                                 false, a->rn != 31, memop);
3008     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3009               a->rt, iss_sf, a->lasr);
3010     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3011     return true;
3012 }
3013 
3014 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3015 {
3016     if (a->rn == 31) {
3017         gen_check_sp_alignment(s);
3018     }
3019     if (a->lasr) {
3020         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3021     }
3022     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3023     return true;
3024 }
3025 
3026 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3027 {
3028     if (a->rn == 31) {
3029         gen_check_sp_alignment(s);
3030     }
3031     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3032     if (a->lasr) {
3033         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3034     }
3035     return true;
3036 }
3037 
3038 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3039 {
3040     if (!dc_isar_feature(aa64_atomics, s)) {
3041         return false;
3042     }
3043     if (((a->rt | a->rs) & 1) != 0) {
3044         return false;
3045     }
3046 
3047     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3048     return true;
3049 }
3050 
3051 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3052 {
3053     if (!dc_isar_feature(aa64_atomics, s)) {
3054         return false;
3055     }
3056     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3057     return true;
3058 }
3059 
3060 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3061 {
3062     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3063     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3064     TCGv_i64 clean_addr = tcg_temp_new_i64();
3065     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3066 
3067     gen_pc_plus_diff(s, clean_addr, a->imm);
3068     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3069               false, true, a->rt, iss_sf, false);
3070     return true;
3071 }
3072 
3073 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3074 {
3075     /* Load register (literal), vector version */
3076     TCGv_i64 clean_addr;
3077     MemOp memop;
3078 
3079     if (!fp_access_check(s)) {
3080         return true;
3081     }
3082     memop = finalize_memop_asimd(s, a->sz);
3083     clean_addr = tcg_temp_new_i64();
3084     gen_pc_plus_diff(s, clean_addr, a->imm);
3085     do_fp_ld(s, a->rt, clean_addr, memop);
3086     return true;
3087 }
3088 
3089 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3090                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3091                                  uint64_t offset, bool is_store, MemOp mop)
3092 {
3093     if (a->rn == 31) {
3094         gen_check_sp_alignment(s);
3095     }
3096 
3097     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3098     if (!a->p) {
3099         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3100     }
3101 
3102     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3103                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3104 }
3105 
3106 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3107                                   TCGv_i64 dirty_addr, uint64_t offset)
3108 {
3109     if (a->w) {
3110         if (a->p) {
3111             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3112         }
3113         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3114     }
3115 }
3116 
3117 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3118 {
3119     uint64_t offset = a->imm << a->sz;
3120     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3121     MemOp mop = finalize_memop(s, a->sz);
3122 
3123     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3124     tcg_rt = cpu_reg(s, a->rt);
3125     tcg_rt2 = cpu_reg(s, a->rt2);
3126     /*
3127      * We built mop above for the single logical access -- rebuild it
3128      * now for the paired operation.
3129      *
3130      * With LSE2, non-sign-extending pairs are treated atomically if
3131      * aligned, and if unaligned one of the pair will be completely
3132      * within a 16-byte block and that element will be atomic.
3133      * Otherwise each element is separately atomic.
3134      * In all cases, issue one operation with the correct atomicity.
3135      */
3136     mop = a->sz + 1;
3137     if (s->align_mem) {
3138         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3139     }
3140     mop = finalize_memop_pair(s, mop);
3141     if (a->sz == 2) {
3142         TCGv_i64 tmp = tcg_temp_new_i64();
3143 
3144         if (s->be_data == MO_LE) {
3145             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3146         } else {
3147             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3148         }
3149         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3150     } else {
3151         TCGv_i128 tmp = tcg_temp_new_i128();
3152 
3153         if (s->be_data == MO_LE) {
3154             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3155         } else {
3156             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3157         }
3158         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3159     }
3160     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3161     return true;
3162 }
3163 
3164 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3165 {
3166     uint64_t offset = a->imm << a->sz;
3167     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3168     MemOp mop = finalize_memop(s, a->sz);
3169 
3170     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3171     tcg_rt = cpu_reg(s, a->rt);
3172     tcg_rt2 = cpu_reg(s, a->rt2);
3173 
3174     /*
3175      * We built mop above for the single logical access -- rebuild it
3176      * now for the paired operation.
3177      *
3178      * With LSE2, non-sign-extending pairs are treated atomically if
3179      * aligned, and if unaligned one of the pair will be completely
3180      * within a 16-byte block and that element will be atomic.
3181      * Otherwise each element is separately atomic.
3182      * In all cases, issue one operation with the correct atomicity.
3183      *
3184      * This treats sign-extending loads like zero-extending loads,
3185      * since that reuses the most code below.
3186      */
3187     mop = a->sz + 1;
3188     if (s->align_mem) {
3189         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3190     }
3191     mop = finalize_memop_pair(s, mop);
3192     if (a->sz == 2) {
3193         int o2 = s->be_data == MO_LE ? 32 : 0;
3194         int o1 = o2 ^ 32;
3195 
3196         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3197         if (a->sign) {
3198             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3199             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3200         } else {
3201             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3202             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3203         }
3204     } else {
3205         TCGv_i128 tmp = tcg_temp_new_i128();
3206 
3207         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3208         if (s->be_data == MO_LE) {
3209             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3210         } else {
3211             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3212         }
3213     }
3214     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3215     return true;
3216 }
3217 
3218 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3219 {
3220     uint64_t offset = a->imm << a->sz;
3221     TCGv_i64 clean_addr, dirty_addr;
3222     MemOp mop;
3223 
3224     if (!fp_access_check(s)) {
3225         return true;
3226     }
3227 
3228     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3229     mop = finalize_memop_asimd(s, a->sz);
3230     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3231     do_fp_st(s, a->rt, clean_addr, mop);
3232     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3233     do_fp_st(s, a->rt2, clean_addr, mop);
3234     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3235     return true;
3236 }
3237 
3238 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3239 {
3240     uint64_t offset = a->imm << a->sz;
3241     TCGv_i64 clean_addr, dirty_addr;
3242     MemOp mop;
3243 
3244     if (!fp_access_check(s)) {
3245         return true;
3246     }
3247 
3248     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3249     mop = finalize_memop_asimd(s, a->sz);
3250     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3251     do_fp_ld(s, a->rt, clean_addr, mop);
3252     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3253     do_fp_ld(s, a->rt2, clean_addr, mop);
3254     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3255     return true;
3256 }
3257 
3258 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3259 {
3260     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3261     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3262     MemOp mop;
3263     TCGv_i128 tmp;
3264 
3265     /* STGP only comes in one size. */
3266     tcg_debug_assert(a->sz == MO_64);
3267 
3268     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3269         return false;
3270     }
3271 
3272     if (a->rn == 31) {
3273         gen_check_sp_alignment(s);
3274     }
3275 
3276     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3277     if (!a->p) {
3278         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3279     }
3280 
3281     clean_addr = clean_data_tbi(s, dirty_addr);
3282     tcg_rt = cpu_reg(s, a->rt);
3283     tcg_rt2 = cpu_reg(s, a->rt2);
3284 
3285     /*
3286      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3287      * and one tag operation.  We implement it as one single aligned 16-byte
3288      * memory operation for convenience.  Note that the alignment ensures
3289      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3290      */
3291     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3292 
3293     tmp = tcg_temp_new_i128();
3294     if (s->be_data == MO_LE) {
3295         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3296     } else {
3297         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3298     }
3299     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3300 
3301     /* Perform the tag store, if tag access enabled. */
3302     if (s->ata[0]) {
3303         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3304             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3305         } else {
3306             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3307         }
3308     }
3309 
3310     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3311     return true;
3312 }
3313 
3314 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3315                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3316                                  uint64_t offset, bool is_store, MemOp mop)
3317 {
3318     int memidx;
3319 
3320     if (a->rn == 31) {
3321         gen_check_sp_alignment(s);
3322     }
3323 
3324     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3325     if (!a->p) {
3326         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3327     }
3328     memidx = get_a64_user_mem_index(s, a->unpriv);
3329     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3330                                         a->w || a->rn != 31,
3331                                         mop, a->unpriv, memidx);
3332 }
3333 
3334 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3335                                   TCGv_i64 dirty_addr, uint64_t offset)
3336 {
3337     if (a->w) {
3338         if (a->p) {
3339             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3340         }
3341         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3342     }
3343 }
3344 
3345 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3346 {
3347     bool iss_sf, iss_valid = !a->w;
3348     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3349     int memidx = get_a64_user_mem_index(s, a->unpriv);
3350     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3351 
3352     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3353 
3354     tcg_rt = cpu_reg(s, a->rt);
3355     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3356 
3357     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3358                      iss_valid, a->rt, iss_sf, false);
3359     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3360     return true;
3361 }
3362 
3363 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3364 {
3365     bool iss_sf, iss_valid = !a->w;
3366     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3367     int memidx = get_a64_user_mem_index(s, a->unpriv);
3368     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3369 
3370     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3371 
3372     tcg_rt = cpu_reg(s, a->rt);
3373     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3374 
3375     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3376                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3377     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3378     return true;
3379 }
3380 
3381 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3382 {
3383     TCGv_i64 clean_addr, dirty_addr;
3384     MemOp mop;
3385 
3386     if (!fp_access_check(s)) {
3387         return true;
3388     }
3389     mop = finalize_memop_asimd(s, a->sz);
3390     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3391     do_fp_st(s, a->rt, clean_addr, mop);
3392     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3393     return true;
3394 }
3395 
3396 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3397 {
3398     TCGv_i64 clean_addr, dirty_addr;
3399     MemOp mop;
3400 
3401     if (!fp_access_check(s)) {
3402         return true;
3403     }
3404     mop = finalize_memop_asimd(s, a->sz);
3405     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3406     do_fp_ld(s, a->rt, clean_addr, mop);
3407     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3408     return true;
3409 }
3410 
3411 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3412                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3413                              bool is_store, MemOp memop)
3414 {
3415     TCGv_i64 tcg_rm;
3416 
3417     if (a->rn == 31) {
3418         gen_check_sp_alignment(s);
3419     }
3420     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3421 
3422     tcg_rm = read_cpu_reg(s, a->rm, 1);
3423     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3424 
3425     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3426     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3427 }
3428 
3429 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3430 {
3431     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3432     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3433     MemOp memop;
3434 
3435     if (extract32(a->opt, 1, 1) == 0) {
3436         return false;
3437     }
3438 
3439     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3440     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3441     tcg_rt = cpu_reg(s, a->rt);
3442     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3443               a->ext, true, a->rt, iss_sf, false);
3444     return true;
3445 }
3446 
3447 static bool trans_STR(DisasContext *s, arg_ldst *a)
3448 {
3449     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3450     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3451     MemOp memop;
3452 
3453     if (extract32(a->opt, 1, 1) == 0) {
3454         return false;
3455     }
3456 
3457     memop = finalize_memop(s, a->sz);
3458     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3459     tcg_rt = cpu_reg(s, a->rt);
3460     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3461     return true;
3462 }
3463 
3464 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3465 {
3466     TCGv_i64 clean_addr, dirty_addr;
3467     MemOp memop;
3468 
3469     if (extract32(a->opt, 1, 1) == 0) {
3470         return false;
3471     }
3472 
3473     if (!fp_access_check(s)) {
3474         return true;
3475     }
3476 
3477     memop = finalize_memop_asimd(s, a->sz);
3478     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3479     do_fp_ld(s, a->rt, clean_addr, memop);
3480     return true;
3481 }
3482 
3483 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3484 {
3485     TCGv_i64 clean_addr, dirty_addr;
3486     MemOp memop;
3487 
3488     if (extract32(a->opt, 1, 1) == 0) {
3489         return false;
3490     }
3491 
3492     if (!fp_access_check(s)) {
3493         return true;
3494     }
3495 
3496     memop = finalize_memop_asimd(s, a->sz);
3497     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3498     do_fp_st(s, a->rt, clean_addr, memop);
3499     return true;
3500 }
3501 
3502 
3503 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3504                          int sign, bool invert)
3505 {
3506     MemOp mop = a->sz | sign;
3507     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3508 
3509     if (a->rn == 31) {
3510         gen_check_sp_alignment(s);
3511     }
3512     mop = check_atomic_align(s, a->rn, mop);
3513     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3514                                 a->rn != 31, mop);
3515     tcg_rs = read_cpu_reg(s, a->rs, true);
3516     tcg_rt = cpu_reg(s, a->rt);
3517     if (invert) {
3518         tcg_gen_not_i64(tcg_rs, tcg_rs);
3519     }
3520     /*
3521      * The tcg atomic primitives are all full barriers.  Therefore we
3522      * can ignore the Acquire and Release bits of this instruction.
3523      */
3524     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3525 
3526     if (mop & MO_SIGN) {
3527         switch (a->sz) {
3528         case MO_8:
3529             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3530             break;
3531         case MO_16:
3532             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3533             break;
3534         case MO_32:
3535             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3536             break;
3537         case MO_64:
3538             break;
3539         default:
3540             g_assert_not_reached();
3541         }
3542     }
3543     return true;
3544 }
3545 
3546 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3547 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3548 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3549 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3550 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3551 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3552 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3553 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3554 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3555 
3556 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3557 {
3558     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3559     TCGv_i64 clean_addr;
3560     MemOp mop;
3561 
3562     if (!dc_isar_feature(aa64_atomics, s) ||
3563         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3564         return false;
3565     }
3566     if (a->rn == 31) {
3567         gen_check_sp_alignment(s);
3568     }
3569     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3570     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3571                                 a->rn != 31, mop);
3572     /*
3573      * LDAPR* are a special case because they are a simple load, not a
3574      * fetch-and-do-something op.
3575      * The architectural consistency requirements here are weaker than
3576      * full load-acquire (we only need "load-acquire processor consistent"),
3577      * but we choose to implement them as full LDAQ.
3578      */
3579     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3580               true, a->rt, iss_sf, true);
3581     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3582     return true;
3583 }
3584 
3585 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3586 {
3587     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3588     MemOp memop;
3589 
3590     /* Load with pointer authentication */
3591     if (!dc_isar_feature(aa64_pauth, s)) {
3592         return false;
3593     }
3594 
3595     if (a->rn == 31) {
3596         gen_check_sp_alignment(s);
3597     }
3598     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3599 
3600     if (s->pauth_active) {
3601         if (!a->m) {
3602             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3603                                       tcg_constant_i64(0));
3604         } else {
3605             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3606                                       tcg_constant_i64(0));
3607         }
3608     }
3609 
3610     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3611 
3612     memop = finalize_memop(s, MO_64);
3613 
3614     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3615     clean_addr = gen_mte_check1(s, dirty_addr, false,
3616                                 a->w || a->rn != 31, memop);
3617 
3618     tcg_rt = cpu_reg(s, a->rt);
3619     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3620               /* extend */ false, /* iss_valid */ !a->w,
3621               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3622 
3623     if (a->w) {
3624         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3625     }
3626     return true;
3627 }
3628 
3629 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3630 {
3631     TCGv_i64 clean_addr, dirty_addr;
3632     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3633     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3634 
3635     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3636         return false;
3637     }
3638 
3639     if (a->rn == 31) {
3640         gen_check_sp_alignment(s);
3641     }
3642 
3643     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3644     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3645     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3646     clean_addr = clean_data_tbi(s, dirty_addr);
3647 
3648     /*
3649      * Load-AcquirePC semantics; we implement as the slightly more
3650      * restrictive Load-Acquire.
3651      */
3652     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3653               a->rt, iss_sf, true);
3654     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3655     return true;
3656 }
3657 
3658 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3659 {
3660     TCGv_i64 clean_addr, dirty_addr;
3661     MemOp mop = a->sz;
3662     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3663 
3664     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3665         return false;
3666     }
3667 
3668     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3669 
3670     if (a->rn == 31) {
3671         gen_check_sp_alignment(s);
3672     }
3673 
3674     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3675     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3676     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3677     clean_addr = clean_data_tbi(s, dirty_addr);
3678 
3679     /* Store-Release semantics */
3680     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3681     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3682     return true;
3683 }
3684 
3685 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3686 {
3687     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3688     MemOp endian, align, mop;
3689 
3690     int total;    /* total bytes */
3691     int elements; /* elements per vector */
3692     int r;
3693     int size = a->sz;
3694 
3695     if (!a->p && a->rm != 0) {
3696         /* For non-postindexed accesses the Rm field must be 0 */
3697         return false;
3698     }
3699     if (size == 3 && !a->q && a->selem != 1) {
3700         return false;
3701     }
3702     if (!fp_access_check(s)) {
3703         return true;
3704     }
3705 
3706     if (a->rn == 31) {
3707         gen_check_sp_alignment(s);
3708     }
3709 
3710     /* For our purposes, bytes are always little-endian.  */
3711     endian = s->be_data;
3712     if (size == 0) {
3713         endian = MO_LE;
3714     }
3715 
3716     total = a->rpt * a->selem * (a->q ? 16 : 8);
3717     tcg_rn = cpu_reg_sp(s, a->rn);
3718 
3719     /*
3720      * Issue the MTE check vs the logical repeat count, before we
3721      * promote consecutive little-endian elements below.
3722      */
3723     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3724                                 finalize_memop_asimd(s, size));
3725 
3726     /*
3727      * Consecutive little-endian elements from a single register
3728      * can be promoted to a larger little-endian operation.
3729      */
3730     align = MO_ALIGN;
3731     if (a->selem == 1 && endian == MO_LE) {
3732         align = pow2_align(size);
3733         size = 3;
3734     }
3735     if (!s->align_mem) {
3736         align = 0;
3737     }
3738     mop = endian | size | align;
3739 
3740     elements = (a->q ? 16 : 8) >> size;
3741     tcg_ebytes = tcg_constant_i64(1 << size);
3742     for (r = 0; r < a->rpt; r++) {
3743         int e;
3744         for (e = 0; e < elements; e++) {
3745             int xs;
3746             for (xs = 0; xs < a->selem; xs++) {
3747                 int tt = (a->rt + r + xs) % 32;
3748                 do_vec_ld(s, tt, e, clean_addr, mop);
3749                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3750             }
3751         }
3752     }
3753 
3754     /*
3755      * For non-quad operations, setting a slice of the low 64 bits of
3756      * the register clears the high 64 bits (in the ARM ARM pseudocode
3757      * this is implicit in the fact that 'rval' is a 64 bit wide
3758      * variable).  For quad operations, we might still need to zero
3759      * the high bits of SVE.
3760      */
3761     for (r = 0; r < a->rpt * a->selem; r++) {
3762         int tt = (a->rt + r) % 32;
3763         clear_vec_high(s, a->q, tt);
3764     }
3765 
3766     if (a->p) {
3767         if (a->rm == 31) {
3768             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3769         } else {
3770             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3771         }
3772     }
3773     return true;
3774 }
3775 
3776 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3777 {
3778     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3779     MemOp endian, align, mop;
3780 
3781     int total;    /* total bytes */
3782     int elements; /* elements per vector */
3783     int r;
3784     int size = a->sz;
3785 
3786     if (!a->p && a->rm != 0) {
3787         /* For non-postindexed accesses the Rm field must be 0 */
3788         return false;
3789     }
3790     if (size == 3 && !a->q && a->selem != 1) {
3791         return false;
3792     }
3793     if (!fp_access_check(s)) {
3794         return true;
3795     }
3796 
3797     if (a->rn == 31) {
3798         gen_check_sp_alignment(s);
3799     }
3800 
3801     /* For our purposes, bytes are always little-endian.  */
3802     endian = s->be_data;
3803     if (size == 0) {
3804         endian = MO_LE;
3805     }
3806 
3807     total = a->rpt * a->selem * (a->q ? 16 : 8);
3808     tcg_rn = cpu_reg_sp(s, a->rn);
3809 
3810     /*
3811      * Issue the MTE check vs the logical repeat count, before we
3812      * promote consecutive little-endian elements below.
3813      */
3814     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3815                                 finalize_memop_asimd(s, size));
3816 
3817     /*
3818      * Consecutive little-endian elements from a single register
3819      * can be promoted to a larger little-endian operation.
3820      */
3821     align = MO_ALIGN;
3822     if (a->selem == 1 && endian == MO_LE) {
3823         align = pow2_align(size);
3824         size = 3;
3825     }
3826     if (!s->align_mem) {
3827         align = 0;
3828     }
3829     mop = endian | size | align;
3830 
3831     elements = (a->q ? 16 : 8) >> size;
3832     tcg_ebytes = tcg_constant_i64(1 << size);
3833     for (r = 0; r < a->rpt; r++) {
3834         int e;
3835         for (e = 0; e < elements; e++) {
3836             int xs;
3837             for (xs = 0; xs < a->selem; xs++) {
3838                 int tt = (a->rt + r + xs) % 32;
3839                 do_vec_st(s, tt, e, clean_addr, mop);
3840                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3841             }
3842         }
3843     }
3844 
3845     if (a->p) {
3846         if (a->rm == 31) {
3847             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3848         } else {
3849             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3850         }
3851     }
3852     return true;
3853 }
3854 
3855 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3856 {
3857     int xs, total, rt;
3858     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3859     MemOp mop;
3860 
3861     if (!a->p && a->rm != 0) {
3862         return false;
3863     }
3864     if (!fp_access_check(s)) {
3865         return true;
3866     }
3867 
3868     if (a->rn == 31) {
3869         gen_check_sp_alignment(s);
3870     }
3871 
3872     total = a->selem << a->scale;
3873     tcg_rn = cpu_reg_sp(s, a->rn);
3874 
3875     mop = finalize_memop_asimd(s, a->scale);
3876     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3877                                 total, mop);
3878 
3879     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3880     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3881         do_vec_st(s, rt, a->index, clean_addr, mop);
3882         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3883     }
3884 
3885     if (a->p) {
3886         if (a->rm == 31) {
3887             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3888         } else {
3889             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3890         }
3891     }
3892     return true;
3893 }
3894 
3895 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3896 {
3897     int xs, total, rt;
3898     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3899     MemOp mop;
3900 
3901     if (!a->p && a->rm != 0) {
3902         return false;
3903     }
3904     if (!fp_access_check(s)) {
3905         return true;
3906     }
3907 
3908     if (a->rn == 31) {
3909         gen_check_sp_alignment(s);
3910     }
3911 
3912     total = a->selem << a->scale;
3913     tcg_rn = cpu_reg_sp(s, a->rn);
3914 
3915     mop = finalize_memop_asimd(s, a->scale);
3916     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3917                                 total, mop);
3918 
3919     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3920     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3921         do_vec_ld(s, rt, a->index, clean_addr, mop);
3922         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3923     }
3924 
3925     if (a->p) {
3926         if (a->rm == 31) {
3927             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3928         } else {
3929             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3930         }
3931     }
3932     return true;
3933 }
3934 
3935 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3936 {
3937     int xs, total, rt;
3938     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3939     MemOp mop;
3940 
3941     if (!a->p && a->rm != 0) {
3942         return false;
3943     }
3944     if (!fp_access_check(s)) {
3945         return true;
3946     }
3947 
3948     if (a->rn == 31) {
3949         gen_check_sp_alignment(s);
3950     }
3951 
3952     total = a->selem << a->scale;
3953     tcg_rn = cpu_reg_sp(s, a->rn);
3954 
3955     mop = finalize_memop_asimd(s, a->scale);
3956     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3957                                 total, mop);
3958 
3959     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3960     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3961         /* Load and replicate to all elements */
3962         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3963 
3964         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3965         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3966                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3967         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3968     }
3969 
3970     if (a->p) {
3971         if (a->rm == 31) {
3972             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3973         } else {
3974             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3975         }
3976     }
3977     return true;
3978 }
3979 
3980 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3981 {
3982     TCGv_i64 addr, clean_addr, tcg_rt;
3983     int size = 4 << s->dcz_blocksize;
3984 
3985     if (!dc_isar_feature(aa64_mte, s)) {
3986         return false;
3987     }
3988     if (s->current_el == 0) {
3989         return false;
3990     }
3991 
3992     if (a->rn == 31) {
3993         gen_check_sp_alignment(s);
3994     }
3995 
3996     addr = read_cpu_reg_sp(s, a->rn, true);
3997     tcg_gen_addi_i64(addr, addr, a->imm);
3998     tcg_rt = cpu_reg(s, a->rt);
3999 
4000     if (s->ata[0]) {
4001         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4002     }
4003     /*
4004      * The non-tags portion of STZGM is mostly like DC_ZVA,
4005      * except the alignment happens before the access.
4006      */
4007     clean_addr = clean_data_tbi(s, addr);
4008     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4009     gen_helper_dc_zva(tcg_env, clean_addr);
4010     return true;
4011 }
4012 
4013 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4014 {
4015     TCGv_i64 addr, clean_addr, tcg_rt;
4016 
4017     if (!dc_isar_feature(aa64_mte, s)) {
4018         return false;
4019     }
4020     if (s->current_el == 0) {
4021         return false;
4022     }
4023 
4024     if (a->rn == 31) {
4025         gen_check_sp_alignment(s);
4026     }
4027 
4028     addr = read_cpu_reg_sp(s, a->rn, true);
4029     tcg_gen_addi_i64(addr, addr, a->imm);
4030     tcg_rt = cpu_reg(s, a->rt);
4031 
4032     if (s->ata[0]) {
4033         gen_helper_stgm(tcg_env, addr, tcg_rt);
4034     } else {
4035         MMUAccessType acc = MMU_DATA_STORE;
4036         int size = 4 << s->gm_blocksize;
4037 
4038         clean_addr = clean_data_tbi(s, addr);
4039         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4040         gen_probe_access(s, clean_addr, acc, size);
4041     }
4042     return true;
4043 }
4044 
4045 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4046 {
4047     TCGv_i64 addr, clean_addr, tcg_rt;
4048 
4049     if (!dc_isar_feature(aa64_mte, s)) {
4050         return false;
4051     }
4052     if (s->current_el == 0) {
4053         return false;
4054     }
4055 
4056     if (a->rn == 31) {
4057         gen_check_sp_alignment(s);
4058     }
4059 
4060     addr = read_cpu_reg_sp(s, a->rn, true);
4061     tcg_gen_addi_i64(addr, addr, a->imm);
4062     tcg_rt = cpu_reg(s, a->rt);
4063 
4064     if (s->ata[0]) {
4065         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4066     } else {
4067         MMUAccessType acc = MMU_DATA_LOAD;
4068         int size = 4 << s->gm_blocksize;
4069 
4070         clean_addr = clean_data_tbi(s, addr);
4071         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4072         gen_probe_access(s, clean_addr, acc, size);
4073         /* The result tags are zeros.  */
4074         tcg_gen_movi_i64(tcg_rt, 0);
4075     }
4076     return true;
4077 }
4078 
4079 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4080 {
4081     TCGv_i64 addr, clean_addr, tcg_rt;
4082 
4083     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4084         return false;
4085     }
4086 
4087     if (a->rn == 31) {
4088         gen_check_sp_alignment(s);
4089     }
4090 
4091     addr = read_cpu_reg_sp(s, a->rn, true);
4092     if (!a->p) {
4093         /* pre-index or signed offset */
4094         tcg_gen_addi_i64(addr, addr, a->imm);
4095     }
4096 
4097     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4098     tcg_rt = cpu_reg(s, a->rt);
4099     if (s->ata[0]) {
4100         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4101     } else {
4102         /*
4103          * Tag access disabled: we must check for aborts on the load
4104          * load from [rn+offset], and then insert a 0 tag into rt.
4105          */
4106         clean_addr = clean_data_tbi(s, addr);
4107         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4108         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4109     }
4110 
4111     if (a->w) {
4112         /* pre-index or post-index */
4113         if (a->p) {
4114             /* post-index */
4115             tcg_gen_addi_i64(addr, addr, a->imm);
4116         }
4117         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4118     }
4119     return true;
4120 }
4121 
4122 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4123 {
4124     TCGv_i64 addr, tcg_rt;
4125 
4126     if (a->rn == 31) {
4127         gen_check_sp_alignment(s);
4128     }
4129 
4130     addr = read_cpu_reg_sp(s, a->rn, true);
4131     if (!a->p) {
4132         /* pre-index or signed offset */
4133         tcg_gen_addi_i64(addr, addr, a->imm);
4134     }
4135     tcg_rt = cpu_reg_sp(s, a->rt);
4136     if (!s->ata[0]) {
4137         /*
4138          * For STG and ST2G, we need to check alignment and probe memory.
4139          * TODO: For STZG and STZ2G, we could rely on the stores below,
4140          * at least for system mode; user-only won't enforce alignment.
4141          */
4142         if (is_pair) {
4143             gen_helper_st2g_stub(tcg_env, addr);
4144         } else {
4145             gen_helper_stg_stub(tcg_env, addr);
4146         }
4147     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4148         if (is_pair) {
4149             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4150         } else {
4151             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4152         }
4153     } else {
4154         if (is_pair) {
4155             gen_helper_st2g(tcg_env, addr, tcg_rt);
4156         } else {
4157             gen_helper_stg(tcg_env, addr, tcg_rt);
4158         }
4159     }
4160 
4161     if (is_zero) {
4162         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4163         TCGv_i64 zero64 = tcg_constant_i64(0);
4164         TCGv_i128 zero128 = tcg_temp_new_i128();
4165         int mem_index = get_mem_index(s);
4166         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4167 
4168         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4169 
4170         /* This is 1 or 2 atomic 16-byte operations. */
4171         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4172         if (is_pair) {
4173             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4174             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4175         }
4176     }
4177 
4178     if (a->w) {
4179         /* pre-index or post-index */
4180         if (a->p) {
4181             /* post-index */
4182             tcg_gen_addi_i64(addr, addr, a->imm);
4183         }
4184         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4185     }
4186     return true;
4187 }
4188 
4189 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4190 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4191 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4192 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4193 
4194 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4195 
4196 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4197                    bool is_setg, SetFn fn)
4198 {
4199     int memidx;
4200     uint32_t syndrome, desc = 0;
4201 
4202     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4203         return false;
4204     }
4205 
4206     /*
4207      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4208      * us to pull this check before the CheckMOPSEnabled() test
4209      * (which we do in the helper function)
4210      */
4211     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4212         a->rd == 31 || a->rn == 31) {
4213         return false;
4214     }
4215 
4216     memidx = get_a64_user_mem_index(s, a->unpriv);
4217 
4218     /*
4219      * We pass option_a == true, matching our implementation;
4220      * we pass wrong_option == false: helper function may set that bit.
4221      */
4222     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4223                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4224 
4225     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4226         /* We may need to do MTE tag checking, so assemble the descriptor */
4227         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4228         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4229         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4230         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4231     }
4232     /* The helper function always needs the memidx even with MTE disabled */
4233     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4234 
4235     /*
4236      * The helper needs the register numbers, but since they're in
4237      * the syndrome anyway, we let it extract them from there rather
4238      * than passing in an extra three integer arguments.
4239      */
4240     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4241     return true;
4242 }
4243 
4244 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4245 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4246 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4247 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4248 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4249 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4250 
4251 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4252 
4253 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4254 {
4255     int rmemidx, wmemidx;
4256     uint32_t syndrome, rdesc = 0, wdesc = 0;
4257     bool wunpriv = extract32(a->options, 0, 1);
4258     bool runpriv = extract32(a->options, 1, 1);
4259 
4260     /*
4261      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4262      * us to pull this check before the CheckMOPSEnabled() test
4263      * (which we do in the helper function)
4264      */
4265     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4266         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4267         return false;
4268     }
4269 
4270     rmemidx = get_a64_user_mem_index(s, runpriv);
4271     wmemidx = get_a64_user_mem_index(s, wunpriv);
4272 
4273     /*
4274      * We pass option_a == true, matching our implementation;
4275      * we pass wrong_option == false: helper function may set that bit.
4276      */
4277     syndrome = syn_mop(false, false, a->options, is_epilogue,
4278                        false, true, a->rd, a->rs, a->rn);
4279 
4280     /* If we need to do MTE tag checking, assemble the descriptors */
4281     if (s->mte_active[runpriv]) {
4282         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4283         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4284     }
4285     if (s->mte_active[wunpriv]) {
4286         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4287         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4288         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4289     }
4290     /* The helper function needs these parts of the descriptor regardless */
4291     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4292     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4293 
4294     /*
4295      * The helper needs the register numbers, but since they're in
4296      * the syndrome anyway, we let it extract them from there rather
4297      * than passing in an extra three integer arguments.
4298      */
4299     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4300        tcg_constant_i32(rdesc));
4301     return true;
4302 }
4303 
4304 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4305 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4306 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4307 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4308 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4309 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4310 
4311 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4312 
4313 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4314                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4315 {
4316     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4317     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4318     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4319 
4320     fn(tcg_rd, tcg_rn, tcg_imm);
4321     if (!a->sf) {
4322         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4323     }
4324     return true;
4325 }
4326 
4327 /*
4328  * PC-rel. addressing
4329  */
4330 
4331 static bool trans_ADR(DisasContext *s, arg_ri *a)
4332 {
4333     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4334     return true;
4335 }
4336 
4337 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4338 {
4339     int64_t offset = (int64_t)a->imm << 12;
4340 
4341     /* The page offset is ok for CF_PCREL. */
4342     offset -= s->pc_curr & 0xfff;
4343     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4344     return true;
4345 }
4346 
4347 /*
4348  * Add/subtract (immediate)
4349  */
4350 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4351 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4352 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4353 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4354 
4355 /*
4356  * Add/subtract (immediate, with tags)
4357  */
4358 
4359 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4360                                       bool sub_op)
4361 {
4362     TCGv_i64 tcg_rn, tcg_rd;
4363     int imm;
4364 
4365     imm = a->uimm6 << LOG2_TAG_GRANULE;
4366     if (sub_op) {
4367         imm = -imm;
4368     }
4369 
4370     tcg_rn = cpu_reg_sp(s, a->rn);
4371     tcg_rd = cpu_reg_sp(s, a->rd);
4372 
4373     if (s->ata[0]) {
4374         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4375                            tcg_constant_i32(imm),
4376                            tcg_constant_i32(a->uimm4));
4377     } else {
4378         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4379         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4380     }
4381     return true;
4382 }
4383 
4384 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4385 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4386 
4387 /* The input should be a value in the bottom e bits (with higher
4388  * bits zero); returns that value replicated into every element
4389  * of size e in a 64 bit integer.
4390  */
4391 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4392 {
4393     assert(e != 0);
4394     while (e < 64) {
4395         mask |= mask << e;
4396         e *= 2;
4397     }
4398     return mask;
4399 }
4400 
4401 /*
4402  * Logical (immediate)
4403  */
4404 
4405 /*
4406  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4407  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4408  * value (ie should cause a guest UNDEF exception), and true if they are
4409  * valid, in which case the decoded bit pattern is written to result.
4410  */
4411 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4412                             unsigned int imms, unsigned int immr)
4413 {
4414     uint64_t mask;
4415     unsigned e, levels, s, r;
4416     int len;
4417 
4418     assert(immn < 2 && imms < 64 && immr < 64);
4419 
4420     /* The bit patterns we create here are 64 bit patterns which
4421      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4422      * 64 bits each. Each element contains the same value: a run
4423      * of between 1 and e-1 non-zero bits, rotated within the
4424      * element by between 0 and e-1 bits.
4425      *
4426      * The element size and run length are encoded into immn (1 bit)
4427      * and imms (6 bits) as follows:
4428      * 64 bit elements: immn = 1, imms = <length of run - 1>
4429      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4430      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4431      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4432      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4433      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4434      * Notice that immn = 0, imms = 11111x is the only combination
4435      * not covered by one of the above options; this is reserved.
4436      * Further, <length of run - 1> all-ones is a reserved pattern.
4437      *
4438      * In all cases the rotation is by immr % e (and immr is 6 bits).
4439      */
4440 
4441     /* First determine the element size */
4442     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4443     if (len < 1) {
4444         /* This is the immn == 0, imms == 0x11111x case */
4445         return false;
4446     }
4447     e = 1 << len;
4448 
4449     levels = e - 1;
4450     s = imms & levels;
4451     r = immr & levels;
4452 
4453     if (s == levels) {
4454         /* <length of run - 1> mustn't be all-ones. */
4455         return false;
4456     }
4457 
4458     /* Create the value of one element: s+1 set bits rotated
4459      * by r within the element (which is e bits wide)...
4460      */
4461     mask = MAKE_64BIT_MASK(0, s + 1);
4462     if (r) {
4463         mask = (mask >> r) | (mask << (e - r));
4464         mask &= MAKE_64BIT_MASK(0, e);
4465     }
4466     /* ...then replicate the element over the whole 64 bit value */
4467     mask = bitfield_replicate(mask, e);
4468     *result = mask;
4469     return true;
4470 }
4471 
4472 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4473                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4474 {
4475     TCGv_i64 tcg_rd, tcg_rn;
4476     uint64_t imm;
4477 
4478     /* Some immediate field values are reserved. */
4479     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4480                                 extract32(a->dbm, 0, 6),
4481                                 extract32(a->dbm, 6, 6))) {
4482         return false;
4483     }
4484     if (!a->sf) {
4485         imm &= 0xffffffffull;
4486     }
4487 
4488     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4489     tcg_rn = cpu_reg(s, a->rn);
4490 
4491     fn(tcg_rd, tcg_rn, imm);
4492     if (set_cc) {
4493         gen_logic_CC(a->sf, tcg_rd);
4494     }
4495     if (!a->sf) {
4496         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4497     }
4498     return true;
4499 }
4500 
4501 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4502 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4503 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4504 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4505 
4506 /*
4507  * Move wide (immediate)
4508  */
4509 
4510 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4511 {
4512     int pos = a->hw << 4;
4513     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4514     return true;
4515 }
4516 
4517 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4518 {
4519     int pos = a->hw << 4;
4520     uint64_t imm = a->imm;
4521 
4522     imm = ~(imm << pos);
4523     if (!a->sf) {
4524         imm = (uint32_t)imm;
4525     }
4526     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4527     return true;
4528 }
4529 
4530 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4531 {
4532     int pos = a->hw << 4;
4533     TCGv_i64 tcg_rd, tcg_im;
4534 
4535     tcg_rd = cpu_reg(s, a->rd);
4536     tcg_im = tcg_constant_i64(a->imm);
4537     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4538     if (!a->sf) {
4539         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4540     }
4541     return true;
4542 }
4543 
4544 /*
4545  * Bitfield
4546  */
4547 
4548 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4549 {
4550     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4551     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4552     unsigned int bitsize = a->sf ? 64 : 32;
4553     unsigned int ri = a->immr;
4554     unsigned int si = a->imms;
4555     unsigned int pos, len;
4556 
4557     if (si >= ri) {
4558         /* Wd<s-r:0> = Wn<s:r> */
4559         len = (si - ri) + 1;
4560         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4561         if (!a->sf) {
4562             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4563         }
4564     } else {
4565         /* Wd<32+s-r,32-r> = Wn<s:0> */
4566         len = si + 1;
4567         pos = (bitsize - ri) & (bitsize - 1);
4568 
4569         if (len < ri) {
4570             /*
4571              * Sign extend the destination field from len to fill the
4572              * balance of the word.  Let the deposit below insert all
4573              * of those sign bits.
4574              */
4575             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4576             len = ri;
4577         }
4578 
4579         /*
4580          * We start with zero, and we haven't modified any bits outside
4581          * bitsize, therefore no final zero-extension is unneeded for !sf.
4582          */
4583         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4584     }
4585     return true;
4586 }
4587 
4588 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4589 {
4590     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4591     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4592     unsigned int bitsize = a->sf ? 64 : 32;
4593     unsigned int ri = a->immr;
4594     unsigned int si = a->imms;
4595     unsigned int pos, len;
4596 
4597     tcg_rd = cpu_reg(s, a->rd);
4598     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4599 
4600     if (si >= ri) {
4601         /* Wd<s-r:0> = Wn<s:r> */
4602         len = (si - ri) + 1;
4603         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4604     } else {
4605         /* Wd<32+s-r,32-r> = Wn<s:0> */
4606         len = si + 1;
4607         pos = (bitsize - ri) & (bitsize - 1);
4608         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4609     }
4610     return true;
4611 }
4612 
4613 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4614 {
4615     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4616     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4617     unsigned int bitsize = a->sf ? 64 : 32;
4618     unsigned int ri = a->immr;
4619     unsigned int si = a->imms;
4620     unsigned int pos, len;
4621 
4622     tcg_rd = cpu_reg(s, a->rd);
4623     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4624 
4625     if (si >= ri) {
4626         /* Wd<s-r:0> = Wn<s:r> */
4627         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4628         len = (si - ri) + 1;
4629         pos = 0;
4630     } else {
4631         /* Wd<32+s-r,32-r> = Wn<s:0> */
4632         len = si + 1;
4633         pos = (bitsize - ri) & (bitsize - 1);
4634     }
4635 
4636     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4637     if (!a->sf) {
4638         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4639     }
4640     return true;
4641 }
4642 
4643 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4644 {
4645     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4646 
4647     tcg_rd = cpu_reg(s, a->rd);
4648 
4649     if (unlikely(a->imm == 0)) {
4650         /*
4651          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4652          * so an extract from bit 0 is a special case.
4653          */
4654         if (a->sf) {
4655             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4656         } else {
4657             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4658         }
4659     } else {
4660         tcg_rm = cpu_reg(s, a->rm);
4661         tcg_rn = cpu_reg(s, a->rn);
4662 
4663         if (a->sf) {
4664             /* Specialization to ROR happens in EXTRACT2.  */
4665             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4666         } else {
4667             TCGv_i32 t0 = tcg_temp_new_i32();
4668 
4669             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4670             if (a->rm == a->rn) {
4671                 tcg_gen_rotri_i32(t0, t0, a->imm);
4672             } else {
4673                 TCGv_i32 t1 = tcg_temp_new_i32();
4674                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4675                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4676             }
4677             tcg_gen_extu_i32_i64(tcg_rd, t0);
4678         }
4679     }
4680     return true;
4681 }
4682 
4683 /*
4684  * Cryptographic AES, SHA, SHA512
4685  */
4686 
4687 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4688 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4689 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4690 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4691 
4692 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4693 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4694 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4695 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4696 
4697 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4698 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4699 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4700 
4701 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4702 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4703 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4704 
4705 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4706 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4707 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4708 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4709 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4710 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4711 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4712 
4713 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4714 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4715 
4716 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4717 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4718 
4719 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4720 {
4721     if (!dc_isar_feature(aa64_sm3, s)) {
4722         return false;
4723     }
4724     if (fp_access_check(s)) {
4725         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4726         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4727         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4728         TCGv_i32 tcg_res = tcg_temp_new_i32();
4729         unsigned vsz, dofs;
4730 
4731         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4732         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4733         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4734 
4735         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4736         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4737         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4738         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4739 
4740         /* Clear the whole register first, then store bits [127:96]. */
4741         vsz = vec_full_reg_size(s);
4742         dofs = vec_full_reg_offset(s, a->rd);
4743         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
4744         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4745     }
4746     return true;
4747 }
4748 
4749 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4750 {
4751     if (fp_access_check(s)) {
4752         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4753     }
4754     return true;
4755 }
4756 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4757 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4758 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4759 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4760 
4761 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4762 {
4763     if (!dc_isar_feature(aa64_sha3, s)) {
4764         return false;
4765     }
4766     if (fp_access_check(s)) {
4767         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4768                      vec_full_reg_offset(s, a->rn),
4769                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4770                      vec_full_reg_size(s));
4771     }
4772     return true;
4773 }
4774 
4775 /*
4776  * Advanced SIMD copy
4777  */
4778 
4779 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4780 {
4781     unsigned esz = ctz32(imm);
4782     if (esz <= MO_64) {
4783         *pesz = esz;
4784         *pidx = imm >> (esz + 1);
4785         return true;
4786     }
4787     return false;
4788 }
4789 
4790 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4791 {
4792     MemOp esz;
4793     unsigned idx;
4794 
4795     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4796         return false;
4797     }
4798     if (fp_access_check(s)) {
4799         /*
4800          * This instruction just extracts the specified element and
4801          * zero-extends it into the bottom of the destination register.
4802          */
4803         TCGv_i64 tmp = tcg_temp_new_i64();
4804         read_vec_element(s, tmp, a->rn, idx, esz);
4805         write_fp_dreg(s, a->rd, tmp);
4806     }
4807     return true;
4808 }
4809 
4810 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4811 {
4812     MemOp esz;
4813     unsigned idx;
4814 
4815     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4816         return false;
4817     }
4818     if (esz == MO_64 && !a->q) {
4819         return false;
4820     }
4821     if (fp_access_check(s)) {
4822         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4823                              vec_reg_offset(s, a->rn, idx, esz),
4824                              a->q ? 16 : 8, vec_full_reg_size(s));
4825     }
4826     return true;
4827 }
4828 
4829 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4830 {
4831     MemOp esz;
4832     unsigned idx;
4833 
4834     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4835         return false;
4836     }
4837     if (esz == MO_64 && !a->q) {
4838         return false;
4839     }
4840     if (fp_access_check(s)) {
4841         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4842                              a->q ? 16 : 8, vec_full_reg_size(s),
4843                              cpu_reg(s, a->rn));
4844     }
4845     return true;
4846 }
4847 
4848 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4849 {
4850     MemOp esz;
4851     unsigned idx;
4852 
4853     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4854         return false;
4855     }
4856     if (is_signed) {
4857         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4858             return false;
4859         }
4860     } else {
4861         if (esz == MO_64 ? !a->q : a->q) {
4862             return false;
4863         }
4864     }
4865     if (fp_access_check(s)) {
4866         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4867         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4868         if (is_signed && !a->q) {
4869             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4870         }
4871     }
4872     return true;
4873 }
4874 
4875 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4876 TRANS(UMOV, do_smov_umov, a, 0)
4877 
4878 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4879 {
4880     MemOp esz;
4881     unsigned idx;
4882 
4883     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4884         return false;
4885     }
4886     if (fp_access_check(s)) {
4887         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4888         clear_vec_high(s, true, a->rd);
4889     }
4890     return true;
4891 }
4892 
4893 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
4894 {
4895     MemOp esz;
4896     unsigned didx, sidx;
4897 
4898     if (!decode_esz_idx(a->di, &esz, &didx)) {
4899         return false;
4900     }
4901     sidx = a->si >> esz;
4902     if (fp_access_check(s)) {
4903         TCGv_i64 tmp = tcg_temp_new_i64();
4904 
4905         read_vec_element(s, tmp, a->rn, sidx, esz);
4906         write_vec_element(s, tmp, a->rd, didx, esz);
4907 
4908         /* INS is considered a 128-bit write for SVE. */
4909         clear_vec_high(s, true, a->rd);
4910     }
4911     return true;
4912 }
4913 
4914 /*
4915  * Advanced SIMD three same
4916  */
4917 
4918 typedef struct FPScalar {
4919     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
4920     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
4921     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
4922 } FPScalar;
4923 
4924 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
4925 {
4926     switch (a->esz) {
4927     case MO_64:
4928         if (fp_access_check(s)) {
4929             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
4930             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
4931             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
4932             write_fp_dreg(s, a->rd, t0);
4933         }
4934         break;
4935     case MO_32:
4936         if (fp_access_check(s)) {
4937             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
4938             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
4939             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
4940             write_fp_sreg(s, a->rd, t0);
4941         }
4942         break;
4943     case MO_16:
4944         if (!dc_isar_feature(aa64_fp16, s)) {
4945             return false;
4946         }
4947         if (fp_access_check(s)) {
4948             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
4949             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
4950             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
4951             write_fp_sreg(s, a->rd, t0);
4952         }
4953         break;
4954     default:
4955         return false;
4956     }
4957     return true;
4958 }
4959 
4960 static const FPScalar f_scalar_fadd = {
4961     gen_helper_vfp_addh,
4962     gen_helper_vfp_adds,
4963     gen_helper_vfp_addd,
4964 };
4965 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
4966 
4967 static const FPScalar f_scalar_fsub = {
4968     gen_helper_vfp_subh,
4969     gen_helper_vfp_subs,
4970     gen_helper_vfp_subd,
4971 };
4972 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
4973 
4974 static const FPScalar f_scalar_fdiv = {
4975     gen_helper_vfp_divh,
4976     gen_helper_vfp_divs,
4977     gen_helper_vfp_divd,
4978 };
4979 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
4980 
4981 static const FPScalar f_scalar_fmul = {
4982     gen_helper_vfp_mulh,
4983     gen_helper_vfp_muls,
4984     gen_helper_vfp_muld,
4985 };
4986 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
4987 
4988 static const FPScalar f_scalar_fmax = {
4989     gen_helper_advsimd_maxh,
4990     gen_helper_vfp_maxs,
4991     gen_helper_vfp_maxd,
4992 };
4993 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
4994 
4995 static const FPScalar f_scalar_fmin = {
4996     gen_helper_advsimd_minh,
4997     gen_helper_vfp_mins,
4998     gen_helper_vfp_mind,
4999 };
5000 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5001 
5002 static const FPScalar f_scalar_fmaxnm = {
5003     gen_helper_advsimd_maxnumh,
5004     gen_helper_vfp_maxnums,
5005     gen_helper_vfp_maxnumd,
5006 };
5007 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5008 
5009 static const FPScalar f_scalar_fminnm = {
5010     gen_helper_advsimd_minnumh,
5011     gen_helper_vfp_minnums,
5012     gen_helper_vfp_minnumd,
5013 };
5014 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5015 
5016 static const FPScalar f_scalar_fmulx = {
5017     gen_helper_advsimd_mulxh,
5018     gen_helper_vfp_mulxs,
5019     gen_helper_vfp_mulxd,
5020 };
5021 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5022 
5023 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5024 {
5025     gen_helper_vfp_mulh(d, n, m, s);
5026     gen_vfp_negh(d, d);
5027 }
5028 
5029 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5030 {
5031     gen_helper_vfp_muls(d, n, m, s);
5032     gen_vfp_negs(d, d);
5033 }
5034 
5035 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5036 {
5037     gen_helper_vfp_muld(d, n, m, s);
5038     gen_vfp_negd(d, d);
5039 }
5040 
5041 static const FPScalar f_scalar_fnmul = {
5042     gen_fnmul_h,
5043     gen_fnmul_s,
5044     gen_fnmul_d,
5045 };
5046 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5047 
5048 static const FPScalar f_scalar_fcmeq = {
5049     gen_helper_advsimd_ceq_f16,
5050     gen_helper_neon_ceq_f32,
5051     gen_helper_neon_ceq_f64,
5052 };
5053 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5054 
5055 static const FPScalar f_scalar_fcmge = {
5056     gen_helper_advsimd_cge_f16,
5057     gen_helper_neon_cge_f32,
5058     gen_helper_neon_cge_f64,
5059 };
5060 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5061 
5062 static const FPScalar f_scalar_fcmgt = {
5063     gen_helper_advsimd_cgt_f16,
5064     gen_helper_neon_cgt_f32,
5065     gen_helper_neon_cgt_f64,
5066 };
5067 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5068 
5069 static const FPScalar f_scalar_facge = {
5070     gen_helper_advsimd_acge_f16,
5071     gen_helper_neon_acge_f32,
5072     gen_helper_neon_acge_f64,
5073 };
5074 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5075 
5076 static const FPScalar f_scalar_facgt = {
5077     gen_helper_advsimd_acgt_f16,
5078     gen_helper_neon_acgt_f32,
5079     gen_helper_neon_acgt_f64,
5080 };
5081 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5082 
5083 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5084 {
5085     gen_helper_vfp_subh(d, n, m, s);
5086     gen_vfp_absh(d, d);
5087 }
5088 
5089 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5090 {
5091     gen_helper_vfp_subs(d, n, m, s);
5092     gen_vfp_abss(d, d);
5093 }
5094 
5095 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5096 {
5097     gen_helper_vfp_subd(d, n, m, s);
5098     gen_vfp_absd(d, d);
5099 }
5100 
5101 static const FPScalar f_scalar_fabd = {
5102     gen_fabd_h,
5103     gen_fabd_s,
5104     gen_fabd_d,
5105 };
5106 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5107 
5108 static const FPScalar f_scalar_frecps = {
5109     gen_helper_recpsf_f16,
5110     gen_helper_recpsf_f32,
5111     gen_helper_recpsf_f64,
5112 };
5113 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5114 
5115 static const FPScalar f_scalar_frsqrts = {
5116     gen_helper_rsqrtsf_f16,
5117     gen_helper_rsqrtsf_f32,
5118     gen_helper_rsqrtsf_f64,
5119 };
5120 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5121 
5122 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5123                 MemOp sgn_n, MemOp sgn_m,
5124                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5125                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5126 {
5127     TCGv_i64 t0, t1, t2, qc;
5128     MemOp esz = a->esz;
5129 
5130     if (!fp_access_check(s)) {
5131         return true;
5132     }
5133 
5134     t0 = tcg_temp_new_i64();
5135     t1 = tcg_temp_new_i64();
5136     t2 = tcg_temp_new_i64();
5137     qc = tcg_temp_new_i64();
5138     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5139     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5140     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5141 
5142     if (esz == MO_64) {
5143         gen_d(t0, qc, t1, t2);
5144     } else {
5145         gen_bhs(t0, qc, t1, t2, esz);
5146         tcg_gen_ext_i64(t0, t0, esz);
5147     }
5148 
5149     write_fp_dreg(s, a->rd, t0);
5150     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5151     return true;
5152 }
5153 
5154 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5155 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5156 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5157 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5158 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5159 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5160 
5161 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5162                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5163 {
5164     if (fp_access_check(s)) {
5165         TCGv_i64 t0 = tcg_temp_new_i64();
5166         TCGv_i64 t1 = tcg_temp_new_i64();
5167 
5168         read_vec_element(s, t0, a->rn, 0, MO_64);
5169         read_vec_element(s, t1, a->rm, 0, MO_64);
5170         fn(t0, t0, t1);
5171         write_fp_dreg(s, a->rd, t0);
5172     }
5173     return true;
5174 }
5175 
5176 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5177 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5178 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5179 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5180 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5181 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5182 
5183 typedef struct ENVScalar2 {
5184     NeonGenTwoOpEnvFn *gen_bhs[3];
5185     NeonGenTwo64OpEnvFn *gen_d;
5186 } ENVScalar2;
5187 
5188 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5189 {
5190     if (!fp_access_check(s)) {
5191         return true;
5192     }
5193     if (a->esz == MO_64) {
5194         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5195         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5196         f->gen_d(t0, tcg_env, t0, t1);
5197         write_fp_dreg(s, a->rd, t0);
5198     } else {
5199         TCGv_i32 t0 = tcg_temp_new_i32();
5200         TCGv_i32 t1 = tcg_temp_new_i32();
5201 
5202         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5203         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5204         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5205         write_fp_sreg(s, a->rd, t0);
5206     }
5207     return true;
5208 }
5209 
5210 static const ENVScalar2 f_scalar_sqshl = {
5211     { gen_helper_neon_qshl_s8,
5212       gen_helper_neon_qshl_s16,
5213       gen_helper_neon_qshl_s32 },
5214     gen_helper_neon_qshl_s64,
5215 };
5216 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5217 
5218 static const ENVScalar2 f_scalar_uqshl = {
5219     { gen_helper_neon_qshl_u8,
5220       gen_helper_neon_qshl_u16,
5221       gen_helper_neon_qshl_u32 },
5222     gen_helper_neon_qshl_u64,
5223 };
5224 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5225 
5226 static const ENVScalar2 f_scalar_sqrshl = {
5227     { gen_helper_neon_qrshl_s8,
5228       gen_helper_neon_qrshl_s16,
5229       gen_helper_neon_qrshl_s32 },
5230     gen_helper_neon_qrshl_s64,
5231 };
5232 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5233 
5234 static const ENVScalar2 f_scalar_uqrshl = {
5235     { gen_helper_neon_qrshl_u8,
5236       gen_helper_neon_qrshl_u16,
5237       gen_helper_neon_qrshl_u32 },
5238     gen_helper_neon_qrshl_u64,
5239 };
5240 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5241 
5242 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5243                               const ENVScalar2 *f)
5244 {
5245     if (a->esz == MO_16 || a->esz == MO_32) {
5246         return do_env_scalar2(s, a, f);
5247     }
5248     return false;
5249 }
5250 
5251 static const ENVScalar2 f_scalar_sqdmulh = {
5252     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5253 };
5254 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5255 
5256 static const ENVScalar2 f_scalar_sqrdmulh = {
5257     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5258 };
5259 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5260 
5261 typedef struct ENVScalar3 {
5262     NeonGenThreeOpEnvFn *gen_hs[2];
5263 } ENVScalar3;
5264 
5265 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5266                               const ENVScalar3 *f)
5267 {
5268     TCGv_i32 t0, t1, t2;
5269 
5270     if (a->esz != MO_16 && a->esz != MO_32) {
5271         return false;
5272     }
5273     if (!fp_access_check(s)) {
5274         return true;
5275     }
5276 
5277     t0 = tcg_temp_new_i32();
5278     t1 = tcg_temp_new_i32();
5279     t2 = tcg_temp_new_i32();
5280     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5281     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5282     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5283     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5284     write_fp_sreg(s, a->rd, t0);
5285     return true;
5286 }
5287 
5288 static const ENVScalar3 f_scalar_sqrdmlah = {
5289     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5290 };
5291 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5292 
5293 static const ENVScalar3 f_scalar_sqrdmlsh = {
5294     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5295 };
5296 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5297 
5298 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5299 {
5300     if (fp_access_check(s)) {
5301         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5302         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5303         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5304         write_fp_dreg(s, a->rd, t0);
5305     }
5306     return true;
5307 }
5308 
5309 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5310 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5311 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5312 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5313 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5314 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5315 
5316 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5317                           gen_helper_gvec_3_ptr * const fns[3])
5318 {
5319     MemOp esz = a->esz;
5320 
5321     switch (esz) {
5322     case MO_64:
5323         if (!a->q) {
5324             return false;
5325         }
5326         break;
5327     case MO_32:
5328         break;
5329     case MO_16:
5330         if (!dc_isar_feature(aa64_fp16, s)) {
5331             return false;
5332         }
5333         break;
5334     default:
5335         return false;
5336     }
5337     if (fp_access_check(s)) {
5338         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5339                           esz == MO_16, data, fns[esz - 1]);
5340     }
5341     return true;
5342 }
5343 
5344 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5345     gen_helper_gvec_fadd_h,
5346     gen_helper_gvec_fadd_s,
5347     gen_helper_gvec_fadd_d,
5348 };
5349 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5350 
5351 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5352     gen_helper_gvec_fsub_h,
5353     gen_helper_gvec_fsub_s,
5354     gen_helper_gvec_fsub_d,
5355 };
5356 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5357 
5358 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5359     gen_helper_gvec_fdiv_h,
5360     gen_helper_gvec_fdiv_s,
5361     gen_helper_gvec_fdiv_d,
5362 };
5363 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5364 
5365 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5366     gen_helper_gvec_fmul_h,
5367     gen_helper_gvec_fmul_s,
5368     gen_helper_gvec_fmul_d,
5369 };
5370 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5371 
5372 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5373     gen_helper_gvec_fmax_h,
5374     gen_helper_gvec_fmax_s,
5375     gen_helper_gvec_fmax_d,
5376 };
5377 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5378 
5379 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5380     gen_helper_gvec_fmin_h,
5381     gen_helper_gvec_fmin_s,
5382     gen_helper_gvec_fmin_d,
5383 };
5384 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5385 
5386 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5387     gen_helper_gvec_fmaxnum_h,
5388     gen_helper_gvec_fmaxnum_s,
5389     gen_helper_gvec_fmaxnum_d,
5390 };
5391 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5392 
5393 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5394     gen_helper_gvec_fminnum_h,
5395     gen_helper_gvec_fminnum_s,
5396     gen_helper_gvec_fminnum_d,
5397 };
5398 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5399 
5400 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5401     gen_helper_gvec_fmulx_h,
5402     gen_helper_gvec_fmulx_s,
5403     gen_helper_gvec_fmulx_d,
5404 };
5405 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5406 
5407 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5408     gen_helper_gvec_vfma_h,
5409     gen_helper_gvec_vfma_s,
5410     gen_helper_gvec_vfma_d,
5411 };
5412 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5413 
5414 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5415     gen_helper_gvec_vfms_h,
5416     gen_helper_gvec_vfms_s,
5417     gen_helper_gvec_vfms_d,
5418 };
5419 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5420 
5421 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5422     gen_helper_gvec_fceq_h,
5423     gen_helper_gvec_fceq_s,
5424     gen_helper_gvec_fceq_d,
5425 };
5426 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5427 
5428 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5429     gen_helper_gvec_fcge_h,
5430     gen_helper_gvec_fcge_s,
5431     gen_helper_gvec_fcge_d,
5432 };
5433 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5434 
5435 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5436     gen_helper_gvec_fcgt_h,
5437     gen_helper_gvec_fcgt_s,
5438     gen_helper_gvec_fcgt_d,
5439 };
5440 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5441 
5442 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5443     gen_helper_gvec_facge_h,
5444     gen_helper_gvec_facge_s,
5445     gen_helper_gvec_facge_d,
5446 };
5447 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5448 
5449 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5450     gen_helper_gvec_facgt_h,
5451     gen_helper_gvec_facgt_s,
5452     gen_helper_gvec_facgt_d,
5453 };
5454 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5455 
5456 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5457     gen_helper_gvec_fabd_h,
5458     gen_helper_gvec_fabd_s,
5459     gen_helper_gvec_fabd_d,
5460 };
5461 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5462 
5463 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5464     gen_helper_gvec_recps_h,
5465     gen_helper_gvec_recps_s,
5466     gen_helper_gvec_recps_d,
5467 };
5468 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5469 
5470 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5471     gen_helper_gvec_rsqrts_h,
5472     gen_helper_gvec_rsqrts_s,
5473     gen_helper_gvec_rsqrts_d,
5474 };
5475 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5476 
5477 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5478     gen_helper_gvec_faddp_h,
5479     gen_helper_gvec_faddp_s,
5480     gen_helper_gvec_faddp_d,
5481 };
5482 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5483 
5484 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5485     gen_helper_gvec_fmaxp_h,
5486     gen_helper_gvec_fmaxp_s,
5487     gen_helper_gvec_fmaxp_d,
5488 };
5489 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5490 
5491 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5492     gen_helper_gvec_fminp_h,
5493     gen_helper_gvec_fminp_s,
5494     gen_helper_gvec_fminp_d,
5495 };
5496 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5497 
5498 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5499     gen_helper_gvec_fmaxnump_h,
5500     gen_helper_gvec_fmaxnump_s,
5501     gen_helper_gvec_fmaxnump_d,
5502 };
5503 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5504 
5505 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5506     gen_helper_gvec_fminnump_h,
5507     gen_helper_gvec_fminnump_s,
5508     gen_helper_gvec_fminnump_d,
5509 };
5510 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5511 
5512 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5513 {
5514     if (fp_access_check(s)) {
5515         int data = (is_2 << 1) | is_s;
5516         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5517                            vec_full_reg_offset(s, a->rn),
5518                            vec_full_reg_offset(s, a->rm), tcg_env,
5519                            a->q ? 16 : 8, vec_full_reg_size(s),
5520                            data, gen_helper_gvec_fmlal_a64);
5521     }
5522     return true;
5523 }
5524 
5525 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5526 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5527 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5528 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5529 
5530 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5531 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5532 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5533 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5534 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5535 
5536 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5537 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5538 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5539 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5540 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5541 
5542 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5543 {
5544     if (fp_access_check(s)) {
5545         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5546     }
5547     return true;
5548 }
5549 
5550 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5551 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5552 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5553 
5554 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5555 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5556 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5557 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5558 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5559 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5560 
5561 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5562 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5563 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5564 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5565 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5566 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5567 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5568 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5569 
5570 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5571 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5572 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5573 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5574 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5575 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5576 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5577 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5578 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5579 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5580 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5581 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5582 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5583 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5584 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5585 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5586 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5587 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5588 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5589 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5590 
5591 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5592 {
5593     if (a->esz == MO_64 && !a->q) {
5594         return false;
5595     }
5596     if (fp_access_check(s)) {
5597         tcg_gen_gvec_cmp(cond, a->esz,
5598                          vec_full_reg_offset(s, a->rd),
5599                          vec_full_reg_offset(s, a->rn),
5600                          vec_full_reg_offset(s, a->rm),
5601                          a->q ? 16 : 8, vec_full_reg_size(s));
5602     }
5603     return true;
5604 }
5605 
5606 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5607 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5608 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5609 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5610 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5611 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5612 
5613 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5614 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5615 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5616 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5617 
5618 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5619                           gen_helper_gvec_4 *fn)
5620 {
5621     if (fp_access_check(s)) {
5622         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5623     }
5624     return true;
5625 }
5626 
5627 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5628                               gen_helper_gvec_4_ptr *fn)
5629 {
5630     if (fp_access_check(s)) {
5631         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5632     }
5633     return true;
5634 }
5635 
5636 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5637 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5638 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5639 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5640 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5641 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5642 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5643 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5644 
5645 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5646 {
5647     if (!dc_isar_feature(aa64_bf16, s)) {
5648         return false;
5649     }
5650     if (fp_access_check(s)) {
5651         /* Q bit selects BFMLALB vs BFMLALT. */
5652         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5653                           gen_helper_gvec_bfmlal);
5654     }
5655     return true;
5656 }
5657 
5658 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5659     gen_helper_gvec_fcaddh,
5660     gen_helper_gvec_fcadds,
5661     gen_helper_gvec_fcaddd,
5662 };
5663 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5664 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5665 
5666 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5667 {
5668     gen_helper_gvec_4_ptr *fn;
5669 
5670     if (!dc_isar_feature(aa64_fcma, s)) {
5671         return false;
5672     }
5673     switch (a->esz) {
5674     case MO_64:
5675         if (!a->q) {
5676             return false;
5677         }
5678         fn = gen_helper_gvec_fcmlad;
5679         break;
5680     case MO_32:
5681         fn = gen_helper_gvec_fcmlas;
5682         break;
5683     case MO_16:
5684         if (!dc_isar_feature(aa64_fp16, s)) {
5685             return false;
5686         }
5687         fn = gen_helper_gvec_fcmlah;
5688         break;
5689     default:
5690         return false;
5691     }
5692     if (fp_access_check(s)) {
5693         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5694                           a->esz == MO_16, a->rot, fn);
5695     }
5696     return true;
5697 }
5698 
5699 /*
5700  * Widening vector x vector/indexed.
5701  *
5702  * These read from the top or bottom half of a 128-bit vector.
5703  * After widening, optionally accumulate with a 128-bit vector.
5704  * Implement these inline, as the number of elements are limited
5705  * and the related SVE and SME operations on larger vectors use
5706  * even/odd elements instead of top/bottom half.
5707  *
5708  * If idx >= 0, operand 2 is indexed, otherwise vector.
5709  * If acc, operand 0 is loaded with rd.
5710  */
5711 
5712 /* For low half, iterating up. */
5713 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5714                             int rd, int rn, int rm, int idx,
5715                             NeonGenTwo64OpFn *fn, bool acc)
5716 {
5717     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5718     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5719     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5720     MemOp esz = memop & MO_SIZE;
5721     int half = 8 >> esz;
5722     int top_swap, top_half;
5723 
5724     /* There are no 64x64->128 bit operations. */
5725     if (esz >= MO_64) {
5726         return false;
5727     }
5728     if (!fp_access_check(s)) {
5729         return true;
5730     }
5731 
5732     if (idx >= 0) {
5733         read_vec_element(s, tcg_op2, rm, idx, memop);
5734     }
5735 
5736     /*
5737      * For top half inputs, iterate forward; backward for bottom half.
5738      * This means the store to the destination will not occur until
5739      * overlapping input inputs are consumed.
5740      * Use top_swap to conditionally invert the forward iteration index.
5741      */
5742     top_swap = top ? 0 : half - 1;
5743     top_half = top ? half : 0;
5744 
5745     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5746         int elt = elt_fwd ^ top_swap;
5747 
5748         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5749         if (idx < 0) {
5750             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5751         }
5752         if (acc) {
5753             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5754         }
5755         fn(tcg_op0, tcg_op1, tcg_op2);
5756         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5757     }
5758     clear_vec_high(s, 1, rd);
5759     return true;
5760 }
5761 
5762 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5763 {
5764     TCGv_i64 t = tcg_temp_new_i64();
5765     tcg_gen_mul_i64(t, n, m);
5766     tcg_gen_add_i64(d, d, t);
5767 }
5768 
5769 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5770 {
5771     TCGv_i64 t = tcg_temp_new_i64();
5772     tcg_gen_mul_i64(t, n, m);
5773     tcg_gen_sub_i64(d, d, t);
5774 }
5775 
5776 TRANS(SMULL_v, do_3op_widening,
5777       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5778       tcg_gen_mul_i64, false)
5779 TRANS(UMULL_v, do_3op_widening,
5780       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5781       tcg_gen_mul_i64, false)
5782 TRANS(SMLAL_v, do_3op_widening,
5783       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5784       gen_muladd_i64, true)
5785 TRANS(UMLAL_v, do_3op_widening,
5786       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5787       gen_muladd_i64, true)
5788 TRANS(SMLSL_v, do_3op_widening,
5789       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5790       gen_mulsub_i64, true)
5791 TRANS(UMLSL_v, do_3op_widening,
5792       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5793       gen_mulsub_i64, true)
5794 
5795 TRANS(SMULL_vi, do_3op_widening,
5796       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5797       tcg_gen_mul_i64, false)
5798 TRANS(UMULL_vi, do_3op_widening,
5799       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5800       tcg_gen_mul_i64, false)
5801 TRANS(SMLAL_vi, do_3op_widening,
5802       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5803       gen_muladd_i64, true)
5804 TRANS(UMLAL_vi, do_3op_widening,
5805       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5806       gen_muladd_i64, true)
5807 TRANS(SMLSL_vi, do_3op_widening,
5808       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5809       gen_mulsub_i64, true)
5810 TRANS(UMLSL_vi, do_3op_widening,
5811       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5812       gen_mulsub_i64, true)
5813 
5814 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5815 {
5816     TCGv_i64 t1 = tcg_temp_new_i64();
5817     TCGv_i64 t2 = tcg_temp_new_i64();
5818 
5819     tcg_gen_sub_i64(t1, n, m);
5820     tcg_gen_sub_i64(t2, m, n);
5821     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5822 }
5823 
5824 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5825 {
5826     TCGv_i64 t1 = tcg_temp_new_i64();
5827     TCGv_i64 t2 = tcg_temp_new_i64();
5828 
5829     tcg_gen_sub_i64(t1, n, m);
5830     tcg_gen_sub_i64(t2, m, n);
5831     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5832 }
5833 
5834 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5835 {
5836     TCGv_i64 t = tcg_temp_new_i64();
5837     gen_sabd_i64(t, n, m);
5838     tcg_gen_add_i64(d, d, t);
5839 }
5840 
5841 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5842 {
5843     TCGv_i64 t = tcg_temp_new_i64();
5844     gen_uabd_i64(t, n, m);
5845     tcg_gen_add_i64(d, d, t);
5846 }
5847 
5848 TRANS(SADDL_v, do_3op_widening,
5849       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5850       tcg_gen_add_i64, false)
5851 TRANS(UADDL_v, do_3op_widening,
5852       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5853       tcg_gen_add_i64, false)
5854 TRANS(SSUBL_v, do_3op_widening,
5855       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5856       tcg_gen_sub_i64, false)
5857 TRANS(USUBL_v, do_3op_widening,
5858       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5859       tcg_gen_sub_i64, false)
5860 TRANS(SABDL_v, do_3op_widening,
5861       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5862       gen_sabd_i64, false)
5863 TRANS(UABDL_v, do_3op_widening,
5864       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5865       gen_uabd_i64, false)
5866 TRANS(SABAL_v, do_3op_widening,
5867       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5868       gen_saba_i64, true)
5869 TRANS(UABAL_v, do_3op_widening,
5870       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5871       gen_uaba_i64, true)
5872 
5873 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5874 {
5875     tcg_gen_mul_i64(d, n, m);
5876     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
5877 }
5878 
5879 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5880 {
5881     tcg_gen_mul_i64(d, n, m);
5882     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
5883 }
5884 
5885 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5886 {
5887     TCGv_i64 t = tcg_temp_new_i64();
5888 
5889     tcg_gen_mul_i64(t, n, m);
5890     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5891     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5892 }
5893 
5894 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5895 {
5896     TCGv_i64 t = tcg_temp_new_i64();
5897 
5898     tcg_gen_mul_i64(t, n, m);
5899     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5900     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5901 }
5902 
5903 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5904 {
5905     TCGv_i64 t = tcg_temp_new_i64();
5906 
5907     tcg_gen_mul_i64(t, n, m);
5908     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5909     tcg_gen_neg_i64(t, t);
5910     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5911 }
5912 
5913 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5914 {
5915     TCGv_i64 t = tcg_temp_new_i64();
5916 
5917     tcg_gen_mul_i64(t, n, m);
5918     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5919     tcg_gen_neg_i64(t, t);
5920     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5921 }
5922 
5923 TRANS(SQDMULL_v, do_3op_widening,
5924       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5925       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
5926 TRANS(SQDMLAL_v, do_3op_widening,
5927       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5928       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
5929 TRANS(SQDMLSL_v, do_3op_widening,
5930       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5931       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
5932 
5933 TRANS(SQDMULL_vi, do_3op_widening,
5934       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5935       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
5936 TRANS(SQDMLAL_vi, do_3op_widening,
5937       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5938       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
5939 TRANS(SQDMLSL_vi, do_3op_widening,
5940       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5941       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
5942 
5943 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
5944                            MemOp sign, bool sub)
5945 {
5946     TCGv_i64 tcg_op0, tcg_op1;
5947     MemOp esz = a->esz;
5948     int half = 8 >> esz;
5949     bool top = a->q;
5950     int top_swap = top ? 0 : half - 1;
5951     int top_half = top ? half : 0;
5952 
5953     /* There are no 64x64->128 bit operations. */
5954     if (esz >= MO_64) {
5955         return false;
5956     }
5957     if (!fp_access_check(s)) {
5958         return true;
5959     }
5960     tcg_op0 = tcg_temp_new_i64();
5961     tcg_op1 = tcg_temp_new_i64();
5962 
5963     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5964         int elt = elt_fwd ^ top_swap;
5965 
5966         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
5967         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
5968         if (sub) {
5969             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
5970         } else {
5971             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
5972         }
5973         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
5974     }
5975     clear_vec_high(s, 1, a->rd);
5976     return true;
5977 }
5978 
5979 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
5980 TRANS(UADDW, do_addsub_wide, a, 0, false)
5981 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
5982 TRANS(USUBW, do_addsub_wide, a, 0, true)
5983 
5984 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
5985                                  bool sub, bool round)
5986 {
5987     TCGv_i64 tcg_op0, tcg_op1;
5988     MemOp esz = a->esz;
5989     int half = 8 >> esz;
5990     bool top = a->q;
5991     int ebits = 8 << esz;
5992     uint64_t rbit = 1ull << (ebits - 1);
5993     int top_swap, top_half;
5994 
5995     /* There are no 128x128->64 bit operations. */
5996     if (esz >= MO_64) {
5997         return false;
5998     }
5999     if (!fp_access_check(s)) {
6000         return true;
6001     }
6002     tcg_op0 = tcg_temp_new_i64();
6003     tcg_op1 = tcg_temp_new_i64();
6004 
6005     /*
6006      * For top half inputs, iterate backward; forward for bottom half.
6007      * This means the store to the destination will not occur until
6008      * overlapping input inputs are consumed.
6009      */
6010     top_swap = top ? half - 1 : 0;
6011     top_half = top ? half : 0;
6012 
6013     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6014         int elt = elt_fwd ^ top_swap;
6015 
6016         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6017         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6018         if (sub) {
6019             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6020         } else {
6021             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6022         }
6023         if (round) {
6024             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6025         }
6026         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6027         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6028     }
6029     clear_vec_high(s, top, a->rd);
6030     return true;
6031 }
6032 
6033 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6034 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6035 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6036 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6037 
6038 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6039 {
6040     if (fp_access_check(s)) {
6041         /* The Q field specifies lo/hi half input for these insns.  */
6042         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6043     }
6044     return true;
6045 }
6046 
6047 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6048 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6049 
6050 /*
6051  * Advanced SIMD scalar/vector x indexed element
6052  */
6053 
6054 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6055 {
6056     switch (a->esz) {
6057     case MO_64:
6058         if (fp_access_check(s)) {
6059             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6060             TCGv_i64 t1 = tcg_temp_new_i64();
6061 
6062             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6063             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6064             write_fp_dreg(s, a->rd, t0);
6065         }
6066         break;
6067     case MO_32:
6068         if (fp_access_check(s)) {
6069             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6070             TCGv_i32 t1 = tcg_temp_new_i32();
6071 
6072             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6073             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6074             write_fp_sreg(s, a->rd, t0);
6075         }
6076         break;
6077     case MO_16:
6078         if (!dc_isar_feature(aa64_fp16, s)) {
6079             return false;
6080         }
6081         if (fp_access_check(s)) {
6082             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6083             TCGv_i32 t1 = tcg_temp_new_i32();
6084 
6085             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6086             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6087             write_fp_sreg(s, a->rd, t0);
6088         }
6089         break;
6090     default:
6091         g_assert_not_reached();
6092     }
6093     return true;
6094 }
6095 
6096 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6097 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6098 
6099 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6100 {
6101     switch (a->esz) {
6102     case MO_64:
6103         if (fp_access_check(s)) {
6104             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6105             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6106             TCGv_i64 t2 = tcg_temp_new_i64();
6107 
6108             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6109             if (neg) {
6110                 gen_vfp_negd(t1, t1);
6111             }
6112             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6113             write_fp_dreg(s, a->rd, t0);
6114         }
6115         break;
6116     case MO_32:
6117         if (fp_access_check(s)) {
6118             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6119             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6120             TCGv_i32 t2 = tcg_temp_new_i32();
6121 
6122             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6123             if (neg) {
6124                 gen_vfp_negs(t1, t1);
6125             }
6126             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6127             write_fp_sreg(s, a->rd, t0);
6128         }
6129         break;
6130     case MO_16:
6131         if (!dc_isar_feature(aa64_fp16, s)) {
6132             return false;
6133         }
6134         if (fp_access_check(s)) {
6135             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6136             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6137             TCGv_i32 t2 = tcg_temp_new_i32();
6138 
6139             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6140             if (neg) {
6141                 gen_vfp_negh(t1, t1);
6142             }
6143             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6144                                        fpstatus_ptr(FPST_FPCR_F16));
6145             write_fp_sreg(s, a->rd, t0);
6146         }
6147         break;
6148     default:
6149         g_assert_not_reached();
6150     }
6151     return true;
6152 }
6153 
6154 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6155 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6156 
6157 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6158                                   const ENVScalar2 *f)
6159 {
6160     if (a->esz < MO_16 || a->esz > MO_32) {
6161         return false;
6162     }
6163     if (fp_access_check(s)) {
6164         TCGv_i32 t0 = tcg_temp_new_i32();
6165         TCGv_i32 t1 = tcg_temp_new_i32();
6166 
6167         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6168         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6169         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6170         write_fp_sreg(s, a->rd, t0);
6171     }
6172     return true;
6173 }
6174 
6175 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6176 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6177 
6178 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6179                                   const ENVScalar3 *f)
6180 {
6181     if (a->esz < MO_16 || a->esz > MO_32) {
6182         return false;
6183     }
6184     if (fp_access_check(s)) {
6185         TCGv_i32 t0 = tcg_temp_new_i32();
6186         TCGv_i32 t1 = tcg_temp_new_i32();
6187         TCGv_i32 t2 = tcg_temp_new_i32();
6188 
6189         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6190         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6191         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6192         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6193         write_fp_sreg(s, a->rd, t0);
6194     }
6195     return true;
6196 }
6197 
6198 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6199 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6200 
6201 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6202                                           NeonGenTwo64OpFn *fn, bool acc)
6203 {
6204     if (fp_access_check(s)) {
6205         TCGv_i64 t0 = tcg_temp_new_i64();
6206         TCGv_i64 t1 = tcg_temp_new_i64();
6207         TCGv_i64 t2 = tcg_temp_new_i64();
6208         unsigned vsz, dofs;
6209 
6210         if (acc) {
6211             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6212         }
6213         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6214         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6215         fn(t0, t1, t2);
6216 
6217         /* Clear the whole register first, then store scalar. */
6218         vsz = vec_full_reg_size(s);
6219         dofs = vec_full_reg_offset(s, a->rd);
6220         tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
6221         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6222     }
6223     return true;
6224 }
6225 
6226 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6227       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6228 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6229       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6230 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6231       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6232 
6233 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6234                               gen_helper_gvec_3_ptr * const fns[3])
6235 {
6236     MemOp esz = a->esz;
6237 
6238     switch (esz) {
6239     case MO_64:
6240         if (!a->q) {
6241             return false;
6242         }
6243         break;
6244     case MO_32:
6245         break;
6246     case MO_16:
6247         if (!dc_isar_feature(aa64_fp16, s)) {
6248             return false;
6249         }
6250         break;
6251     default:
6252         g_assert_not_reached();
6253     }
6254     if (fp_access_check(s)) {
6255         gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6256                           esz == MO_16, a->idx, fns[esz - 1]);
6257     }
6258     return true;
6259 }
6260 
6261 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6262     gen_helper_gvec_fmul_idx_h,
6263     gen_helper_gvec_fmul_idx_s,
6264     gen_helper_gvec_fmul_idx_d,
6265 };
6266 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6267 
6268 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6269     gen_helper_gvec_fmulx_idx_h,
6270     gen_helper_gvec_fmulx_idx_s,
6271     gen_helper_gvec_fmulx_idx_d,
6272 };
6273 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6274 
6275 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6276 {
6277     static gen_helper_gvec_4_ptr * const fns[3] = {
6278         gen_helper_gvec_fmla_idx_h,
6279         gen_helper_gvec_fmla_idx_s,
6280         gen_helper_gvec_fmla_idx_d,
6281     };
6282     MemOp esz = a->esz;
6283 
6284     switch (esz) {
6285     case MO_64:
6286         if (!a->q) {
6287             return false;
6288         }
6289         break;
6290     case MO_32:
6291         break;
6292     case MO_16:
6293         if (!dc_isar_feature(aa64_fp16, s)) {
6294             return false;
6295         }
6296         break;
6297     default:
6298         g_assert_not_reached();
6299     }
6300     if (fp_access_check(s)) {
6301         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6302                           esz == MO_16, (a->idx << 1) | neg,
6303                           fns[esz - 1]);
6304     }
6305     return true;
6306 }
6307 
6308 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6309 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6310 
6311 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6312 {
6313     if (fp_access_check(s)) {
6314         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6315         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6316                            vec_full_reg_offset(s, a->rn),
6317                            vec_full_reg_offset(s, a->rm), tcg_env,
6318                            a->q ? 16 : 8, vec_full_reg_size(s),
6319                            data, gen_helper_gvec_fmlal_idx_a64);
6320     }
6321     return true;
6322 }
6323 
6324 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6325 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6326 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6327 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6328 
6329 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6330                                gen_helper_gvec_3 * const fns[2])
6331 {
6332     assert(a->esz == MO_16 || a->esz == MO_32);
6333     if (fp_access_check(s)) {
6334         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6335     }
6336     return true;
6337 }
6338 
6339 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6340     gen_helper_gvec_mul_idx_h,
6341     gen_helper_gvec_mul_idx_s,
6342 };
6343 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6344 
6345 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6346 {
6347     static gen_helper_gvec_4 * const fns[2][2] = {
6348         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6349         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6350     };
6351 
6352     assert(a->esz == MO_16 || a->esz == MO_32);
6353     if (fp_access_check(s)) {
6354         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6355                          a->idx, fns[a->esz - 1][sub]);
6356     }
6357     return true;
6358 }
6359 
6360 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6361 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6362 
6363 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6364                                   gen_helper_gvec_4 * const fns[2])
6365 {
6366     assert(a->esz == MO_16 || a->esz == MO_32);
6367     if (fp_access_check(s)) {
6368         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6369                            vec_full_reg_offset(s, a->rn),
6370                            vec_full_reg_offset(s, a->rm),
6371                            offsetof(CPUARMState, vfp.qc),
6372                            a->q ? 16 : 8, vec_full_reg_size(s),
6373                            a->idx, fns[a->esz - 1]);
6374     }
6375     return true;
6376 }
6377 
6378 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6379     gen_helper_neon_sqdmulh_idx_h,
6380     gen_helper_neon_sqdmulh_idx_s,
6381 };
6382 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6383 
6384 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6385     gen_helper_neon_sqrdmulh_idx_h,
6386     gen_helper_neon_sqrdmulh_idx_s,
6387 };
6388 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6389 
6390 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6391     gen_helper_neon_sqrdmlah_idx_h,
6392     gen_helper_neon_sqrdmlah_idx_s,
6393 };
6394 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6395            f_vector_idx_sqrdmlah)
6396 
6397 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6398     gen_helper_neon_sqrdmlsh_idx_h,
6399     gen_helper_neon_sqrdmlsh_idx_s,
6400 };
6401 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6402            f_vector_idx_sqrdmlsh)
6403 
6404 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6405                               gen_helper_gvec_4 *fn)
6406 {
6407     if (fp_access_check(s)) {
6408         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6409     }
6410     return true;
6411 }
6412 
6413 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6414                                   gen_helper_gvec_4_ptr *fn)
6415 {
6416     if (fp_access_check(s)) {
6417         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6418     }
6419     return true;
6420 }
6421 
6422 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6423 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6424 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6425            gen_helper_gvec_sudot_idx_b)
6426 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6427            gen_helper_gvec_usdot_idx_b)
6428 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6429            gen_helper_gvec_bfdot_idx)
6430 
6431 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6432 {
6433     if (!dc_isar_feature(aa64_bf16, s)) {
6434         return false;
6435     }
6436     if (fp_access_check(s)) {
6437         /* Q bit selects BFMLALB vs BFMLALT. */
6438         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6439                           (a->idx << 1) | a->q,
6440                           gen_helper_gvec_bfmlal_idx);
6441     }
6442     return true;
6443 }
6444 
6445 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6446 {
6447     gen_helper_gvec_4_ptr *fn;
6448 
6449     if (!dc_isar_feature(aa64_fcma, s)) {
6450         return false;
6451     }
6452     switch (a->esz) {
6453     case MO_16:
6454         if (!dc_isar_feature(aa64_fp16, s)) {
6455             return false;
6456         }
6457         fn = gen_helper_gvec_fcmlah_idx;
6458         break;
6459     case MO_32:
6460         fn = gen_helper_gvec_fcmlas_idx;
6461         break;
6462     default:
6463         g_assert_not_reached();
6464     }
6465     if (fp_access_check(s)) {
6466         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6467                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6468     }
6469     return true;
6470 }
6471 
6472 /*
6473  * Advanced SIMD scalar pairwise
6474  */
6475 
6476 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6477 {
6478     switch (a->esz) {
6479     case MO_64:
6480         if (fp_access_check(s)) {
6481             TCGv_i64 t0 = tcg_temp_new_i64();
6482             TCGv_i64 t1 = tcg_temp_new_i64();
6483 
6484             read_vec_element(s, t0, a->rn, 0, MO_64);
6485             read_vec_element(s, t1, a->rn, 1, MO_64);
6486             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6487             write_fp_dreg(s, a->rd, t0);
6488         }
6489         break;
6490     case MO_32:
6491         if (fp_access_check(s)) {
6492             TCGv_i32 t0 = tcg_temp_new_i32();
6493             TCGv_i32 t1 = tcg_temp_new_i32();
6494 
6495             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6496             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6497             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6498             write_fp_sreg(s, a->rd, t0);
6499         }
6500         break;
6501     case MO_16:
6502         if (!dc_isar_feature(aa64_fp16, s)) {
6503             return false;
6504         }
6505         if (fp_access_check(s)) {
6506             TCGv_i32 t0 = tcg_temp_new_i32();
6507             TCGv_i32 t1 = tcg_temp_new_i32();
6508 
6509             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6510             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6511             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6512             write_fp_sreg(s, a->rd, t0);
6513         }
6514         break;
6515     default:
6516         g_assert_not_reached();
6517     }
6518     return true;
6519 }
6520 
6521 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6522 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6523 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6524 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6525 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6526 
6527 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6528 {
6529     if (fp_access_check(s)) {
6530         TCGv_i64 t0 = tcg_temp_new_i64();
6531         TCGv_i64 t1 = tcg_temp_new_i64();
6532 
6533         read_vec_element(s, t0, a->rn, 0, MO_64);
6534         read_vec_element(s, t1, a->rn, 1, MO_64);
6535         tcg_gen_add_i64(t0, t0, t1);
6536         write_fp_dreg(s, a->rd, t0);
6537     }
6538     return true;
6539 }
6540 
6541 /*
6542  * Floating-point conditional select
6543  */
6544 
6545 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6546 {
6547     TCGv_i64 t_true, t_false;
6548     DisasCompare64 c;
6549 
6550     switch (a->esz) {
6551     case MO_32:
6552     case MO_64:
6553         break;
6554     case MO_16:
6555         if (!dc_isar_feature(aa64_fp16, s)) {
6556             return false;
6557         }
6558         break;
6559     default:
6560         return false;
6561     }
6562 
6563     if (!fp_access_check(s)) {
6564         return true;
6565     }
6566 
6567     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6568     t_true = tcg_temp_new_i64();
6569     t_false = tcg_temp_new_i64();
6570     read_vec_element(s, t_true, a->rn, 0, a->esz);
6571     read_vec_element(s, t_false, a->rm, 0, a->esz);
6572 
6573     a64_test_cc(&c, a->cond);
6574     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6575                         t_true, t_false);
6576 
6577     /*
6578      * Note that sregs & hregs write back zeros to the high bits,
6579      * and we've already done the zero-extension.
6580      */
6581     write_fp_dreg(s, a->rd, t_true);
6582     return true;
6583 }
6584 
6585 /*
6586  * Floating-point data-processing (3 source)
6587  */
6588 
6589 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6590 {
6591     TCGv_ptr fpst;
6592 
6593     /*
6594      * These are fused multiply-add.  Note that doing the negations here
6595      * as separate steps is correct: an input NaN should come out with
6596      * its sign bit flipped if it is a negated-input.
6597      */
6598     switch (a->esz) {
6599     case MO_64:
6600         if (fp_access_check(s)) {
6601             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6602             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6603             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6604 
6605             if (neg_a) {
6606                 gen_vfp_negd(ta, ta);
6607             }
6608             if (neg_n) {
6609                 gen_vfp_negd(tn, tn);
6610             }
6611             fpst = fpstatus_ptr(FPST_FPCR);
6612             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6613             write_fp_dreg(s, a->rd, ta);
6614         }
6615         break;
6616 
6617     case MO_32:
6618         if (fp_access_check(s)) {
6619             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6620             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6621             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6622 
6623             if (neg_a) {
6624                 gen_vfp_negs(ta, ta);
6625             }
6626             if (neg_n) {
6627                 gen_vfp_negs(tn, tn);
6628             }
6629             fpst = fpstatus_ptr(FPST_FPCR);
6630             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6631             write_fp_sreg(s, a->rd, ta);
6632         }
6633         break;
6634 
6635     case MO_16:
6636         if (!dc_isar_feature(aa64_fp16, s)) {
6637             return false;
6638         }
6639         if (fp_access_check(s)) {
6640             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6641             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6642             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6643 
6644             if (neg_a) {
6645                 gen_vfp_negh(ta, ta);
6646             }
6647             if (neg_n) {
6648                 gen_vfp_negh(tn, tn);
6649             }
6650             fpst = fpstatus_ptr(FPST_FPCR_F16);
6651             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6652             write_fp_sreg(s, a->rd, ta);
6653         }
6654         break;
6655 
6656     default:
6657         return false;
6658     }
6659     return true;
6660 }
6661 
6662 TRANS(FMADD, do_fmadd, a, false, false)
6663 TRANS(FNMADD, do_fmadd, a, true, true)
6664 TRANS(FMSUB, do_fmadd, a, false, true)
6665 TRANS(FNMSUB, do_fmadd, a, true, false)
6666 
6667 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
6668  * Note that it is the caller's responsibility to ensure that the
6669  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
6670  * mandated semantics for out of range shifts.
6671  */
6672 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
6673                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
6674 {
6675     switch (shift_type) {
6676     case A64_SHIFT_TYPE_LSL:
6677         tcg_gen_shl_i64(dst, src, shift_amount);
6678         break;
6679     case A64_SHIFT_TYPE_LSR:
6680         tcg_gen_shr_i64(dst, src, shift_amount);
6681         break;
6682     case A64_SHIFT_TYPE_ASR:
6683         if (!sf) {
6684             tcg_gen_ext32s_i64(dst, src);
6685         }
6686         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
6687         break;
6688     case A64_SHIFT_TYPE_ROR:
6689         if (sf) {
6690             tcg_gen_rotr_i64(dst, src, shift_amount);
6691         } else {
6692             TCGv_i32 t0, t1;
6693             t0 = tcg_temp_new_i32();
6694             t1 = tcg_temp_new_i32();
6695             tcg_gen_extrl_i64_i32(t0, src);
6696             tcg_gen_extrl_i64_i32(t1, shift_amount);
6697             tcg_gen_rotr_i32(t0, t0, t1);
6698             tcg_gen_extu_i32_i64(dst, t0);
6699         }
6700         break;
6701     default:
6702         assert(FALSE); /* all shift types should be handled */
6703         break;
6704     }
6705 
6706     if (!sf) { /* zero extend final result */
6707         tcg_gen_ext32u_i64(dst, dst);
6708     }
6709 }
6710 
6711 /* Shift a TCGv src by immediate, put result in dst.
6712  * The shift amount must be in range (this should always be true as the
6713  * relevant instructions will UNDEF on bad shift immediates).
6714  */
6715 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
6716                           enum a64_shift_type shift_type, unsigned int shift_i)
6717 {
6718     assert(shift_i < (sf ? 64 : 32));
6719 
6720     if (shift_i == 0) {
6721         tcg_gen_mov_i64(dst, src);
6722     } else {
6723         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
6724     }
6725 }
6726 
6727 /* Logical (shifted register)
6728  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
6729  * +----+-----+-----------+-------+---+------+--------+------+------+
6730  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
6731  * +----+-----+-----------+-------+---+------+--------+------+------+
6732  */
6733 static void disas_logic_reg(DisasContext *s, uint32_t insn)
6734 {
6735     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
6736     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
6737 
6738     sf = extract32(insn, 31, 1);
6739     opc = extract32(insn, 29, 2);
6740     shift_type = extract32(insn, 22, 2);
6741     invert = extract32(insn, 21, 1);
6742     rm = extract32(insn, 16, 5);
6743     shift_amount = extract32(insn, 10, 6);
6744     rn = extract32(insn, 5, 5);
6745     rd = extract32(insn, 0, 5);
6746 
6747     if (!sf && (shift_amount & (1 << 5))) {
6748         unallocated_encoding(s);
6749         return;
6750     }
6751 
6752     tcg_rd = cpu_reg(s, rd);
6753 
6754     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
6755         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
6756          * register-register MOV and MVN, so it is worth special casing.
6757          */
6758         tcg_rm = cpu_reg(s, rm);
6759         if (invert) {
6760             tcg_gen_not_i64(tcg_rd, tcg_rm);
6761             if (!sf) {
6762                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6763             }
6764         } else {
6765             if (sf) {
6766                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
6767             } else {
6768                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
6769             }
6770         }
6771         return;
6772     }
6773 
6774     tcg_rm = read_cpu_reg(s, rm, sf);
6775 
6776     if (shift_amount) {
6777         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
6778     }
6779 
6780     tcg_rn = cpu_reg(s, rn);
6781 
6782     switch (opc | (invert << 2)) {
6783     case 0: /* AND */
6784     case 3: /* ANDS */
6785         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6786         break;
6787     case 1: /* ORR */
6788         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
6789         break;
6790     case 2: /* EOR */
6791         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
6792         break;
6793     case 4: /* BIC */
6794     case 7: /* BICS */
6795         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
6796         break;
6797     case 5: /* ORN */
6798         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
6799         break;
6800     case 6: /* EON */
6801         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
6802         break;
6803     default:
6804         assert(FALSE);
6805         break;
6806     }
6807 
6808     if (!sf) {
6809         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6810     }
6811 
6812     if (opc == 3) {
6813         gen_logic_CC(sf, tcg_rd);
6814     }
6815 }
6816 
6817 /*
6818  * Add/subtract (extended register)
6819  *
6820  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
6821  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
6822  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
6823  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
6824  *
6825  *  sf: 0 -> 32bit, 1 -> 64bit
6826  *  op: 0 -> add  , 1 -> sub
6827  *   S: 1 -> set flags
6828  * opt: 00
6829  * option: extension type (see DecodeRegExtend)
6830  * imm3: optional shift to Rm
6831  *
6832  * Rd = Rn + LSL(extend(Rm), amount)
6833  */
6834 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
6835 {
6836     int rd = extract32(insn, 0, 5);
6837     int rn = extract32(insn, 5, 5);
6838     int imm3 = extract32(insn, 10, 3);
6839     int option = extract32(insn, 13, 3);
6840     int rm = extract32(insn, 16, 5);
6841     int opt = extract32(insn, 22, 2);
6842     bool setflags = extract32(insn, 29, 1);
6843     bool sub_op = extract32(insn, 30, 1);
6844     bool sf = extract32(insn, 31, 1);
6845 
6846     TCGv_i64 tcg_rm, tcg_rn; /* temps */
6847     TCGv_i64 tcg_rd;
6848     TCGv_i64 tcg_result;
6849 
6850     if (imm3 > 4 || opt != 0) {
6851         unallocated_encoding(s);
6852         return;
6853     }
6854 
6855     /* non-flag setting ops may use SP */
6856     if (!setflags) {
6857         tcg_rd = cpu_reg_sp(s, rd);
6858     } else {
6859         tcg_rd = cpu_reg(s, rd);
6860     }
6861     tcg_rn = read_cpu_reg_sp(s, rn, sf);
6862 
6863     tcg_rm = read_cpu_reg(s, rm, sf);
6864     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
6865 
6866     tcg_result = tcg_temp_new_i64();
6867 
6868     if (!setflags) {
6869         if (sub_op) {
6870             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
6871         } else {
6872             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
6873         }
6874     } else {
6875         if (sub_op) {
6876             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
6877         } else {
6878             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
6879         }
6880     }
6881 
6882     if (sf) {
6883         tcg_gen_mov_i64(tcg_rd, tcg_result);
6884     } else {
6885         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
6886     }
6887 }
6888 
6889 /*
6890  * Add/subtract (shifted register)
6891  *
6892  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
6893  * +--+--+--+-----------+-----+--+-------+---------+------+------+
6894  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
6895  * +--+--+--+-----------+-----+--+-------+---------+------+------+
6896  *
6897  *    sf: 0 -> 32bit, 1 -> 64bit
6898  *    op: 0 -> add  , 1 -> sub
6899  *     S: 1 -> set flags
6900  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
6901  *  imm6: Shift amount to apply to Rm before the add/sub
6902  */
6903 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
6904 {
6905     int rd = extract32(insn, 0, 5);
6906     int rn = extract32(insn, 5, 5);
6907     int imm6 = extract32(insn, 10, 6);
6908     int rm = extract32(insn, 16, 5);
6909     int shift_type = extract32(insn, 22, 2);
6910     bool setflags = extract32(insn, 29, 1);
6911     bool sub_op = extract32(insn, 30, 1);
6912     bool sf = extract32(insn, 31, 1);
6913 
6914     TCGv_i64 tcg_rd = cpu_reg(s, rd);
6915     TCGv_i64 tcg_rn, tcg_rm;
6916     TCGv_i64 tcg_result;
6917 
6918     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
6919         unallocated_encoding(s);
6920         return;
6921     }
6922 
6923     tcg_rn = read_cpu_reg(s, rn, sf);
6924     tcg_rm = read_cpu_reg(s, rm, sf);
6925 
6926     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
6927 
6928     tcg_result = tcg_temp_new_i64();
6929 
6930     if (!setflags) {
6931         if (sub_op) {
6932             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
6933         } else {
6934             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
6935         }
6936     } else {
6937         if (sub_op) {
6938             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
6939         } else {
6940             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
6941         }
6942     }
6943 
6944     if (sf) {
6945         tcg_gen_mov_i64(tcg_rd, tcg_result);
6946     } else {
6947         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
6948     }
6949 }
6950 
6951 /* Data-processing (3 source)
6952  *
6953  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
6954  *  +--+------+-----------+------+------+----+------+------+------+
6955  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6956  *  +--+------+-----------+------+------+----+------+------+------+
6957  */
6958 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
6959 {
6960     int rd = extract32(insn, 0, 5);
6961     int rn = extract32(insn, 5, 5);
6962     int ra = extract32(insn, 10, 5);
6963     int rm = extract32(insn, 16, 5);
6964     int op_id = (extract32(insn, 29, 3) << 4) |
6965         (extract32(insn, 21, 3) << 1) |
6966         extract32(insn, 15, 1);
6967     bool sf = extract32(insn, 31, 1);
6968     bool is_sub = extract32(op_id, 0, 1);
6969     bool is_high = extract32(op_id, 2, 1);
6970     bool is_signed = false;
6971     TCGv_i64 tcg_op1;
6972     TCGv_i64 tcg_op2;
6973     TCGv_i64 tcg_tmp;
6974 
6975     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
6976     switch (op_id) {
6977     case 0x42: /* SMADDL */
6978     case 0x43: /* SMSUBL */
6979     case 0x44: /* SMULH */
6980         is_signed = true;
6981         break;
6982     case 0x0: /* MADD (32bit) */
6983     case 0x1: /* MSUB (32bit) */
6984     case 0x40: /* MADD (64bit) */
6985     case 0x41: /* MSUB (64bit) */
6986     case 0x4a: /* UMADDL */
6987     case 0x4b: /* UMSUBL */
6988     case 0x4c: /* UMULH */
6989         break;
6990     default:
6991         unallocated_encoding(s);
6992         return;
6993     }
6994 
6995     if (is_high) {
6996         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
6997         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6998         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6999         TCGv_i64 tcg_rm = cpu_reg(s, rm);
7000 
7001         if (is_signed) {
7002             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7003         } else {
7004             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7005         }
7006         return;
7007     }
7008 
7009     tcg_op1 = tcg_temp_new_i64();
7010     tcg_op2 = tcg_temp_new_i64();
7011     tcg_tmp = tcg_temp_new_i64();
7012 
7013     if (op_id < 0x42) {
7014         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
7015         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
7016     } else {
7017         if (is_signed) {
7018             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
7019             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
7020         } else {
7021             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
7022             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
7023         }
7024     }
7025 
7026     if (ra == 31 && !is_sub) {
7027         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
7028         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
7029     } else {
7030         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
7031         if (is_sub) {
7032             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7033         } else {
7034             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7035         }
7036     }
7037 
7038     if (!sf) {
7039         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
7040     }
7041 }
7042 
7043 /* Add/subtract (with carry)
7044  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
7045  * +--+--+--+------------------------+------+-------------+------+-----+
7046  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
7047  * +--+--+--+------------------------+------+-------------+------+-----+
7048  */
7049 
7050 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
7051 {
7052     unsigned int sf, op, setflags, rm, rn, rd;
7053     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
7054 
7055     sf = extract32(insn, 31, 1);
7056     op = extract32(insn, 30, 1);
7057     setflags = extract32(insn, 29, 1);
7058     rm = extract32(insn, 16, 5);
7059     rn = extract32(insn, 5, 5);
7060     rd = extract32(insn, 0, 5);
7061 
7062     tcg_rd = cpu_reg(s, rd);
7063     tcg_rn = cpu_reg(s, rn);
7064 
7065     if (op) {
7066         tcg_y = tcg_temp_new_i64();
7067         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
7068     } else {
7069         tcg_y = cpu_reg(s, rm);
7070     }
7071 
7072     if (setflags) {
7073         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
7074     } else {
7075         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
7076     }
7077 }
7078 
7079 /*
7080  * Rotate right into flags
7081  *  31 30 29                21       15          10      5  4      0
7082  * +--+--+--+-----------------+--------+-----------+------+--+------+
7083  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
7084  * +--+--+--+-----------------+--------+-----------+------+--+------+
7085  */
7086 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
7087 {
7088     int mask = extract32(insn, 0, 4);
7089     int o2 = extract32(insn, 4, 1);
7090     int rn = extract32(insn, 5, 5);
7091     int imm6 = extract32(insn, 15, 6);
7092     int sf_op_s = extract32(insn, 29, 3);
7093     TCGv_i64 tcg_rn;
7094     TCGv_i32 nzcv;
7095 
7096     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
7097         unallocated_encoding(s);
7098         return;
7099     }
7100 
7101     tcg_rn = read_cpu_reg(s, rn, 1);
7102     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
7103 
7104     nzcv = tcg_temp_new_i32();
7105     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
7106 
7107     if (mask & 8) { /* N */
7108         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
7109     }
7110     if (mask & 4) { /* Z */
7111         tcg_gen_not_i32(cpu_ZF, nzcv);
7112         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
7113     }
7114     if (mask & 2) { /* C */
7115         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
7116     }
7117     if (mask & 1) { /* V */
7118         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
7119     }
7120 }
7121 
7122 /*
7123  * Evaluate into flags
7124  *  31 30 29                21        15   14        10      5  4      0
7125  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7126  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
7127  * +--+--+--+-----------------+---------+----+---------+------+--+------+
7128  */
7129 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
7130 {
7131     int o3_mask = extract32(insn, 0, 5);
7132     int rn = extract32(insn, 5, 5);
7133     int o2 = extract32(insn, 15, 6);
7134     int sz = extract32(insn, 14, 1);
7135     int sf_op_s = extract32(insn, 29, 3);
7136     TCGv_i32 tmp;
7137     int shift;
7138 
7139     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
7140         !dc_isar_feature(aa64_condm_4, s)) {
7141         unallocated_encoding(s);
7142         return;
7143     }
7144     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
7145 
7146     tmp = tcg_temp_new_i32();
7147     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
7148     tcg_gen_shli_i32(cpu_NF, tmp, shift);
7149     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
7150     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
7151     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
7152 }
7153 
7154 /* Conditional compare (immediate / register)
7155  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
7156  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7157  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
7158  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7159  *        [1]                             y                [0]       [0]
7160  */
7161 static void disas_cc(DisasContext *s, uint32_t insn)
7162 {
7163     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
7164     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
7165     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
7166     DisasCompare c;
7167 
7168     if (!extract32(insn, 29, 1)) {
7169         unallocated_encoding(s);
7170         return;
7171     }
7172     if (insn & (1 << 10 | 1 << 4)) {
7173         unallocated_encoding(s);
7174         return;
7175     }
7176     sf = extract32(insn, 31, 1);
7177     op = extract32(insn, 30, 1);
7178     is_imm = extract32(insn, 11, 1);
7179     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
7180     cond = extract32(insn, 12, 4);
7181     rn = extract32(insn, 5, 5);
7182     nzcv = extract32(insn, 0, 4);
7183 
7184     /* Set T0 = !COND.  */
7185     tcg_t0 = tcg_temp_new_i32();
7186     arm_test_cc(&c, cond);
7187     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
7188 
7189     /* Load the arguments for the new comparison.  */
7190     if (is_imm) {
7191         tcg_y = tcg_temp_new_i64();
7192         tcg_gen_movi_i64(tcg_y, y);
7193     } else {
7194         tcg_y = cpu_reg(s, y);
7195     }
7196     tcg_rn = cpu_reg(s, rn);
7197 
7198     /* Set the flags for the new comparison.  */
7199     tcg_tmp = tcg_temp_new_i64();
7200     if (op) {
7201         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
7202     } else {
7203         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
7204     }
7205 
7206     /* If COND was false, force the flags to #nzcv.  Compute two masks
7207      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
7208      * For tcg hosts that support ANDC, we can make do with just T1.
7209      * In either case, allow the tcg optimizer to delete any unused mask.
7210      */
7211     tcg_t1 = tcg_temp_new_i32();
7212     tcg_t2 = tcg_temp_new_i32();
7213     tcg_gen_neg_i32(tcg_t1, tcg_t0);
7214     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
7215 
7216     if (nzcv & 8) { /* N */
7217         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
7218     } else {
7219         if (TCG_TARGET_HAS_andc_i32) {
7220             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
7221         } else {
7222             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
7223         }
7224     }
7225     if (nzcv & 4) { /* Z */
7226         if (TCG_TARGET_HAS_andc_i32) {
7227             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
7228         } else {
7229             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
7230         }
7231     } else {
7232         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
7233     }
7234     if (nzcv & 2) { /* C */
7235         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
7236     } else {
7237         if (TCG_TARGET_HAS_andc_i32) {
7238             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
7239         } else {
7240             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
7241         }
7242     }
7243     if (nzcv & 1) { /* V */
7244         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
7245     } else {
7246         if (TCG_TARGET_HAS_andc_i32) {
7247             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
7248         } else {
7249             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
7250         }
7251     }
7252 }
7253 
7254 /* Conditional select
7255  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
7256  * +----+----+---+-----------------+------+------+-----+------+------+
7257  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
7258  * +----+----+---+-----------------+------+------+-----+------+------+
7259  */
7260 static void disas_cond_select(DisasContext *s, uint32_t insn)
7261 {
7262     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
7263     TCGv_i64 tcg_rd, zero;
7264     DisasCompare64 c;
7265 
7266     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
7267         /* S == 1 or op2<1> == 1 */
7268         unallocated_encoding(s);
7269         return;
7270     }
7271     sf = extract32(insn, 31, 1);
7272     else_inv = extract32(insn, 30, 1);
7273     rm = extract32(insn, 16, 5);
7274     cond = extract32(insn, 12, 4);
7275     else_inc = extract32(insn, 10, 1);
7276     rn = extract32(insn, 5, 5);
7277     rd = extract32(insn, 0, 5);
7278 
7279     tcg_rd = cpu_reg(s, rd);
7280 
7281     a64_test_cc(&c, cond);
7282     zero = tcg_constant_i64(0);
7283 
7284     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
7285         /* CSET & CSETM.  */
7286         if (else_inv) {
7287             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
7288                                    tcg_rd, c.value, zero);
7289         } else {
7290             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
7291                                 tcg_rd, c.value, zero);
7292         }
7293     } else {
7294         TCGv_i64 t_true = cpu_reg(s, rn);
7295         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
7296         if (else_inv && else_inc) {
7297             tcg_gen_neg_i64(t_false, t_false);
7298         } else if (else_inv) {
7299             tcg_gen_not_i64(t_false, t_false);
7300         } else if (else_inc) {
7301             tcg_gen_addi_i64(t_false, t_false, 1);
7302         }
7303         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
7304     }
7305 
7306     if (!sf) {
7307         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7308     }
7309 }
7310 
7311 static void handle_clz(DisasContext *s, unsigned int sf,
7312                        unsigned int rn, unsigned int rd)
7313 {
7314     TCGv_i64 tcg_rd, tcg_rn;
7315     tcg_rd = cpu_reg(s, rd);
7316     tcg_rn = cpu_reg(s, rn);
7317 
7318     if (sf) {
7319         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7320     } else {
7321         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7322         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7323         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
7324         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7325     }
7326 }
7327 
7328 static void handle_cls(DisasContext *s, unsigned int sf,
7329                        unsigned int rn, unsigned int rd)
7330 {
7331     TCGv_i64 tcg_rd, tcg_rn;
7332     tcg_rd = cpu_reg(s, rd);
7333     tcg_rn = cpu_reg(s, rn);
7334 
7335     if (sf) {
7336         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7337     } else {
7338         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7339         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7340         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
7341         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7342     }
7343 }
7344 
7345 static void handle_rbit(DisasContext *s, unsigned int sf,
7346                         unsigned int rn, unsigned int rd)
7347 {
7348     TCGv_i64 tcg_rd, tcg_rn;
7349     tcg_rd = cpu_reg(s, rd);
7350     tcg_rn = cpu_reg(s, rn);
7351 
7352     if (sf) {
7353         gen_helper_rbit64(tcg_rd, tcg_rn);
7354     } else {
7355         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
7356         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
7357         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
7358         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
7359     }
7360 }
7361 
7362 /* REV with sf==1, opcode==3 ("REV64") */
7363 static void handle_rev64(DisasContext *s, unsigned int sf,
7364                          unsigned int rn, unsigned int rd)
7365 {
7366     if (!sf) {
7367         unallocated_encoding(s);
7368         return;
7369     }
7370     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
7371 }
7372 
7373 /* REV with sf==0, opcode==2
7374  * REV32 (sf==1, opcode==2)
7375  */
7376 static void handle_rev32(DisasContext *s, unsigned int sf,
7377                          unsigned int rn, unsigned int rd)
7378 {
7379     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7380     TCGv_i64 tcg_rn = cpu_reg(s, rn);
7381 
7382     if (sf) {
7383         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
7384         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
7385     } else {
7386         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
7387     }
7388 }
7389 
7390 /* REV16 (opcode==1) */
7391 static void handle_rev16(DisasContext *s, unsigned int sf,
7392                          unsigned int rn, unsigned int rd)
7393 {
7394     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7395     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7396     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
7397     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
7398 
7399     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
7400     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
7401     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
7402     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
7403     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
7404 }
7405 
7406 /* Data-processing (1 source)
7407  *   31  30  29  28             21 20     16 15    10 9    5 4    0
7408  * +----+---+---+-----------------+---------+--------+------+------+
7409  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
7410  * +----+---+---+-----------------+---------+--------+------+------+
7411  */
7412 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
7413 {
7414     unsigned int sf, opcode, opcode2, rn, rd;
7415     TCGv_i64 tcg_rd;
7416 
7417     if (extract32(insn, 29, 1)) {
7418         unallocated_encoding(s);
7419         return;
7420     }
7421 
7422     sf = extract32(insn, 31, 1);
7423     opcode = extract32(insn, 10, 6);
7424     opcode2 = extract32(insn, 16, 5);
7425     rn = extract32(insn, 5, 5);
7426     rd = extract32(insn, 0, 5);
7427 
7428 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
7429 
7430     switch (MAP(sf, opcode2, opcode)) {
7431     case MAP(0, 0x00, 0x00): /* RBIT */
7432     case MAP(1, 0x00, 0x00):
7433         handle_rbit(s, sf, rn, rd);
7434         break;
7435     case MAP(0, 0x00, 0x01): /* REV16 */
7436     case MAP(1, 0x00, 0x01):
7437         handle_rev16(s, sf, rn, rd);
7438         break;
7439     case MAP(0, 0x00, 0x02): /* REV/REV32 */
7440     case MAP(1, 0x00, 0x02):
7441         handle_rev32(s, sf, rn, rd);
7442         break;
7443     case MAP(1, 0x00, 0x03): /* REV64 */
7444         handle_rev64(s, sf, rn, rd);
7445         break;
7446     case MAP(0, 0x00, 0x04): /* CLZ */
7447     case MAP(1, 0x00, 0x04):
7448         handle_clz(s, sf, rn, rd);
7449         break;
7450     case MAP(0, 0x00, 0x05): /* CLS */
7451     case MAP(1, 0x00, 0x05):
7452         handle_cls(s, sf, rn, rd);
7453         break;
7454     case MAP(1, 0x01, 0x00): /* PACIA */
7455         if (s->pauth_active) {
7456             tcg_rd = cpu_reg(s, rd);
7457             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7458         } else if (!dc_isar_feature(aa64_pauth, s)) {
7459             goto do_unallocated;
7460         }
7461         break;
7462     case MAP(1, 0x01, 0x01): /* PACIB */
7463         if (s->pauth_active) {
7464             tcg_rd = cpu_reg(s, rd);
7465             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7466         } else if (!dc_isar_feature(aa64_pauth, s)) {
7467             goto do_unallocated;
7468         }
7469         break;
7470     case MAP(1, 0x01, 0x02): /* PACDA */
7471         if (s->pauth_active) {
7472             tcg_rd = cpu_reg(s, rd);
7473             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7474         } else if (!dc_isar_feature(aa64_pauth, s)) {
7475             goto do_unallocated;
7476         }
7477         break;
7478     case MAP(1, 0x01, 0x03): /* PACDB */
7479         if (s->pauth_active) {
7480             tcg_rd = cpu_reg(s, rd);
7481             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7482         } else if (!dc_isar_feature(aa64_pauth, s)) {
7483             goto do_unallocated;
7484         }
7485         break;
7486     case MAP(1, 0x01, 0x04): /* AUTIA */
7487         if (s->pauth_active) {
7488             tcg_rd = cpu_reg(s, rd);
7489             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7490         } else if (!dc_isar_feature(aa64_pauth, s)) {
7491             goto do_unallocated;
7492         }
7493         break;
7494     case MAP(1, 0x01, 0x05): /* AUTIB */
7495         if (s->pauth_active) {
7496             tcg_rd = cpu_reg(s, rd);
7497             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7498         } else if (!dc_isar_feature(aa64_pauth, s)) {
7499             goto do_unallocated;
7500         }
7501         break;
7502     case MAP(1, 0x01, 0x06): /* AUTDA */
7503         if (s->pauth_active) {
7504             tcg_rd = cpu_reg(s, rd);
7505             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7506         } else if (!dc_isar_feature(aa64_pauth, s)) {
7507             goto do_unallocated;
7508         }
7509         break;
7510     case MAP(1, 0x01, 0x07): /* AUTDB */
7511         if (s->pauth_active) {
7512             tcg_rd = cpu_reg(s, rd);
7513             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
7514         } else if (!dc_isar_feature(aa64_pauth, s)) {
7515             goto do_unallocated;
7516         }
7517         break;
7518     case MAP(1, 0x01, 0x08): /* PACIZA */
7519         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7520             goto do_unallocated;
7521         } else if (s->pauth_active) {
7522             tcg_rd = cpu_reg(s, rd);
7523             gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7524         }
7525         break;
7526     case MAP(1, 0x01, 0x09): /* PACIZB */
7527         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7528             goto do_unallocated;
7529         } else if (s->pauth_active) {
7530             tcg_rd = cpu_reg(s, rd);
7531             gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7532         }
7533         break;
7534     case MAP(1, 0x01, 0x0a): /* PACDZA */
7535         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7536             goto do_unallocated;
7537         } else if (s->pauth_active) {
7538             tcg_rd = cpu_reg(s, rd);
7539             gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7540         }
7541         break;
7542     case MAP(1, 0x01, 0x0b): /* PACDZB */
7543         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7544             goto do_unallocated;
7545         } else if (s->pauth_active) {
7546             tcg_rd = cpu_reg(s, rd);
7547             gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7548         }
7549         break;
7550     case MAP(1, 0x01, 0x0c): /* AUTIZA */
7551         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7552             goto do_unallocated;
7553         } else if (s->pauth_active) {
7554             tcg_rd = cpu_reg(s, rd);
7555             gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7556         }
7557         break;
7558     case MAP(1, 0x01, 0x0d): /* AUTIZB */
7559         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7560             goto do_unallocated;
7561         } else if (s->pauth_active) {
7562             tcg_rd = cpu_reg(s, rd);
7563             gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7564         }
7565         break;
7566     case MAP(1, 0x01, 0x0e): /* AUTDZA */
7567         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7568             goto do_unallocated;
7569         } else if (s->pauth_active) {
7570             tcg_rd = cpu_reg(s, rd);
7571             gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7572         }
7573         break;
7574     case MAP(1, 0x01, 0x0f): /* AUTDZB */
7575         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7576             goto do_unallocated;
7577         } else if (s->pauth_active) {
7578             tcg_rd = cpu_reg(s, rd);
7579             gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
7580         }
7581         break;
7582     case MAP(1, 0x01, 0x10): /* XPACI */
7583         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7584             goto do_unallocated;
7585         } else if (s->pauth_active) {
7586             tcg_rd = cpu_reg(s, rd);
7587             gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
7588         }
7589         break;
7590     case MAP(1, 0x01, 0x11): /* XPACD */
7591         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
7592             goto do_unallocated;
7593         } else if (s->pauth_active) {
7594             tcg_rd = cpu_reg(s, rd);
7595             gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
7596         }
7597         break;
7598     default:
7599     do_unallocated:
7600         unallocated_encoding(s);
7601         break;
7602     }
7603 
7604 #undef MAP
7605 }
7606 
7607 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
7608                        unsigned int rm, unsigned int rn, unsigned int rd)
7609 {
7610     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7611     tcg_rd = cpu_reg(s, rd);
7612 
7613     if (!sf && is_signed) {
7614         tcg_n = tcg_temp_new_i64();
7615         tcg_m = tcg_temp_new_i64();
7616         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
7617         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
7618     } else {
7619         tcg_n = read_cpu_reg(s, rn, sf);
7620         tcg_m = read_cpu_reg(s, rm, sf);
7621     }
7622 
7623     if (is_signed) {
7624         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7625     } else {
7626         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7627     }
7628 
7629     if (!sf) { /* zero extend final result */
7630         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7631     }
7632 }
7633 
7634 /* LSLV, LSRV, ASRV, RORV */
7635 static void handle_shift_reg(DisasContext *s,
7636                              enum a64_shift_type shift_type, unsigned int sf,
7637                              unsigned int rm, unsigned int rn, unsigned int rd)
7638 {
7639     TCGv_i64 tcg_shift = tcg_temp_new_i64();
7640     TCGv_i64 tcg_rd = cpu_reg(s, rd);
7641     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
7642 
7643     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
7644     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
7645 }
7646 
7647 /* CRC32[BHWX], CRC32C[BHWX] */
7648 static void handle_crc32(DisasContext *s,
7649                          unsigned int sf, unsigned int sz, bool crc32c,
7650                          unsigned int rm, unsigned int rn, unsigned int rd)
7651 {
7652     TCGv_i64 tcg_acc, tcg_val;
7653     TCGv_i32 tcg_bytes;
7654 
7655     if (!dc_isar_feature(aa64_crc32, s)
7656         || (sf == 1 && sz != 3)
7657         || (sf == 0 && sz == 3)) {
7658         unallocated_encoding(s);
7659         return;
7660     }
7661 
7662     if (sz == 3) {
7663         tcg_val = cpu_reg(s, rm);
7664     } else {
7665         uint64_t mask;
7666         switch (sz) {
7667         case 0:
7668             mask = 0xFF;
7669             break;
7670         case 1:
7671             mask = 0xFFFF;
7672             break;
7673         case 2:
7674             mask = 0xFFFFFFFF;
7675             break;
7676         default:
7677             g_assert_not_reached();
7678         }
7679         tcg_val = tcg_temp_new_i64();
7680         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
7681     }
7682 
7683     tcg_acc = cpu_reg(s, rn);
7684     tcg_bytes = tcg_constant_i32(1 << sz);
7685 
7686     if (crc32c) {
7687         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
7688     } else {
7689         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
7690     }
7691 }
7692 
7693 /* Data-processing (2 source)
7694  *   31   30  29 28             21 20  16 15    10 9    5 4    0
7695  * +----+---+---+-----------------+------+--------+------+------+
7696  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
7697  * +----+---+---+-----------------+------+--------+------+------+
7698  */
7699 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
7700 {
7701     unsigned int sf, rm, opcode, rn, rd, setflag;
7702     sf = extract32(insn, 31, 1);
7703     setflag = extract32(insn, 29, 1);
7704     rm = extract32(insn, 16, 5);
7705     opcode = extract32(insn, 10, 6);
7706     rn = extract32(insn, 5, 5);
7707     rd = extract32(insn, 0, 5);
7708 
7709     if (setflag && opcode != 0) {
7710         unallocated_encoding(s);
7711         return;
7712     }
7713 
7714     switch (opcode) {
7715     case 0: /* SUBP(S) */
7716         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7717             goto do_unallocated;
7718         } else {
7719             TCGv_i64 tcg_n, tcg_m, tcg_d;
7720 
7721             tcg_n = read_cpu_reg_sp(s, rn, true);
7722             tcg_m = read_cpu_reg_sp(s, rm, true);
7723             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
7724             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
7725             tcg_d = cpu_reg(s, rd);
7726 
7727             if (setflag) {
7728                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
7729             } else {
7730                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
7731             }
7732         }
7733         break;
7734     case 2: /* UDIV */
7735         handle_div(s, false, sf, rm, rn, rd);
7736         break;
7737     case 3: /* SDIV */
7738         handle_div(s, true, sf, rm, rn, rd);
7739         break;
7740     case 4: /* IRG */
7741         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7742             goto do_unallocated;
7743         }
7744         if (s->ata[0]) {
7745             gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
7746                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
7747         } else {
7748             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
7749                                              cpu_reg_sp(s, rn));
7750         }
7751         break;
7752     case 5: /* GMI */
7753         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
7754             goto do_unallocated;
7755         } else {
7756             TCGv_i64 t = tcg_temp_new_i64();
7757 
7758             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
7759             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
7760             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
7761         }
7762         break;
7763     case 8: /* LSLV */
7764         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
7765         break;
7766     case 9: /* LSRV */
7767         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
7768         break;
7769     case 10: /* ASRV */
7770         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
7771         break;
7772     case 11: /* RORV */
7773         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
7774         break;
7775     case 12: /* PACGA */
7776         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
7777             goto do_unallocated;
7778         }
7779         gen_helper_pacga(cpu_reg(s, rd), tcg_env,
7780                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
7781         break;
7782     case 16:
7783     case 17:
7784     case 18:
7785     case 19:
7786     case 20:
7787     case 21:
7788     case 22:
7789     case 23: /* CRC32 */
7790     {
7791         int sz = extract32(opcode, 0, 2);
7792         bool crc32c = extract32(opcode, 2, 1);
7793         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
7794         break;
7795     }
7796     default:
7797     do_unallocated:
7798         unallocated_encoding(s);
7799         break;
7800     }
7801 }
7802 
7803 /*
7804  * Data processing - register
7805  *  31  30 29  28      25    21  20  16      10         0
7806  * +--+---+--+---+-------+-----+-------+-------+---------+
7807  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
7808  * +--+---+--+---+-------+-----+-------+-------+---------+
7809  */
7810 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
7811 {
7812     int op0 = extract32(insn, 30, 1);
7813     int op1 = extract32(insn, 28, 1);
7814     int op2 = extract32(insn, 21, 4);
7815     int op3 = extract32(insn, 10, 6);
7816 
7817     if (!op1) {
7818         if (op2 & 8) {
7819             if (op2 & 1) {
7820                 /* Add/sub (extended register) */
7821                 disas_add_sub_ext_reg(s, insn);
7822             } else {
7823                 /* Add/sub (shifted register) */
7824                 disas_add_sub_reg(s, insn);
7825             }
7826         } else {
7827             /* Logical (shifted register) */
7828             disas_logic_reg(s, insn);
7829         }
7830         return;
7831     }
7832 
7833     switch (op2) {
7834     case 0x0:
7835         switch (op3) {
7836         case 0x00: /* Add/subtract (with carry) */
7837             disas_adc_sbc(s, insn);
7838             break;
7839 
7840         case 0x01: /* Rotate right into flags */
7841         case 0x21:
7842             disas_rotate_right_into_flags(s, insn);
7843             break;
7844 
7845         case 0x02: /* Evaluate into flags */
7846         case 0x12:
7847         case 0x22:
7848         case 0x32:
7849             disas_evaluate_into_flags(s, insn);
7850             break;
7851 
7852         default:
7853             goto do_unallocated;
7854         }
7855         break;
7856 
7857     case 0x2: /* Conditional compare */
7858         disas_cc(s, insn); /* both imm and reg forms */
7859         break;
7860 
7861     case 0x4: /* Conditional select */
7862         disas_cond_select(s, insn);
7863         break;
7864 
7865     case 0x6: /* Data-processing */
7866         if (op0) {    /* (1 source) */
7867             disas_data_proc_1src(s, insn);
7868         } else {      /* (2 source) */
7869             disas_data_proc_2src(s, insn);
7870         }
7871         break;
7872     case 0x8 ... 0xf: /* (3 source) */
7873         disas_data_proc_3src(s, insn);
7874         break;
7875 
7876     default:
7877     do_unallocated:
7878         unallocated_encoding(s);
7879         break;
7880     }
7881 }
7882 
7883 static void handle_fp_compare(DisasContext *s, int size,
7884                               unsigned int rn, unsigned int rm,
7885                               bool cmp_with_zero, bool signal_all_nans)
7886 {
7887     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7888     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7889 
7890     if (size == MO_64) {
7891         TCGv_i64 tcg_vn, tcg_vm;
7892 
7893         tcg_vn = read_fp_dreg(s, rn);
7894         if (cmp_with_zero) {
7895             tcg_vm = tcg_constant_i64(0);
7896         } else {
7897             tcg_vm = read_fp_dreg(s, rm);
7898         }
7899         if (signal_all_nans) {
7900             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7901         } else {
7902             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7903         }
7904     } else {
7905         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7906         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7907 
7908         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7909         if (cmp_with_zero) {
7910             tcg_gen_movi_i32(tcg_vm, 0);
7911         } else {
7912             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7913         }
7914 
7915         switch (size) {
7916         case MO_32:
7917             if (signal_all_nans) {
7918                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7919             } else {
7920                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7921             }
7922             break;
7923         case MO_16:
7924             if (signal_all_nans) {
7925                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7926             } else {
7927                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7928             }
7929             break;
7930         default:
7931             g_assert_not_reached();
7932         }
7933     }
7934 
7935     gen_set_nzcv(tcg_flags);
7936 }
7937 
7938 /* Floating point compare
7939  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
7940  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
7941  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
7942  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
7943  */
7944 static void disas_fp_compare(DisasContext *s, uint32_t insn)
7945 {
7946     unsigned int mos, type, rm, op, rn, opc, op2r;
7947     int size;
7948 
7949     mos = extract32(insn, 29, 3);
7950     type = extract32(insn, 22, 2);
7951     rm = extract32(insn, 16, 5);
7952     op = extract32(insn, 14, 2);
7953     rn = extract32(insn, 5, 5);
7954     opc = extract32(insn, 3, 2);
7955     op2r = extract32(insn, 0, 3);
7956 
7957     if (mos || op || op2r) {
7958         unallocated_encoding(s);
7959         return;
7960     }
7961 
7962     switch (type) {
7963     case 0:
7964         size = MO_32;
7965         break;
7966     case 1:
7967         size = MO_64;
7968         break;
7969     case 3:
7970         size = MO_16;
7971         if (dc_isar_feature(aa64_fp16, s)) {
7972             break;
7973         }
7974         /* fallthru */
7975     default:
7976         unallocated_encoding(s);
7977         return;
7978     }
7979 
7980     if (!fp_access_check(s)) {
7981         return;
7982     }
7983 
7984     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
7985 }
7986 
7987 /* Floating point conditional compare
7988  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
7989  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
7990  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
7991  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
7992  */
7993 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
7994 {
7995     unsigned int mos, type, rm, cond, rn, op, nzcv;
7996     TCGLabel *label_continue = NULL;
7997     int size;
7998 
7999     mos = extract32(insn, 29, 3);
8000     type = extract32(insn, 22, 2);
8001     rm = extract32(insn, 16, 5);
8002     cond = extract32(insn, 12, 4);
8003     rn = extract32(insn, 5, 5);
8004     op = extract32(insn, 4, 1);
8005     nzcv = extract32(insn, 0, 4);
8006 
8007     if (mos) {
8008         unallocated_encoding(s);
8009         return;
8010     }
8011 
8012     switch (type) {
8013     case 0:
8014         size = MO_32;
8015         break;
8016     case 1:
8017         size = MO_64;
8018         break;
8019     case 3:
8020         size = MO_16;
8021         if (dc_isar_feature(aa64_fp16, s)) {
8022             break;
8023         }
8024         /* fallthru */
8025     default:
8026         unallocated_encoding(s);
8027         return;
8028     }
8029 
8030     if (!fp_access_check(s)) {
8031         return;
8032     }
8033 
8034     if (cond < 0x0e) { /* not always */
8035         TCGLabel *label_match = gen_new_label();
8036         label_continue = gen_new_label();
8037         arm_gen_test_cc(cond, label_match);
8038         /* nomatch: */
8039         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
8040         tcg_gen_br(label_continue);
8041         gen_set_label(label_match);
8042     }
8043 
8044     handle_fp_compare(s, size, rn, rm, false, op);
8045 
8046     if (cond < 0x0e) {
8047         gen_set_label(label_continue);
8048     }
8049 }
8050 
8051 /* Floating-point data-processing (1 source) - half precision */
8052 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
8053 {
8054     TCGv_ptr fpst = NULL;
8055     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
8056     TCGv_i32 tcg_res = tcg_temp_new_i32();
8057 
8058     switch (opcode) {
8059     case 0x0: /* FMOV */
8060         tcg_gen_mov_i32(tcg_res, tcg_op);
8061         break;
8062     case 0x1: /* FABS */
8063         gen_vfp_absh(tcg_res, tcg_op);
8064         break;
8065     case 0x2: /* FNEG */
8066         gen_vfp_negh(tcg_res, tcg_op);
8067         break;
8068     case 0x3: /* FSQRT */
8069         fpst = fpstatus_ptr(FPST_FPCR_F16);
8070         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
8071         break;
8072     case 0x8: /* FRINTN */
8073     case 0x9: /* FRINTP */
8074     case 0xa: /* FRINTM */
8075     case 0xb: /* FRINTZ */
8076     case 0xc: /* FRINTA */
8077     {
8078         TCGv_i32 tcg_rmode;
8079 
8080         fpst = fpstatus_ptr(FPST_FPCR_F16);
8081         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
8082         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8083         gen_restore_rmode(tcg_rmode, fpst);
8084         break;
8085     }
8086     case 0xe: /* FRINTX */
8087         fpst = fpstatus_ptr(FPST_FPCR_F16);
8088         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
8089         break;
8090     case 0xf: /* FRINTI */
8091         fpst = fpstatus_ptr(FPST_FPCR_F16);
8092         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8093         break;
8094     default:
8095         g_assert_not_reached();
8096     }
8097 
8098     write_fp_sreg(s, rd, tcg_res);
8099 }
8100 
8101 /* Floating-point data-processing (1 source) - single precision */
8102 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
8103 {
8104     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
8105     TCGv_i32 tcg_op, tcg_res;
8106     TCGv_ptr fpst;
8107     int rmode = -1;
8108 
8109     tcg_op = read_fp_sreg(s, rn);
8110     tcg_res = tcg_temp_new_i32();
8111 
8112     switch (opcode) {
8113     case 0x0: /* FMOV */
8114         tcg_gen_mov_i32(tcg_res, tcg_op);
8115         goto done;
8116     case 0x1: /* FABS */
8117         gen_vfp_abss(tcg_res, tcg_op);
8118         goto done;
8119     case 0x2: /* FNEG */
8120         gen_vfp_negs(tcg_res, tcg_op);
8121         goto done;
8122     case 0x3: /* FSQRT */
8123         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
8124         goto done;
8125     case 0x6: /* BFCVT */
8126         gen_fpst = gen_helper_bfcvt;
8127         break;
8128     case 0x8: /* FRINTN */
8129     case 0x9: /* FRINTP */
8130     case 0xa: /* FRINTM */
8131     case 0xb: /* FRINTZ */
8132     case 0xc: /* FRINTA */
8133         rmode = opcode & 7;
8134         gen_fpst = gen_helper_rints;
8135         break;
8136     case 0xe: /* FRINTX */
8137         gen_fpst = gen_helper_rints_exact;
8138         break;
8139     case 0xf: /* FRINTI */
8140         gen_fpst = gen_helper_rints;
8141         break;
8142     case 0x10: /* FRINT32Z */
8143         rmode = FPROUNDING_ZERO;
8144         gen_fpst = gen_helper_frint32_s;
8145         break;
8146     case 0x11: /* FRINT32X */
8147         gen_fpst = gen_helper_frint32_s;
8148         break;
8149     case 0x12: /* FRINT64Z */
8150         rmode = FPROUNDING_ZERO;
8151         gen_fpst = gen_helper_frint64_s;
8152         break;
8153     case 0x13: /* FRINT64X */
8154         gen_fpst = gen_helper_frint64_s;
8155         break;
8156     default:
8157         g_assert_not_reached();
8158     }
8159 
8160     fpst = fpstatus_ptr(FPST_FPCR);
8161     if (rmode >= 0) {
8162         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8163         gen_fpst(tcg_res, tcg_op, fpst);
8164         gen_restore_rmode(tcg_rmode, fpst);
8165     } else {
8166         gen_fpst(tcg_res, tcg_op, fpst);
8167     }
8168 
8169  done:
8170     write_fp_sreg(s, rd, tcg_res);
8171 }
8172 
8173 /* Floating-point data-processing (1 source) - double precision */
8174 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
8175 {
8176     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
8177     TCGv_i64 tcg_op, tcg_res;
8178     TCGv_ptr fpst;
8179     int rmode = -1;
8180 
8181     switch (opcode) {
8182     case 0x0: /* FMOV */
8183         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
8184         return;
8185     }
8186 
8187     tcg_op = read_fp_dreg(s, rn);
8188     tcg_res = tcg_temp_new_i64();
8189 
8190     switch (opcode) {
8191     case 0x1: /* FABS */
8192         gen_vfp_absd(tcg_res, tcg_op);
8193         goto done;
8194     case 0x2: /* FNEG */
8195         gen_vfp_negd(tcg_res, tcg_op);
8196         goto done;
8197     case 0x3: /* FSQRT */
8198         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
8199         goto done;
8200     case 0x8: /* FRINTN */
8201     case 0x9: /* FRINTP */
8202     case 0xa: /* FRINTM */
8203     case 0xb: /* FRINTZ */
8204     case 0xc: /* FRINTA */
8205         rmode = opcode & 7;
8206         gen_fpst = gen_helper_rintd;
8207         break;
8208     case 0xe: /* FRINTX */
8209         gen_fpst = gen_helper_rintd_exact;
8210         break;
8211     case 0xf: /* FRINTI */
8212         gen_fpst = gen_helper_rintd;
8213         break;
8214     case 0x10: /* FRINT32Z */
8215         rmode = FPROUNDING_ZERO;
8216         gen_fpst = gen_helper_frint32_d;
8217         break;
8218     case 0x11: /* FRINT32X */
8219         gen_fpst = gen_helper_frint32_d;
8220         break;
8221     case 0x12: /* FRINT64Z */
8222         rmode = FPROUNDING_ZERO;
8223         gen_fpst = gen_helper_frint64_d;
8224         break;
8225     case 0x13: /* FRINT64X */
8226         gen_fpst = gen_helper_frint64_d;
8227         break;
8228     default:
8229         g_assert_not_reached();
8230     }
8231 
8232     fpst = fpstatus_ptr(FPST_FPCR);
8233     if (rmode >= 0) {
8234         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8235         gen_fpst(tcg_res, tcg_op, fpst);
8236         gen_restore_rmode(tcg_rmode, fpst);
8237     } else {
8238         gen_fpst(tcg_res, tcg_op, fpst);
8239     }
8240 
8241  done:
8242     write_fp_dreg(s, rd, tcg_res);
8243 }
8244 
8245 static void handle_fp_fcvt(DisasContext *s, int opcode,
8246                            int rd, int rn, int dtype, int ntype)
8247 {
8248     switch (ntype) {
8249     case 0x0:
8250     {
8251         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
8252         if (dtype == 1) {
8253             /* Single to double */
8254             TCGv_i64 tcg_rd = tcg_temp_new_i64();
8255             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
8256             write_fp_dreg(s, rd, tcg_rd);
8257         } else {
8258             /* Single to half */
8259             TCGv_i32 tcg_rd = tcg_temp_new_i32();
8260             TCGv_i32 ahp = get_ahp_flag();
8261             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8262 
8263             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8264             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8265             write_fp_sreg(s, rd, tcg_rd);
8266         }
8267         break;
8268     }
8269     case 0x1:
8270     {
8271         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8272         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8273         if (dtype == 0) {
8274             /* Double to single */
8275             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
8276         } else {
8277             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8278             TCGv_i32 ahp = get_ahp_flag();
8279             /* Double to half */
8280             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8281             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8282         }
8283         write_fp_sreg(s, rd, tcg_rd);
8284         break;
8285     }
8286     case 0x3:
8287     {
8288         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
8289         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
8290         TCGv_i32 tcg_ahp = get_ahp_flag();
8291         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
8292         if (dtype == 0) {
8293             /* Half to single */
8294             TCGv_i32 tcg_rd = tcg_temp_new_i32();
8295             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8296             write_fp_sreg(s, rd, tcg_rd);
8297         } else {
8298             /* Half to double */
8299             TCGv_i64 tcg_rd = tcg_temp_new_i64();
8300             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8301             write_fp_dreg(s, rd, tcg_rd);
8302         }
8303         break;
8304     }
8305     default:
8306         g_assert_not_reached();
8307     }
8308 }
8309 
8310 /* Floating point data-processing (1 source)
8311  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
8312  * +---+---+---+-----------+------+---+--------+-----------+------+------+
8313  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
8314  * +---+---+---+-----------+------+---+--------+-----------+------+------+
8315  */
8316 static void disas_fp_1src(DisasContext *s, uint32_t insn)
8317 {
8318     int mos = extract32(insn, 29, 3);
8319     int type = extract32(insn, 22, 2);
8320     int opcode = extract32(insn, 15, 6);
8321     int rn = extract32(insn, 5, 5);
8322     int rd = extract32(insn, 0, 5);
8323 
8324     if (mos) {
8325         goto do_unallocated;
8326     }
8327 
8328     switch (opcode) {
8329     case 0x4: case 0x5: case 0x7:
8330     {
8331         /* FCVT between half, single and double precision */
8332         int dtype = extract32(opcode, 0, 2);
8333         if (type == 2 || dtype == type) {
8334             goto do_unallocated;
8335         }
8336         if (!fp_access_check(s)) {
8337             return;
8338         }
8339 
8340         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
8341         break;
8342     }
8343 
8344     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
8345         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
8346             goto do_unallocated;
8347         }
8348         /* fall through */
8349     case 0x0 ... 0x3:
8350     case 0x8 ... 0xc:
8351     case 0xe ... 0xf:
8352         /* 32-to-32 and 64-to-64 ops */
8353         switch (type) {
8354         case 0:
8355             if (!fp_access_check(s)) {
8356                 return;
8357             }
8358             handle_fp_1src_single(s, opcode, rd, rn);
8359             break;
8360         case 1:
8361             if (!fp_access_check(s)) {
8362                 return;
8363             }
8364             handle_fp_1src_double(s, opcode, rd, rn);
8365             break;
8366         case 3:
8367             if (!dc_isar_feature(aa64_fp16, s)) {
8368                 goto do_unallocated;
8369             }
8370 
8371             if (!fp_access_check(s)) {
8372                 return;
8373             }
8374             handle_fp_1src_half(s, opcode, rd, rn);
8375             break;
8376         default:
8377             goto do_unallocated;
8378         }
8379         break;
8380 
8381     case 0x6:
8382         switch (type) {
8383         case 1: /* BFCVT */
8384             if (!dc_isar_feature(aa64_bf16, s)) {
8385                 goto do_unallocated;
8386             }
8387             if (!fp_access_check(s)) {
8388                 return;
8389             }
8390             handle_fp_1src_single(s, opcode, rd, rn);
8391             break;
8392         default:
8393             goto do_unallocated;
8394         }
8395         break;
8396 
8397     default:
8398     do_unallocated:
8399         unallocated_encoding(s);
8400         break;
8401     }
8402 }
8403 
8404 /* Floating point immediate
8405  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
8406  * +---+---+---+-----------+------+---+------------+-------+------+------+
8407  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
8408  * +---+---+---+-----------+------+---+------------+-------+------+------+
8409  */
8410 static void disas_fp_imm(DisasContext *s, uint32_t insn)
8411 {
8412     int rd = extract32(insn, 0, 5);
8413     int imm5 = extract32(insn, 5, 5);
8414     int imm8 = extract32(insn, 13, 8);
8415     int type = extract32(insn, 22, 2);
8416     int mos = extract32(insn, 29, 3);
8417     uint64_t imm;
8418     MemOp sz;
8419 
8420     if (mos || imm5) {
8421         unallocated_encoding(s);
8422         return;
8423     }
8424 
8425     switch (type) {
8426     case 0:
8427         sz = MO_32;
8428         break;
8429     case 1:
8430         sz = MO_64;
8431         break;
8432     case 3:
8433         sz = MO_16;
8434         if (dc_isar_feature(aa64_fp16, s)) {
8435             break;
8436         }
8437         /* fallthru */
8438     default:
8439         unallocated_encoding(s);
8440         return;
8441     }
8442 
8443     if (!fp_access_check(s)) {
8444         return;
8445     }
8446 
8447     imm = vfp_expand_imm(sz, imm8);
8448     write_fp_dreg(s, rd, tcg_constant_i64(imm));
8449 }
8450 
8451 /* Handle floating point <=> fixed point conversions. Note that we can
8452  * also deal with fp <=> integer conversions as a special case (scale == 64)
8453  * OPTME: consider handling that special case specially or at least skipping
8454  * the call to scalbn in the helpers for zero shifts.
8455  */
8456 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
8457                            bool itof, int rmode, int scale, int sf, int type)
8458 {
8459     bool is_signed = !(opcode & 1);
8460     TCGv_ptr tcg_fpstatus;
8461     TCGv_i32 tcg_shift, tcg_single;
8462     TCGv_i64 tcg_double;
8463 
8464     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
8465 
8466     tcg_shift = tcg_constant_i32(64 - scale);
8467 
8468     if (itof) {
8469         TCGv_i64 tcg_int = cpu_reg(s, rn);
8470         if (!sf) {
8471             TCGv_i64 tcg_extend = tcg_temp_new_i64();
8472 
8473             if (is_signed) {
8474                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
8475             } else {
8476                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
8477             }
8478 
8479             tcg_int = tcg_extend;
8480         }
8481 
8482         switch (type) {
8483         case 1: /* float64 */
8484             tcg_double = tcg_temp_new_i64();
8485             if (is_signed) {
8486                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
8487                                      tcg_shift, tcg_fpstatus);
8488             } else {
8489                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
8490                                      tcg_shift, tcg_fpstatus);
8491             }
8492             write_fp_dreg(s, rd, tcg_double);
8493             break;
8494 
8495         case 0: /* float32 */
8496             tcg_single = tcg_temp_new_i32();
8497             if (is_signed) {
8498                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
8499                                      tcg_shift, tcg_fpstatus);
8500             } else {
8501                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
8502                                      tcg_shift, tcg_fpstatus);
8503             }
8504             write_fp_sreg(s, rd, tcg_single);
8505             break;
8506 
8507         case 3: /* float16 */
8508             tcg_single = tcg_temp_new_i32();
8509             if (is_signed) {
8510                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
8511                                      tcg_shift, tcg_fpstatus);
8512             } else {
8513                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
8514                                      tcg_shift, tcg_fpstatus);
8515             }
8516             write_fp_sreg(s, rd, tcg_single);
8517             break;
8518 
8519         default:
8520             g_assert_not_reached();
8521         }
8522     } else {
8523         TCGv_i64 tcg_int = cpu_reg(s, rd);
8524         TCGv_i32 tcg_rmode;
8525 
8526         if (extract32(opcode, 2, 1)) {
8527             /* There are too many rounding modes to all fit into rmode,
8528              * so FCVTA[US] is a special case.
8529              */
8530             rmode = FPROUNDING_TIEAWAY;
8531         }
8532 
8533         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
8534 
8535         switch (type) {
8536         case 1: /* float64 */
8537             tcg_double = read_fp_dreg(s, rn);
8538             if (is_signed) {
8539                 if (!sf) {
8540                     gen_helper_vfp_tosld(tcg_int, tcg_double,
8541                                          tcg_shift, tcg_fpstatus);
8542                 } else {
8543                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
8544                                          tcg_shift, tcg_fpstatus);
8545                 }
8546             } else {
8547                 if (!sf) {
8548                     gen_helper_vfp_tould(tcg_int, tcg_double,
8549                                          tcg_shift, tcg_fpstatus);
8550                 } else {
8551                     gen_helper_vfp_touqd(tcg_int, tcg_double,
8552                                          tcg_shift, tcg_fpstatus);
8553                 }
8554             }
8555             if (!sf) {
8556                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
8557             }
8558             break;
8559 
8560         case 0: /* float32 */
8561             tcg_single = read_fp_sreg(s, rn);
8562             if (sf) {
8563                 if (is_signed) {
8564                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
8565                                          tcg_shift, tcg_fpstatus);
8566                 } else {
8567                     gen_helper_vfp_touqs(tcg_int, tcg_single,
8568                                          tcg_shift, tcg_fpstatus);
8569                 }
8570             } else {
8571                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
8572                 if (is_signed) {
8573                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
8574                                          tcg_shift, tcg_fpstatus);
8575                 } else {
8576                     gen_helper_vfp_touls(tcg_dest, tcg_single,
8577                                          tcg_shift, tcg_fpstatus);
8578                 }
8579                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
8580             }
8581             break;
8582 
8583         case 3: /* float16 */
8584             tcg_single = read_fp_sreg(s, rn);
8585             if (sf) {
8586                 if (is_signed) {
8587                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
8588                                          tcg_shift, tcg_fpstatus);
8589                 } else {
8590                     gen_helper_vfp_touqh(tcg_int, tcg_single,
8591                                          tcg_shift, tcg_fpstatus);
8592                 }
8593             } else {
8594                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
8595                 if (is_signed) {
8596                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
8597                                          tcg_shift, tcg_fpstatus);
8598                 } else {
8599                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
8600                                          tcg_shift, tcg_fpstatus);
8601                 }
8602                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
8603             }
8604             break;
8605 
8606         default:
8607             g_assert_not_reached();
8608         }
8609 
8610         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8611     }
8612 }
8613 
8614 /* Floating point <-> fixed point conversions
8615  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
8616  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
8617  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
8618  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
8619  */
8620 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
8621 {
8622     int rd = extract32(insn, 0, 5);
8623     int rn = extract32(insn, 5, 5);
8624     int scale = extract32(insn, 10, 6);
8625     int opcode = extract32(insn, 16, 3);
8626     int rmode = extract32(insn, 19, 2);
8627     int type = extract32(insn, 22, 2);
8628     bool sbit = extract32(insn, 29, 1);
8629     bool sf = extract32(insn, 31, 1);
8630     bool itof;
8631 
8632     if (sbit || (!sf && scale < 32)) {
8633         unallocated_encoding(s);
8634         return;
8635     }
8636 
8637     switch (type) {
8638     case 0: /* float32 */
8639     case 1: /* float64 */
8640         break;
8641     case 3: /* float16 */
8642         if (dc_isar_feature(aa64_fp16, s)) {
8643             break;
8644         }
8645         /* fallthru */
8646     default:
8647         unallocated_encoding(s);
8648         return;
8649     }
8650 
8651     switch ((rmode << 3) | opcode) {
8652     case 0x2: /* SCVTF */
8653     case 0x3: /* UCVTF */
8654         itof = true;
8655         break;
8656     case 0x18: /* FCVTZS */
8657     case 0x19: /* FCVTZU */
8658         itof = false;
8659         break;
8660     default:
8661         unallocated_encoding(s);
8662         return;
8663     }
8664 
8665     if (!fp_access_check(s)) {
8666         return;
8667     }
8668 
8669     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
8670 }
8671 
8672 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
8673 {
8674     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
8675      * without conversion.
8676      */
8677 
8678     if (itof) {
8679         TCGv_i64 tcg_rn = cpu_reg(s, rn);
8680         TCGv_i64 tmp;
8681 
8682         switch (type) {
8683         case 0:
8684             /* 32 bit */
8685             tmp = tcg_temp_new_i64();
8686             tcg_gen_ext32u_i64(tmp, tcg_rn);
8687             write_fp_dreg(s, rd, tmp);
8688             break;
8689         case 1:
8690             /* 64 bit */
8691             write_fp_dreg(s, rd, tcg_rn);
8692             break;
8693         case 2:
8694             /* 64 bit to top half. */
8695             tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
8696             clear_vec_high(s, true, rd);
8697             break;
8698         case 3:
8699             /* 16 bit */
8700             tmp = tcg_temp_new_i64();
8701             tcg_gen_ext16u_i64(tmp, tcg_rn);
8702             write_fp_dreg(s, rd, tmp);
8703             break;
8704         default:
8705             g_assert_not_reached();
8706         }
8707     } else {
8708         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8709 
8710         switch (type) {
8711         case 0:
8712             /* 32 bit */
8713             tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
8714             break;
8715         case 1:
8716             /* 64 bit */
8717             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
8718             break;
8719         case 2:
8720             /* 64 bits from top half */
8721             tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
8722             break;
8723         case 3:
8724             /* 16 bit */
8725             tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
8726             break;
8727         default:
8728             g_assert_not_reached();
8729         }
8730     }
8731 }
8732 
8733 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
8734 {
8735     TCGv_i64 t = read_fp_dreg(s, rn);
8736     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
8737 
8738     gen_helper_fjcvtzs(t, t, fpstatus);
8739 
8740     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
8741     tcg_gen_extrh_i64_i32(cpu_ZF, t);
8742     tcg_gen_movi_i32(cpu_CF, 0);
8743     tcg_gen_movi_i32(cpu_NF, 0);
8744     tcg_gen_movi_i32(cpu_VF, 0);
8745 }
8746 
8747 /* Floating point <-> integer conversions
8748  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
8749  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
8750  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
8751  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
8752  */
8753 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
8754 {
8755     int rd = extract32(insn, 0, 5);
8756     int rn = extract32(insn, 5, 5);
8757     int opcode = extract32(insn, 16, 3);
8758     int rmode = extract32(insn, 19, 2);
8759     int type = extract32(insn, 22, 2);
8760     bool sbit = extract32(insn, 29, 1);
8761     bool sf = extract32(insn, 31, 1);
8762     bool itof = false;
8763 
8764     if (sbit) {
8765         goto do_unallocated;
8766     }
8767 
8768     switch (opcode) {
8769     case 2: /* SCVTF */
8770     case 3: /* UCVTF */
8771         itof = true;
8772         /* fallthru */
8773     case 4: /* FCVTAS */
8774     case 5: /* FCVTAU */
8775         if (rmode != 0) {
8776             goto do_unallocated;
8777         }
8778         /* fallthru */
8779     case 0: /* FCVT[NPMZ]S */
8780     case 1: /* FCVT[NPMZ]U */
8781         switch (type) {
8782         case 0: /* float32 */
8783         case 1: /* float64 */
8784             break;
8785         case 3: /* float16 */
8786             if (!dc_isar_feature(aa64_fp16, s)) {
8787                 goto do_unallocated;
8788             }
8789             break;
8790         default:
8791             goto do_unallocated;
8792         }
8793         if (!fp_access_check(s)) {
8794             return;
8795         }
8796         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
8797         break;
8798 
8799     default:
8800         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
8801         case 0b01100110: /* FMOV half <-> 32-bit int */
8802         case 0b01100111:
8803         case 0b11100110: /* FMOV half <-> 64-bit int */
8804         case 0b11100111:
8805             if (!dc_isar_feature(aa64_fp16, s)) {
8806                 goto do_unallocated;
8807             }
8808             /* fallthru */
8809         case 0b00000110: /* FMOV 32-bit */
8810         case 0b00000111:
8811         case 0b10100110: /* FMOV 64-bit */
8812         case 0b10100111:
8813         case 0b11001110: /* FMOV top half of 128-bit */
8814         case 0b11001111:
8815             if (!fp_access_check(s)) {
8816                 return;
8817             }
8818             itof = opcode & 1;
8819             handle_fmov(s, rd, rn, type, itof);
8820             break;
8821 
8822         case 0b00111110: /* FJCVTZS */
8823             if (!dc_isar_feature(aa64_jscvt, s)) {
8824                 goto do_unallocated;
8825             } else if (fp_access_check(s)) {
8826                 handle_fjcvtzs(s, rd, rn);
8827             }
8828             break;
8829 
8830         default:
8831         do_unallocated:
8832             unallocated_encoding(s);
8833             return;
8834         }
8835         break;
8836     }
8837 }
8838 
8839 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
8840  *   31  30  29 28     25 24                          0
8841  * +---+---+---+---------+-----------------------------+
8842  * |   | 0 |   | 1 1 1 1 |                             |
8843  * +---+---+---+---------+-----------------------------+
8844  */
8845 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
8846 {
8847     if (extract32(insn, 24, 1)) {
8848         unallocated_encoding(s); /* in decodetree */
8849     } else if (extract32(insn, 21, 1) == 0) {
8850         /* Floating point to fixed point conversions */
8851         disas_fp_fixed_conv(s, insn);
8852     } else {
8853         switch (extract32(insn, 10, 2)) {
8854         case 1:
8855             /* Floating point conditional compare */
8856             disas_fp_ccomp(s, insn);
8857             break;
8858         case 2:
8859             /* Floating point data-processing (2 source) */
8860             unallocated_encoding(s); /* in decodetree */
8861             break;
8862         case 3:
8863             /* Floating point conditional select */
8864             unallocated_encoding(s); /* in decodetree */
8865             break;
8866         case 0:
8867             switch (ctz32(extract32(insn, 12, 4))) {
8868             case 0: /* [15:12] == xxx1 */
8869                 /* Floating point immediate */
8870                 disas_fp_imm(s, insn);
8871                 break;
8872             case 1: /* [15:12] == xx10 */
8873                 /* Floating point compare */
8874                 disas_fp_compare(s, insn);
8875                 break;
8876             case 2: /* [15:12] == x100 */
8877                 /* Floating point data-processing (1 source) */
8878                 disas_fp_1src(s, insn);
8879                 break;
8880             case 3: /* [15:12] == 1000 */
8881                 unallocated_encoding(s);
8882                 break;
8883             default: /* [15:12] == 0000 */
8884                 /* Floating point <-> integer conversions */
8885                 disas_fp_int_conv(s, insn);
8886                 break;
8887             }
8888             break;
8889         }
8890     }
8891 }
8892 
8893 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
8894                      int pos)
8895 {
8896     /* Extract 64 bits from the middle of two concatenated 64 bit
8897      * vector register slices left:right. The extracted bits start
8898      * at 'pos' bits into the right (least significant) side.
8899      * We return the result in tcg_right, and guarantee not to
8900      * trash tcg_left.
8901      */
8902     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8903     assert(pos > 0 && pos < 64);
8904 
8905     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
8906     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
8907     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
8908 }
8909 
8910 /* EXT
8911  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
8912  * +---+---+-------------+-----+---+------+---+------+---+------+------+
8913  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
8914  * +---+---+-------------+-----+---+------+---+------+---+------+------+
8915  */
8916 static void disas_simd_ext(DisasContext *s, uint32_t insn)
8917 {
8918     int is_q = extract32(insn, 30, 1);
8919     int op2 = extract32(insn, 22, 2);
8920     int imm4 = extract32(insn, 11, 4);
8921     int rm = extract32(insn, 16, 5);
8922     int rn = extract32(insn, 5, 5);
8923     int rd = extract32(insn, 0, 5);
8924     int pos = imm4 << 3;
8925     TCGv_i64 tcg_resl, tcg_resh;
8926 
8927     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
8928         unallocated_encoding(s);
8929         return;
8930     }
8931 
8932     if (!fp_access_check(s)) {
8933         return;
8934     }
8935 
8936     tcg_resh = tcg_temp_new_i64();
8937     tcg_resl = tcg_temp_new_i64();
8938 
8939     /* Vd gets bits starting at pos bits into Vm:Vn. This is
8940      * either extracting 128 bits from a 128:128 concatenation, or
8941      * extracting 64 bits from a 64:64 concatenation.
8942      */
8943     if (!is_q) {
8944         read_vec_element(s, tcg_resl, rn, 0, MO_64);
8945         if (pos != 0) {
8946             read_vec_element(s, tcg_resh, rm, 0, MO_64);
8947             do_ext64(s, tcg_resh, tcg_resl, pos);
8948         }
8949     } else {
8950         TCGv_i64 tcg_hh;
8951         typedef struct {
8952             int reg;
8953             int elt;
8954         } EltPosns;
8955         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
8956         EltPosns *elt = eltposns;
8957 
8958         if (pos >= 64) {
8959             elt++;
8960             pos -= 64;
8961         }
8962 
8963         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
8964         elt++;
8965         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
8966         elt++;
8967         if (pos != 0) {
8968             do_ext64(s, tcg_resh, tcg_resl, pos);
8969             tcg_hh = tcg_temp_new_i64();
8970             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
8971             do_ext64(s, tcg_hh, tcg_resh, pos);
8972         }
8973     }
8974 
8975     write_vec_element(s, tcg_resl, rd, 0, MO_64);
8976     if (is_q) {
8977         write_vec_element(s, tcg_resh, rd, 1, MO_64);
8978     }
8979     clear_vec_high(s, is_q, rd);
8980 }
8981 
8982 /* TBL/TBX
8983  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
8984  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
8985  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
8986  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
8987  */
8988 static void disas_simd_tb(DisasContext *s, uint32_t insn)
8989 {
8990     int op2 = extract32(insn, 22, 2);
8991     int is_q = extract32(insn, 30, 1);
8992     int rm = extract32(insn, 16, 5);
8993     int rn = extract32(insn, 5, 5);
8994     int rd = extract32(insn, 0, 5);
8995     int is_tbx = extract32(insn, 12, 1);
8996     int len = (extract32(insn, 13, 2) + 1) * 16;
8997 
8998     if (op2 != 0) {
8999         unallocated_encoding(s);
9000         return;
9001     }
9002 
9003     if (!fp_access_check(s)) {
9004         return;
9005     }
9006 
9007     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9008                        vec_full_reg_offset(s, rm), tcg_env,
9009                        is_q ? 16 : 8, vec_full_reg_size(s),
9010                        (len << 6) | (is_tbx << 5) | rn,
9011                        gen_helper_simd_tblx);
9012 }
9013 
9014 /* ZIP/UZP/TRN
9015  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
9016  * +---+---+-------------+------+---+------+---+------------------+------+
9017  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
9018  * +---+---+-------------+------+---+------+---+------------------+------+
9019  */
9020 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
9021 {
9022     int rd = extract32(insn, 0, 5);
9023     int rn = extract32(insn, 5, 5);
9024     int rm = extract32(insn, 16, 5);
9025     int size = extract32(insn, 22, 2);
9026     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
9027      * bit 2 indicates 1 vs 2 variant of the insn.
9028      */
9029     int opcode = extract32(insn, 12, 2);
9030     bool part = extract32(insn, 14, 1);
9031     bool is_q = extract32(insn, 30, 1);
9032     int esize = 8 << size;
9033     int i;
9034     int datasize = is_q ? 128 : 64;
9035     int elements = datasize / esize;
9036     TCGv_i64 tcg_res[2], tcg_ele;
9037 
9038     if (opcode == 0 || (size == 3 && !is_q)) {
9039         unallocated_encoding(s);
9040         return;
9041     }
9042 
9043     if (!fp_access_check(s)) {
9044         return;
9045     }
9046 
9047     tcg_res[0] = tcg_temp_new_i64();
9048     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
9049     tcg_ele = tcg_temp_new_i64();
9050 
9051     for (i = 0; i < elements; i++) {
9052         int o, w;
9053 
9054         switch (opcode) {
9055         case 1: /* UZP1/2 */
9056         {
9057             int midpoint = elements / 2;
9058             if (i < midpoint) {
9059                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
9060             } else {
9061                 read_vec_element(s, tcg_ele, rm,
9062                                  2 * (i - midpoint) + part, size);
9063             }
9064             break;
9065         }
9066         case 2: /* TRN1/2 */
9067             if (i & 1) {
9068                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
9069             } else {
9070                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
9071             }
9072             break;
9073         case 3: /* ZIP1/2 */
9074         {
9075             int base = part * elements / 2;
9076             if (i & 1) {
9077                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
9078             } else {
9079                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
9080             }
9081             break;
9082         }
9083         default:
9084             g_assert_not_reached();
9085         }
9086 
9087         w = (i * esize) / 64;
9088         o = (i * esize) % 64;
9089         if (o == 0) {
9090             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
9091         } else {
9092             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
9093             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
9094         }
9095     }
9096 
9097     for (i = 0; i <= is_q; ++i) {
9098         write_vec_element(s, tcg_res[i], rd, i, MO_64);
9099     }
9100     clear_vec_high(s, is_q, rd);
9101 }
9102 
9103 /*
9104  * do_reduction_op helper
9105  *
9106  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
9107  * important for correct NaN propagation that we do these
9108  * operations in exactly the order specified by the pseudocode.
9109  *
9110  * This is a recursive function, TCG temps should be freed by the
9111  * calling function once it is done with the values.
9112  */
9113 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
9114                                 int esize, int size, int vmap, TCGv_ptr fpst)
9115 {
9116     if (esize == size) {
9117         int element;
9118         MemOp msize = esize == 16 ? MO_16 : MO_32;
9119         TCGv_i32 tcg_elem;
9120 
9121         /* We should have one register left here */
9122         assert(ctpop8(vmap) == 1);
9123         element = ctz32(vmap);
9124         assert(element < 8);
9125 
9126         tcg_elem = tcg_temp_new_i32();
9127         read_vec_element_i32(s, tcg_elem, rn, element, msize);
9128         return tcg_elem;
9129     } else {
9130         int bits = size / 2;
9131         int shift = ctpop8(vmap) / 2;
9132         int vmap_lo = (vmap >> shift) & vmap;
9133         int vmap_hi = (vmap & ~vmap_lo);
9134         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
9135 
9136         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
9137         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
9138         tcg_res = tcg_temp_new_i32();
9139 
9140         switch (fpopcode) {
9141         case 0x0c: /* fmaxnmv half-precision */
9142             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
9143             break;
9144         case 0x0f: /* fmaxv half-precision */
9145             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
9146             break;
9147         case 0x1c: /* fminnmv half-precision */
9148             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
9149             break;
9150         case 0x1f: /* fminv half-precision */
9151             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
9152             break;
9153         case 0x2c: /* fmaxnmv */
9154             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
9155             break;
9156         case 0x2f: /* fmaxv */
9157             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
9158             break;
9159         case 0x3c: /* fminnmv */
9160             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
9161             break;
9162         case 0x3f: /* fminv */
9163             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
9164             break;
9165         default:
9166             g_assert_not_reached();
9167         }
9168         return tcg_res;
9169     }
9170 }
9171 
9172 /* AdvSIMD across lanes
9173  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9174  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9175  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9176  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9177  */
9178 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
9179 {
9180     int rd = extract32(insn, 0, 5);
9181     int rn = extract32(insn, 5, 5);
9182     int size = extract32(insn, 22, 2);
9183     int opcode = extract32(insn, 12, 5);
9184     bool is_q = extract32(insn, 30, 1);
9185     bool is_u = extract32(insn, 29, 1);
9186     bool is_fp = false;
9187     bool is_min = false;
9188     int esize;
9189     int elements;
9190     int i;
9191     TCGv_i64 tcg_res, tcg_elt;
9192 
9193     switch (opcode) {
9194     case 0x1b: /* ADDV */
9195         if (is_u) {
9196             unallocated_encoding(s);
9197             return;
9198         }
9199         /* fall through */
9200     case 0x3: /* SADDLV, UADDLV */
9201     case 0xa: /* SMAXV, UMAXV */
9202     case 0x1a: /* SMINV, UMINV */
9203         if (size == 3 || (size == 2 && !is_q)) {
9204             unallocated_encoding(s);
9205             return;
9206         }
9207         break;
9208     case 0xc: /* FMAXNMV, FMINNMV */
9209     case 0xf: /* FMAXV, FMINV */
9210         /* Bit 1 of size field encodes min vs max and the actual size
9211          * depends on the encoding of the U bit. If not set (and FP16
9212          * enabled) then we do half-precision float instead of single
9213          * precision.
9214          */
9215         is_min = extract32(size, 1, 1);
9216         is_fp = true;
9217         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
9218             size = 1;
9219         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
9220             unallocated_encoding(s);
9221             return;
9222         } else {
9223             size = 2;
9224         }
9225         break;
9226     default:
9227         unallocated_encoding(s);
9228         return;
9229     }
9230 
9231     if (!fp_access_check(s)) {
9232         return;
9233     }
9234 
9235     esize = 8 << size;
9236     elements = (is_q ? 128 : 64) / esize;
9237 
9238     tcg_res = tcg_temp_new_i64();
9239     tcg_elt = tcg_temp_new_i64();
9240 
9241     /* These instructions operate across all lanes of a vector
9242      * to produce a single result. We can guarantee that a 64
9243      * bit intermediate is sufficient:
9244      *  + for [US]ADDLV the maximum element size is 32 bits, and
9245      *    the result type is 64 bits
9246      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
9247      *    same as the element size, which is 32 bits at most
9248      * For the integer operations we can choose to work at 64
9249      * or 32 bits and truncate at the end; for simplicity
9250      * we use 64 bits always. The floating point
9251      * ops do require 32 bit intermediates, though.
9252      */
9253     if (!is_fp) {
9254         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
9255 
9256         for (i = 1; i < elements; i++) {
9257             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
9258 
9259             switch (opcode) {
9260             case 0x03: /* SADDLV / UADDLV */
9261             case 0x1b: /* ADDV */
9262                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
9263                 break;
9264             case 0x0a: /* SMAXV / UMAXV */
9265                 if (is_u) {
9266                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
9267                 } else {
9268                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
9269                 }
9270                 break;
9271             case 0x1a: /* SMINV / UMINV */
9272                 if (is_u) {
9273                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
9274                 } else {
9275                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
9276                 }
9277                 break;
9278             default:
9279                 g_assert_not_reached();
9280             }
9281 
9282         }
9283     } else {
9284         /* Floating point vector reduction ops which work across 32
9285          * bit (single) or 16 bit (half-precision) intermediates.
9286          * Note that correct NaN propagation requires that we do these
9287          * operations in exactly the order specified by the pseudocode.
9288          */
9289         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9290         int fpopcode = opcode | is_min << 4 | is_u << 5;
9291         int vmap = (1 << elements) - 1;
9292         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
9293                                              (is_q ? 128 : 64), vmap, fpst);
9294         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
9295     }
9296 
9297     /* Now truncate the result to the width required for the final output */
9298     if (opcode == 0x03) {
9299         /* SADDLV, UADDLV: result is 2*esize */
9300         size++;
9301     }
9302 
9303     switch (size) {
9304     case 0:
9305         tcg_gen_ext8u_i64(tcg_res, tcg_res);
9306         break;
9307     case 1:
9308         tcg_gen_ext16u_i64(tcg_res, tcg_res);
9309         break;
9310     case 2:
9311         tcg_gen_ext32u_i64(tcg_res, tcg_res);
9312         break;
9313     case 3:
9314         break;
9315     default:
9316         g_assert_not_reached();
9317     }
9318 
9319     write_fp_dreg(s, rd, tcg_res);
9320 }
9321 
9322 /* AdvSIMD modified immediate
9323  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
9324  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
9325  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
9326  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
9327  *
9328  * There are a number of operations that can be carried out here:
9329  *   MOVI - move (shifted) imm into register
9330  *   MVNI - move inverted (shifted) imm into register
9331  *   ORR  - bitwise OR of (shifted) imm with register
9332  *   BIC  - bitwise clear of (shifted) imm with register
9333  * With ARMv8.2 we also have:
9334  *   FMOV half-precision
9335  */
9336 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
9337 {
9338     int rd = extract32(insn, 0, 5);
9339     int cmode = extract32(insn, 12, 4);
9340     int o2 = extract32(insn, 11, 1);
9341     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
9342     bool is_neg = extract32(insn, 29, 1);
9343     bool is_q = extract32(insn, 30, 1);
9344     uint64_t imm = 0;
9345 
9346     if (o2) {
9347         if (cmode != 0xf || is_neg) {
9348             unallocated_encoding(s);
9349             return;
9350         }
9351         /* FMOV (vector, immediate) - half-precision */
9352         if (!dc_isar_feature(aa64_fp16, s)) {
9353             unallocated_encoding(s);
9354             return;
9355         }
9356         imm = vfp_expand_imm(MO_16, abcdefgh);
9357         /* now duplicate across the lanes */
9358         imm = dup_const(MO_16, imm);
9359     } else {
9360         if (cmode == 0xf && is_neg && !is_q) {
9361             unallocated_encoding(s);
9362             return;
9363         }
9364         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
9365     }
9366 
9367     if (!fp_access_check(s)) {
9368         return;
9369     }
9370 
9371     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
9372         /* MOVI or MVNI, with MVNI negation handled above.  */
9373         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
9374                              vec_full_reg_size(s), imm);
9375     } else {
9376         /* ORR or BIC, with BIC negation to AND handled above.  */
9377         if (is_neg) {
9378             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
9379         } else {
9380             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
9381         }
9382     }
9383 }
9384 
9385 /*
9386  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
9387  *
9388  * This code is handles the common shifting code and is used by both
9389  * the vector and scalar code.
9390  */
9391 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
9392                                     TCGv_i64 tcg_rnd, bool accumulate,
9393                                     bool is_u, int size, int shift)
9394 {
9395     bool extended_result = false;
9396     bool round = tcg_rnd != NULL;
9397     int ext_lshift = 0;
9398     TCGv_i64 tcg_src_hi;
9399 
9400     if (round && size == 3) {
9401         extended_result = true;
9402         ext_lshift = 64 - shift;
9403         tcg_src_hi = tcg_temp_new_i64();
9404     } else if (shift == 64) {
9405         if (!accumulate && is_u) {
9406             /* result is zero */
9407             tcg_gen_movi_i64(tcg_res, 0);
9408             return;
9409         }
9410     }
9411 
9412     /* Deal with the rounding step */
9413     if (round) {
9414         if (extended_result) {
9415             TCGv_i64 tcg_zero = tcg_constant_i64(0);
9416             if (!is_u) {
9417                 /* take care of sign extending tcg_res */
9418                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
9419                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
9420                                  tcg_src, tcg_src_hi,
9421                                  tcg_rnd, tcg_zero);
9422             } else {
9423                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
9424                                  tcg_src, tcg_zero,
9425                                  tcg_rnd, tcg_zero);
9426             }
9427         } else {
9428             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
9429         }
9430     }
9431 
9432     /* Now do the shift right */
9433     if (round && extended_result) {
9434         /* extended case, >64 bit precision required */
9435         if (ext_lshift == 0) {
9436             /* special case, only high bits matter */
9437             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
9438         } else {
9439             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
9440             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
9441             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
9442         }
9443     } else {
9444         if (is_u) {
9445             if (shift == 64) {
9446                 /* essentially shifting in 64 zeros */
9447                 tcg_gen_movi_i64(tcg_src, 0);
9448             } else {
9449                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
9450             }
9451         } else {
9452             if (shift == 64) {
9453                 /* effectively extending the sign-bit */
9454                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
9455             } else {
9456                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
9457             }
9458         }
9459     }
9460 
9461     if (accumulate) {
9462         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
9463     } else {
9464         tcg_gen_mov_i64(tcg_res, tcg_src);
9465     }
9466 }
9467 
9468 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
9469 static void handle_scalar_simd_shri(DisasContext *s,
9470                                     bool is_u, int immh, int immb,
9471                                     int opcode, int rn, int rd)
9472 {
9473     const int size = 3;
9474     int immhb = immh << 3 | immb;
9475     int shift = 2 * (8 << size) - immhb;
9476     bool accumulate = false;
9477     bool round = false;
9478     bool insert = false;
9479     TCGv_i64 tcg_rn;
9480     TCGv_i64 tcg_rd;
9481     TCGv_i64 tcg_round;
9482 
9483     if (!extract32(immh, 3, 1)) {
9484         unallocated_encoding(s);
9485         return;
9486     }
9487 
9488     if (!fp_access_check(s)) {
9489         return;
9490     }
9491 
9492     switch (opcode) {
9493     case 0x02: /* SSRA / USRA (accumulate) */
9494         accumulate = true;
9495         break;
9496     case 0x04: /* SRSHR / URSHR (rounding) */
9497         round = true;
9498         break;
9499     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9500         accumulate = round = true;
9501         break;
9502     case 0x08: /* SRI */
9503         insert = true;
9504         break;
9505     }
9506 
9507     if (round) {
9508         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
9509     } else {
9510         tcg_round = NULL;
9511     }
9512 
9513     tcg_rn = read_fp_dreg(s, rn);
9514     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
9515 
9516     if (insert) {
9517         /* shift count same as element size is valid but does nothing;
9518          * special case to avoid potential shift by 64.
9519          */
9520         int esize = 8 << size;
9521         if (shift != esize) {
9522             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
9523             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
9524         }
9525     } else {
9526         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9527                                 accumulate, is_u, size, shift);
9528     }
9529 
9530     write_fp_dreg(s, rd, tcg_rd);
9531 }
9532 
9533 /* SHL/SLI - Scalar shift left */
9534 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
9535                                     int immh, int immb, int opcode,
9536                                     int rn, int rd)
9537 {
9538     int size = 32 - clz32(immh) - 1;
9539     int immhb = immh << 3 | immb;
9540     int shift = immhb - (8 << size);
9541     TCGv_i64 tcg_rn;
9542     TCGv_i64 tcg_rd;
9543 
9544     if (!extract32(immh, 3, 1)) {
9545         unallocated_encoding(s);
9546         return;
9547     }
9548 
9549     if (!fp_access_check(s)) {
9550         return;
9551     }
9552 
9553     tcg_rn = read_fp_dreg(s, rn);
9554     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
9555 
9556     if (insert) {
9557         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
9558     } else {
9559         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
9560     }
9561 
9562     write_fp_dreg(s, rd, tcg_rd);
9563 }
9564 
9565 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
9566  * (signed/unsigned) narrowing */
9567 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
9568                                    bool is_u_shift, bool is_u_narrow,
9569                                    int immh, int immb, int opcode,
9570                                    int rn, int rd)
9571 {
9572     int immhb = immh << 3 | immb;
9573     int size = 32 - clz32(immh) - 1;
9574     int esize = 8 << size;
9575     int shift = (2 * esize) - immhb;
9576     int elements = is_scalar ? 1 : (64 / esize);
9577     bool round = extract32(opcode, 0, 1);
9578     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
9579     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
9580     TCGv_i32 tcg_rd_narrowed;
9581     TCGv_i64 tcg_final;
9582 
9583     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
9584         { gen_helper_neon_narrow_sat_s8,
9585           gen_helper_neon_unarrow_sat8 },
9586         { gen_helper_neon_narrow_sat_s16,
9587           gen_helper_neon_unarrow_sat16 },
9588         { gen_helper_neon_narrow_sat_s32,
9589           gen_helper_neon_unarrow_sat32 },
9590         { NULL, NULL },
9591     };
9592     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
9593         gen_helper_neon_narrow_sat_u8,
9594         gen_helper_neon_narrow_sat_u16,
9595         gen_helper_neon_narrow_sat_u32,
9596         NULL
9597     };
9598     NeonGenNarrowEnvFn *narrowfn;
9599 
9600     int i;
9601 
9602     assert(size < 4);
9603 
9604     if (extract32(immh, 3, 1)) {
9605         unallocated_encoding(s);
9606         return;
9607     }
9608 
9609     if (!fp_access_check(s)) {
9610         return;
9611     }
9612 
9613     if (is_u_shift) {
9614         narrowfn = unsigned_narrow_fns[size];
9615     } else {
9616         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
9617     }
9618 
9619     tcg_rn = tcg_temp_new_i64();
9620     tcg_rd = tcg_temp_new_i64();
9621     tcg_rd_narrowed = tcg_temp_new_i32();
9622     tcg_final = tcg_temp_new_i64();
9623 
9624     if (round) {
9625         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
9626     } else {
9627         tcg_round = NULL;
9628     }
9629 
9630     for (i = 0; i < elements; i++) {
9631         read_vec_element(s, tcg_rn, rn, i, ldop);
9632         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9633                                 false, is_u_shift, size+1, shift);
9634         narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd);
9635         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
9636         if (i == 0) {
9637             tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize);
9638         } else {
9639             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9640         }
9641     }
9642 
9643     if (!is_q) {
9644         write_vec_element(s, tcg_final, rd, 0, MO_64);
9645     } else {
9646         write_vec_element(s, tcg_final, rd, 1, MO_64);
9647     }
9648     clear_vec_high(s, is_q, rd);
9649 }
9650 
9651 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
9652 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
9653                              bool src_unsigned, bool dst_unsigned,
9654                              int immh, int immb, int rn, int rd)
9655 {
9656     int immhb = immh << 3 | immb;
9657     int size = 32 - clz32(immh) - 1;
9658     int shift = immhb - (8 << size);
9659     int pass;
9660 
9661     assert(immh != 0);
9662     assert(!(scalar && is_q));
9663 
9664     if (!scalar) {
9665         if (!is_q && extract32(immh, 3, 1)) {
9666             unallocated_encoding(s);
9667             return;
9668         }
9669 
9670         /* Since we use the variable-shift helpers we must
9671          * replicate the shift count into each element of
9672          * the tcg_shift value.
9673          */
9674         switch (size) {
9675         case 0:
9676             shift |= shift << 8;
9677             /* fall through */
9678         case 1:
9679             shift |= shift << 16;
9680             break;
9681         case 2:
9682         case 3:
9683             break;
9684         default:
9685             g_assert_not_reached();
9686         }
9687     }
9688 
9689     if (!fp_access_check(s)) {
9690         return;
9691     }
9692 
9693     if (size == 3) {
9694         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
9695         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
9696             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
9697             { NULL, gen_helper_neon_qshl_u64 },
9698         };
9699         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
9700         int maxpass = is_q ? 2 : 1;
9701 
9702         for (pass = 0; pass < maxpass; pass++) {
9703             TCGv_i64 tcg_op = tcg_temp_new_i64();
9704 
9705             read_vec_element(s, tcg_op, rn, pass, MO_64);
9706             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
9707             write_vec_element(s, tcg_op, rd, pass, MO_64);
9708         }
9709         clear_vec_high(s, is_q, rd);
9710     } else {
9711         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
9712         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
9713             {
9714                 { gen_helper_neon_qshl_s8,
9715                   gen_helper_neon_qshl_s16,
9716                   gen_helper_neon_qshl_s32 },
9717                 { gen_helper_neon_qshlu_s8,
9718                   gen_helper_neon_qshlu_s16,
9719                   gen_helper_neon_qshlu_s32 }
9720             }, {
9721                 { NULL, NULL, NULL },
9722                 { gen_helper_neon_qshl_u8,
9723                   gen_helper_neon_qshl_u16,
9724                   gen_helper_neon_qshl_u32 }
9725             }
9726         };
9727         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
9728         MemOp memop = scalar ? size : MO_32;
9729         int maxpass = scalar ? 1 : is_q ? 4 : 2;
9730 
9731         for (pass = 0; pass < maxpass; pass++) {
9732             TCGv_i32 tcg_op = tcg_temp_new_i32();
9733 
9734             read_vec_element_i32(s, tcg_op, rn, pass, memop);
9735             genfn(tcg_op, tcg_env, tcg_op, tcg_shift);
9736             if (scalar) {
9737                 switch (size) {
9738                 case 0:
9739                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
9740                     break;
9741                 case 1:
9742                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
9743                     break;
9744                 case 2:
9745                     break;
9746                 default:
9747                     g_assert_not_reached();
9748                 }
9749                 write_fp_sreg(s, rd, tcg_op);
9750             } else {
9751                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
9752             }
9753         }
9754 
9755         if (!scalar) {
9756             clear_vec_high(s, is_q, rd);
9757         }
9758     }
9759 }
9760 
9761 /* Common vector code for handling integer to FP conversion */
9762 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
9763                                    int elements, int is_signed,
9764                                    int fracbits, int size)
9765 {
9766     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9767     TCGv_i32 tcg_shift = NULL;
9768 
9769     MemOp mop = size | (is_signed ? MO_SIGN : 0);
9770     int pass;
9771 
9772     if (fracbits || size == MO_64) {
9773         tcg_shift = tcg_constant_i32(fracbits);
9774     }
9775 
9776     if (size == MO_64) {
9777         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
9778         TCGv_i64 tcg_double = tcg_temp_new_i64();
9779 
9780         for (pass = 0; pass < elements; pass++) {
9781             read_vec_element(s, tcg_int64, rn, pass, mop);
9782 
9783             if (is_signed) {
9784                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
9785                                      tcg_shift, tcg_fpst);
9786             } else {
9787                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
9788                                      tcg_shift, tcg_fpst);
9789             }
9790             if (elements == 1) {
9791                 write_fp_dreg(s, rd, tcg_double);
9792             } else {
9793                 write_vec_element(s, tcg_double, rd, pass, MO_64);
9794             }
9795         }
9796     } else {
9797         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
9798         TCGv_i32 tcg_float = tcg_temp_new_i32();
9799 
9800         for (pass = 0; pass < elements; pass++) {
9801             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
9802 
9803             switch (size) {
9804             case MO_32:
9805                 if (fracbits) {
9806                     if (is_signed) {
9807                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
9808                                              tcg_shift, tcg_fpst);
9809                     } else {
9810                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
9811                                              tcg_shift, tcg_fpst);
9812                     }
9813                 } else {
9814                     if (is_signed) {
9815                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
9816                     } else {
9817                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
9818                     }
9819                 }
9820                 break;
9821             case MO_16:
9822                 if (fracbits) {
9823                     if (is_signed) {
9824                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
9825                                              tcg_shift, tcg_fpst);
9826                     } else {
9827                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
9828                                              tcg_shift, tcg_fpst);
9829                     }
9830                 } else {
9831                     if (is_signed) {
9832                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
9833                     } else {
9834                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
9835                     }
9836                 }
9837                 break;
9838             default:
9839                 g_assert_not_reached();
9840             }
9841 
9842             if (elements == 1) {
9843                 write_fp_sreg(s, rd, tcg_float);
9844             } else {
9845                 write_vec_element_i32(s, tcg_float, rd, pass, size);
9846             }
9847         }
9848     }
9849 
9850     clear_vec_high(s, elements << size == 16, rd);
9851 }
9852 
9853 /* UCVTF/SCVTF - Integer to FP conversion */
9854 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
9855                                          bool is_q, bool is_u,
9856                                          int immh, int immb, int opcode,
9857                                          int rn, int rd)
9858 {
9859     int size, elements, fracbits;
9860     int immhb = immh << 3 | immb;
9861 
9862     if (immh & 8) {
9863         size = MO_64;
9864         if (!is_scalar && !is_q) {
9865             unallocated_encoding(s);
9866             return;
9867         }
9868     } else if (immh & 4) {
9869         size = MO_32;
9870     } else if (immh & 2) {
9871         size = MO_16;
9872         if (!dc_isar_feature(aa64_fp16, s)) {
9873             unallocated_encoding(s);
9874             return;
9875         }
9876     } else {
9877         /* immh == 0 would be a failure of the decode logic */
9878         g_assert(immh == 1);
9879         unallocated_encoding(s);
9880         return;
9881     }
9882 
9883     if (is_scalar) {
9884         elements = 1;
9885     } else {
9886         elements = (8 << is_q) >> size;
9887     }
9888     fracbits = (16 << size) - immhb;
9889 
9890     if (!fp_access_check(s)) {
9891         return;
9892     }
9893 
9894     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
9895 }
9896 
9897 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
9898 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
9899                                          bool is_q, bool is_u,
9900                                          int immh, int immb, int rn, int rd)
9901 {
9902     int immhb = immh << 3 | immb;
9903     int pass, size, fracbits;
9904     TCGv_ptr tcg_fpstatus;
9905     TCGv_i32 tcg_rmode, tcg_shift;
9906 
9907     if (immh & 0x8) {
9908         size = MO_64;
9909         if (!is_scalar && !is_q) {
9910             unallocated_encoding(s);
9911             return;
9912         }
9913     } else if (immh & 0x4) {
9914         size = MO_32;
9915     } else if (immh & 0x2) {
9916         size = MO_16;
9917         if (!dc_isar_feature(aa64_fp16, s)) {
9918             unallocated_encoding(s);
9919             return;
9920         }
9921     } else {
9922         /* Should have split out AdvSIMD modified immediate earlier.  */
9923         assert(immh == 1);
9924         unallocated_encoding(s);
9925         return;
9926     }
9927 
9928     if (!fp_access_check(s)) {
9929         return;
9930     }
9931 
9932     assert(!(is_scalar && is_q));
9933 
9934     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9935     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
9936     fracbits = (16 << size) - immhb;
9937     tcg_shift = tcg_constant_i32(fracbits);
9938 
9939     if (size == MO_64) {
9940         int maxpass = is_scalar ? 1 : 2;
9941 
9942         for (pass = 0; pass < maxpass; pass++) {
9943             TCGv_i64 tcg_op = tcg_temp_new_i64();
9944 
9945             read_vec_element(s, tcg_op, rn, pass, MO_64);
9946             if (is_u) {
9947                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9948             } else {
9949                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9950             }
9951             write_vec_element(s, tcg_op, rd, pass, MO_64);
9952         }
9953         clear_vec_high(s, is_q, rd);
9954     } else {
9955         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9956         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9957 
9958         switch (size) {
9959         case MO_16:
9960             if (is_u) {
9961                 fn = gen_helper_vfp_touhh;
9962             } else {
9963                 fn = gen_helper_vfp_toshh;
9964             }
9965             break;
9966         case MO_32:
9967             if (is_u) {
9968                 fn = gen_helper_vfp_touls;
9969             } else {
9970                 fn = gen_helper_vfp_tosls;
9971             }
9972             break;
9973         default:
9974             g_assert_not_reached();
9975         }
9976 
9977         for (pass = 0; pass < maxpass; pass++) {
9978             TCGv_i32 tcg_op = tcg_temp_new_i32();
9979 
9980             read_vec_element_i32(s, tcg_op, rn, pass, size);
9981             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9982             if (is_scalar) {
9983                 if (size == MO_16 && !is_u) {
9984                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
9985                 }
9986                 write_fp_sreg(s, rd, tcg_op);
9987             } else {
9988                 write_vec_element_i32(s, tcg_op, rd, pass, size);
9989             }
9990         }
9991         if (!is_scalar) {
9992             clear_vec_high(s, is_q, rd);
9993         }
9994     }
9995 
9996     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9997 }
9998 
9999 /* AdvSIMD scalar shift by immediate
10000  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
10001  * +-----+---+-------------+------+------+--------+---+------+------+
10002  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10003  * +-----+---+-------------+------+------+--------+---+------+------+
10004  *
10005  * This is the scalar version so it works on a fixed sized registers
10006  */
10007 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
10008 {
10009     int rd = extract32(insn, 0, 5);
10010     int rn = extract32(insn, 5, 5);
10011     int opcode = extract32(insn, 11, 5);
10012     int immb = extract32(insn, 16, 3);
10013     int immh = extract32(insn, 19, 4);
10014     bool is_u = extract32(insn, 29, 1);
10015 
10016     if (immh == 0) {
10017         unallocated_encoding(s);
10018         return;
10019     }
10020 
10021     switch (opcode) {
10022     case 0x08: /* SRI */
10023         if (!is_u) {
10024             unallocated_encoding(s);
10025             return;
10026         }
10027         /* fall through */
10028     case 0x00: /* SSHR / USHR */
10029     case 0x02: /* SSRA / USRA */
10030     case 0x04: /* SRSHR / URSHR */
10031     case 0x06: /* SRSRA / URSRA */
10032         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
10033         break;
10034     case 0x0a: /* SHL / SLI */
10035         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
10036         break;
10037     case 0x1c: /* SCVTF, UCVTF */
10038         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
10039                                      opcode, rn, rd);
10040         break;
10041     case 0x10: /* SQSHRUN, SQSHRUN2 */
10042     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
10043         if (!is_u) {
10044             unallocated_encoding(s);
10045             return;
10046         }
10047         handle_vec_simd_sqshrn(s, true, false, false, true,
10048                                immh, immb, opcode, rn, rd);
10049         break;
10050     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
10051     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
10052         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
10053                                immh, immb, opcode, rn, rd);
10054         break;
10055     case 0xc: /* SQSHLU */
10056         if (!is_u) {
10057             unallocated_encoding(s);
10058             return;
10059         }
10060         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
10061         break;
10062     case 0xe: /* SQSHL, UQSHL */
10063         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
10064         break;
10065     case 0x1f: /* FCVTZS, FCVTZU */
10066         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
10067         break;
10068     default:
10069         unallocated_encoding(s);
10070         break;
10071     }
10072 }
10073 
10074 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
10075                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
10076                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
10077 {
10078     /* Handle 64->64 opcodes which are shared between the scalar and
10079      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
10080      * is valid in either group and also the double-precision fp ops.
10081      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
10082      * requires them.
10083      */
10084     TCGCond cond;
10085 
10086     switch (opcode) {
10087     case 0x4: /* CLS, CLZ */
10088         if (u) {
10089             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
10090         } else {
10091             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
10092         }
10093         break;
10094     case 0x5: /* NOT */
10095         /* This opcode is shared with CNT and RBIT but we have earlier
10096          * enforced that size == 3 if and only if this is the NOT insn.
10097          */
10098         tcg_gen_not_i64(tcg_rd, tcg_rn);
10099         break;
10100     case 0x7: /* SQABS, SQNEG */
10101         if (u) {
10102             gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
10103         } else {
10104             gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
10105         }
10106         break;
10107     case 0xa: /* CMLT */
10108         cond = TCG_COND_LT;
10109     do_cmop:
10110         /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
10111         tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
10112         break;
10113     case 0x8: /* CMGT, CMGE */
10114         cond = u ? TCG_COND_GE : TCG_COND_GT;
10115         goto do_cmop;
10116     case 0x9: /* CMEQ, CMLE */
10117         cond = u ? TCG_COND_LE : TCG_COND_EQ;
10118         goto do_cmop;
10119     case 0xb: /* ABS, NEG */
10120         if (u) {
10121             tcg_gen_neg_i64(tcg_rd, tcg_rn);
10122         } else {
10123             tcg_gen_abs_i64(tcg_rd, tcg_rn);
10124         }
10125         break;
10126     case 0x2f: /* FABS */
10127         gen_vfp_absd(tcg_rd, tcg_rn);
10128         break;
10129     case 0x6f: /* FNEG */
10130         gen_vfp_negd(tcg_rd, tcg_rn);
10131         break;
10132     case 0x7f: /* FSQRT */
10133         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
10134         break;
10135     case 0x1a: /* FCVTNS */
10136     case 0x1b: /* FCVTMS */
10137     case 0x1c: /* FCVTAS */
10138     case 0x3a: /* FCVTPS */
10139     case 0x3b: /* FCVTZS */
10140         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10141         break;
10142     case 0x5a: /* FCVTNU */
10143     case 0x5b: /* FCVTMU */
10144     case 0x5c: /* FCVTAU */
10145     case 0x7a: /* FCVTPU */
10146     case 0x7b: /* FCVTZU */
10147         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10148         break;
10149     case 0x18: /* FRINTN */
10150     case 0x19: /* FRINTM */
10151     case 0x38: /* FRINTP */
10152     case 0x39: /* FRINTZ */
10153     case 0x58: /* FRINTA */
10154     case 0x79: /* FRINTI */
10155         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
10156         break;
10157     case 0x59: /* FRINTX */
10158         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
10159         break;
10160     case 0x1e: /* FRINT32Z */
10161     case 0x5e: /* FRINT32X */
10162         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
10163         break;
10164     case 0x1f: /* FRINT64Z */
10165     case 0x5f: /* FRINT64X */
10166         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
10167         break;
10168     default:
10169         g_assert_not_reached();
10170     }
10171 }
10172 
10173 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
10174                                    bool is_scalar, bool is_u, bool is_q,
10175                                    int size, int rn, int rd)
10176 {
10177     bool is_double = (size == MO_64);
10178     TCGv_ptr fpst;
10179 
10180     if (!fp_access_check(s)) {
10181         return;
10182     }
10183 
10184     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
10185 
10186     if (is_double) {
10187         TCGv_i64 tcg_op = tcg_temp_new_i64();
10188         TCGv_i64 tcg_zero = tcg_constant_i64(0);
10189         TCGv_i64 tcg_res = tcg_temp_new_i64();
10190         NeonGenTwoDoubleOpFn *genfn;
10191         bool swap = false;
10192         int pass;
10193 
10194         switch (opcode) {
10195         case 0x2e: /* FCMLT (zero) */
10196             swap = true;
10197             /* fallthrough */
10198         case 0x2c: /* FCMGT (zero) */
10199             genfn = gen_helper_neon_cgt_f64;
10200             break;
10201         case 0x2d: /* FCMEQ (zero) */
10202             genfn = gen_helper_neon_ceq_f64;
10203             break;
10204         case 0x6d: /* FCMLE (zero) */
10205             swap = true;
10206             /* fall through */
10207         case 0x6c: /* FCMGE (zero) */
10208             genfn = gen_helper_neon_cge_f64;
10209             break;
10210         default:
10211             g_assert_not_reached();
10212         }
10213 
10214         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10215             read_vec_element(s, tcg_op, rn, pass, MO_64);
10216             if (swap) {
10217                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10218             } else {
10219                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10220             }
10221             write_vec_element(s, tcg_res, rd, pass, MO_64);
10222         }
10223 
10224         clear_vec_high(s, !is_scalar, rd);
10225     } else {
10226         TCGv_i32 tcg_op = tcg_temp_new_i32();
10227         TCGv_i32 tcg_zero = tcg_constant_i32(0);
10228         TCGv_i32 tcg_res = tcg_temp_new_i32();
10229         NeonGenTwoSingleOpFn *genfn;
10230         bool swap = false;
10231         int pass, maxpasses;
10232 
10233         if (size == MO_16) {
10234             switch (opcode) {
10235             case 0x2e: /* FCMLT (zero) */
10236                 swap = true;
10237                 /* fall through */
10238             case 0x2c: /* FCMGT (zero) */
10239                 genfn = gen_helper_advsimd_cgt_f16;
10240                 break;
10241             case 0x2d: /* FCMEQ (zero) */
10242                 genfn = gen_helper_advsimd_ceq_f16;
10243                 break;
10244             case 0x6d: /* FCMLE (zero) */
10245                 swap = true;
10246                 /* fall through */
10247             case 0x6c: /* FCMGE (zero) */
10248                 genfn = gen_helper_advsimd_cge_f16;
10249                 break;
10250             default:
10251                 g_assert_not_reached();
10252             }
10253         } else {
10254             switch (opcode) {
10255             case 0x2e: /* FCMLT (zero) */
10256                 swap = true;
10257                 /* fall through */
10258             case 0x2c: /* FCMGT (zero) */
10259                 genfn = gen_helper_neon_cgt_f32;
10260                 break;
10261             case 0x2d: /* FCMEQ (zero) */
10262                 genfn = gen_helper_neon_ceq_f32;
10263                 break;
10264             case 0x6d: /* FCMLE (zero) */
10265                 swap = true;
10266                 /* fall through */
10267             case 0x6c: /* FCMGE (zero) */
10268                 genfn = gen_helper_neon_cge_f32;
10269                 break;
10270             default:
10271                 g_assert_not_reached();
10272             }
10273         }
10274 
10275         if (is_scalar) {
10276             maxpasses = 1;
10277         } else {
10278             int vector_size = 8 << is_q;
10279             maxpasses = vector_size >> size;
10280         }
10281 
10282         for (pass = 0; pass < maxpasses; pass++) {
10283             read_vec_element_i32(s, tcg_op, rn, pass, size);
10284             if (swap) {
10285                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10286             } else {
10287                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10288             }
10289             if (is_scalar) {
10290                 write_fp_sreg(s, rd, tcg_res);
10291             } else {
10292                 write_vec_element_i32(s, tcg_res, rd, pass, size);
10293             }
10294         }
10295 
10296         if (!is_scalar) {
10297             clear_vec_high(s, is_q, rd);
10298         }
10299     }
10300 }
10301 
10302 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10303                                     bool is_scalar, bool is_u, bool is_q,
10304                                     int size, int rn, int rd)
10305 {
10306     bool is_double = (size == 3);
10307     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10308 
10309     if (is_double) {
10310         TCGv_i64 tcg_op = tcg_temp_new_i64();
10311         TCGv_i64 tcg_res = tcg_temp_new_i64();
10312         int pass;
10313 
10314         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10315             read_vec_element(s, tcg_op, rn, pass, MO_64);
10316             switch (opcode) {
10317             case 0x3d: /* FRECPE */
10318                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10319                 break;
10320             case 0x3f: /* FRECPX */
10321                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10322                 break;
10323             case 0x7d: /* FRSQRTE */
10324                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10325                 break;
10326             default:
10327                 g_assert_not_reached();
10328             }
10329             write_vec_element(s, tcg_res, rd, pass, MO_64);
10330         }
10331         clear_vec_high(s, !is_scalar, rd);
10332     } else {
10333         TCGv_i32 tcg_op = tcg_temp_new_i32();
10334         TCGv_i32 tcg_res = tcg_temp_new_i32();
10335         int pass, maxpasses;
10336 
10337         if (is_scalar) {
10338             maxpasses = 1;
10339         } else {
10340             maxpasses = is_q ? 4 : 2;
10341         }
10342 
10343         for (pass = 0; pass < maxpasses; pass++) {
10344             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10345 
10346             switch (opcode) {
10347             case 0x3c: /* URECPE */
10348                 gen_helper_recpe_u32(tcg_res, tcg_op);
10349                 break;
10350             case 0x3d: /* FRECPE */
10351                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10352                 break;
10353             case 0x3f: /* FRECPX */
10354                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10355                 break;
10356             case 0x7d: /* FRSQRTE */
10357                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10358                 break;
10359             default:
10360                 g_assert_not_reached();
10361             }
10362 
10363             if (is_scalar) {
10364                 write_fp_sreg(s, rd, tcg_res);
10365             } else {
10366                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10367             }
10368         }
10369         if (!is_scalar) {
10370             clear_vec_high(s, is_q, rd);
10371         }
10372     }
10373 }
10374 
10375 static void handle_2misc_narrow(DisasContext *s, bool scalar,
10376                                 int opcode, bool u, bool is_q,
10377                                 int size, int rn, int rd)
10378 {
10379     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10380      * in the source becomes a size element in the destination).
10381      */
10382     int pass;
10383     TCGv_i32 tcg_res[2];
10384     int destelt = is_q ? 2 : 0;
10385     int passes = scalar ? 1 : 2;
10386 
10387     if (scalar) {
10388         tcg_res[1] = tcg_constant_i32(0);
10389     }
10390 
10391     for (pass = 0; pass < passes; pass++) {
10392         TCGv_i64 tcg_op = tcg_temp_new_i64();
10393         NeonGenNarrowFn *genfn = NULL;
10394         NeonGenNarrowEnvFn *genenvfn = NULL;
10395 
10396         if (scalar) {
10397             read_vec_element(s, tcg_op, rn, pass, size + 1);
10398         } else {
10399             read_vec_element(s, tcg_op, rn, pass, MO_64);
10400         }
10401         tcg_res[pass] = tcg_temp_new_i32();
10402 
10403         switch (opcode) {
10404         case 0x12: /* XTN, SQXTUN */
10405         {
10406             static NeonGenNarrowFn * const xtnfns[3] = {
10407                 gen_helper_neon_narrow_u8,
10408                 gen_helper_neon_narrow_u16,
10409                 tcg_gen_extrl_i64_i32,
10410             };
10411             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
10412                 gen_helper_neon_unarrow_sat8,
10413                 gen_helper_neon_unarrow_sat16,
10414                 gen_helper_neon_unarrow_sat32,
10415             };
10416             if (u) {
10417                 genenvfn = sqxtunfns[size];
10418             } else {
10419                 genfn = xtnfns[size];
10420             }
10421             break;
10422         }
10423         case 0x14: /* SQXTN, UQXTN */
10424         {
10425             static NeonGenNarrowEnvFn * const fns[3][2] = {
10426                 { gen_helper_neon_narrow_sat_s8,
10427                   gen_helper_neon_narrow_sat_u8 },
10428                 { gen_helper_neon_narrow_sat_s16,
10429                   gen_helper_neon_narrow_sat_u16 },
10430                 { gen_helper_neon_narrow_sat_s32,
10431                   gen_helper_neon_narrow_sat_u32 },
10432             };
10433             genenvfn = fns[size][u];
10434             break;
10435         }
10436         case 0x16: /* FCVTN, FCVTN2 */
10437             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10438             if (size == 2) {
10439                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env);
10440             } else {
10441                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
10442                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
10443                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10444                 TCGv_i32 ahp = get_ahp_flag();
10445 
10446                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10447                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10448                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10449                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
10450             }
10451             break;
10452         case 0x36: /* BFCVTN, BFCVTN2 */
10453             {
10454                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10455                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
10456             }
10457             break;
10458         case 0x56:  /* FCVTXN, FCVTXN2 */
10459             /* 64 bit to 32 bit float conversion
10460              * with von Neumann rounding (round to odd)
10461              */
10462             assert(size == 2);
10463             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env);
10464             break;
10465         default:
10466             g_assert_not_reached();
10467         }
10468 
10469         if (genfn) {
10470             genfn(tcg_res[pass], tcg_op);
10471         } else if (genenvfn) {
10472             genenvfn(tcg_res[pass], tcg_env, tcg_op);
10473         }
10474     }
10475 
10476     for (pass = 0; pass < 2; pass++) {
10477         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
10478     }
10479     clear_vec_high(s, is_q, rd);
10480 }
10481 
10482 /* AdvSIMD scalar two reg misc
10483  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10484  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10485  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10486  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10487  */
10488 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10489 {
10490     int rd = extract32(insn, 0, 5);
10491     int rn = extract32(insn, 5, 5);
10492     int opcode = extract32(insn, 12, 5);
10493     int size = extract32(insn, 22, 2);
10494     bool u = extract32(insn, 29, 1);
10495     bool is_fcvt = false;
10496     int rmode;
10497     TCGv_i32 tcg_rmode;
10498     TCGv_ptr tcg_fpstatus;
10499 
10500     switch (opcode) {
10501     case 0x7: /* SQABS / SQNEG */
10502         break;
10503     case 0xa: /* CMLT */
10504         if (u) {
10505             unallocated_encoding(s);
10506             return;
10507         }
10508         /* fall through */
10509     case 0x8: /* CMGT, CMGE */
10510     case 0x9: /* CMEQ, CMLE */
10511     case 0xb: /* ABS, NEG */
10512         if (size != 3) {
10513             unallocated_encoding(s);
10514             return;
10515         }
10516         break;
10517     case 0x12: /* SQXTUN */
10518         if (!u) {
10519             unallocated_encoding(s);
10520             return;
10521         }
10522         /* fall through */
10523     case 0x14: /* SQXTN, UQXTN */
10524         if (size == 3) {
10525             unallocated_encoding(s);
10526             return;
10527         }
10528         if (!fp_access_check(s)) {
10529             return;
10530         }
10531         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10532         return;
10533     case 0xc ... 0xf:
10534     case 0x16 ... 0x1d:
10535     case 0x1f:
10536         /* Floating point: U, size[1] and opcode indicate operation;
10537          * size[0] indicates single or double precision.
10538          */
10539         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10540         size = extract32(size, 0, 1) ? 3 : 2;
10541         switch (opcode) {
10542         case 0x2c: /* FCMGT (zero) */
10543         case 0x2d: /* FCMEQ (zero) */
10544         case 0x2e: /* FCMLT (zero) */
10545         case 0x6c: /* FCMGE (zero) */
10546         case 0x6d: /* FCMLE (zero) */
10547             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10548             return;
10549         case 0x1d: /* SCVTF */
10550         case 0x5d: /* UCVTF */
10551         {
10552             bool is_signed = (opcode == 0x1d);
10553             if (!fp_access_check(s)) {
10554                 return;
10555             }
10556             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10557             return;
10558         }
10559         case 0x3d: /* FRECPE */
10560         case 0x3f: /* FRECPX */
10561         case 0x7d: /* FRSQRTE */
10562             if (!fp_access_check(s)) {
10563                 return;
10564             }
10565             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10566             return;
10567         case 0x1a: /* FCVTNS */
10568         case 0x1b: /* FCVTMS */
10569         case 0x3a: /* FCVTPS */
10570         case 0x3b: /* FCVTZS */
10571         case 0x5a: /* FCVTNU */
10572         case 0x5b: /* FCVTMU */
10573         case 0x7a: /* FCVTPU */
10574         case 0x7b: /* FCVTZU */
10575             is_fcvt = true;
10576             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10577             break;
10578         case 0x1c: /* FCVTAS */
10579         case 0x5c: /* FCVTAU */
10580             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10581             is_fcvt = true;
10582             rmode = FPROUNDING_TIEAWAY;
10583             break;
10584         case 0x56: /* FCVTXN, FCVTXN2 */
10585             if (size == 2) {
10586                 unallocated_encoding(s);
10587                 return;
10588             }
10589             if (!fp_access_check(s)) {
10590                 return;
10591             }
10592             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10593             return;
10594         default:
10595             unallocated_encoding(s);
10596             return;
10597         }
10598         break;
10599     default:
10600     case 0x3: /* USQADD / SUQADD */
10601         unallocated_encoding(s);
10602         return;
10603     }
10604 
10605     if (!fp_access_check(s)) {
10606         return;
10607     }
10608 
10609     if (is_fcvt) {
10610         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10611         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10612     } else {
10613         tcg_fpstatus = NULL;
10614         tcg_rmode = NULL;
10615     }
10616 
10617     if (size == 3) {
10618         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10619         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10620 
10621         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10622         write_fp_dreg(s, rd, tcg_rd);
10623     } else {
10624         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10625         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10626 
10627         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10628 
10629         switch (opcode) {
10630         case 0x7: /* SQABS, SQNEG */
10631         {
10632             NeonGenOneOpEnvFn *genfn;
10633             static NeonGenOneOpEnvFn * const fns[3][2] = {
10634                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10635                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10636                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10637             };
10638             genfn = fns[size][u];
10639             genfn(tcg_rd, tcg_env, tcg_rn);
10640             break;
10641         }
10642         case 0x1a: /* FCVTNS */
10643         case 0x1b: /* FCVTMS */
10644         case 0x1c: /* FCVTAS */
10645         case 0x3a: /* FCVTPS */
10646         case 0x3b: /* FCVTZS */
10647             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10648                                  tcg_fpstatus);
10649             break;
10650         case 0x5a: /* FCVTNU */
10651         case 0x5b: /* FCVTMU */
10652         case 0x5c: /* FCVTAU */
10653         case 0x7a: /* FCVTPU */
10654         case 0x7b: /* FCVTZU */
10655             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10656                                  tcg_fpstatus);
10657             break;
10658         default:
10659             g_assert_not_reached();
10660         }
10661 
10662         write_fp_sreg(s, rd, tcg_rd);
10663     }
10664 
10665     if (is_fcvt) {
10666         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10667     }
10668 }
10669 
10670 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10671 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10672                                  int immh, int immb, int opcode, int rn, int rd)
10673 {
10674     int size = 32 - clz32(immh) - 1;
10675     int immhb = immh << 3 | immb;
10676     int shift = 2 * (8 << size) - immhb;
10677     GVecGen2iFn *gvec_fn;
10678 
10679     if (extract32(immh, 3, 1) && !is_q) {
10680         unallocated_encoding(s);
10681         return;
10682     }
10683     tcg_debug_assert(size <= 3);
10684 
10685     if (!fp_access_check(s)) {
10686         return;
10687     }
10688 
10689     switch (opcode) {
10690     case 0x02: /* SSRA / USRA (accumulate) */
10691         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10692         break;
10693 
10694     case 0x08: /* SRI */
10695         gvec_fn = gen_gvec_sri;
10696         break;
10697 
10698     case 0x00: /* SSHR / USHR */
10699         if (is_u) {
10700             if (shift == 8 << size) {
10701                 /* Shift count the same size as element size produces zero.  */
10702                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10703                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10704                 return;
10705             }
10706             gvec_fn = tcg_gen_gvec_shri;
10707         } else {
10708             /* Shift count the same size as element size produces all sign.  */
10709             if (shift == 8 << size) {
10710                 shift -= 1;
10711             }
10712             gvec_fn = tcg_gen_gvec_sari;
10713         }
10714         break;
10715 
10716     case 0x04: /* SRSHR / URSHR (rounding) */
10717         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10718         break;
10719 
10720     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10721         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10722         break;
10723 
10724     default:
10725         g_assert_not_reached();
10726     }
10727 
10728     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10729 }
10730 
10731 /* SHL/SLI - Vector shift left */
10732 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10733                                  int immh, int immb, int opcode, int rn, int rd)
10734 {
10735     int size = 32 - clz32(immh) - 1;
10736     int immhb = immh << 3 | immb;
10737     int shift = immhb - (8 << size);
10738 
10739     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10740     assert(size >= 0 && size <= 3);
10741 
10742     if (extract32(immh, 3, 1) && !is_q) {
10743         unallocated_encoding(s);
10744         return;
10745     }
10746 
10747     if (!fp_access_check(s)) {
10748         return;
10749     }
10750 
10751     if (insert) {
10752         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10753     } else {
10754         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10755     }
10756 }
10757 
10758 /* USHLL/SHLL - Vector shift left with widening */
10759 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10760                                  int immh, int immb, int opcode, int rn, int rd)
10761 {
10762     int size = 32 - clz32(immh) - 1;
10763     int immhb = immh << 3 | immb;
10764     int shift = immhb - (8 << size);
10765     int dsize = 64;
10766     int esize = 8 << size;
10767     int elements = dsize/esize;
10768     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10769     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10770     int i;
10771 
10772     if (size >= 3) {
10773         unallocated_encoding(s);
10774         return;
10775     }
10776 
10777     if (!fp_access_check(s)) {
10778         return;
10779     }
10780 
10781     /* For the LL variants the store is larger than the load,
10782      * so if rd == rn we would overwrite parts of our input.
10783      * So load everything right now and use shifts in the main loop.
10784      */
10785     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10786 
10787     for (i = 0; i < elements; i++) {
10788         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10789         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10790         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10791         write_vec_element(s, tcg_rd, rd, i, size + 1);
10792     }
10793     clear_vec_high(s, true, rd);
10794 }
10795 
10796 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10797 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10798                                  int immh, int immb, int opcode, int rn, int rd)
10799 {
10800     int immhb = immh << 3 | immb;
10801     int size = 32 - clz32(immh) - 1;
10802     int dsize = 64;
10803     int esize = 8 << size;
10804     int elements = dsize/esize;
10805     int shift = (2 * esize) - immhb;
10806     bool round = extract32(opcode, 0, 1);
10807     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10808     TCGv_i64 tcg_round;
10809     int i;
10810 
10811     if (extract32(immh, 3, 1)) {
10812         unallocated_encoding(s);
10813         return;
10814     }
10815 
10816     if (!fp_access_check(s)) {
10817         return;
10818     }
10819 
10820     tcg_rn = tcg_temp_new_i64();
10821     tcg_rd = tcg_temp_new_i64();
10822     tcg_final = tcg_temp_new_i64();
10823     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10824 
10825     if (round) {
10826         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10827     } else {
10828         tcg_round = NULL;
10829     }
10830 
10831     for (i = 0; i < elements; i++) {
10832         read_vec_element(s, tcg_rn, rn, i, size+1);
10833         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10834                                 false, true, size+1, shift);
10835 
10836         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10837     }
10838 
10839     if (!is_q) {
10840         write_vec_element(s, tcg_final, rd, 0, MO_64);
10841     } else {
10842         write_vec_element(s, tcg_final, rd, 1, MO_64);
10843     }
10844 
10845     clear_vec_high(s, is_q, rd);
10846 }
10847 
10848 
10849 /* AdvSIMD shift by immediate
10850  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10851  * +---+---+---+-------------+------+------+--------+---+------+------+
10852  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10853  * +---+---+---+-------------+------+------+--------+---+------+------+
10854  */
10855 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10856 {
10857     int rd = extract32(insn, 0, 5);
10858     int rn = extract32(insn, 5, 5);
10859     int opcode = extract32(insn, 11, 5);
10860     int immb = extract32(insn, 16, 3);
10861     int immh = extract32(insn, 19, 4);
10862     bool is_u = extract32(insn, 29, 1);
10863     bool is_q = extract32(insn, 30, 1);
10864 
10865     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10866     assert(immh != 0);
10867 
10868     switch (opcode) {
10869     case 0x08: /* SRI */
10870         if (!is_u) {
10871             unallocated_encoding(s);
10872             return;
10873         }
10874         /* fall through */
10875     case 0x00: /* SSHR / USHR */
10876     case 0x02: /* SSRA / USRA (accumulate) */
10877     case 0x04: /* SRSHR / URSHR (rounding) */
10878     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10879         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10880         break;
10881     case 0x0a: /* SHL / SLI */
10882         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10883         break;
10884     case 0x10: /* SHRN */
10885     case 0x11: /* RSHRN / SQRSHRUN */
10886         if (is_u) {
10887             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10888                                    opcode, rn, rd);
10889         } else {
10890             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10891         }
10892         break;
10893     case 0x12: /* SQSHRN / UQSHRN */
10894     case 0x13: /* SQRSHRN / UQRSHRN */
10895         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10896                                opcode, rn, rd);
10897         break;
10898     case 0x14: /* SSHLL / USHLL */
10899         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10900         break;
10901     case 0x1c: /* SCVTF / UCVTF */
10902         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10903                                      opcode, rn, rd);
10904         break;
10905     case 0xc: /* SQSHLU */
10906         if (!is_u) {
10907             unallocated_encoding(s);
10908             return;
10909         }
10910         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10911         break;
10912     case 0xe: /* SQSHL, UQSHL */
10913         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10914         break;
10915     case 0x1f: /* FCVTZS/ FCVTZU */
10916         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10917         return;
10918     default:
10919         unallocated_encoding(s);
10920         return;
10921     }
10922 }
10923 
10924 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10925                                   int size, int rn, int rd)
10926 {
10927     /* Handle 2-reg-misc ops which are widening (so each size element
10928      * in the source becomes a 2*size element in the destination.
10929      * The only instruction like this is FCVTL.
10930      */
10931     int pass;
10932 
10933     if (size == 3) {
10934         /* 32 -> 64 bit fp conversion */
10935         TCGv_i64 tcg_res[2];
10936         int srcelt = is_q ? 2 : 0;
10937 
10938         for (pass = 0; pass < 2; pass++) {
10939             TCGv_i32 tcg_op = tcg_temp_new_i32();
10940             tcg_res[pass] = tcg_temp_new_i64();
10941 
10942             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10943             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
10944         }
10945         for (pass = 0; pass < 2; pass++) {
10946             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10947         }
10948     } else {
10949         /* 16 -> 32 bit fp conversion */
10950         int srcelt = is_q ? 4 : 0;
10951         TCGv_i32 tcg_res[4];
10952         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10953         TCGv_i32 ahp = get_ahp_flag();
10954 
10955         for (pass = 0; pass < 4; pass++) {
10956             tcg_res[pass] = tcg_temp_new_i32();
10957 
10958             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10959             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10960                                            fpst, ahp);
10961         }
10962         for (pass = 0; pass < 4; pass++) {
10963             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10964         }
10965     }
10966 }
10967 
10968 static void handle_rev(DisasContext *s, int opcode, bool u,
10969                        bool is_q, int size, int rn, int rd)
10970 {
10971     int op = (opcode << 1) | u;
10972     int opsz = op + size;
10973     int grp_size = 3 - opsz;
10974     int dsize = is_q ? 128 : 64;
10975     int i;
10976 
10977     if (opsz >= 3) {
10978         unallocated_encoding(s);
10979         return;
10980     }
10981 
10982     if (!fp_access_check(s)) {
10983         return;
10984     }
10985 
10986     if (size == 0) {
10987         /* Special case bytes, use bswap op on each group of elements */
10988         int groups = dsize / (8 << grp_size);
10989 
10990         for (i = 0; i < groups; i++) {
10991             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
10992 
10993             read_vec_element(s, tcg_tmp, rn, i, grp_size);
10994             switch (grp_size) {
10995             case MO_16:
10996                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10997                 break;
10998             case MO_32:
10999                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11000                 break;
11001             case MO_64:
11002                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11003                 break;
11004             default:
11005                 g_assert_not_reached();
11006             }
11007             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11008         }
11009         clear_vec_high(s, is_q, rd);
11010     } else {
11011         int revmask = (1 << grp_size) - 1;
11012         int esize = 8 << size;
11013         int elements = dsize / esize;
11014         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11015         TCGv_i64 tcg_rd[2];
11016 
11017         for (i = 0; i < 2; i++) {
11018             tcg_rd[i] = tcg_temp_new_i64();
11019             tcg_gen_movi_i64(tcg_rd[i], 0);
11020         }
11021 
11022         for (i = 0; i < elements; i++) {
11023             int e_rev = (i & 0xf) ^ revmask;
11024             int w = (e_rev * esize) / 64;
11025             int o = (e_rev * esize) % 64;
11026 
11027             read_vec_element(s, tcg_rn, rn, i, size);
11028             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11029         }
11030 
11031         for (i = 0; i < 2; i++) {
11032             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11033         }
11034         clear_vec_high(s, true, rd);
11035     }
11036 }
11037 
11038 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11039                                   bool is_q, int size, int rn, int rd)
11040 {
11041     /* Implement the pairwise operations from 2-misc:
11042      * SADDLP, UADDLP, SADALP, UADALP.
11043      * These all add pairs of elements in the input to produce a
11044      * double-width result element in the output (possibly accumulating).
11045      */
11046     bool accum = (opcode == 0x6);
11047     int maxpass = is_q ? 2 : 1;
11048     int pass;
11049     TCGv_i64 tcg_res[2];
11050 
11051     if (size == 2) {
11052         /* 32 + 32 -> 64 op */
11053         MemOp memop = size + (u ? 0 : MO_SIGN);
11054 
11055         for (pass = 0; pass < maxpass; pass++) {
11056             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11057             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11058 
11059             tcg_res[pass] = tcg_temp_new_i64();
11060 
11061             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11062             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11063             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11064             if (accum) {
11065                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11066                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11067             }
11068         }
11069     } else {
11070         for (pass = 0; pass < maxpass; pass++) {
11071             TCGv_i64 tcg_op = tcg_temp_new_i64();
11072             NeonGenOne64OpFn *genfn;
11073             static NeonGenOne64OpFn * const fns[2][2] = {
11074                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11075                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11076             };
11077 
11078             genfn = fns[size][u];
11079 
11080             tcg_res[pass] = tcg_temp_new_i64();
11081 
11082             read_vec_element(s, tcg_op, rn, pass, MO_64);
11083             genfn(tcg_res[pass], tcg_op);
11084 
11085             if (accum) {
11086                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11087                 if (size == 0) {
11088                     gen_helper_neon_addl_u16(tcg_res[pass],
11089                                              tcg_res[pass], tcg_op);
11090                 } else {
11091                     gen_helper_neon_addl_u32(tcg_res[pass],
11092                                              tcg_res[pass], tcg_op);
11093                 }
11094             }
11095         }
11096     }
11097     if (!is_q) {
11098         tcg_res[1] = tcg_constant_i64(0);
11099     }
11100     for (pass = 0; pass < 2; pass++) {
11101         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11102     }
11103 }
11104 
11105 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11106 {
11107     /* Implement SHLL and SHLL2 */
11108     int pass;
11109     int part = is_q ? 2 : 0;
11110     TCGv_i64 tcg_res[2];
11111 
11112     for (pass = 0; pass < 2; pass++) {
11113         static NeonGenWidenFn * const widenfns[3] = {
11114             gen_helper_neon_widen_u8,
11115             gen_helper_neon_widen_u16,
11116             tcg_gen_extu_i32_i64,
11117         };
11118         NeonGenWidenFn *widenfn = widenfns[size];
11119         TCGv_i32 tcg_op = tcg_temp_new_i32();
11120 
11121         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11122         tcg_res[pass] = tcg_temp_new_i64();
11123         widenfn(tcg_res[pass], tcg_op);
11124         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11125     }
11126 
11127     for (pass = 0; pass < 2; pass++) {
11128         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11129     }
11130 }
11131 
11132 /* AdvSIMD two reg misc
11133  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11134  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11135  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11136  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11137  */
11138 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11139 {
11140     int size = extract32(insn, 22, 2);
11141     int opcode = extract32(insn, 12, 5);
11142     bool u = extract32(insn, 29, 1);
11143     bool is_q = extract32(insn, 30, 1);
11144     int rn = extract32(insn, 5, 5);
11145     int rd = extract32(insn, 0, 5);
11146     bool need_fpstatus = false;
11147     int rmode = -1;
11148     TCGv_i32 tcg_rmode;
11149     TCGv_ptr tcg_fpstatus;
11150 
11151     switch (opcode) {
11152     case 0x0: /* REV64, REV32 */
11153     case 0x1: /* REV16 */
11154         handle_rev(s, opcode, u, is_q, size, rn, rd);
11155         return;
11156     case 0x5: /* CNT, NOT, RBIT */
11157         if (u && size == 0) {
11158             /* NOT */
11159             break;
11160         } else if (u && size == 1) {
11161             /* RBIT */
11162             break;
11163         } else if (!u && size == 0) {
11164             /* CNT */
11165             break;
11166         }
11167         unallocated_encoding(s);
11168         return;
11169     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11170     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11171         if (size == 3) {
11172             unallocated_encoding(s);
11173             return;
11174         }
11175         if (!fp_access_check(s)) {
11176             return;
11177         }
11178 
11179         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11180         return;
11181     case 0x4: /* CLS, CLZ */
11182         if (size == 3) {
11183             unallocated_encoding(s);
11184             return;
11185         }
11186         break;
11187     case 0x2: /* SADDLP, UADDLP */
11188     case 0x6: /* SADALP, UADALP */
11189         if (size == 3) {
11190             unallocated_encoding(s);
11191             return;
11192         }
11193         if (!fp_access_check(s)) {
11194             return;
11195         }
11196         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11197         return;
11198     case 0x13: /* SHLL, SHLL2 */
11199         if (u == 0 || size == 3) {
11200             unallocated_encoding(s);
11201             return;
11202         }
11203         if (!fp_access_check(s)) {
11204             return;
11205         }
11206         handle_shll(s, is_q, size, rn, rd);
11207         return;
11208     case 0xa: /* CMLT */
11209         if (u == 1) {
11210             unallocated_encoding(s);
11211             return;
11212         }
11213         /* fall through */
11214     case 0x8: /* CMGT, CMGE */
11215     case 0x9: /* CMEQ, CMLE */
11216     case 0xb: /* ABS, NEG */
11217         if (size == 3 && !is_q) {
11218             unallocated_encoding(s);
11219             return;
11220         }
11221         break;
11222     case 0x7: /* SQABS, SQNEG */
11223         if (size == 3 && !is_q) {
11224             unallocated_encoding(s);
11225             return;
11226         }
11227         break;
11228     case 0xc ... 0xf:
11229     case 0x16 ... 0x1f:
11230     {
11231         /* Floating point: U, size[1] and opcode indicate operation;
11232          * size[0] indicates single or double precision.
11233          */
11234         int is_double = extract32(size, 0, 1);
11235         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11236         size = is_double ? 3 : 2;
11237         switch (opcode) {
11238         case 0x2f: /* FABS */
11239         case 0x6f: /* FNEG */
11240             if (size == 3 && !is_q) {
11241                 unallocated_encoding(s);
11242                 return;
11243             }
11244             break;
11245         case 0x1d: /* SCVTF */
11246         case 0x5d: /* UCVTF */
11247         {
11248             bool is_signed = (opcode == 0x1d) ? true : false;
11249             int elements = is_double ? 2 : is_q ? 4 : 2;
11250             if (is_double && !is_q) {
11251                 unallocated_encoding(s);
11252                 return;
11253             }
11254             if (!fp_access_check(s)) {
11255                 return;
11256             }
11257             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11258             return;
11259         }
11260         case 0x2c: /* FCMGT (zero) */
11261         case 0x2d: /* FCMEQ (zero) */
11262         case 0x2e: /* FCMLT (zero) */
11263         case 0x6c: /* FCMGE (zero) */
11264         case 0x6d: /* FCMLE (zero) */
11265             if (size == 3 && !is_q) {
11266                 unallocated_encoding(s);
11267                 return;
11268             }
11269             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11270             return;
11271         case 0x7f: /* FSQRT */
11272             if (size == 3 && !is_q) {
11273                 unallocated_encoding(s);
11274                 return;
11275             }
11276             break;
11277         case 0x1a: /* FCVTNS */
11278         case 0x1b: /* FCVTMS */
11279         case 0x3a: /* FCVTPS */
11280         case 0x3b: /* FCVTZS */
11281         case 0x5a: /* FCVTNU */
11282         case 0x5b: /* FCVTMU */
11283         case 0x7a: /* FCVTPU */
11284         case 0x7b: /* FCVTZU */
11285             need_fpstatus = true;
11286             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11287             if (size == 3 && !is_q) {
11288                 unallocated_encoding(s);
11289                 return;
11290             }
11291             break;
11292         case 0x5c: /* FCVTAU */
11293         case 0x1c: /* FCVTAS */
11294             need_fpstatus = true;
11295             rmode = FPROUNDING_TIEAWAY;
11296             if (size == 3 && !is_q) {
11297                 unallocated_encoding(s);
11298                 return;
11299             }
11300             break;
11301         case 0x3c: /* URECPE */
11302             if (size == 3) {
11303                 unallocated_encoding(s);
11304                 return;
11305             }
11306             /* fall through */
11307         case 0x3d: /* FRECPE */
11308         case 0x7d: /* FRSQRTE */
11309             if (size == 3 && !is_q) {
11310                 unallocated_encoding(s);
11311                 return;
11312             }
11313             if (!fp_access_check(s)) {
11314                 return;
11315             }
11316             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11317             return;
11318         case 0x56: /* FCVTXN, FCVTXN2 */
11319             if (size == 2) {
11320                 unallocated_encoding(s);
11321                 return;
11322             }
11323             /* fall through */
11324         case 0x16: /* FCVTN, FCVTN2 */
11325             /* handle_2misc_narrow does a 2*size -> size operation, but these
11326              * instructions encode the source size rather than dest size.
11327              */
11328             if (!fp_access_check(s)) {
11329                 return;
11330             }
11331             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11332             return;
11333         case 0x36: /* BFCVTN, BFCVTN2 */
11334             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
11335                 unallocated_encoding(s);
11336                 return;
11337             }
11338             if (!fp_access_check(s)) {
11339                 return;
11340             }
11341             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11342             return;
11343         case 0x17: /* FCVTL, FCVTL2 */
11344             if (!fp_access_check(s)) {
11345                 return;
11346             }
11347             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11348             return;
11349         case 0x18: /* FRINTN */
11350         case 0x19: /* FRINTM */
11351         case 0x38: /* FRINTP */
11352         case 0x39: /* FRINTZ */
11353             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11354             /* fall through */
11355         case 0x59: /* FRINTX */
11356         case 0x79: /* FRINTI */
11357             need_fpstatus = true;
11358             if (size == 3 && !is_q) {
11359                 unallocated_encoding(s);
11360                 return;
11361             }
11362             break;
11363         case 0x58: /* FRINTA */
11364             rmode = FPROUNDING_TIEAWAY;
11365             need_fpstatus = true;
11366             if (size == 3 && !is_q) {
11367                 unallocated_encoding(s);
11368                 return;
11369             }
11370             break;
11371         case 0x7c: /* URSQRTE */
11372             if (size == 3) {
11373                 unallocated_encoding(s);
11374                 return;
11375             }
11376             break;
11377         case 0x1e: /* FRINT32Z */
11378         case 0x1f: /* FRINT64Z */
11379             rmode = FPROUNDING_ZERO;
11380             /* fall through */
11381         case 0x5e: /* FRINT32X */
11382         case 0x5f: /* FRINT64X */
11383             need_fpstatus = true;
11384             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
11385                 unallocated_encoding(s);
11386                 return;
11387             }
11388             break;
11389         default:
11390             unallocated_encoding(s);
11391             return;
11392         }
11393         break;
11394     }
11395     default:
11396     case 0x3: /* SUQADD, USQADD */
11397         unallocated_encoding(s);
11398         return;
11399     }
11400 
11401     if (!fp_access_check(s)) {
11402         return;
11403     }
11404 
11405     if (need_fpstatus || rmode >= 0) {
11406         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
11407     } else {
11408         tcg_fpstatus = NULL;
11409     }
11410     if (rmode >= 0) {
11411         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11412     } else {
11413         tcg_rmode = NULL;
11414     }
11415 
11416     switch (opcode) {
11417     case 0x5:
11418         if (u && size == 0) { /* NOT */
11419             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11420             return;
11421         }
11422         break;
11423     case 0x8: /* CMGT, CMGE */
11424         if (u) {
11425             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
11426         } else {
11427             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
11428         }
11429         return;
11430     case 0x9: /* CMEQ, CMLE */
11431         if (u) {
11432             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
11433         } else {
11434             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
11435         }
11436         return;
11437     case 0xa: /* CMLT */
11438         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
11439         return;
11440     case 0xb:
11441         if (u) { /* ABS, NEG */
11442             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11443         } else {
11444             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
11445         }
11446         return;
11447     }
11448 
11449     if (size == 3) {
11450         /* All 64-bit element operations can be shared with scalar 2misc */
11451         int pass;
11452 
11453         /* Coverity claims (size == 3 && !is_q) has been eliminated
11454          * from all paths leading to here.
11455          */
11456         tcg_debug_assert(is_q);
11457         for (pass = 0; pass < 2; pass++) {
11458             TCGv_i64 tcg_op = tcg_temp_new_i64();
11459             TCGv_i64 tcg_res = tcg_temp_new_i64();
11460 
11461             read_vec_element(s, tcg_op, rn, pass, MO_64);
11462 
11463             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11464                             tcg_rmode, tcg_fpstatus);
11465 
11466             write_vec_element(s, tcg_res, rd, pass, MO_64);
11467         }
11468     } else {
11469         int pass;
11470 
11471         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11472             TCGv_i32 tcg_op = tcg_temp_new_i32();
11473             TCGv_i32 tcg_res = tcg_temp_new_i32();
11474 
11475             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11476 
11477             if (size == 2) {
11478                 /* Special cases for 32 bit elements */
11479                 switch (opcode) {
11480                 case 0x4: /* CLS */
11481                     if (u) {
11482                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11483                     } else {
11484                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
11485                     }
11486                     break;
11487                 case 0x7: /* SQABS, SQNEG */
11488                     if (u) {
11489                         gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
11490                     } else {
11491                         gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
11492                     }
11493                     break;
11494                 case 0x2f: /* FABS */
11495                     gen_vfp_abss(tcg_res, tcg_op);
11496                     break;
11497                 case 0x6f: /* FNEG */
11498                     gen_vfp_negs(tcg_res, tcg_op);
11499                     break;
11500                 case 0x7f: /* FSQRT */
11501                     gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
11502                     break;
11503                 case 0x1a: /* FCVTNS */
11504                 case 0x1b: /* FCVTMS */
11505                 case 0x1c: /* FCVTAS */
11506                 case 0x3a: /* FCVTPS */
11507                 case 0x3b: /* FCVTZS */
11508                     gen_helper_vfp_tosls(tcg_res, tcg_op,
11509                                          tcg_constant_i32(0), tcg_fpstatus);
11510                     break;
11511                 case 0x5a: /* FCVTNU */
11512                 case 0x5b: /* FCVTMU */
11513                 case 0x5c: /* FCVTAU */
11514                 case 0x7a: /* FCVTPU */
11515                 case 0x7b: /* FCVTZU */
11516                     gen_helper_vfp_touls(tcg_res, tcg_op,
11517                                          tcg_constant_i32(0), tcg_fpstatus);
11518                     break;
11519                 case 0x18: /* FRINTN */
11520                 case 0x19: /* FRINTM */
11521                 case 0x38: /* FRINTP */
11522                 case 0x39: /* FRINTZ */
11523                 case 0x58: /* FRINTA */
11524                 case 0x79: /* FRINTI */
11525                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11526                     break;
11527                 case 0x59: /* FRINTX */
11528                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11529                     break;
11530                 case 0x7c: /* URSQRTE */
11531                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
11532                     break;
11533                 case 0x1e: /* FRINT32Z */
11534                 case 0x5e: /* FRINT32X */
11535                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
11536                     break;
11537                 case 0x1f: /* FRINT64Z */
11538                 case 0x5f: /* FRINT64X */
11539                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
11540                     break;
11541                 default:
11542                     g_assert_not_reached();
11543                 }
11544             } else {
11545                 /* Use helpers for 8 and 16 bit elements */
11546                 switch (opcode) {
11547                 case 0x5: /* CNT, RBIT */
11548                     /* For these two insns size is part of the opcode specifier
11549                      * (handled earlier); they always operate on byte elements.
11550                      */
11551                     if (u) {
11552                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11553                     } else {
11554                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11555                     }
11556                     break;
11557                 case 0x7: /* SQABS, SQNEG */
11558                 {
11559                     NeonGenOneOpEnvFn *genfn;
11560                     static NeonGenOneOpEnvFn * const fns[2][2] = {
11561                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11562                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11563                     };
11564                     genfn = fns[size][u];
11565                     genfn(tcg_res, tcg_env, tcg_op);
11566                     break;
11567                 }
11568                 case 0x4: /* CLS, CLZ */
11569                     if (u) {
11570                         if (size == 0) {
11571                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
11572                         } else {
11573                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
11574                         }
11575                     } else {
11576                         if (size == 0) {
11577                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
11578                         } else {
11579                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
11580                         }
11581                     }
11582                     break;
11583                 default:
11584                     g_assert_not_reached();
11585                 }
11586             }
11587 
11588             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11589         }
11590     }
11591     clear_vec_high(s, is_q, rd);
11592 
11593     if (tcg_rmode) {
11594         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11595     }
11596 }
11597 
11598 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11599  *
11600  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11601  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11602  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11603  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11604  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11605  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11606  *
11607  * This actually covers two groups where scalar access is governed by
11608  * bit 28. A bunch of the instructions (float to integral) only exist
11609  * in the vector form and are un-allocated for the scalar decode. Also
11610  * in the scalar decode Q is always 1.
11611  */
11612 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11613 {
11614     int fpop, opcode, a, u;
11615     int rn, rd;
11616     bool is_q;
11617     bool is_scalar;
11618     bool only_in_vector = false;
11619 
11620     int pass;
11621     TCGv_i32 tcg_rmode = NULL;
11622     TCGv_ptr tcg_fpstatus = NULL;
11623     bool need_fpst = true;
11624     int rmode = -1;
11625 
11626     if (!dc_isar_feature(aa64_fp16, s)) {
11627         unallocated_encoding(s);
11628         return;
11629     }
11630 
11631     rd = extract32(insn, 0, 5);
11632     rn = extract32(insn, 5, 5);
11633 
11634     a = extract32(insn, 23, 1);
11635     u = extract32(insn, 29, 1);
11636     is_scalar = extract32(insn, 28, 1);
11637     is_q = extract32(insn, 30, 1);
11638 
11639     opcode = extract32(insn, 12, 5);
11640     fpop = deposit32(opcode, 5, 1, a);
11641     fpop = deposit32(fpop, 6, 1, u);
11642 
11643     switch (fpop) {
11644     case 0x1d: /* SCVTF */
11645     case 0x5d: /* UCVTF */
11646     {
11647         int elements;
11648 
11649         if (is_scalar) {
11650             elements = 1;
11651         } else {
11652             elements = (is_q ? 8 : 4);
11653         }
11654 
11655         if (!fp_access_check(s)) {
11656             return;
11657         }
11658         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11659         return;
11660     }
11661     break;
11662     case 0x2c: /* FCMGT (zero) */
11663     case 0x2d: /* FCMEQ (zero) */
11664     case 0x2e: /* FCMLT (zero) */
11665     case 0x6c: /* FCMGE (zero) */
11666     case 0x6d: /* FCMLE (zero) */
11667         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11668         return;
11669     case 0x3d: /* FRECPE */
11670     case 0x3f: /* FRECPX */
11671         break;
11672     case 0x18: /* FRINTN */
11673         only_in_vector = true;
11674         rmode = FPROUNDING_TIEEVEN;
11675         break;
11676     case 0x19: /* FRINTM */
11677         only_in_vector = true;
11678         rmode = FPROUNDING_NEGINF;
11679         break;
11680     case 0x38: /* FRINTP */
11681         only_in_vector = true;
11682         rmode = FPROUNDING_POSINF;
11683         break;
11684     case 0x39: /* FRINTZ */
11685         only_in_vector = true;
11686         rmode = FPROUNDING_ZERO;
11687         break;
11688     case 0x58: /* FRINTA */
11689         only_in_vector = true;
11690         rmode = FPROUNDING_TIEAWAY;
11691         break;
11692     case 0x59: /* FRINTX */
11693     case 0x79: /* FRINTI */
11694         only_in_vector = true;
11695         /* current rounding mode */
11696         break;
11697     case 0x1a: /* FCVTNS */
11698         rmode = FPROUNDING_TIEEVEN;
11699         break;
11700     case 0x1b: /* FCVTMS */
11701         rmode = FPROUNDING_NEGINF;
11702         break;
11703     case 0x1c: /* FCVTAS */
11704         rmode = FPROUNDING_TIEAWAY;
11705         break;
11706     case 0x3a: /* FCVTPS */
11707         rmode = FPROUNDING_POSINF;
11708         break;
11709     case 0x3b: /* FCVTZS */
11710         rmode = FPROUNDING_ZERO;
11711         break;
11712     case 0x5a: /* FCVTNU */
11713         rmode = FPROUNDING_TIEEVEN;
11714         break;
11715     case 0x5b: /* FCVTMU */
11716         rmode = FPROUNDING_NEGINF;
11717         break;
11718     case 0x5c: /* FCVTAU */
11719         rmode = FPROUNDING_TIEAWAY;
11720         break;
11721     case 0x7a: /* FCVTPU */
11722         rmode = FPROUNDING_POSINF;
11723         break;
11724     case 0x7b: /* FCVTZU */
11725         rmode = FPROUNDING_ZERO;
11726         break;
11727     case 0x2f: /* FABS */
11728     case 0x6f: /* FNEG */
11729         need_fpst = false;
11730         break;
11731     case 0x7d: /* FRSQRTE */
11732     case 0x7f: /* FSQRT (vector) */
11733         break;
11734     default:
11735         unallocated_encoding(s);
11736         return;
11737     }
11738 
11739 
11740     /* Check additional constraints for the scalar encoding */
11741     if (is_scalar) {
11742         if (!is_q) {
11743             unallocated_encoding(s);
11744             return;
11745         }
11746         /* FRINTxx is only in the vector form */
11747         if (only_in_vector) {
11748             unallocated_encoding(s);
11749             return;
11750         }
11751     }
11752 
11753     if (!fp_access_check(s)) {
11754         return;
11755     }
11756 
11757     if (rmode >= 0 || need_fpst) {
11758         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
11759     }
11760 
11761     if (rmode >= 0) {
11762         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11763     }
11764 
11765     if (is_scalar) {
11766         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
11767         TCGv_i32 tcg_res = tcg_temp_new_i32();
11768 
11769         switch (fpop) {
11770         case 0x1a: /* FCVTNS */
11771         case 0x1b: /* FCVTMS */
11772         case 0x1c: /* FCVTAS */
11773         case 0x3a: /* FCVTPS */
11774         case 0x3b: /* FCVTZS */
11775             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11776             break;
11777         case 0x3d: /* FRECPE */
11778             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11779             break;
11780         case 0x3f: /* FRECPX */
11781             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11782             break;
11783         case 0x5a: /* FCVTNU */
11784         case 0x5b: /* FCVTMU */
11785         case 0x5c: /* FCVTAU */
11786         case 0x7a: /* FCVTPU */
11787         case 0x7b: /* FCVTZU */
11788             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11789             break;
11790         case 0x6f: /* FNEG */
11791             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11792             break;
11793         case 0x7d: /* FRSQRTE */
11794             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11795             break;
11796         default:
11797             g_assert_not_reached();
11798         }
11799 
11800         /* limit any sign extension going on */
11801         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11802         write_fp_sreg(s, rd, tcg_res);
11803     } else {
11804         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11805             TCGv_i32 tcg_op = tcg_temp_new_i32();
11806             TCGv_i32 tcg_res = tcg_temp_new_i32();
11807 
11808             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11809 
11810             switch (fpop) {
11811             case 0x1a: /* FCVTNS */
11812             case 0x1b: /* FCVTMS */
11813             case 0x1c: /* FCVTAS */
11814             case 0x3a: /* FCVTPS */
11815             case 0x3b: /* FCVTZS */
11816                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11817                 break;
11818             case 0x3d: /* FRECPE */
11819                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11820                 break;
11821             case 0x5a: /* FCVTNU */
11822             case 0x5b: /* FCVTMU */
11823             case 0x5c: /* FCVTAU */
11824             case 0x7a: /* FCVTPU */
11825             case 0x7b: /* FCVTZU */
11826                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11827                 break;
11828             case 0x18: /* FRINTN */
11829             case 0x19: /* FRINTM */
11830             case 0x38: /* FRINTP */
11831             case 0x39: /* FRINTZ */
11832             case 0x58: /* FRINTA */
11833             case 0x79: /* FRINTI */
11834                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11835                 break;
11836             case 0x59: /* FRINTX */
11837                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11838                 break;
11839             case 0x2f: /* FABS */
11840                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11841                 break;
11842             case 0x6f: /* FNEG */
11843                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11844                 break;
11845             case 0x7d: /* FRSQRTE */
11846                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11847                 break;
11848             case 0x7f: /* FSQRT */
11849                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11850                 break;
11851             default:
11852                 g_assert_not_reached();
11853             }
11854 
11855             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11856         }
11857 
11858         clear_vec_high(s, is_q, rd);
11859     }
11860 
11861     if (tcg_rmode) {
11862         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11863     }
11864 }
11865 
11866 /* C3.6 Data processing - SIMD, inc Crypto
11867  *
11868  * As the decode gets a little complex we are using a table based
11869  * approach for this part of the decode.
11870  */
11871 static const AArch64DecodeTable data_proc_simd[] = {
11872     /* pattern  ,  mask     ,  fn                        */
11873     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11874     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11875     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11876     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11877     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11878     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11879     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11880     { 0x2e000000, 0xbf208400, disas_simd_ext },
11881     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11882     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11883     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
11884     { 0x00000000, 0x00000000, NULL }
11885 };
11886 
11887 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11888 {
11889     /* Note that this is called with all non-FP cases from
11890      * table C3-6 so it must UNDEF for entries not specifically
11891      * allocated to instructions in that table.
11892      */
11893     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11894     if (fn) {
11895         fn(s, insn);
11896     } else {
11897         unallocated_encoding(s);
11898     }
11899 }
11900 
11901 /* C3.6 Data processing - SIMD and floating point */
11902 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11903 {
11904     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11905         disas_data_proc_fp(s, insn);
11906     } else {
11907         /* SIMD, including crypto */
11908         disas_data_proc_simd(s, insn);
11909     }
11910 }
11911 
11912 static bool trans_OK(DisasContext *s, arg_OK *a)
11913 {
11914     return true;
11915 }
11916 
11917 static bool trans_FAIL(DisasContext *s, arg_OK *a)
11918 {
11919     s->is_nonstreaming = true;
11920     return true;
11921 }
11922 
11923 /**
11924  * btype_destination_ok:
11925  * @insn: The instruction at the branch destination
11926  * @bt: SCTLR_ELx.BT
11927  * @btype: PSTATE.BTYPE, and is non-zero
11928  *
11929  * On a guarded page, there are a limited number of insns
11930  * that may be present at the branch target:
11931  *   - branch target identifiers,
11932  *   - paciasp, pacibsp,
11933  *   - BRK insn
11934  *   - HLT insn
11935  * Anything else causes a Branch Target Exception.
11936  *
11937  * Return true if the branch is compatible, false to raise BTITRAP.
11938  */
11939 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
11940 {
11941     if ((insn & 0xfffff01fu) == 0xd503201fu) {
11942         /* HINT space */
11943         switch (extract32(insn, 5, 7)) {
11944         case 0b011001: /* PACIASP */
11945         case 0b011011: /* PACIBSP */
11946             /*
11947              * If SCTLR_ELx.BT, then PACI*SP are not compatible
11948              * with btype == 3.  Otherwise all btype are ok.
11949              */
11950             return !bt || btype != 3;
11951         case 0b100000: /* BTI */
11952             /* Not compatible with any btype.  */
11953             return false;
11954         case 0b100010: /* BTI c */
11955             /* Not compatible with btype == 3 */
11956             return btype != 3;
11957         case 0b100100: /* BTI j */
11958             /* Not compatible with btype == 2 */
11959             return btype != 2;
11960         case 0b100110: /* BTI jc */
11961             /* Compatible with any btype.  */
11962             return true;
11963         }
11964     } else {
11965         switch (insn & 0xffe0001fu) {
11966         case 0xd4200000u: /* BRK */
11967         case 0xd4400000u: /* HLT */
11968             /* Give priority to the breakpoint exception.  */
11969             return true;
11970         }
11971     }
11972     return false;
11973 }
11974 
11975 /* C3.1 A64 instruction index by encoding */
11976 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
11977 {
11978     switch (extract32(insn, 25, 4)) {
11979     case 0x5:
11980     case 0xd:      /* Data processing - register */
11981         disas_data_proc_reg(s, insn);
11982         break;
11983     case 0x7:
11984     case 0xf:      /* Data processing - SIMD and floating point */
11985         disas_data_proc_simd_fp(s, insn);
11986         break;
11987     default:
11988         unallocated_encoding(s);
11989         break;
11990     }
11991 }
11992 
11993 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11994                                           CPUState *cpu)
11995 {
11996     DisasContext *dc = container_of(dcbase, DisasContext, base);
11997     CPUARMState *env = cpu_env(cpu);
11998     ARMCPU *arm_cpu = env_archcpu(env);
11999     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
12000     int bound, core_mmu_idx;
12001 
12002     dc->isar = &arm_cpu->isar;
12003     dc->condjmp = 0;
12004     dc->pc_save = dc->base.pc_first;
12005     dc->aarch64 = true;
12006     dc->thumb = false;
12007     dc->sctlr_b = 0;
12008     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
12009     dc->condexec_mask = 0;
12010     dc->condexec_cond = 0;
12011     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
12012     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
12013     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
12014     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
12015     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
12016     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, false);
12017 #if !defined(CONFIG_USER_ONLY)
12018     dc->user = (dc->current_el == 0);
12019 #endif
12020     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
12021     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
12022     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
12023     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
12024     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
12025     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
12026     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
12027     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
12028     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
12029     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
12030     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
12031     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
12032     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
12033     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
12034     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
12035     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
12036     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
12037     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
12038     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
12039     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
12040     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
12041     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
12042     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
12043     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
12044     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
12045     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
12046     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
12047     dc->vec_len = 0;
12048     dc->vec_stride = 0;
12049     dc->cp_regs = arm_cpu->cp_regs;
12050     dc->features = env->features;
12051     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
12052     dc->gm_blocksize = arm_cpu->gm_blocksize;
12053 
12054 #ifdef CONFIG_USER_ONLY
12055     /* In sve_probe_page, we assume TBI is enabled. */
12056     tcg_debug_assert(dc->tbid & 1);
12057 #endif
12058 
12059     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
12060 
12061     /* Single step state. The code-generation logic here is:
12062      *  SS_ACTIVE == 0:
12063      *   generate code with no special handling for single-stepping (except
12064      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
12065      *   this happens anyway because those changes are all system register or
12066      *   PSTATE writes).
12067      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
12068      *   emit code for one insn
12069      *   emit code to clear PSTATE.SS
12070      *   emit code to generate software step exception for completed step
12071      *   end TB (as usual for having generated an exception)
12072      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
12073      *   emit code to generate a software step exception
12074      *   end the TB
12075      */
12076     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
12077     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
12078     dc->is_ldex = false;
12079 
12080     /* Bound the number of insns to execute to those left on the page.  */
12081     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
12082 
12083     /* If architectural single step active, limit to 1.  */
12084     if (dc->ss_active) {
12085         bound = 1;
12086     }
12087     dc->base.max_insns = MIN(dc->base.max_insns, bound);
12088 }
12089 
12090 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
12091 {
12092 }
12093 
12094 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
12095 {
12096     DisasContext *dc = container_of(dcbase, DisasContext, base);
12097     target_ulong pc_arg = dc->base.pc_next;
12098 
12099     if (tb_cflags(dcbase->tb) & CF_PCREL) {
12100         pc_arg &= ~TARGET_PAGE_MASK;
12101     }
12102     tcg_gen_insn_start(pc_arg, 0, 0);
12103     dc->insn_start_updated = false;
12104 }
12105 
12106 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12107 {
12108     DisasContext *s = container_of(dcbase, DisasContext, base);
12109     CPUARMState *env = cpu_env(cpu);
12110     uint64_t pc = s->base.pc_next;
12111     uint32_t insn;
12112 
12113     /* Singlestep exceptions have the highest priority. */
12114     if (s->ss_active && !s->pstate_ss) {
12115         /* Singlestep state is Active-pending.
12116          * If we're in this state at the start of a TB then either
12117          *  a) we just took an exception to an EL which is being debugged
12118          *     and this is the first insn in the exception handler
12119          *  b) debug exceptions were masked and we just unmasked them
12120          *     without changing EL (eg by clearing PSTATE.D)
12121          * In either case we're going to take a swstep exception in the
12122          * "did not step an insn" case, and so the syndrome ISV and EX
12123          * bits should be zero.
12124          */
12125         assert(s->base.num_insns == 1);
12126         gen_swstep_exception(s, 0, 0);
12127         s->base.is_jmp = DISAS_NORETURN;
12128         s->base.pc_next = pc + 4;
12129         return;
12130     }
12131 
12132     if (pc & 3) {
12133         /*
12134          * PC alignment fault.  This has priority over the instruction abort
12135          * that we would receive from a translation fault via arm_ldl_code.
12136          * This should only be possible after an indirect branch, at the
12137          * start of the TB.
12138          */
12139         assert(s->base.num_insns == 1);
12140         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
12141         s->base.is_jmp = DISAS_NORETURN;
12142         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
12143         return;
12144     }
12145 
12146     s->pc_curr = pc;
12147     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
12148     s->insn = insn;
12149     s->base.pc_next = pc + 4;
12150 
12151     s->fp_access_checked = false;
12152     s->sve_access_checked = false;
12153 
12154     if (s->pstate_il) {
12155         /*
12156          * Illegal execution state. This has priority over BTI
12157          * exceptions, but comes after instruction abort exceptions.
12158          */
12159         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
12160         return;
12161     }
12162 
12163     if (dc_isar_feature(aa64_bti, s)) {
12164         if (s->base.num_insns == 1) {
12165             /* First insn can have btype set to non-zero.  */
12166             tcg_debug_assert(s->btype >= 0);
12167 
12168             /*
12169              * Note that the Branch Target Exception has fairly high
12170              * priority -- below debugging exceptions but above most
12171              * everything else.  This allows us to handle this now
12172              * instead of waiting until the insn is otherwise decoded.
12173              *
12174              * We can check all but the guarded page check here;
12175              * defer the latter to a helper.
12176              */
12177             if (s->btype != 0
12178                 && !btype_destination_ok(insn, s->bt, s->btype)) {
12179                 gen_helper_guarded_page_check(tcg_env);
12180             }
12181         } else {
12182             /* Not the first insn: btype must be 0.  */
12183             tcg_debug_assert(s->btype == 0);
12184         }
12185     }
12186 
12187     s->is_nonstreaming = false;
12188     if (s->sme_trap_nonstreaming) {
12189         disas_sme_fa64(s, insn);
12190     }
12191 
12192     if (!disas_a64(s, insn) &&
12193         !disas_sme(s, insn) &&
12194         !disas_sve(s, insn)) {
12195         disas_a64_legacy(s, insn);
12196     }
12197 
12198     /*
12199      * After execution of most insns, btype is reset to 0.
12200      * Note that we set btype == -1 when the insn sets btype.
12201      */
12202     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
12203         reset_btype(s);
12204     }
12205 }
12206 
12207 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
12208 {
12209     DisasContext *dc = container_of(dcbase, DisasContext, base);
12210 
12211     if (unlikely(dc->ss_active)) {
12212         /* Note that this means single stepping WFI doesn't halt the CPU.
12213          * For conditional branch insns this is harmless unreachable code as
12214          * gen_goto_tb() has already handled emitting the debug exception
12215          * (and thus a tb-jump is not possible when singlestepping).
12216          */
12217         switch (dc->base.is_jmp) {
12218         default:
12219             gen_a64_update_pc(dc, 4);
12220             /* fall through */
12221         case DISAS_EXIT:
12222         case DISAS_JUMP:
12223             gen_step_complete_exception(dc);
12224             break;
12225         case DISAS_NORETURN:
12226             break;
12227         }
12228     } else {
12229         switch (dc->base.is_jmp) {
12230         case DISAS_NEXT:
12231         case DISAS_TOO_MANY:
12232             gen_goto_tb(dc, 1, 4);
12233             break;
12234         default:
12235         case DISAS_UPDATE_EXIT:
12236             gen_a64_update_pc(dc, 4);
12237             /* fall through */
12238         case DISAS_EXIT:
12239             tcg_gen_exit_tb(NULL, 0);
12240             break;
12241         case DISAS_UPDATE_NOCHAIN:
12242             gen_a64_update_pc(dc, 4);
12243             /* fall through */
12244         case DISAS_JUMP:
12245             tcg_gen_lookup_and_goto_ptr();
12246             break;
12247         case DISAS_NORETURN:
12248         case DISAS_SWI:
12249             break;
12250         case DISAS_WFE:
12251             gen_a64_update_pc(dc, 4);
12252             gen_helper_wfe(tcg_env);
12253             break;
12254         case DISAS_YIELD:
12255             gen_a64_update_pc(dc, 4);
12256             gen_helper_yield(tcg_env);
12257             break;
12258         case DISAS_WFI:
12259             /*
12260              * This is a special case because we don't want to just halt
12261              * the CPU if trying to debug across a WFI.
12262              */
12263             gen_a64_update_pc(dc, 4);
12264             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
12265             /*
12266              * The helper doesn't necessarily throw an exception, but we
12267              * must go back to the main loop to check for interrupts anyway.
12268              */
12269             tcg_gen_exit_tb(NULL, 0);
12270             break;
12271         }
12272     }
12273 }
12274 
12275 const TranslatorOps aarch64_translator_ops = {
12276     .init_disas_context = aarch64_tr_init_disas_context,
12277     .tb_start           = aarch64_tr_tb_start,
12278     .insn_start         = aarch64_tr_insn_start,
12279     .translate_insn     = aarch64_tr_translate_insn,
12280     .tb_stop            = aarch64_tr_tb_stop,
12281 };
12282