xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision f09613965664e268b00dfc7b4253065e6a0543a9)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* initialize TCG globals.  */
79 void a64_translate_init(void)
80 {
81     int i;
82 
83     cpu_pc = tcg_global_mem_new_i64(tcg_env,
84                                     offsetof(CPUARMState, pc),
85                                     "pc");
86     for (i = 0; i < 32; i++) {
87         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88                                           offsetof(CPUARMState, xregs[i]),
89                                           regnames[i]);
90     }
91 
92     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93         offsetof(CPUARMState, exclusive_high), "exclusive_high");
94 }
95 
96 /*
97  * Return the core mmu_idx to use for A64 load/store insns which
98  * have a "unprivileged load/store" variant. Those insns access
99  * EL0 if executed from an EL which has control over EL0 (usually
100  * EL1) but behave like normal loads and stores if executed from
101  * elsewhere (eg EL3).
102  *
103  * @unpriv : true for the unprivileged encoding; false for the
104  *           normal encoding (in which case we will return the same
105  *           thing as get_mem_index().
106  */
107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108 {
109     /*
110      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111      * which is the usual mmu_idx for this cpu state.
112      */
113     ARMMMUIdx useridx = s->mmu_idx;
114 
115     if (unpriv && s->unpriv) {
116         /*
117          * We have pre-computed the condition for AccType_UNPRIV.
118          * Therefore we should never get here with a mmu_idx for
119          * which we do not know the corresponding user mmu_idx.
120          */
121         switch (useridx) {
122         case ARMMMUIdx_E10_1:
123         case ARMMMUIdx_E10_1_PAN:
124             useridx = ARMMMUIdx_E10_0;
125             break;
126         case ARMMMUIdx_E20_2:
127         case ARMMMUIdx_E20_2_PAN:
128             useridx = ARMMMUIdx_E20_0;
129             break;
130         default:
131             g_assert_not_reached();
132         }
133     }
134     return arm_to_core_mmu_idx(useridx);
135 }
136 
137 static void set_btype_raw(int val)
138 {
139     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140                    offsetof(CPUARMState, btype));
141 }
142 
143 static void set_btype(DisasContext *s, int val)
144 {
145     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
146     tcg_debug_assert(val >= 1 && val <= 3);
147     set_btype_raw(val);
148     s->btype = -1;
149 }
150 
151 static void reset_btype(DisasContext *s)
152 {
153     if (s->btype != 0) {
154         set_btype_raw(0);
155         s->btype = 0;
156     }
157 }
158 
159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160 {
161     assert(s->pc_save != -1);
162     if (tb_cflags(s->base.tb) & CF_PCREL) {
163         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164     } else {
165         tcg_gen_movi_i64(dest, s->pc_curr + diff);
166     }
167 }
168 
169 void gen_a64_update_pc(DisasContext *s, target_long diff)
170 {
171     gen_pc_plus_diff(s, cpu_pc, diff);
172     s->pc_save = s->pc_curr + diff;
173 }
174 
175 /*
176  * Handle Top Byte Ignore (TBI) bits.
177  *
178  * If address tagging is enabled via the TCR TBI bits:
179  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
180  *    then the address is zero-extended, clearing bits [63:56]
181  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182  *    and TBI1 controls addresses with bit 55 == 1.
183  *    If the appropriate TBI bit is set for the address then
184  *    the address is sign-extended from bit 55 into bits [63:56]
185  *
186  * Here We have concatenated TBI{1,0} into tbi.
187  */
188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189                                 TCGv_i64 src, int tbi)
190 {
191     if (tbi == 0) {
192         /* Load unmodified address */
193         tcg_gen_mov_i64(dst, src);
194     } else if (!regime_has_2_ranges(s->mmu_idx)) {
195         /* Force tag byte to all zero */
196         tcg_gen_extract_i64(dst, src, 0, 56);
197     } else {
198         /* Sign-extend from bit 55.  */
199         tcg_gen_sextract_i64(dst, src, 0, 56);
200 
201         switch (tbi) {
202         case 1:
203             /* tbi0 but !tbi1: only use the extension if positive */
204             tcg_gen_and_i64(dst, dst, src);
205             break;
206         case 2:
207             /* !tbi0 but tbi1: only use the extension if negative */
208             tcg_gen_or_i64(dst, dst, src);
209             break;
210         case 3:
211             /* tbi0 and tbi1: always use the extension */
212             break;
213         default:
214             g_assert_not_reached();
215         }
216     }
217 }
218 
219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220 {
221     /*
222      * If address tagging is enabled for instructions via the TCR TBI bits,
223      * then loading an address into the PC will clear out any tag.
224      */
225     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226     s->pc_save = -1;
227 }
228 
229 /*
230  * Handle MTE and/or TBI.
231  *
232  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
233  * for the tag to be present in the FAR_ELx register.  But for user-only
234  * mode we do not have a TLB with which to implement this, so we must
235  * remove the top byte now.
236  *
237  * Always return a fresh temporary that we can increment independently
238  * of the write-back address.
239  */
240 
241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242 {
243     TCGv_i64 clean = tcg_temp_new_i64();
244 #ifdef CONFIG_USER_ONLY
245     gen_top_byte_ignore(s, clean, addr, s->tbid);
246 #else
247     tcg_gen_mov_i64(clean, addr);
248 #endif
249     return clean;
250 }
251 
252 /* Insert a zero tag into src, with the result at dst. */
253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254 {
255     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256 }
257 
258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259                              MMUAccessType acc, int log2_size)
260 {
261     gen_helper_probe_access(tcg_env, ptr,
262                             tcg_constant_i32(acc),
263                             tcg_constant_i32(get_mem_index(s)),
264                             tcg_constant_i32(1 << log2_size));
265 }
266 
267 /*
268  * For MTE, check a single logical or atomic access.  This probes a single
269  * address, the exact one specified.  The size and alignment of the access
270  * is not relevant to MTE, per se, but watchpoints do require the size,
271  * and we want to recognize those before making any other changes to state.
272  */
273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274                                       bool is_write, bool tag_checked,
275                                       MemOp memop, bool is_unpriv,
276                                       int core_idx)
277 {
278     if (tag_checked && s->mte_active[is_unpriv]) {
279         TCGv_i64 ret;
280         int desc = 0;
281 
282         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288 
289         ret = tcg_temp_new_i64();
290         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291 
292         return ret;
293     }
294     return clean_data_tbi(s, addr);
295 }
296 
297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, MemOp memop)
299 {
300     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301                                  false, get_mem_index(s));
302 }
303 
304 /*
305  * For MTE, check multiple logical sequential accesses.
306  */
307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308                         bool tag_checked, int total_size, MemOp single_mop)
309 {
310     if (tag_checked && s->mte_active[0]) {
311         TCGv_i64 ret;
312         int desc = 0;
313 
314         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320 
321         ret = tcg_temp_new_i64();
322         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323 
324         return ret;
325     }
326     return clean_data_tbi(s, addr);
327 }
328 
329 /*
330  * Generate the special alignment check that applies to AccType_ATOMIC
331  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332  * naturally aligned, but it must not cross a 16-byte boundary.
333  * See AArch64.CheckAlignment().
334  */
335 static void check_lse2_align(DisasContext *s, int rn, int imm,
336                              bool is_write, MemOp mop)
337 {
338     TCGv_i32 tmp;
339     TCGv_i64 addr;
340     TCGLabel *over_label;
341     MMUAccessType type;
342     int mmu_idx;
343 
344     tmp = tcg_temp_new_i32();
345     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346     tcg_gen_addi_i32(tmp, tmp, imm & 15);
347     tcg_gen_andi_i32(tmp, tmp, 15);
348     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349 
350     over_label = gen_new_label();
351     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352 
353     addr = tcg_temp_new_i64();
354     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355 
356     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357     mmu_idx = get_mem_index(s);
358     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359                                 tcg_constant_i32(mmu_idx));
360 
361     gen_set_label(over_label);
362 
363 }
364 
365 /* Handle the alignment check for AccType_ATOMIC instructions. */
366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367 {
368     MemOp size = mop & MO_SIZE;
369 
370     if (size == MO_8) {
371         return mop;
372     }
373 
374     /*
375      * If size == MO_128, this is a LDXP, and the operation is single-copy
376      * atomic for each doubleword, not the entire quadword; it still must
377      * be quadword aligned.
378      */
379     if (size == MO_128) {
380         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381                                    MO_ATOM_IFALIGN_PAIR);
382     }
383     if (dc_isar_feature(aa64_lse2, s)) {
384         check_lse2_align(s, rn, 0, true, mop);
385     } else {
386         mop |= MO_ALIGN;
387     }
388     return finalize_memop(s, mop);
389 }
390 
391 /* Handle the alignment check for AccType_ORDERED instructions. */
392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393                                  bool is_write, MemOp mop)
394 {
395     MemOp size = mop & MO_SIZE;
396 
397     if (size == MO_8) {
398         return mop;
399     }
400     if (size == MO_128) {
401         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402                                    MO_ATOM_IFALIGN_PAIR);
403     }
404     if (!dc_isar_feature(aa64_lse2, s)) {
405         mop |= MO_ALIGN;
406     } else if (!s->naa) {
407         check_lse2_align(s, rn, imm, is_write, mop);
408     }
409     return finalize_memop(s, mop);
410 }
411 
412 typedef struct DisasCompare64 {
413     TCGCond cond;
414     TCGv_i64 value;
415 } DisasCompare64;
416 
417 static void a64_test_cc(DisasCompare64 *c64, int cc)
418 {
419     DisasCompare c32;
420 
421     arm_test_cc(&c32, cc);
422 
423     /*
424      * Sign-extend the 32-bit value so that the GE/LT comparisons work
425      * properly.  The NE/EQ comparisons are also fine with this choice.
426       */
427     c64->cond = c32.cond;
428     c64->value = tcg_temp_new_i64();
429     tcg_gen_ext_i32_i64(c64->value, c32.value);
430 }
431 
432 static void gen_rebuild_hflags(DisasContext *s)
433 {
434     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435 }
436 
437 static void gen_exception_internal(int excp)
438 {
439     assert(excp_is_internal(excp));
440     gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441 }
442 
443 static void gen_exception_internal_insn(DisasContext *s, int excp)
444 {
445     gen_a64_update_pc(s, 0);
446     gen_exception_internal(excp);
447     s->base.is_jmp = DISAS_NORETURN;
448 }
449 
450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451 {
452     gen_a64_update_pc(s, 0);
453     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454     s->base.is_jmp = DISAS_NORETURN;
455 }
456 
457 static void gen_step_complete_exception(DisasContext *s)
458 {
459     /* We just completed step of an insn. Move from Active-not-pending
460      * to Active-pending, and then also take the swstep exception.
461      * This corresponds to making the (IMPDEF) choice to prioritize
462      * swstep exceptions over asynchronous exceptions taken to an exception
463      * level where debug is disabled. This choice has the advantage that
464      * we do not need to maintain internal state corresponding to the
465      * ISV/EX syndrome bits between completion of the step and generation
466      * of the exception, and our syndrome information is always correct.
467      */
468     gen_ss_advance(s);
469     gen_swstep_exception(s, 1, s->is_ldex);
470     s->base.is_jmp = DISAS_NORETURN;
471 }
472 
473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474 {
475     if (s->ss_active) {
476         return false;
477     }
478     return translator_use_goto_tb(&s->base, dest);
479 }
480 
481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482 {
483     if (use_goto_tb(s, s->pc_curr + diff)) {
484         /*
485          * For pcrel, the pc must always be up-to-date on entry to
486          * the linked TB, so that it can use simple additions for all
487          * further adjustments.  For !pcrel, the linked TB is compiled
488          * to know its full virtual address, so we can delay the
489          * update to pc to the unlinked path.  A long chain of links
490          * can thus avoid many updates to the PC.
491          */
492         if (tb_cflags(s->base.tb) & CF_PCREL) {
493             gen_a64_update_pc(s, diff);
494             tcg_gen_goto_tb(n);
495         } else {
496             tcg_gen_goto_tb(n);
497             gen_a64_update_pc(s, diff);
498         }
499         tcg_gen_exit_tb(s->base.tb, n);
500         s->base.is_jmp = DISAS_NORETURN;
501     } else {
502         gen_a64_update_pc(s, diff);
503         if (s->ss_active) {
504             gen_step_complete_exception(s);
505         } else {
506             tcg_gen_lookup_and_goto_ptr();
507             s->base.is_jmp = DISAS_NORETURN;
508         }
509     }
510 }
511 
512 /*
513  * Register access functions
514  *
515  * These functions are used for directly accessing a register in where
516  * changes to the final register value are likely to be made. If you
517  * need to use a register for temporary calculation (e.g. index type
518  * operations) use the read_* form.
519  *
520  * B1.2.1 Register mappings
521  *
522  * In instruction register encoding 31 can refer to ZR (zero register) or
523  * the SP (stack pointer) depending on context. In QEMU's case we map SP
524  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525  * This is the point of the _sp forms.
526  */
527 TCGv_i64 cpu_reg(DisasContext *s, int reg)
528 {
529     if (reg == 31) {
530         TCGv_i64 t = tcg_temp_new_i64();
531         tcg_gen_movi_i64(t, 0);
532         return t;
533     } else {
534         return cpu_X[reg];
535     }
536 }
537 
538 /* register access for when 31 == SP */
539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540 {
541     return cpu_X[reg];
542 }
543 
544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545  * representing the register contents. This TCGv is an auto-freed
546  * temporary so it need not be explicitly freed, and may be modified.
547  */
548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549 {
550     TCGv_i64 v = tcg_temp_new_i64();
551     if (reg != 31) {
552         if (sf) {
553             tcg_gen_mov_i64(v, cpu_X[reg]);
554         } else {
555             tcg_gen_ext32u_i64(v, cpu_X[reg]);
556         }
557     } else {
558         tcg_gen_movi_i64(v, 0);
559     }
560     return v;
561 }
562 
563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564 {
565     TCGv_i64 v = tcg_temp_new_i64();
566     if (sf) {
567         tcg_gen_mov_i64(v, cpu_X[reg]);
568     } else {
569         tcg_gen_ext32u_i64(v, cpu_X[reg]);
570     }
571     return v;
572 }
573 
574 /* Return the offset into CPUARMState of a slice (from
575  * the least significant end) of FP register Qn (ie
576  * Dn, Sn, Hn or Bn).
577  * (Note that this is not the same mapping as for A32; see cpu.h)
578  */
579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580 {
581     return vec_reg_offset(s, regno, 0, size);
582 }
583 
584 /* Offset of the high half of the 128 bit vector Qn */
585 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586 {
587     return vec_reg_offset(s, regno, 1, MO_64);
588 }
589 
590 /* Convenience accessors for reading and writing single and double
591  * FP registers. Writing clears the upper parts of the associated
592  * 128 bit vector register, as required by the architecture.
593  * Note that unlike the GP register accessors, the values returned
594  * by the read functions must be manually freed.
595  */
596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597 {
598     TCGv_i64 v = tcg_temp_new_i64();
599 
600     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601     return v;
602 }
603 
604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605 {
606     TCGv_i32 v = tcg_temp_new_i32();
607 
608     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609     return v;
610 }
611 
612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613 {
614     TCGv_i32 v = tcg_temp_new_i32();
615 
616     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617     return v;
618 }
619 
620 static void clear_vec(DisasContext *s, int rd)
621 {
622     unsigned ofs = fp_reg_offset(s, rd, MO_64);
623     unsigned vsz = vec_full_reg_size(s);
624 
625     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626 }
627 
628 /*
629  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630  * If SVE is not enabled, then there are only 128 bits in the vector.
631  */
632 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633 {
634     unsigned ofs = fp_reg_offset(s, rd, MO_64);
635     unsigned vsz = vec_full_reg_size(s);
636 
637     /* Nop move, with side effect of clearing the tail. */
638     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639 }
640 
641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642 {
643     unsigned ofs = fp_reg_offset(s, reg, MO_64);
644 
645     tcg_gen_st_i64(v, tcg_env, ofs);
646     clear_vec_high(s, false, reg);
647 }
648 
649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650 {
651     TCGv_i64 tmp = tcg_temp_new_i64();
652 
653     tcg_gen_extu_i32_i64(tmp, v);
654     write_fp_dreg(s, reg, tmp);
655 }
656 
657 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
658 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
659                          GVecGen2Fn *gvec_fn, int vece)
660 {
661     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
662             is_q ? 16 : 8, vec_full_reg_size(s));
663 }
664 
665 /* Expand a 2-operand + immediate AdvSIMD vector operation using
666  * an expander function.
667  */
668 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
669                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
670 {
671     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
672             imm, is_q ? 16 : 8, vec_full_reg_size(s));
673 }
674 
675 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
676 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
677                          GVecGen3Fn *gvec_fn, int vece)
678 {
679     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
680             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
681 }
682 
683 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
684 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
685                          int rx, GVecGen4Fn *gvec_fn, int vece)
686 {
687     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
688             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
689             is_q ? 16 : 8, vec_full_reg_size(s));
690 }
691 
692 /* Expand a 2-operand operation using an out-of-line helper.  */
693 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
694                              int rn, int data, gen_helper_gvec_2 *fn)
695 {
696     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
697                        vec_full_reg_offset(s, rn),
698                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
699 }
700 
701 /* Expand a 3-operand operation using an out-of-line helper.  */
702 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
703                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
704 {
705     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
706                        vec_full_reg_offset(s, rn),
707                        vec_full_reg_offset(s, rm),
708                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
709 }
710 
711 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
712  * an out-of-line helper.
713  */
714 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
715                               int rm, bool is_fp16, int data,
716                               gen_helper_gvec_3_ptr *fn)
717 {
718     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
719     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
720                        vec_full_reg_offset(s, rn),
721                        vec_full_reg_offset(s, rm), fpst,
722                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
723 }
724 
725 /* Expand a 4-operand operation using an out-of-line helper.  */
726 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
727                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
728 {
729     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
730                        vec_full_reg_offset(s, rn),
731                        vec_full_reg_offset(s, rm),
732                        vec_full_reg_offset(s, ra),
733                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
734 }
735 
736 /*
737  * Expand a 4-operand operation using an out-of-line helper that takes
738  * a pointer to the CPU env.
739  */
740 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
741                              int rm, int ra, int data,
742                              gen_helper_gvec_4_ptr *fn)
743 {
744     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
745                        vec_full_reg_offset(s, rn),
746                        vec_full_reg_offset(s, rm),
747                        vec_full_reg_offset(s, ra),
748                        tcg_env,
749                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
750 }
751 
752 /*
753  * Expand a 4-operand + fpstatus pointer + simd data value operation using
754  * an out-of-line helper.
755  */
756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
757                               int rm, int ra, bool is_fp16, int data,
758                               gen_helper_gvec_4_ptr *fn)
759 {
760     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
761     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
762                        vec_full_reg_offset(s, rn),
763                        vec_full_reg_offset(s, rm),
764                        vec_full_reg_offset(s, ra), fpst,
765                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
766 }
767 
768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
769  * than the 32 bit equivalent.
770  */
771 static inline void gen_set_NZ64(TCGv_i64 result)
772 {
773     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
774     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
775 }
776 
777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
778 static inline void gen_logic_CC(int sf, TCGv_i64 result)
779 {
780     if (sf) {
781         gen_set_NZ64(result);
782     } else {
783         tcg_gen_extrl_i64_i32(cpu_ZF, result);
784         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
785     }
786     tcg_gen_movi_i32(cpu_CF, 0);
787     tcg_gen_movi_i32(cpu_VF, 0);
788 }
789 
790 /* dest = T0 + T1; compute C, N, V and Z flags */
791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
792 {
793     TCGv_i64 result, flag, tmp;
794     result = tcg_temp_new_i64();
795     flag = tcg_temp_new_i64();
796     tmp = tcg_temp_new_i64();
797 
798     tcg_gen_movi_i64(tmp, 0);
799     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
800 
801     tcg_gen_extrl_i64_i32(cpu_CF, flag);
802 
803     gen_set_NZ64(result);
804 
805     tcg_gen_xor_i64(flag, result, t0);
806     tcg_gen_xor_i64(tmp, t0, t1);
807     tcg_gen_andc_i64(flag, flag, tmp);
808     tcg_gen_extrh_i64_i32(cpu_VF, flag);
809 
810     tcg_gen_mov_i64(dest, result);
811 }
812 
813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
814 {
815     TCGv_i32 t0_32 = tcg_temp_new_i32();
816     TCGv_i32 t1_32 = tcg_temp_new_i32();
817     TCGv_i32 tmp = tcg_temp_new_i32();
818 
819     tcg_gen_movi_i32(tmp, 0);
820     tcg_gen_extrl_i64_i32(t0_32, t0);
821     tcg_gen_extrl_i64_i32(t1_32, t1);
822     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
823     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
824     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
825     tcg_gen_xor_i32(tmp, t0_32, t1_32);
826     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
827     tcg_gen_extu_i32_i64(dest, cpu_NF);
828 }
829 
830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
831 {
832     if (sf) {
833         gen_add64_CC(dest, t0, t1);
834     } else {
835         gen_add32_CC(dest, t0, t1);
836     }
837 }
838 
839 /* dest = T0 - T1; compute C, N, V and Z flags */
840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
841 {
842     /* 64 bit arithmetic */
843     TCGv_i64 result, flag, tmp;
844 
845     result = tcg_temp_new_i64();
846     flag = tcg_temp_new_i64();
847     tcg_gen_sub_i64(result, t0, t1);
848 
849     gen_set_NZ64(result);
850 
851     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
852     tcg_gen_extrl_i64_i32(cpu_CF, flag);
853 
854     tcg_gen_xor_i64(flag, result, t0);
855     tmp = tcg_temp_new_i64();
856     tcg_gen_xor_i64(tmp, t0, t1);
857     tcg_gen_and_i64(flag, flag, tmp);
858     tcg_gen_extrh_i64_i32(cpu_VF, flag);
859     tcg_gen_mov_i64(dest, result);
860 }
861 
862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
863 {
864     /* 32 bit arithmetic */
865     TCGv_i32 t0_32 = tcg_temp_new_i32();
866     TCGv_i32 t1_32 = tcg_temp_new_i32();
867     TCGv_i32 tmp;
868 
869     tcg_gen_extrl_i64_i32(t0_32, t0);
870     tcg_gen_extrl_i64_i32(t1_32, t1);
871     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
872     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
873     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
874     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
875     tmp = tcg_temp_new_i32();
876     tcg_gen_xor_i32(tmp, t0_32, t1_32);
877     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
878     tcg_gen_extu_i32_i64(dest, cpu_NF);
879 }
880 
881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
882 {
883     if (sf) {
884         gen_sub64_CC(dest, t0, t1);
885     } else {
886         gen_sub32_CC(dest, t0, t1);
887     }
888 }
889 
890 /* dest = T0 + T1 + CF; do not compute flags. */
891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
892 {
893     TCGv_i64 flag = tcg_temp_new_i64();
894     tcg_gen_extu_i32_i64(flag, cpu_CF);
895     tcg_gen_add_i64(dest, t0, t1);
896     tcg_gen_add_i64(dest, dest, flag);
897 
898     if (!sf) {
899         tcg_gen_ext32u_i64(dest, dest);
900     }
901 }
902 
903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
905 {
906     if (sf) {
907         TCGv_i64 result = tcg_temp_new_i64();
908         TCGv_i64 cf_64 = tcg_temp_new_i64();
909         TCGv_i64 vf_64 = tcg_temp_new_i64();
910         TCGv_i64 tmp = tcg_temp_new_i64();
911         TCGv_i64 zero = tcg_constant_i64(0);
912 
913         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
914         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
915         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
916         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
917         gen_set_NZ64(result);
918 
919         tcg_gen_xor_i64(vf_64, result, t0);
920         tcg_gen_xor_i64(tmp, t0, t1);
921         tcg_gen_andc_i64(vf_64, vf_64, tmp);
922         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
923 
924         tcg_gen_mov_i64(dest, result);
925     } else {
926         TCGv_i32 t0_32 = tcg_temp_new_i32();
927         TCGv_i32 t1_32 = tcg_temp_new_i32();
928         TCGv_i32 tmp = tcg_temp_new_i32();
929         TCGv_i32 zero = tcg_constant_i32(0);
930 
931         tcg_gen_extrl_i64_i32(t0_32, t0);
932         tcg_gen_extrl_i64_i32(t1_32, t1);
933         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
934         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
935 
936         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
937         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
938         tcg_gen_xor_i32(tmp, t0_32, t1_32);
939         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
940         tcg_gen_extu_i32_i64(dest, cpu_NF);
941     }
942 }
943 
944 /*
945  * Load/Store generators
946  */
947 
948 /*
949  * Store from GPR register to memory.
950  */
951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
952                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
953                              bool iss_valid,
954                              unsigned int iss_srt,
955                              bool iss_sf, bool iss_ar)
956 {
957     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
958 
959     if (iss_valid) {
960         uint32_t syn;
961 
962         syn = syn_data_abort_with_iss(0,
963                                       (memop & MO_SIZE),
964                                       false,
965                                       iss_srt,
966                                       iss_sf,
967                                       iss_ar,
968                                       0, 0, 0, 0, 0, false);
969         disas_set_insn_syndrome(s, syn);
970     }
971 }
972 
973 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
974                       TCGv_i64 tcg_addr, MemOp memop,
975                       bool iss_valid,
976                       unsigned int iss_srt,
977                       bool iss_sf, bool iss_ar)
978 {
979     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
980                      iss_valid, iss_srt, iss_sf, iss_ar);
981 }
982 
983 /*
984  * Load from memory to GPR register
985  */
986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
987                              MemOp memop, bool extend, int memidx,
988                              bool iss_valid, unsigned int iss_srt,
989                              bool iss_sf, bool iss_ar)
990 {
991     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
992 
993     if (extend && (memop & MO_SIGN)) {
994         g_assert((memop & MO_SIZE) <= MO_32);
995         tcg_gen_ext32u_i64(dest, dest);
996     }
997 
998     if (iss_valid) {
999         uint32_t syn;
1000 
1001         syn = syn_data_abort_with_iss(0,
1002                                       (memop & MO_SIZE),
1003                                       (memop & MO_SIGN) != 0,
1004                                       iss_srt,
1005                                       iss_sf,
1006                                       iss_ar,
1007                                       0, 0, 0, 0, 0, false);
1008         disas_set_insn_syndrome(s, syn);
1009     }
1010 }
1011 
1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1013                       MemOp memop, bool extend,
1014                       bool iss_valid, unsigned int iss_srt,
1015                       bool iss_sf, bool iss_ar)
1016 {
1017     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1018                      iss_valid, iss_srt, iss_sf, iss_ar);
1019 }
1020 
1021 /*
1022  * Store from FP register to memory
1023  */
1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1025 {
1026     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1027     TCGv_i64 tmplo = tcg_temp_new_i64();
1028 
1029     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1030 
1031     if ((mop & MO_SIZE) < MO_128) {
1032         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1033     } else {
1034         TCGv_i64 tmphi = tcg_temp_new_i64();
1035         TCGv_i128 t16 = tcg_temp_new_i128();
1036 
1037         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1038         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1039 
1040         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1041     }
1042 }
1043 
1044 /*
1045  * Load from memory to FP register
1046  */
1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1048 {
1049     /* This always zero-extends and writes to a full 128 bit wide vector */
1050     TCGv_i64 tmplo = tcg_temp_new_i64();
1051     TCGv_i64 tmphi = NULL;
1052 
1053     if ((mop & MO_SIZE) < MO_128) {
1054         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1055     } else {
1056         TCGv_i128 t16 = tcg_temp_new_i128();
1057 
1058         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1059 
1060         tmphi = tcg_temp_new_i64();
1061         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1062     }
1063 
1064     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1065 
1066     if (tmphi) {
1067         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1068     }
1069     clear_vec_high(s, tmphi != NULL, destidx);
1070 }
1071 
1072 /*
1073  * Vector load/store helpers.
1074  *
1075  * The principal difference between this and a FP load is that we don't
1076  * zero extend as we are filling a partial chunk of the vector register.
1077  * These functions don't support 128 bit loads/stores, which would be
1078  * normal load/store operations.
1079  *
1080  * The _i32 versions are useful when operating on 32 bit quantities
1081  * (eg for floating point single or using Neon helper functions).
1082  */
1083 
1084 /* Get value of an element within a vector register */
1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1086                              int element, MemOp memop)
1087 {
1088     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1089     switch ((unsigned)memop) {
1090     case MO_8:
1091         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1092         break;
1093     case MO_16:
1094         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1095         break;
1096     case MO_32:
1097         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1098         break;
1099     case MO_8|MO_SIGN:
1100         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1101         break;
1102     case MO_16|MO_SIGN:
1103         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1104         break;
1105     case MO_32|MO_SIGN:
1106         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1107         break;
1108     case MO_64:
1109     case MO_64|MO_SIGN:
1110         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1111         break;
1112     default:
1113         g_assert_not_reached();
1114     }
1115 }
1116 
1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1118                                  int element, MemOp memop)
1119 {
1120     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1121     switch (memop) {
1122     case MO_8:
1123         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1124         break;
1125     case MO_16:
1126         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1127         break;
1128     case MO_8|MO_SIGN:
1129         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1130         break;
1131     case MO_16|MO_SIGN:
1132         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1133         break;
1134     case MO_32:
1135     case MO_32|MO_SIGN:
1136         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1137         break;
1138     default:
1139         g_assert_not_reached();
1140     }
1141 }
1142 
1143 /* Set value of an element within a vector register */
1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1145                               int element, MemOp memop)
1146 {
1147     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1148     switch (memop) {
1149     case MO_8:
1150         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1151         break;
1152     case MO_16:
1153         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1154         break;
1155     case MO_32:
1156         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1157         break;
1158     case MO_64:
1159         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1160         break;
1161     default:
1162         g_assert_not_reached();
1163     }
1164 }
1165 
1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1167                                   int destidx, int element, MemOp memop)
1168 {
1169     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1170     switch (memop) {
1171     case MO_8:
1172         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1173         break;
1174     case MO_16:
1175         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1176         break;
1177     case MO_32:
1178         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1179         break;
1180     default:
1181         g_assert_not_reached();
1182     }
1183 }
1184 
1185 /* Store from vector register to memory */
1186 static void do_vec_st(DisasContext *s, int srcidx, int element,
1187                       TCGv_i64 tcg_addr, MemOp mop)
1188 {
1189     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1190 
1191     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1192     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1193 }
1194 
1195 /* Load from memory to vector register */
1196 static void do_vec_ld(DisasContext *s, int destidx, int element,
1197                       TCGv_i64 tcg_addr, MemOp mop)
1198 {
1199     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1200 
1201     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1202     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1203 }
1204 
1205 /* Check that FP/Neon access is enabled. If it is, return
1206  * true. If not, emit code to generate an appropriate exception,
1207  * and return false; the caller should not emit any code for
1208  * the instruction. Note that this check must happen after all
1209  * unallocated-encoding checks (otherwise the syndrome information
1210  * for the resulting exception will be incorrect).
1211  */
1212 static bool fp_access_check_only(DisasContext *s)
1213 {
1214     if (s->fp_excp_el) {
1215         assert(!s->fp_access_checked);
1216         s->fp_access_checked = true;
1217 
1218         gen_exception_insn_el(s, 0, EXCP_UDEF,
1219                               syn_fp_access_trap(1, 0xe, false, 0),
1220                               s->fp_excp_el);
1221         return false;
1222     }
1223     s->fp_access_checked = true;
1224     return true;
1225 }
1226 
1227 static bool fp_access_check(DisasContext *s)
1228 {
1229     if (!fp_access_check_only(s)) {
1230         return false;
1231     }
1232     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1233         gen_exception_insn(s, 0, EXCP_UDEF,
1234                            syn_smetrap(SME_ET_Streaming, false));
1235         return false;
1236     }
1237     return true;
1238 }
1239 
1240 /*
1241  * Return <0 for non-supported element sizes, with MO_16 controlled by
1242  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1243  */
1244 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1245 {
1246     switch (esz) {
1247     case MO_64:
1248     case MO_32:
1249         break;
1250     case MO_16:
1251         if (!dc_isar_feature(aa64_fp16, s)) {
1252             return -1;
1253         }
1254         break;
1255     default:
1256         return -1;
1257     }
1258     return fp_access_check(s);
1259 }
1260 
1261 /* Likewise, but vector MO_64 must have two elements. */
1262 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1263 {
1264     switch (esz) {
1265     case MO_64:
1266         if (!is_q) {
1267             return -1;
1268         }
1269         break;
1270     case MO_32:
1271         break;
1272     case MO_16:
1273         if (!dc_isar_feature(aa64_fp16, s)) {
1274             return -1;
1275         }
1276         break;
1277     default:
1278         return -1;
1279     }
1280     return fp_access_check(s);
1281 }
1282 
1283 /*
1284  * Check that SVE access is enabled.  If it is, return true.
1285  * If not, emit code to generate an appropriate exception and return false.
1286  * This function corresponds to CheckSVEEnabled().
1287  */
1288 bool sve_access_check(DisasContext *s)
1289 {
1290     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1291         assert(dc_isar_feature(aa64_sme, s));
1292         if (!sme_sm_enabled_check(s)) {
1293             goto fail_exit;
1294         }
1295     } else if (s->sve_excp_el) {
1296         gen_exception_insn_el(s, 0, EXCP_UDEF,
1297                               syn_sve_access_trap(), s->sve_excp_el);
1298         goto fail_exit;
1299     }
1300     s->sve_access_checked = true;
1301     return fp_access_check(s);
1302 
1303  fail_exit:
1304     /* Assert that we only raise one exception per instruction. */
1305     assert(!s->sve_access_checked);
1306     s->sve_access_checked = true;
1307     return false;
1308 }
1309 
1310 /*
1311  * Check that SME access is enabled, raise an exception if not.
1312  * Note that this function corresponds to CheckSMEAccess and is
1313  * only used directly for cpregs.
1314  */
1315 static bool sme_access_check(DisasContext *s)
1316 {
1317     if (s->sme_excp_el) {
1318         gen_exception_insn_el(s, 0, EXCP_UDEF,
1319                               syn_smetrap(SME_ET_AccessTrap, false),
1320                               s->sme_excp_el);
1321         return false;
1322     }
1323     return true;
1324 }
1325 
1326 /* This function corresponds to CheckSMEEnabled. */
1327 bool sme_enabled_check(DisasContext *s)
1328 {
1329     /*
1330      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1331      * to be zero when fp_excp_el has priority.  This is because we need
1332      * sme_excp_el by itself for cpregs access checks.
1333      */
1334     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1335         s->fp_access_checked = true;
1336         return sme_access_check(s);
1337     }
1338     return fp_access_check_only(s);
1339 }
1340 
1341 /* Common subroutine for CheckSMEAnd*Enabled. */
1342 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1343 {
1344     if (!sme_enabled_check(s)) {
1345         return false;
1346     }
1347     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1348         gen_exception_insn(s, 0, EXCP_UDEF,
1349                            syn_smetrap(SME_ET_NotStreaming, false));
1350         return false;
1351     }
1352     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1353         gen_exception_insn(s, 0, EXCP_UDEF,
1354                            syn_smetrap(SME_ET_InactiveZA, false));
1355         return false;
1356     }
1357     return true;
1358 }
1359 
1360 /*
1361  * Expanders for AdvSIMD translation functions.
1362  */
1363 
1364 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1365                             gen_helper_gvec_2 *fn)
1366 {
1367     if (!a->q && a->esz == MO_64) {
1368         return false;
1369     }
1370     if (fp_access_check(s)) {
1371         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1372     }
1373     return true;
1374 }
1375 
1376 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1377                             gen_helper_gvec_3 *fn)
1378 {
1379     if (!a->q && a->esz == MO_64) {
1380         return false;
1381     }
1382     if (fp_access_check(s)) {
1383         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1384     }
1385     return true;
1386 }
1387 
1388 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1389 {
1390     if (!a->q && a->esz == MO_64) {
1391         return false;
1392     }
1393     if (fp_access_check(s)) {
1394         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1395     }
1396     return true;
1397 }
1398 
1399 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1400 {
1401     if (a->esz == MO_64) {
1402         return false;
1403     }
1404     if (fp_access_check(s)) {
1405         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1406     }
1407     return true;
1408 }
1409 
1410 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1411 {
1412     if (a->esz == MO_8) {
1413         return false;
1414     }
1415     return do_gvec_fn3_no64(s, a, fn);
1416 }
1417 
1418 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1419 {
1420     if (!a->q && a->esz == MO_64) {
1421         return false;
1422     }
1423     if (fp_access_check(s)) {
1424         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1425     }
1426     return true;
1427 }
1428 
1429 /*
1430  * This utility function is for doing register extension with an
1431  * optional shift. You will likely want to pass a temporary for the
1432  * destination register. See DecodeRegExtend() in the ARM ARM.
1433  */
1434 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1435                               int option, unsigned int shift)
1436 {
1437     int extsize = extract32(option, 0, 2);
1438     bool is_signed = extract32(option, 2, 1);
1439 
1440     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1441     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1442 }
1443 
1444 static inline void gen_check_sp_alignment(DisasContext *s)
1445 {
1446     /* The AArch64 architecture mandates that (if enabled via PSTATE
1447      * or SCTLR bits) there is a check that SP is 16-aligned on every
1448      * SP-relative load or store (with an exception generated if it is not).
1449      * In line with general QEMU practice regarding misaligned accesses,
1450      * we omit these checks for the sake of guest program performance.
1451      * This function is provided as a hook so we can more easily add these
1452      * checks in future (possibly as a "favour catching guest program bugs
1453      * over speed" user selectable option).
1454      */
1455 }
1456 
1457 /*
1458  * The instruction disassembly implemented here matches
1459  * the instruction encoding classifications in chapter C4
1460  * of the ARM Architecture Reference Manual (DDI0487B_a);
1461  * classification names and decode diagrams here should generally
1462  * match up with those in the manual.
1463  */
1464 
1465 static bool trans_B(DisasContext *s, arg_i *a)
1466 {
1467     reset_btype(s);
1468     gen_goto_tb(s, 0, a->imm);
1469     return true;
1470 }
1471 
1472 static bool trans_BL(DisasContext *s, arg_i *a)
1473 {
1474     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1475     reset_btype(s);
1476     gen_goto_tb(s, 0, a->imm);
1477     return true;
1478 }
1479 
1480 
1481 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1482 {
1483     DisasLabel match;
1484     TCGv_i64 tcg_cmp;
1485 
1486     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1487     reset_btype(s);
1488 
1489     match = gen_disas_label(s);
1490     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1491                         tcg_cmp, 0, match.label);
1492     gen_goto_tb(s, 0, 4);
1493     set_disas_label(s, match);
1494     gen_goto_tb(s, 1, a->imm);
1495     return true;
1496 }
1497 
1498 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1499 {
1500     DisasLabel match;
1501     TCGv_i64 tcg_cmp;
1502 
1503     tcg_cmp = tcg_temp_new_i64();
1504     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1505 
1506     reset_btype(s);
1507 
1508     match = gen_disas_label(s);
1509     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1510                         tcg_cmp, 0, match.label);
1511     gen_goto_tb(s, 0, 4);
1512     set_disas_label(s, match);
1513     gen_goto_tb(s, 1, a->imm);
1514     return true;
1515 }
1516 
1517 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1518 {
1519     /* BC.cond is only present with FEAT_HBC */
1520     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1521         return false;
1522     }
1523     reset_btype(s);
1524     if (a->cond < 0x0e) {
1525         /* genuinely conditional branches */
1526         DisasLabel match = gen_disas_label(s);
1527         arm_gen_test_cc(a->cond, match.label);
1528         gen_goto_tb(s, 0, 4);
1529         set_disas_label(s, match);
1530         gen_goto_tb(s, 1, a->imm);
1531     } else {
1532         /* 0xe and 0xf are both "always" conditions */
1533         gen_goto_tb(s, 0, a->imm);
1534     }
1535     return true;
1536 }
1537 
1538 static void set_btype_for_br(DisasContext *s, int rn)
1539 {
1540     if (dc_isar_feature(aa64_bti, s)) {
1541         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1542         if (rn == 16 || rn == 17) {
1543             set_btype(s, 1);
1544         } else {
1545             TCGv_i64 pc = tcg_temp_new_i64();
1546             gen_pc_plus_diff(s, pc, 0);
1547             gen_helper_guarded_page_br(tcg_env, pc);
1548             s->btype = -1;
1549         }
1550     }
1551 }
1552 
1553 static void set_btype_for_blr(DisasContext *s)
1554 {
1555     if (dc_isar_feature(aa64_bti, s)) {
1556         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1557         set_btype(s, 2);
1558     }
1559 }
1560 
1561 static bool trans_BR(DisasContext *s, arg_r *a)
1562 {
1563     set_btype_for_br(s, a->rn);
1564     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1565     s->base.is_jmp = DISAS_JUMP;
1566     return true;
1567 }
1568 
1569 static bool trans_BLR(DisasContext *s, arg_r *a)
1570 {
1571     TCGv_i64 dst = cpu_reg(s, a->rn);
1572     TCGv_i64 lr = cpu_reg(s, 30);
1573     if (dst == lr) {
1574         TCGv_i64 tmp = tcg_temp_new_i64();
1575         tcg_gen_mov_i64(tmp, dst);
1576         dst = tmp;
1577     }
1578     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1579     gen_a64_set_pc(s, dst);
1580     set_btype_for_blr(s);
1581     s->base.is_jmp = DISAS_JUMP;
1582     return true;
1583 }
1584 
1585 static bool trans_RET(DisasContext *s, arg_r *a)
1586 {
1587     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1588     s->base.is_jmp = DISAS_JUMP;
1589     return true;
1590 }
1591 
1592 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1593                                    TCGv_i64 modifier, bool use_key_a)
1594 {
1595     TCGv_i64 truedst;
1596     /*
1597      * Return the branch target for a BRAA/RETA/etc, which is either
1598      * just the destination dst, or that value with the pauth check
1599      * done and the code removed from the high bits.
1600      */
1601     if (!s->pauth_active) {
1602         return dst;
1603     }
1604 
1605     truedst = tcg_temp_new_i64();
1606     if (use_key_a) {
1607         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1608     } else {
1609         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1610     }
1611     return truedst;
1612 }
1613 
1614 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1615 {
1616     TCGv_i64 dst;
1617 
1618     if (!dc_isar_feature(aa64_pauth, s)) {
1619         return false;
1620     }
1621 
1622     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1623     set_btype_for_br(s, a->rn);
1624     gen_a64_set_pc(s, dst);
1625     s->base.is_jmp = DISAS_JUMP;
1626     return true;
1627 }
1628 
1629 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1630 {
1631     TCGv_i64 dst, lr;
1632 
1633     if (!dc_isar_feature(aa64_pauth, s)) {
1634         return false;
1635     }
1636 
1637     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1638     lr = cpu_reg(s, 30);
1639     if (dst == lr) {
1640         TCGv_i64 tmp = tcg_temp_new_i64();
1641         tcg_gen_mov_i64(tmp, dst);
1642         dst = tmp;
1643     }
1644     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1645     gen_a64_set_pc(s, dst);
1646     set_btype_for_blr(s);
1647     s->base.is_jmp = DISAS_JUMP;
1648     return true;
1649 }
1650 
1651 static bool trans_RETA(DisasContext *s, arg_reta *a)
1652 {
1653     TCGv_i64 dst;
1654 
1655     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1656     gen_a64_set_pc(s, dst);
1657     s->base.is_jmp = DISAS_JUMP;
1658     return true;
1659 }
1660 
1661 static bool trans_BRA(DisasContext *s, arg_bra *a)
1662 {
1663     TCGv_i64 dst;
1664 
1665     if (!dc_isar_feature(aa64_pauth, s)) {
1666         return false;
1667     }
1668     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1669     gen_a64_set_pc(s, dst);
1670     set_btype_for_br(s, a->rn);
1671     s->base.is_jmp = DISAS_JUMP;
1672     return true;
1673 }
1674 
1675 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1676 {
1677     TCGv_i64 dst, lr;
1678 
1679     if (!dc_isar_feature(aa64_pauth, s)) {
1680         return false;
1681     }
1682     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1683     lr = cpu_reg(s, 30);
1684     if (dst == lr) {
1685         TCGv_i64 tmp = tcg_temp_new_i64();
1686         tcg_gen_mov_i64(tmp, dst);
1687         dst = tmp;
1688     }
1689     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1690     gen_a64_set_pc(s, dst);
1691     set_btype_for_blr(s);
1692     s->base.is_jmp = DISAS_JUMP;
1693     return true;
1694 }
1695 
1696 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1697 {
1698     TCGv_i64 dst;
1699 
1700     if (s->current_el == 0) {
1701         return false;
1702     }
1703     if (s->trap_eret) {
1704         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1705         return true;
1706     }
1707     dst = tcg_temp_new_i64();
1708     tcg_gen_ld_i64(dst, tcg_env,
1709                    offsetof(CPUARMState, elr_el[s->current_el]));
1710 
1711     translator_io_start(&s->base);
1712 
1713     gen_helper_exception_return(tcg_env, dst);
1714     /* Must exit loop to check un-masked IRQs */
1715     s->base.is_jmp = DISAS_EXIT;
1716     return true;
1717 }
1718 
1719 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1720 {
1721     TCGv_i64 dst;
1722 
1723     if (!dc_isar_feature(aa64_pauth, s)) {
1724         return false;
1725     }
1726     if (s->current_el == 0) {
1727         return false;
1728     }
1729     /* The FGT trap takes precedence over an auth trap. */
1730     if (s->trap_eret) {
1731         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1732         return true;
1733     }
1734     dst = tcg_temp_new_i64();
1735     tcg_gen_ld_i64(dst, tcg_env,
1736                    offsetof(CPUARMState, elr_el[s->current_el]));
1737 
1738     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1739 
1740     translator_io_start(&s->base);
1741 
1742     gen_helper_exception_return(tcg_env, dst);
1743     /* Must exit loop to check un-masked IRQs */
1744     s->base.is_jmp = DISAS_EXIT;
1745     return true;
1746 }
1747 
1748 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1749 {
1750     return true;
1751 }
1752 
1753 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1754 {
1755     /*
1756      * When running in MTTCG we don't generate jumps to the yield and
1757      * WFE helpers as it won't affect the scheduling of other vCPUs.
1758      * If we wanted to more completely model WFE/SEV so we don't busy
1759      * spin unnecessarily we would need to do something more involved.
1760      */
1761     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1762         s->base.is_jmp = DISAS_YIELD;
1763     }
1764     return true;
1765 }
1766 
1767 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1768 {
1769     s->base.is_jmp = DISAS_WFI;
1770     return true;
1771 }
1772 
1773 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1774 {
1775     /*
1776      * When running in MTTCG we don't generate jumps to the yield and
1777      * WFE helpers as it won't affect the scheduling of other vCPUs.
1778      * If we wanted to more completely model WFE/SEV so we don't busy
1779      * spin unnecessarily we would need to do something more involved.
1780      */
1781     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1782         s->base.is_jmp = DISAS_WFE;
1783     }
1784     return true;
1785 }
1786 
1787 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1788 {
1789     if (!dc_isar_feature(aa64_wfxt, s)) {
1790         return false;
1791     }
1792 
1793     /*
1794      * Because we need to pass the register value to the helper,
1795      * it's easier to emit the code now, unlike trans_WFI which
1796      * defers it to aarch64_tr_tb_stop(). That means we need to
1797      * check ss_active so that single-stepping a WFIT doesn't halt.
1798      */
1799     if (s->ss_active) {
1800         /* Act like a NOP under architectural singlestep */
1801         return true;
1802     }
1803 
1804     gen_a64_update_pc(s, 4);
1805     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1806     /* Go back to the main loop to check for interrupts */
1807     s->base.is_jmp = DISAS_EXIT;
1808     return true;
1809 }
1810 
1811 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1812 {
1813     if (!dc_isar_feature(aa64_wfxt, s)) {
1814         return false;
1815     }
1816 
1817     /*
1818      * We rely here on our WFE implementation being a NOP, so we
1819      * don't need to do anything different to handle the WFET timeout
1820      * from what trans_WFE does.
1821      */
1822     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1823         s->base.is_jmp = DISAS_WFE;
1824     }
1825     return true;
1826 }
1827 
1828 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1829 {
1830     if (s->pauth_active) {
1831         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1832     }
1833     return true;
1834 }
1835 
1836 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1837 {
1838     if (s->pauth_active) {
1839         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1840     }
1841     return true;
1842 }
1843 
1844 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1845 {
1846     if (s->pauth_active) {
1847         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1848     }
1849     return true;
1850 }
1851 
1852 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1853 {
1854     if (s->pauth_active) {
1855         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1856     }
1857     return true;
1858 }
1859 
1860 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1861 {
1862     if (s->pauth_active) {
1863         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1864     }
1865     return true;
1866 }
1867 
1868 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1869 {
1870     /* Without RAS, we must implement this as NOP. */
1871     if (dc_isar_feature(aa64_ras, s)) {
1872         /*
1873          * QEMU does not have a source of physical SErrors,
1874          * so we are only concerned with virtual SErrors.
1875          * The pseudocode in the ARM for this case is
1876          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1877          *      AArch64.vESBOperation();
1878          * Most of the condition can be evaluated at translation time.
1879          * Test for EL2 present, and defer test for SEL2 to runtime.
1880          */
1881         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1882             gen_helper_vesb(tcg_env);
1883         }
1884     }
1885     return true;
1886 }
1887 
1888 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1889 {
1890     if (s->pauth_active) {
1891         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1892     }
1893     return true;
1894 }
1895 
1896 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1897 {
1898     if (s->pauth_active) {
1899         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1900     }
1901     return true;
1902 }
1903 
1904 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1905 {
1906     if (s->pauth_active) {
1907         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1908     }
1909     return true;
1910 }
1911 
1912 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1913 {
1914     if (s->pauth_active) {
1915         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1916     }
1917     return true;
1918 }
1919 
1920 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1921 {
1922     if (s->pauth_active) {
1923         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1924     }
1925     return true;
1926 }
1927 
1928 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1929 {
1930     if (s->pauth_active) {
1931         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1932     }
1933     return true;
1934 }
1935 
1936 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1937 {
1938     if (s->pauth_active) {
1939         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1940     }
1941     return true;
1942 }
1943 
1944 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1945 {
1946     if (s->pauth_active) {
1947         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1948     }
1949     return true;
1950 }
1951 
1952 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1953 {
1954     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1955     return true;
1956 }
1957 
1958 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1959 {
1960     /* We handle DSB and DMB the same way */
1961     TCGBar bar;
1962 
1963     switch (a->types) {
1964     case 1: /* MBReqTypes_Reads */
1965         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1966         break;
1967     case 2: /* MBReqTypes_Writes */
1968         bar = TCG_BAR_SC | TCG_MO_ST_ST;
1969         break;
1970     default: /* MBReqTypes_All */
1971         bar = TCG_BAR_SC | TCG_MO_ALL;
1972         break;
1973     }
1974     tcg_gen_mb(bar);
1975     return true;
1976 }
1977 
1978 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
1979 {
1980     if (!dc_isar_feature(aa64_xs, s)) {
1981         return false;
1982     }
1983     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
1984     return true;
1985 }
1986 
1987 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1988 {
1989     /*
1990      * We need to break the TB after this insn to execute
1991      * self-modifying code correctly and also to take
1992      * any pending interrupts immediately.
1993      */
1994     reset_btype(s);
1995     gen_goto_tb(s, 0, 4);
1996     return true;
1997 }
1998 
1999 static bool trans_SB(DisasContext *s, arg_SB *a)
2000 {
2001     if (!dc_isar_feature(aa64_sb, s)) {
2002         return false;
2003     }
2004     /*
2005      * TODO: There is no speculation barrier opcode for TCG;
2006      * MB and end the TB instead.
2007      */
2008     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2009     gen_goto_tb(s, 0, 4);
2010     return true;
2011 }
2012 
2013 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2014 {
2015     if (!dc_isar_feature(aa64_condm_4, s)) {
2016         return false;
2017     }
2018     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2019     return true;
2020 }
2021 
2022 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2023 {
2024     TCGv_i32 z;
2025 
2026     if (!dc_isar_feature(aa64_condm_5, s)) {
2027         return false;
2028     }
2029 
2030     z = tcg_temp_new_i32();
2031 
2032     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2033 
2034     /*
2035      * (!C & !Z) << 31
2036      * (!(C | Z)) << 31
2037      * ~((C | Z) << 31)
2038      * ~-(C | Z)
2039      * (C | Z) - 1
2040      */
2041     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2042     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2043 
2044     /* !(Z & C) */
2045     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2046     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2047 
2048     /* (!C & Z) << 31 -> -(Z & ~C) */
2049     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2050     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2051 
2052     /* C | Z */
2053     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2054 
2055     return true;
2056 }
2057 
2058 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2059 {
2060     if (!dc_isar_feature(aa64_condm_5, s)) {
2061         return false;
2062     }
2063 
2064     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2065     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2066 
2067     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2068     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2069 
2070     tcg_gen_movi_i32(cpu_NF, 0);
2071     tcg_gen_movi_i32(cpu_VF, 0);
2072 
2073     return true;
2074 }
2075 
2076 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2077 {
2078     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2079         return false;
2080     }
2081     if (a->imm & 1) {
2082         set_pstate_bits(PSTATE_UAO);
2083     } else {
2084         clear_pstate_bits(PSTATE_UAO);
2085     }
2086     gen_rebuild_hflags(s);
2087     s->base.is_jmp = DISAS_TOO_MANY;
2088     return true;
2089 }
2090 
2091 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2092 {
2093     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2094         return false;
2095     }
2096     if (a->imm & 1) {
2097         set_pstate_bits(PSTATE_PAN);
2098     } else {
2099         clear_pstate_bits(PSTATE_PAN);
2100     }
2101     gen_rebuild_hflags(s);
2102     s->base.is_jmp = DISAS_TOO_MANY;
2103     return true;
2104 }
2105 
2106 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2107 {
2108     if (s->current_el == 0) {
2109         return false;
2110     }
2111     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2112     s->base.is_jmp = DISAS_TOO_MANY;
2113     return true;
2114 }
2115 
2116 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2117 {
2118     if (!dc_isar_feature(aa64_ssbs, s)) {
2119         return false;
2120     }
2121     if (a->imm & 1) {
2122         set_pstate_bits(PSTATE_SSBS);
2123     } else {
2124         clear_pstate_bits(PSTATE_SSBS);
2125     }
2126     /* Don't need to rebuild hflags since SSBS is a nop */
2127     s->base.is_jmp = DISAS_TOO_MANY;
2128     return true;
2129 }
2130 
2131 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2132 {
2133     if (!dc_isar_feature(aa64_dit, s)) {
2134         return false;
2135     }
2136     if (a->imm & 1) {
2137         set_pstate_bits(PSTATE_DIT);
2138     } else {
2139         clear_pstate_bits(PSTATE_DIT);
2140     }
2141     /* There's no need to rebuild hflags because DIT is a nop */
2142     s->base.is_jmp = DISAS_TOO_MANY;
2143     return true;
2144 }
2145 
2146 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2147 {
2148     if (dc_isar_feature(aa64_mte, s)) {
2149         /* Full MTE is enabled -- set the TCO bit as directed. */
2150         if (a->imm & 1) {
2151             set_pstate_bits(PSTATE_TCO);
2152         } else {
2153             clear_pstate_bits(PSTATE_TCO);
2154         }
2155         gen_rebuild_hflags(s);
2156         /* Many factors, including TCO, go into MTE_ACTIVE. */
2157         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2158         return true;
2159     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2160         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2161         return true;
2162     } else {
2163         /* Insn not present */
2164         return false;
2165     }
2166 }
2167 
2168 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2169 {
2170     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2171     s->base.is_jmp = DISAS_TOO_MANY;
2172     return true;
2173 }
2174 
2175 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2176 {
2177     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2178     /* Exit the cpu loop to re-evaluate pending IRQs. */
2179     s->base.is_jmp = DISAS_UPDATE_EXIT;
2180     return true;
2181 }
2182 
2183 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2184 {
2185     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2186         return false;
2187     }
2188 
2189     if (a->imm == 0) {
2190         clear_pstate_bits(PSTATE_ALLINT);
2191     } else if (s->current_el > 1) {
2192         set_pstate_bits(PSTATE_ALLINT);
2193     } else {
2194         gen_helper_msr_set_allint_el1(tcg_env);
2195     }
2196 
2197     /* Exit the cpu loop to re-evaluate pending IRQs. */
2198     s->base.is_jmp = DISAS_UPDATE_EXIT;
2199     return true;
2200 }
2201 
2202 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2203 {
2204     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2205         return false;
2206     }
2207     if (sme_access_check(s)) {
2208         int old = s->pstate_sm | (s->pstate_za << 1);
2209         int new = a->imm * 3;
2210 
2211         if ((old ^ new) & a->mask) {
2212             /* At least one bit changes. */
2213             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2214                                 tcg_constant_i32(a->mask));
2215             s->base.is_jmp = DISAS_TOO_MANY;
2216         }
2217     }
2218     return true;
2219 }
2220 
2221 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2222 {
2223     TCGv_i32 tmp = tcg_temp_new_i32();
2224     TCGv_i32 nzcv = tcg_temp_new_i32();
2225 
2226     /* build bit 31, N */
2227     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2228     /* build bit 30, Z */
2229     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2230     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2231     /* build bit 29, C */
2232     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2233     /* build bit 28, V */
2234     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2235     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2236     /* generate result */
2237     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2238 }
2239 
2240 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2241 {
2242     TCGv_i32 nzcv = tcg_temp_new_i32();
2243 
2244     /* take NZCV from R[t] */
2245     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2246 
2247     /* bit 31, N */
2248     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2249     /* bit 30, Z */
2250     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2251     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2252     /* bit 29, C */
2253     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2254     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2255     /* bit 28, V */
2256     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2257     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2258 }
2259 
2260 static void gen_sysreg_undef(DisasContext *s, bool isread,
2261                              uint8_t op0, uint8_t op1, uint8_t op2,
2262                              uint8_t crn, uint8_t crm, uint8_t rt)
2263 {
2264     /*
2265      * Generate code to emit an UNDEF with correct syndrome
2266      * information for a failed system register access.
2267      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2268      * but if FEAT_IDST is implemented then read accesses to registers
2269      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2270      * syndrome.
2271      */
2272     uint32_t syndrome;
2273 
2274     if (isread && dc_isar_feature(aa64_ids, s) &&
2275         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2276         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2277     } else {
2278         syndrome = syn_uncategorized();
2279     }
2280     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2281 }
2282 
2283 /* MRS - move from system register
2284  * MSR (register) - move to system register
2285  * SYS
2286  * SYSL
2287  * These are all essentially the same insn in 'read' and 'write'
2288  * versions, with varying op0 fields.
2289  */
2290 static void handle_sys(DisasContext *s, bool isread,
2291                        unsigned int op0, unsigned int op1, unsigned int op2,
2292                        unsigned int crn, unsigned int crm, unsigned int rt)
2293 {
2294     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2295                                       crn, crm, op0, op1, op2);
2296     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2297     bool need_exit_tb = false;
2298     bool nv_trap_to_el2 = false;
2299     bool nv_redirect_reg = false;
2300     bool skip_fp_access_checks = false;
2301     bool nv2_mem_redirect = false;
2302     TCGv_ptr tcg_ri = NULL;
2303     TCGv_i64 tcg_rt;
2304     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2305 
2306     if (crn == 11 || crn == 15) {
2307         /*
2308          * Check for TIDCP trap, which must take precedence over
2309          * the UNDEF for "no such register" etc.
2310          */
2311         switch (s->current_el) {
2312         case 0:
2313             if (dc_isar_feature(aa64_tidcp1, s)) {
2314                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2315             }
2316             break;
2317         case 1:
2318             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2319             break;
2320         }
2321     }
2322 
2323     if (!ri) {
2324         /* Unknown register; this might be a guest error or a QEMU
2325          * unimplemented feature.
2326          */
2327         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2328                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2329                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2330         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2331         return;
2332     }
2333 
2334     if (s->nv2 && ri->nv2_redirect_offset) {
2335         /*
2336          * Some registers always redirect to memory; some only do so if
2337          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2338          * pairs which share an offset; see the table in R_CSRPQ).
2339          */
2340         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2341             nv2_mem_redirect = s->nv1;
2342         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2343             nv2_mem_redirect = !s->nv1;
2344         } else {
2345             nv2_mem_redirect = true;
2346         }
2347     }
2348 
2349     /* Check access permissions */
2350     if (!cp_access_ok(s->current_el, ri, isread)) {
2351         /*
2352          * FEAT_NV/NV2 handling does not do the usual FP access checks
2353          * for registers only accessible at EL2 (though it *does* do them
2354          * for registers accessible at EL1).
2355          */
2356         skip_fp_access_checks = true;
2357         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2358             /*
2359              * This is one of the few EL2 registers which should redirect
2360              * to the equivalent EL1 register. We do that after running
2361              * the EL2 register's accessfn.
2362              */
2363             nv_redirect_reg = true;
2364             assert(!nv2_mem_redirect);
2365         } else if (nv2_mem_redirect) {
2366             /*
2367              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2368              * UNDEF to EL1.
2369              */
2370         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2371             /*
2372              * This register / instruction exists and is an EL2 register, so
2373              * we must trap to EL2 if accessed in nested virtualization EL1
2374              * instead of UNDEFing. We'll do that after the usual access checks.
2375              * (This makes a difference only for a couple of registers like
2376              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2377              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2378              * an accessfn which does nothing when called from EL1, because
2379              * the trap-to-EL3 controls which would apply to that register
2380              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2381              */
2382             nv_trap_to_el2 = true;
2383         } else {
2384             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2385             return;
2386         }
2387     }
2388 
2389     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2390         /* Emit code to perform further access permissions checks at
2391          * runtime; this may result in an exception.
2392          */
2393         gen_a64_update_pc(s, 0);
2394         tcg_ri = tcg_temp_new_ptr();
2395         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2396                                        tcg_constant_i32(key),
2397                                        tcg_constant_i32(syndrome),
2398                                        tcg_constant_i32(isread));
2399     } else if (ri->type & ARM_CP_RAISES_EXC) {
2400         /*
2401          * The readfn or writefn might raise an exception;
2402          * synchronize the CPU state in case it does.
2403          */
2404         gen_a64_update_pc(s, 0);
2405     }
2406 
2407     if (!skip_fp_access_checks) {
2408         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2409             return;
2410         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2411             return;
2412         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2413             return;
2414         }
2415     }
2416 
2417     if (nv_trap_to_el2) {
2418         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2419         return;
2420     }
2421 
2422     if (nv_redirect_reg) {
2423         /*
2424          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2425          * Conveniently in all cases the encoding of the EL1 register is
2426          * identical to the EL2 register except that opc1 is 0.
2427          * Get the reginfo for the EL1 register to use for the actual access.
2428          * We don't use the EL1 register's access function, and
2429          * fine-grained-traps on EL1 also do not apply here.
2430          */
2431         key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2432                                  crn, crm, op0, 0, op2);
2433         ri = get_arm_cp_reginfo(s->cp_regs, key);
2434         assert(ri);
2435         assert(cp_access_ok(s->current_el, ri, isread));
2436         /*
2437          * We might not have done an update_pc earlier, so check we don't
2438          * need it. We could support this in future if necessary.
2439          */
2440         assert(!(ri->type & ARM_CP_RAISES_EXC));
2441     }
2442 
2443     if (nv2_mem_redirect) {
2444         /*
2445          * This system register is being redirected into an EL2 memory access.
2446          * This means it is not an IO operation, doesn't change hflags,
2447          * and need not end the TB, because it has no side effects.
2448          *
2449          * The access is 64-bit single copy atomic, guaranteed aligned because
2450          * of the definition of VCNR_EL2. Its endianness depends on
2451          * SCTLR_EL2.EE, not on the data endianness of EL1.
2452          * It is done under either the EL2 translation regime or the EL2&0
2453          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2454          * PSTATE.PAN is 0.
2455          */
2456         TCGv_i64 ptr = tcg_temp_new_i64();
2457         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2458         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2459         int memidx = arm_to_core_mmu_idx(armmemidx);
2460         uint32_t syn;
2461 
2462         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2463 
2464         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2465         tcg_gen_addi_i64(ptr, ptr,
2466                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2467         tcg_rt = cpu_reg(s, rt);
2468 
2469         syn = syn_data_abort_vncr(0, !isread, 0);
2470         disas_set_insn_syndrome(s, syn);
2471         if (isread) {
2472             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2473         } else {
2474             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2475         }
2476         return;
2477     }
2478 
2479     /* Handle special cases first */
2480     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2481     case 0:
2482         break;
2483     case ARM_CP_NOP:
2484         return;
2485     case ARM_CP_NZCV:
2486         tcg_rt = cpu_reg(s, rt);
2487         if (isread) {
2488             gen_get_nzcv(tcg_rt);
2489         } else {
2490             gen_set_nzcv(tcg_rt);
2491         }
2492         return;
2493     case ARM_CP_CURRENTEL:
2494     {
2495         /*
2496          * Reads as current EL value from pstate, which is
2497          * guaranteed to be constant by the tb flags.
2498          * For nested virt we should report EL2.
2499          */
2500         int el = s->nv ? 2 : s->current_el;
2501         tcg_rt = cpu_reg(s, rt);
2502         tcg_gen_movi_i64(tcg_rt, el << 2);
2503         return;
2504     }
2505     case ARM_CP_DC_ZVA:
2506         /* Writes clear the aligned block of memory which rt points into. */
2507         if (s->mte_active[0]) {
2508             int desc = 0;
2509 
2510             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2511             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2512             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2513 
2514             tcg_rt = tcg_temp_new_i64();
2515             gen_helper_mte_check_zva(tcg_rt, tcg_env,
2516                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2517         } else {
2518             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2519         }
2520         gen_helper_dc_zva(tcg_env, tcg_rt);
2521         return;
2522     case ARM_CP_DC_GVA:
2523         {
2524             TCGv_i64 clean_addr, tag;
2525 
2526             /*
2527              * DC_GVA, like DC_ZVA, requires that we supply the original
2528              * pointer for an invalid page.  Probe that address first.
2529              */
2530             tcg_rt = cpu_reg(s, rt);
2531             clean_addr = clean_data_tbi(s, tcg_rt);
2532             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2533 
2534             if (s->ata[0]) {
2535                 /* Extract the tag from the register to match STZGM.  */
2536                 tag = tcg_temp_new_i64();
2537                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2538                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2539             }
2540         }
2541         return;
2542     case ARM_CP_DC_GZVA:
2543         {
2544             TCGv_i64 clean_addr, tag;
2545 
2546             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2547             tcg_rt = cpu_reg(s, rt);
2548             clean_addr = clean_data_tbi(s, tcg_rt);
2549             gen_helper_dc_zva(tcg_env, clean_addr);
2550 
2551             if (s->ata[0]) {
2552                 /* Extract the tag from the register to match STZGM.  */
2553                 tag = tcg_temp_new_i64();
2554                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2555                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2556             }
2557         }
2558         return;
2559     default:
2560         g_assert_not_reached();
2561     }
2562 
2563     if (ri->type & ARM_CP_IO) {
2564         /* I/O operations must end the TB here (whether read or write) */
2565         need_exit_tb = translator_io_start(&s->base);
2566     }
2567 
2568     tcg_rt = cpu_reg(s, rt);
2569 
2570     if (isread) {
2571         if (ri->type & ARM_CP_CONST) {
2572             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2573         } else if (ri->readfn) {
2574             if (!tcg_ri) {
2575                 tcg_ri = gen_lookup_cp_reg(key);
2576             }
2577             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2578         } else {
2579             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2580         }
2581     } else {
2582         if (ri->type & ARM_CP_CONST) {
2583             /* If not forbidden by access permissions, treat as WI */
2584             return;
2585         } else if (ri->writefn) {
2586             if (!tcg_ri) {
2587                 tcg_ri = gen_lookup_cp_reg(key);
2588             }
2589             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2590         } else {
2591             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2592         }
2593     }
2594 
2595     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2596         /*
2597          * A write to any coprocessor register that ends a TB
2598          * must rebuild the hflags for the next TB.
2599          */
2600         gen_rebuild_hflags(s);
2601         /*
2602          * We default to ending the TB on a coprocessor register write,
2603          * but allow this to be suppressed by the register definition
2604          * (usually only necessary to work around guest bugs).
2605          */
2606         need_exit_tb = true;
2607     }
2608     if (need_exit_tb) {
2609         s->base.is_jmp = DISAS_UPDATE_EXIT;
2610     }
2611 }
2612 
2613 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2614 {
2615     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2616     return true;
2617 }
2618 
2619 static bool trans_SVC(DisasContext *s, arg_i *a)
2620 {
2621     /*
2622      * For SVC, HVC and SMC we advance the single-step state
2623      * machine before taking the exception. This is architecturally
2624      * mandated, to ensure that single-stepping a system call
2625      * instruction works properly.
2626      */
2627     uint32_t syndrome = syn_aa64_svc(a->imm);
2628     if (s->fgt_svc) {
2629         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2630         return true;
2631     }
2632     gen_ss_advance(s);
2633     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2634     return true;
2635 }
2636 
2637 static bool trans_HVC(DisasContext *s, arg_i *a)
2638 {
2639     int target_el = s->current_el == 3 ? 3 : 2;
2640 
2641     if (s->current_el == 0) {
2642         unallocated_encoding(s);
2643         return true;
2644     }
2645     /*
2646      * The pre HVC helper handles cases when HVC gets trapped
2647      * as an undefined insn by runtime configuration.
2648      */
2649     gen_a64_update_pc(s, 0);
2650     gen_helper_pre_hvc(tcg_env);
2651     /* Architecture requires ss advance before we do the actual work */
2652     gen_ss_advance(s);
2653     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2654     return true;
2655 }
2656 
2657 static bool trans_SMC(DisasContext *s, arg_i *a)
2658 {
2659     if (s->current_el == 0) {
2660         unallocated_encoding(s);
2661         return true;
2662     }
2663     gen_a64_update_pc(s, 0);
2664     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2665     /* Architecture requires ss advance before we do the actual work */
2666     gen_ss_advance(s);
2667     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2668     return true;
2669 }
2670 
2671 static bool trans_BRK(DisasContext *s, arg_i *a)
2672 {
2673     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2674     return true;
2675 }
2676 
2677 static bool trans_HLT(DisasContext *s, arg_i *a)
2678 {
2679     /*
2680      * HLT. This has two purposes.
2681      * Architecturally, it is an external halting debug instruction.
2682      * Since QEMU doesn't implement external debug, we treat this as
2683      * it is required for halting debug disabled: it will UNDEF.
2684      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2685      */
2686     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2687         gen_exception_internal_insn(s, EXCP_SEMIHOST);
2688     } else {
2689         unallocated_encoding(s);
2690     }
2691     return true;
2692 }
2693 
2694 /*
2695  * Load/Store exclusive instructions are implemented by remembering
2696  * the value/address loaded, and seeing if these are the same
2697  * when the store is performed. This is not actually the architecturally
2698  * mandated semantics, but it works for typical guest code sequences
2699  * and avoids having to monitor regular stores.
2700  *
2701  * The store exclusive uses the atomic cmpxchg primitives to avoid
2702  * races in multi-threaded linux-user and when MTTCG softmmu is
2703  * enabled.
2704  */
2705 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2706                                int size, bool is_pair)
2707 {
2708     int idx = get_mem_index(s);
2709     TCGv_i64 dirty_addr, clean_addr;
2710     MemOp memop = check_atomic_align(s, rn, size + is_pair);
2711 
2712     s->is_ldex = true;
2713     dirty_addr = cpu_reg_sp(s, rn);
2714     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2715 
2716     g_assert(size <= 3);
2717     if (is_pair) {
2718         g_assert(size >= 2);
2719         if (size == 2) {
2720             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2721             if (s->be_data == MO_LE) {
2722                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2723                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2724             } else {
2725                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2726                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2727             }
2728         } else {
2729             TCGv_i128 t16 = tcg_temp_new_i128();
2730 
2731             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2732 
2733             if (s->be_data == MO_LE) {
2734                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2735                                       cpu_exclusive_high, t16);
2736             } else {
2737                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2738                                       cpu_exclusive_val, t16);
2739             }
2740             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2741             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2742         }
2743     } else {
2744         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2745         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2746     }
2747     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2748 }
2749 
2750 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2751                                 int rn, int size, int is_pair)
2752 {
2753     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2754      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2755      *     [addr] = {Rt};
2756      *     if (is_pair) {
2757      *         [addr + datasize] = {Rt2};
2758      *     }
2759      *     {Rd} = 0;
2760      * } else {
2761      *     {Rd} = 1;
2762      * }
2763      * env->exclusive_addr = -1;
2764      */
2765     TCGLabel *fail_label = gen_new_label();
2766     TCGLabel *done_label = gen_new_label();
2767     TCGv_i64 tmp, clean_addr;
2768     MemOp memop;
2769 
2770     /*
2771      * FIXME: We are out of spec here.  We have recorded only the address
2772      * from load_exclusive, not the entire range, and we assume that the
2773      * size of the access on both sides match.  The architecture allows the
2774      * store to be smaller than the load, so long as the stored bytes are
2775      * within the range recorded by the load.
2776      */
2777 
2778     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2779     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2780     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2781 
2782     /*
2783      * The write, and any associated faults, only happen if the virtual
2784      * and physical addresses pass the exclusive monitor check.  These
2785      * faults are exceedingly unlikely, because normally the guest uses
2786      * the exact same address register for the load_exclusive, and we
2787      * would have recognized these faults there.
2788      *
2789      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2790      * unaligned 4-byte write within the range of an aligned 8-byte load.
2791      * With LSE2, the store would need to cross a 16-byte boundary when the
2792      * load did not, which would mean the store is outside the range
2793      * recorded for the monitor, which would have failed a corrected monitor
2794      * check above.  For now, we assume no size change and retain the
2795      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2796      *
2797      * It is possible to trigger an MTE fault, by performing the load with
2798      * a virtual address with a valid tag and performing the store with the
2799      * same virtual address and a different invalid tag.
2800      */
2801     memop = size + is_pair;
2802     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2803         memop |= MO_ALIGN;
2804     }
2805     memop = finalize_memop(s, memop);
2806     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2807 
2808     tmp = tcg_temp_new_i64();
2809     if (is_pair) {
2810         if (size == 2) {
2811             if (s->be_data == MO_LE) {
2812                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2813             } else {
2814                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2815             }
2816             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2817                                        cpu_exclusive_val, tmp,
2818                                        get_mem_index(s), memop);
2819             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2820         } else {
2821             TCGv_i128 t16 = tcg_temp_new_i128();
2822             TCGv_i128 c16 = tcg_temp_new_i128();
2823             TCGv_i64 a, b;
2824 
2825             if (s->be_data == MO_LE) {
2826                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2827                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2828                                         cpu_exclusive_high);
2829             } else {
2830                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2831                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2832                                         cpu_exclusive_val);
2833             }
2834 
2835             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2836                                         get_mem_index(s), memop);
2837 
2838             a = tcg_temp_new_i64();
2839             b = tcg_temp_new_i64();
2840             if (s->be_data == MO_LE) {
2841                 tcg_gen_extr_i128_i64(a, b, t16);
2842             } else {
2843                 tcg_gen_extr_i128_i64(b, a, t16);
2844             }
2845 
2846             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2847             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2848             tcg_gen_or_i64(tmp, a, b);
2849 
2850             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2851         }
2852     } else {
2853         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2854                                    cpu_reg(s, rt), get_mem_index(s), memop);
2855         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2856     }
2857     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2858     tcg_gen_br(done_label);
2859 
2860     gen_set_label(fail_label);
2861     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2862     gen_set_label(done_label);
2863     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2864 }
2865 
2866 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2867                                  int rn, int size)
2868 {
2869     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2870     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2871     int memidx = get_mem_index(s);
2872     TCGv_i64 clean_addr;
2873     MemOp memop;
2874 
2875     if (rn == 31) {
2876         gen_check_sp_alignment(s);
2877     }
2878     memop = check_atomic_align(s, rn, size);
2879     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2880     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2881                                memidx, memop);
2882 }
2883 
2884 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2885                                       int rn, int size)
2886 {
2887     TCGv_i64 s1 = cpu_reg(s, rs);
2888     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2889     TCGv_i64 t1 = cpu_reg(s, rt);
2890     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2891     TCGv_i64 clean_addr;
2892     int memidx = get_mem_index(s);
2893     MemOp memop;
2894 
2895     if (rn == 31) {
2896         gen_check_sp_alignment(s);
2897     }
2898 
2899     /* This is a single atomic access, despite the "pair". */
2900     memop = check_atomic_align(s, rn, size + 1);
2901     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2902 
2903     if (size == 2) {
2904         TCGv_i64 cmp = tcg_temp_new_i64();
2905         TCGv_i64 val = tcg_temp_new_i64();
2906 
2907         if (s->be_data == MO_LE) {
2908             tcg_gen_concat32_i64(val, t1, t2);
2909             tcg_gen_concat32_i64(cmp, s1, s2);
2910         } else {
2911             tcg_gen_concat32_i64(val, t2, t1);
2912             tcg_gen_concat32_i64(cmp, s2, s1);
2913         }
2914 
2915         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2916 
2917         if (s->be_data == MO_LE) {
2918             tcg_gen_extr32_i64(s1, s2, cmp);
2919         } else {
2920             tcg_gen_extr32_i64(s2, s1, cmp);
2921         }
2922     } else {
2923         TCGv_i128 cmp = tcg_temp_new_i128();
2924         TCGv_i128 val = tcg_temp_new_i128();
2925 
2926         if (s->be_data == MO_LE) {
2927             tcg_gen_concat_i64_i128(val, t1, t2);
2928             tcg_gen_concat_i64_i128(cmp, s1, s2);
2929         } else {
2930             tcg_gen_concat_i64_i128(val, t2, t1);
2931             tcg_gen_concat_i64_i128(cmp, s2, s1);
2932         }
2933 
2934         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2935 
2936         if (s->be_data == MO_LE) {
2937             tcg_gen_extr_i128_i64(s1, s2, cmp);
2938         } else {
2939             tcg_gen_extr_i128_i64(s2, s1, cmp);
2940         }
2941     }
2942 }
2943 
2944 /*
2945  * Compute the ISS.SF bit for syndrome information if an exception
2946  * is taken on a load or store. This indicates whether the instruction
2947  * is accessing a 32-bit or 64-bit register. This logic is derived
2948  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2949  */
2950 static bool ldst_iss_sf(int size, bool sign, bool ext)
2951 {
2952 
2953     if (sign) {
2954         /*
2955          * Signed loads are 64 bit results if we are not going to
2956          * do a zero-extend from 32 to 64 after the load.
2957          * (For a store, sign and ext are always false.)
2958          */
2959         return !ext;
2960     } else {
2961         /* Unsigned loads/stores work at the specified size */
2962         return size == MO_64;
2963     }
2964 }
2965 
2966 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2967 {
2968     if (a->rn == 31) {
2969         gen_check_sp_alignment(s);
2970     }
2971     if (a->lasr) {
2972         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2973     }
2974     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2975     return true;
2976 }
2977 
2978 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2979 {
2980     if (a->rn == 31) {
2981         gen_check_sp_alignment(s);
2982     }
2983     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2984     if (a->lasr) {
2985         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2986     }
2987     return true;
2988 }
2989 
2990 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2991 {
2992     TCGv_i64 clean_addr;
2993     MemOp memop;
2994     bool iss_sf = ldst_iss_sf(a->sz, false, false);
2995 
2996     /*
2997      * StoreLORelease is the same as Store-Release for QEMU, but
2998      * needs the feature-test.
2999      */
3000     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3001         return false;
3002     }
3003     /* Generate ISS for non-exclusive accesses including LASR.  */
3004     if (a->rn == 31) {
3005         gen_check_sp_alignment(s);
3006     }
3007     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3008     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3009     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3010                                 true, a->rn != 31, memop);
3011     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3012               iss_sf, a->lasr);
3013     return true;
3014 }
3015 
3016 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3017 {
3018     TCGv_i64 clean_addr;
3019     MemOp memop;
3020     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3021 
3022     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3023     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3024         return false;
3025     }
3026     /* Generate ISS for non-exclusive accesses including LASR.  */
3027     if (a->rn == 31) {
3028         gen_check_sp_alignment(s);
3029     }
3030     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3031     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3032                                 false, a->rn != 31, memop);
3033     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3034               a->rt, iss_sf, a->lasr);
3035     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3036     return true;
3037 }
3038 
3039 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3040 {
3041     if (a->rn == 31) {
3042         gen_check_sp_alignment(s);
3043     }
3044     if (a->lasr) {
3045         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3046     }
3047     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3048     return true;
3049 }
3050 
3051 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3052 {
3053     if (a->rn == 31) {
3054         gen_check_sp_alignment(s);
3055     }
3056     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3057     if (a->lasr) {
3058         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3059     }
3060     return true;
3061 }
3062 
3063 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3064 {
3065     if (!dc_isar_feature(aa64_atomics, s)) {
3066         return false;
3067     }
3068     if (((a->rt | a->rs) & 1) != 0) {
3069         return false;
3070     }
3071 
3072     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3073     return true;
3074 }
3075 
3076 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3077 {
3078     if (!dc_isar_feature(aa64_atomics, s)) {
3079         return false;
3080     }
3081     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3082     return true;
3083 }
3084 
3085 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3086 {
3087     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3088     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3089     TCGv_i64 clean_addr = tcg_temp_new_i64();
3090     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3091 
3092     gen_pc_plus_diff(s, clean_addr, a->imm);
3093     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3094               false, true, a->rt, iss_sf, false);
3095     return true;
3096 }
3097 
3098 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3099 {
3100     /* Load register (literal), vector version */
3101     TCGv_i64 clean_addr;
3102     MemOp memop;
3103 
3104     if (!fp_access_check(s)) {
3105         return true;
3106     }
3107     memop = finalize_memop_asimd(s, a->sz);
3108     clean_addr = tcg_temp_new_i64();
3109     gen_pc_plus_diff(s, clean_addr, a->imm);
3110     do_fp_ld(s, a->rt, clean_addr, memop);
3111     return true;
3112 }
3113 
3114 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3115                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3116                                  uint64_t offset, bool is_store, MemOp mop)
3117 {
3118     if (a->rn == 31) {
3119         gen_check_sp_alignment(s);
3120     }
3121 
3122     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3123     if (!a->p) {
3124         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3125     }
3126 
3127     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3128                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3129 }
3130 
3131 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3132                                   TCGv_i64 dirty_addr, uint64_t offset)
3133 {
3134     if (a->w) {
3135         if (a->p) {
3136             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3137         }
3138         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3139     }
3140 }
3141 
3142 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3143 {
3144     uint64_t offset = a->imm << a->sz;
3145     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3146     MemOp mop = finalize_memop(s, a->sz);
3147 
3148     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3149     tcg_rt = cpu_reg(s, a->rt);
3150     tcg_rt2 = cpu_reg(s, a->rt2);
3151     /*
3152      * We built mop above for the single logical access -- rebuild it
3153      * now for the paired operation.
3154      *
3155      * With LSE2, non-sign-extending pairs are treated atomically if
3156      * aligned, and if unaligned one of the pair will be completely
3157      * within a 16-byte block and that element will be atomic.
3158      * Otherwise each element is separately atomic.
3159      * In all cases, issue one operation with the correct atomicity.
3160      */
3161     mop = a->sz + 1;
3162     if (s->align_mem) {
3163         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3164     }
3165     mop = finalize_memop_pair(s, mop);
3166     if (a->sz == 2) {
3167         TCGv_i64 tmp = tcg_temp_new_i64();
3168 
3169         if (s->be_data == MO_LE) {
3170             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3171         } else {
3172             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3173         }
3174         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3175     } else {
3176         TCGv_i128 tmp = tcg_temp_new_i128();
3177 
3178         if (s->be_data == MO_LE) {
3179             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3180         } else {
3181             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3182         }
3183         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3184     }
3185     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3186     return true;
3187 }
3188 
3189 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3190 {
3191     uint64_t offset = a->imm << a->sz;
3192     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3193     MemOp mop = finalize_memop(s, a->sz);
3194 
3195     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3196     tcg_rt = cpu_reg(s, a->rt);
3197     tcg_rt2 = cpu_reg(s, a->rt2);
3198 
3199     /*
3200      * We built mop above for the single logical access -- rebuild it
3201      * now for the paired operation.
3202      *
3203      * With LSE2, non-sign-extending pairs are treated atomically if
3204      * aligned, and if unaligned one of the pair will be completely
3205      * within a 16-byte block and that element will be atomic.
3206      * Otherwise each element is separately atomic.
3207      * In all cases, issue one operation with the correct atomicity.
3208      *
3209      * This treats sign-extending loads like zero-extending loads,
3210      * since that reuses the most code below.
3211      */
3212     mop = a->sz + 1;
3213     if (s->align_mem) {
3214         mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3215     }
3216     mop = finalize_memop_pair(s, mop);
3217     if (a->sz == 2) {
3218         int o2 = s->be_data == MO_LE ? 32 : 0;
3219         int o1 = o2 ^ 32;
3220 
3221         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3222         if (a->sign) {
3223             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3224             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3225         } else {
3226             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3227             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3228         }
3229     } else {
3230         TCGv_i128 tmp = tcg_temp_new_i128();
3231 
3232         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3233         if (s->be_data == MO_LE) {
3234             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3235         } else {
3236             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3237         }
3238     }
3239     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3240     return true;
3241 }
3242 
3243 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3244 {
3245     uint64_t offset = a->imm << a->sz;
3246     TCGv_i64 clean_addr, dirty_addr;
3247     MemOp mop;
3248 
3249     if (!fp_access_check(s)) {
3250         return true;
3251     }
3252 
3253     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3254     mop = finalize_memop_asimd(s, a->sz);
3255     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3256     do_fp_st(s, a->rt, clean_addr, mop);
3257     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3258     do_fp_st(s, a->rt2, clean_addr, mop);
3259     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3260     return true;
3261 }
3262 
3263 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3264 {
3265     uint64_t offset = a->imm << a->sz;
3266     TCGv_i64 clean_addr, dirty_addr;
3267     MemOp mop;
3268 
3269     if (!fp_access_check(s)) {
3270         return true;
3271     }
3272 
3273     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3274     mop = finalize_memop_asimd(s, a->sz);
3275     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3276     do_fp_ld(s, a->rt, clean_addr, mop);
3277     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3278     do_fp_ld(s, a->rt2, clean_addr, mop);
3279     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3280     return true;
3281 }
3282 
3283 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3284 {
3285     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3286     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3287     MemOp mop;
3288     TCGv_i128 tmp;
3289 
3290     /* STGP only comes in one size. */
3291     tcg_debug_assert(a->sz == MO_64);
3292 
3293     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3294         return false;
3295     }
3296 
3297     if (a->rn == 31) {
3298         gen_check_sp_alignment(s);
3299     }
3300 
3301     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3302     if (!a->p) {
3303         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3304     }
3305 
3306     clean_addr = clean_data_tbi(s, dirty_addr);
3307     tcg_rt = cpu_reg(s, a->rt);
3308     tcg_rt2 = cpu_reg(s, a->rt2);
3309 
3310     /*
3311      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3312      * and one tag operation.  We implement it as one single aligned 16-byte
3313      * memory operation for convenience.  Note that the alignment ensures
3314      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3315      */
3316     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3317 
3318     tmp = tcg_temp_new_i128();
3319     if (s->be_data == MO_LE) {
3320         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3321     } else {
3322         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3323     }
3324     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3325 
3326     /* Perform the tag store, if tag access enabled. */
3327     if (s->ata[0]) {
3328         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3329             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3330         } else {
3331             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3332         }
3333     }
3334 
3335     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3336     return true;
3337 }
3338 
3339 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3340                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3341                                  uint64_t offset, bool is_store, MemOp mop)
3342 {
3343     int memidx;
3344 
3345     if (a->rn == 31) {
3346         gen_check_sp_alignment(s);
3347     }
3348 
3349     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3350     if (!a->p) {
3351         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3352     }
3353     memidx = get_a64_user_mem_index(s, a->unpriv);
3354     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3355                                         a->w || a->rn != 31,
3356                                         mop, a->unpriv, memidx);
3357 }
3358 
3359 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3360                                   TCGv_i64 dirty_addr, uint64_t offset)
3361 {
3362     if (a->w) {
3363         if (a->p) {
3364             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3365         }
3366         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3367     }
3368 }
3369 
3370 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3371 {
3372     bool iss_sf, iss_valid = !a->w;
3373     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3374     int memidx = get_a64_user_mem_index(s, a->unpriv);
3375     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3376 
3377     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3378 
3379     tcg_rt = cpu_reg(s, a->rt);
3380     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3381 
3382     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3383                      iss_valid, a->rt, iss_sf, false);
3384     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3385     return true;
3386 }
3387 
3388 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3389 {
3390     bool iss_sf, iss_valid = !a->w;
3391     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3392     int memidx = get_a64_user_mem_index(s, a->unpriv);
3393     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3394 
3395     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3396 
3397     tcg_rt = cpu_reg(s, a->rt);
3398     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3399 
3400     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3401                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3402     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3403     return true;
3404 }
3405 
3406 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3407 {
3408     TCGv_i64 clean_addr, dirty_addr;
3409     MemOp mop;
3410 
3411     if (!fp_access_check(s)) {
3412         return true;
3413     }
3414     mop = finalize_memop_asimd(s, a->sz);
3415     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3416     do_fp_st(s, a->rt, clean_addr, mop);
3417     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3418     return true;
3419 }
3420 
3421 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3422 {
3423     TCGv_i64 clean_addr, dirty_addr;
3424     MemOp mop;
3425 
3426     if (!fp_access_check(s)) {
3427         return true;
3428     }
3429     mop = finalize_memop_asimd(s, a->sz);
3430     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3431     do_fp_ld(s, a->rt, clean_addr, mop);
3432     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3433     return true;
3434 }
3435 
3436 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3437                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3438                              bool is_store, MemOp memop)
3439 {
3440     TCGv_i64 tcg_rm;
3441 
3442     if (a->rn == 31) {
3443         gen_check_sp_alignment(s);
3444     }
3445     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3446 
3447     tcg_rm = read_cpu_reg(s, a->rm, 1);
3448     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3449 
3450     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3451     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3452 }
3453 
3454 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3455 {
3456     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3457     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3458     MemOp memop;
3459 
3460     if (extract32(a->opt, 1, 1) == 0) {
3461         return false;
3462     }
3463 
3464     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3465     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3466     tcg_rt = cpu_reg(s, a->rt);
3467     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3468               a->ext, true, a->rt, iss_sf, false);
3469     return true;
3470 }
3471 
3472 static bool trans_STR(DisasContext *s, arg_ldst *a)
3473 {
3474     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3475     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3476     MemOp memop;
3477 
3478     if (extract32(a->opt, 1, 1) == 0) {
3479         return false;
3480     }
3481 
3482     memop = finalize_memop(s, a->sz);
3483     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3484     tcg_rt = cpu_reg(s, a->rt);
3485     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3486     return true;
3487 }
3488 
3489 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3490 {
3491     TCGv_i64 clean_addr, dirty_addr;
3492     MemOp memop;
3493 
3494     if (extract32(a->opt, 1, 1) == 0) {
3495         return false;
3496     }
3497 
3498     if (!fp_access_check(s)) {
3499         return true;
3500     }
3501 
3502     memop = finalize_memop_asimd(s, a->sz);
3503     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3504     do_fp_ld(s, a->rt, clean_addr, memop);
3505     return true;
3506 }
3507 
3508 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3509 {
3510     TCGv_i64 clean_addr, dirty_addr;
3511     MemOp memop;
3512 
3513     if (extract32(a->opt, 1, 1) == 0) {
3514         return false;
3515     }
3516 
3517     if (!fp_access_check(s)) {
3518         return true;
3519     }
3520 
3521     memop = finalize_memop_asimd(s, a->sz);
3522     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3523     do_fp_st(s, a->rt, clean_addr, memop);
3524     return true;
3525 }
3526 
3527 
3528 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3529                          int sign, bool invert)
3530 {
3531     MemOp mop = a->sz | sign;
3532     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3533 
3534     if (a->rn == 31) {
3535         gen_check_sp_alignment(s);
3536     }
3537     mop = check_atomic_align(s, a->rn, mop);
3538     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3539                                 a->rn != 31, mop);
3540     tcg_rs = read_cpu_reg(s, a->rs, true);
3541     tcg_rt = cpu_reg(s, a->rt);
3542     if (invert) {
3543         tcg_gen_not_i64(tcg_rs, tcg_rs);
3544     }
3545     /*
3546      * The tcg atomic primitives are all full barriers.  Therefore we
3547      * can ignore the Acquire and Release bits of this instruction.
3548      */
3549     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3550 
3551     if (mop & MO_SIGN) {
3552         switch (a->sz) {
3553         case MO_8:
3554             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3555             break;
3556         case MO_16:
3557             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3558             break;
3559         case MO_32:
3560             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3561             break;
3562         case MO_64:
3563             break;
3564         default:
3565             g_assert_not_reached();
3566         }
3567     }
3568     return true;
3569 }
3570 
3571 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3572 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3573 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3574 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
3575 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3576 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3577 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3578 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3579 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3580 
3581 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3582 {
3583     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3584     TCGv_i64 clean_addr;
3585     MemOp mop;
3586 
3587     if (!dc_isar_feature(aa64_atomics, s) ||
3588         !dc_isar_feature(aa64_rcpc_8_3, s)) {
3589         return false;
3590     }
3591     if (a->rn == 31) {
3592         gen_check_sp_alignment(s);
3593     }
3594     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3595     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3596                                 a->rn != 31, mop);
3597     /*
3598      * LDAPR* are a special case because they are a simple load, not a
3599      * fetch-and-do-something op.
3600      * The architectural consistency requirements here are weaker than
3601      * full load-acquire (we only need "load-acquire processor consistent"),
3602      * but we choose to implement them as full LDAQ.
3603      */
3604     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3605               true, a->rt, iss_sf, true);
3606     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3607     return true;
3608 }
3609 
3610 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3611 {
3612     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3613     MemOp memop;
3614 
3615     /* Load with pointer authentication */
3616     if (!dc_isar_feature(aa64_pauth, s)) {
3617         return false;
3618     }
3619 
3620     if (a->rn == 31) {
3621         gen_check_sp_alignment(s);
3622     }
3623     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3624 
3625     if (s->pauth_active) {
3626         if (!a->m) {
3627             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3628                                       tcg_constant_i64(0));
3629         } else {
3630             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3631                                       tcg_constant_i64(0));
3632         }
3633     }
3634 
3635     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3636 
3637     memop = finalize_memop(s, MO_64);
3638 
3639     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3640     clean_addr = gen_mte_check1(s, dirty_addr, false,
3641                                 a->w || a->rn != 31, memop);
3642 
3643     tcg_rt = cpu_reg(s, a->rt);
3644     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3645               /* extend */ false, /* iss_valid */ !a->w,
3646               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3647 
3648     if (a->w) {
3649         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3650     }
3651     return true;
3652 }
3653 
3654 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3655 {
3656     TCGv_i64 clean_addr, dirty_addr;
3657     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3658     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3659 
3660     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3661         return false;
3662     }
3663 
3664     if (a->rn == 31) {
3665         gen_check_sp_alignment(s);
3666     }
3667 
3668     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3669     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3670     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3671     clean_addr = clean_data_tbi(s, dirty_addr);
3672 
3673     /*
3674      * Load-AcquirePC semantics; we implement as the slightly more
3675      * restrictive Load-Acquire.
3676      */
3677     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3678               a->rt, iss_sf, true);
3679     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3680     return true;
3681 }
3682 
3683 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3684 {
3685     TCGv_i64 clean_addr, dirty_addr;
3686     MemOp mop = a->sz;
3687     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3688 
3689     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3690         return false;
3691     }
3692 
3693     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3694 
3695     if (a->rn == 31) {
3696         gen_check_sp_alignment(s);
3697     }
3698 
3699     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3700     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3701     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3702     clean_addr = clean_data_tbi(s, dirty_addr);
3703 
3704     /* Store-Release semantics */
3705     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3706     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3707     return true;
3708 }
3709 
3710 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3711 {
3712     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3713     MemOp endian, align, mop;
3714 
3715     int total;    /* total bytes */
3716     int elements; /* elements per vector */
3717     int r;
3718     int size = a->sz;
3719 
3720     if (!a->p && a->rm != 0) {
3721         /* For non-postindexed accesses the Rm field must be 0 */
3722         return false;
3723     }
3724     if (size == 3 && !a->q && a->selem != 1) {
3725         return false;
3726     }
3727     if (!fp_access_check(s)) {
3728         return true;
3729     }
3730 
3731     if (a->rn == 31) {
3732         gen_check_sp_alignment(s);
3733     }
3734 
3735     /* For our purposes, bytes are always little-endian.  */
3736     endian = s->be_data;
3737     if (size == 0) {
3738         endian = MO_LE;
3739     }
3740 
3741     total = a->rpt * a->selem * (a->q ? 16 : 8);
3742     tcg_rn = cpu_reg_sp(s, a->rn);
3743 
3744     /*
3745      * Issue the MTE check vs the logical repeat count, before we
3746      * promote consecutive little-endian elements below.
3747      */
3748     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3749                                 finalize_memop_asimd(s, size));
3750 
3751     /*
3752      * Consecutive little-endian elements from a single register
3753      * can be promoted to a larger little-endian operation.
3754      */
3755     align = MO_ALIGN;
3756     if (a->selem == 1 && endian == MO_LE) {
3757         align = pow2_align(size);
3758         size = 3;
3759     }
3760     if (!s->align_mem) {
3761         align = 0;
3762     }
3763     mop = endian | size | align;
3764 
3765     elements = (a->q ? 16 : 8) >> size;
3766     tcg_ebytes = tcg_constant_i64(1 << size);
3767     for (r = 0; r < a->rpt; r++) {
3768         int e;
3769         for (e = 0; e < elements; e++) {
3770             int xs;
3771             for (xs = 0; xs < a->selem; xs++) {
3772                 int tt = (a->rt + r + xs) % 32;
3773                 do_vec_ld(s, tt, e, clean_addr, mop);
3774                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3775             }
3776         }
3777     }
3778 
3779     /*
3780      * For non-quad operations, setting a slice of the low 64 bits of
3781      * the register clears the high 64 bits (in the ARM ARM pseudocode
3782      * this is implicit in the fact that 'rval' is a 64 bit wide
3783      * variable).  For quad operations, we might still need to zero
3784      * the high bits of SVE.
3785      */
3786     for (r = 0; r < a->rpt * a->selem; r++) {
3787         int tt = (a->rt + r) % 32;
3788         clear_vec_high(s, a->q, tt);
3789     }
3790 
3791     if (a->p) {
3792         if (a->rm == 31) {
3793             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3794         } else {
3795             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3796         }
3797     }
3798     return true;
3799 }
3800 
3801 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3802 {
3803     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3804     MemOp endian, align, mop;
3805 
3806     int total;    /* total bytes */
3807     int elements; /* elements per vector */
3808     int r;
3809     int size = a->sz;
3810 
3811     if (!a->p && a->rm != 0) {
3812         /* For non-postindexed accesses the Rm field must be 0 */
3813         return false;
3814     }
3815     if (size == 3 && !a->q && a->selem != 1) {
3816         return false;
3817     }
3818     if (!fp_access_check(s)) {
3819         return true;
3820     }
3821 
3822     if (a->rn == 31) {
3823         gen_check_sp_alignment(s);
3824     }
3825 
3826     /* For our purposes, bytes are always little-endian.  */
3827     endian = s->be_data;
3828     if (size == 0) {
3829         endian = MO_LE;
3830     }
3831 
3832     total = a->rpt * a->selem * (a->q ? 16 : 8);
3833     tcg_rn = cpu_reg_sp(s, a->rn);
3834 
3835     /*
3836      * Issue the MTE check vs the logical repeat count, before we
3837      * promote consecutive little-endian elements below.
3838      */
3839     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3840                                 finalize_memop_asimd(s, size));
3841 
3842     /*
3843      * Consecutive little-endian elements from a single register
3844      * can be promoted to a larger little-endian operation.
3845      */
3846     align = MO_ALIGN;
3847     if (a->selem == 1 && endian == MO_LE) {
3848         align = pow2_align(size);
3849         size = 3;
3850     }
3851     if (!s->align_mem) {
3852         align = 0;
3853     }
3854     mop = endian | size | align;
3855 
3856     elements = (a->q ? 16 : 8) >> size;
3857     tcg_ebytes = tcg_constant_i64(1 << size);
3858     for (r = 0; r < a->rpt; r++) {
3859         int e;
3860         for (e = 0; e < elements; e++) {
3861             int xs;
3862             for (xs = 0; xs < a->selem; xs++) {
3863                 int tt = (a->rt + r + xs) % 32;
3864                 do_vec_st(s, tt, e, clean_addr, mop);
3865                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3866             }
3867         }
3868     }
3869 
3870     if (a->p) {
3871         if (a->rm == 31) {
3872             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3873         } else {
3874             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3875         }
3876     }
3877     return true;
3878 }
3879 
3880 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3881 {
3882     int xs, total, rt;
3883     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3884     MemOp mop;
3885 
3886     if (!a->p && a->rm != 0) {
3887         return false;
3888     }
3889     if (!fp_access_check(s)) {
3890         return true;
3891     }
3892 
3893     if (a->rn == 31) {
3894         gen_check_sp_alignment(s);
3895     }
3896 
3897     total = a->selem << a->scale;
3898     tcg_rn = cpu_reg_sp(s, a->rn);
3899 
3900     mop = finalize_memop_asimd(s, a->scale);
3901     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3902                                 total, mop);
3903 
3904     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3905     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3906         do_vec_st(s, rt, a->index, clean_addr, mop);
3907         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3908     }
3909 
3910     if (a->p) {
3911         if (a->rm == 31) {
3912             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3913         } else {
3914             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3915         }
3916     }
3917     return true;
3918 }
3919 
3920 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3921 {
3922     int xs, total, rt;
3923     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3924     MemOp mop;
3925 
3926     if (!a->p && a->rm != 0) {
3927         return false;
3928     }
3929     if (!fp_access_check(s)) {
3930         return true;
3931     }
3932 
3933     if (a->rn == 31) {
3934         gen_check_sp_alignment(s);
3935     }
3936 
3937     total = a->selem << a->scale;
3938     tcg_rn = cpu_reg_sp(s, a->rn);
3939 
3940     mop = finalize_memop_asimd(s, a->scale);
3941     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3942                                 total, mop);
3943 
3944     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3945     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3946         do_vec_ld(s, rt, a->index, clean_addr, mop);
3947         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3948     }
3949 
3950     if (a->p) {
3951         if (a->rm == 31) {
3952             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3953         } else {
3954             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3955         }
3956     }
3957     return true;
3958 }
3959 
3960 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3961 {
3962     int xs, total, rt;
3963     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3964     MemOp mop;
3965 
3966     if (!a->p && a->rm != 0) {
3967         return false;
3968     }
3969     if (!fp_access_check(s)) {
3970         return true;
3971     }
3972 
3973     if (a->rn == 31) {
3974         gen_check_sp_alignment(s);
3975     }
3976 
3977     total = a->selem << a->scale;
3978     tcg_rn = cpu_reg_sp(s, a->rn);
3979 
3980     mop = finalize_memop_asimd(s, a->scale);
3981     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3982                                 total, mop);
3983 
3984     tcg_ebytes = tcg_constant_i64(1 << a->scale);
3985     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3986         /* Load and replicate to all elements */
3987         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3988 
3989         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3990         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3991                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3992         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3993     }
3994 
3995     if (a->p) {
3996         if (a->rm == 31) {
3997             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3998         } else {
3999             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4000         }
4001     }
4002     return true;
4003 }
4004 
4005 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4006 {
4007     TCGv_i64 addr, clean_addr, tcg_rt;
4008     int size = 4 << s->dcz_blocksize;
4009 
4010     if (!dc_isar_feature(aa64_mte, s)) {
4011         return false;
4012     }
4013     if (s->current_el == 0) {
4014         return false;
4015     }
4016 
4017     if (a->rn == 31) {
4018         gen_check_sp_alignment(s);
4019     }
4020 
4021     addr = read_cpu_reg_sp(s, a->rn, true);
4022     tcg_gen_addi_i64(addr, addr, a->imm);
4023     tcg_rt = cpu_reg(s, a->rt);
4024 
4025     if (s->ata[0]) {
4026         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4027     }
4028     /*
4029      * The non-tags portion of STZGM is mostly like DC_ZVA,
4030      * except the alignment happens before the access.
4031      */
4032     clean_addr = clean_data_tbi(s, addr);
4033     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4034     gen_helper_dc_zva(tcg_env, clean_addr);
4035     return true;
4036 }
4037 
4038 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4039 {
4040     TCGv_i64 addr, clean_addr, tcg_rt;
4041 
4042     if (!dc_isar_feature(aa64_mte, s)) {
4043         return false;
4044     }
4045     if (s->current_el == 0) {
4046         return false;
4047     }
4048 
4049     if (a->rn == 31) {
4050         gen_check_sp_alignment(s);
4051     }
4052 
4053     addr = read_cpu_reg_sp(s, a->rn, true);
4054     tcg_gen_addi_i64(addr, addr, a->imm);
4055     tcg_rt = cpu_reg(s, a->rt);
4056 
4057     if (s->ata[0]) {
4058         gen_helper_stgm(tcg_env, addr, tcg_rt);
4059     } else {
4060         MMUAccessType acc = MMU_DATA_STORE;
4061         int size = 4 << s->gm_blocksize;
4062 
4063         clean_addr = clean_data_tbi(s, addr);
4064         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4065         gen_probe_access(s, clean_addr, acc, size);
4066     }
4067     return true;
4068 }
4069 
4070 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4071 {
4072     TCGv_i64 addr, clean_addr, tcg_rt;
4073 
4074     if (!dc_isar_feature(aa64_mte, s)) {
4075         return false;
4076     }
4077     if (s->current_el == 0) {
4078         return false;
4079     }
4080 
4081     if (a->rn == 31) {
4082         gen_check_sp_alignment(s);
4083     }
4084 
4085     addr = read_cpu_reg_sp(s, a->rn, true);
4086     tcg_gen_addi_i64(addr, addr, a->imm);
4087     tcg_rt = cpu_reg(s, a->rt);
4088 
4089     if (s->ata[0]) {
4090         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4091     } else {
4092         MMUAccessType acc = MMU_DATA_LOAD;
4093         int size = 4 << s->gm_blocksize;
4094 
4095         clean_addr = clean_data_tbi(s, addr);
4096         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4097         gen_probe_access(s, clean_addr, acc, size);
4098         /* The result tags are zeros.  */
4099         tcg_gen_movi_i64(tcg_rt, 0);
4100     }
4101     return true;
4102 }
4103 
4104 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4105 {
4106     TCGv_i64 addr, clean_addr, tcg_rt;
4107 
4108     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4109         return false;
4110     }
4111 
4112     if (a->rn == 31) {
4113         gen_check_sp_alignment(s);
4114     }
4115 
4116     addr = read_cpu_reg_sp(s, a->rn, true);
4117     if (!a->p) {
4118         /* pre-index or signed offset */
4119         tcg_gen_addi_i64(addr, addr, a->imm);
4120     }
4121 
4122     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4123     tcg_rt = cpu_reg(s, a->rt);
4124     if (s->ata[0]) {
4125         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4126     } else {
4127         /*
4128          * Tag access disabled: we must check for aborts on the load
4129          * load from [rn+offset], and then insert a 0 tag into rt.
4130          */
4131         clean_addr = clean_data_tbi(s, addr);
4132         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4133         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4134     }
4135 
4136     if (a->w) {
4137         /* pre-index or post-index */
4138         if (a->p) {
4139             /* post-index */
4140             tcg_gen_addi_i64(addr, addr, a->imm);
4141         }
4142         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4143     }
4144     return true;
4145 }
4146 
4147 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4148 {
4149     TCGv_i64 addr, tcg_rt;
4150 
4151     if (a->rn == 31) {
4152         gen_check_sp_alignment(s);
4153     }
4154 
4155     addr = read_cpu_reg_sp(s, a->rn, true);
4156     if (!a->p) {
4157         /* pre-index or signed offset */
4158         tcg_gen_addi_i64(addr, addr, a->imm);
4159     }
4160     tcg_rt = cpu_reg_sp(s, a->rt);
4161     if (!s->ata[0]) {
4162         /*
4163          * For STG and ST2G, we need to check alignment and probe memory.
4164          * TODO: For STZG and STZ2G, we could rely on the stores below,
4165          * at least for system mode; user-only won't enforce alignment.
4166          */
4167         if (is_pair) {
4168             gen_helper_st2g_stub(tcg_env, addr);
4169         } else {
4170             gen_helper_stg_stub(tcg_env, addr);
4171         }
4172     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4173         if (is_pair) {
4174             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4175         } else {
4176             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4177         }
4178     } else {
4179         if (is_pair) {
4180             gen_helper_st2g(tcg_env, addr, tcg_rt);
4181         } else {
4182             gen_helper_stg(tcg_env, addr, tcg_rt);
4183         }
4184     }
4185 
4186     if (is_zero) {
4187         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4188         TCGv_i64 zero64 = tcg_constant_i64(0);
4189         TCGv_i128 zero128 = tcg_temp_new_i128();
4190         int mem_index = get_mem_index(s);
4191         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4192 
4193         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4194 
4195         /* This is 1 or 2 atomic 16-byte operations. */
4196         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4197         if (is_pair) {
4198             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4199             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4200         }
4201     }
4202 
4203     if (a->w) {
4204         /* pre-index or post-index */
4205         if (a->p) {
4206             /* post-index */
4207             tcg_gen_addi_i64(addr, addr, a->imm);
4208         }
4209         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4210     }
4211     return true;
4212 }
4213 
4214 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4215 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4216 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4217 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4218 
4219 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4220 
4221 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4222                    bool is_setg, SetFn fn)
4223 {
4224     int memidx;
4225     uint32_t syndrome, desc = 0;
4226 
4227     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4228         return false;
4229     }
4230 
4231     /*
4232      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4233      * us to pull this check before the CheckMOPSEnabled() test
4234      * (which we do in the helper function)
4235      */
4236     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4237         a->rd == 31 || a->rn == 31) {
4238         return false;
4239     }
4240 
4241     memidx = get_a64_user_mem_index(s, a->unpriv);
4242 
4243     /*
4244      * We pass option_a == true, matching our implementation;
4245      * we pass wrong_option == false: helper function may set that bit.
4246      */
4247     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4248                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4249 
4250     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4251         /* We may need to do MTE tag checking, so assemble the descriptor */
4252         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4253         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4254         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4255         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4256     }
4257     /* The helper function always needs the memidx even with MTE disabled */
4258     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4259 
4260     /*
4261      * The helper needs the register numbers, but since they're in
4262      * the syndrome anyway, we let it extract them from there rather
4263      * than passing in an extra three integer arguments.
4264      */
4265     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4266     return true;
4267 }
4268 
4269 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4270 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4271 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4272 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4273 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4274 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4275 
4276 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4277 
4278 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4279 {
4280     int rmemidx, wmemidx;
4281     uint32_t syndrome, rdesc = 0, wdesc = 0;
4282     bool wunpriv = extract32(a->options, 0, 1);
4283     bool runpriv = extract32(a->options, 1, 1);
4284 
4285     /*
4286      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4287      * us to pull this check before the CheckMOPSEnabled() test
4288      * (which we do in the helper function)
4289      */
4290     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4291         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4292         return false;
4293     }
4294 
4295     rmemidx = get_a64_user_mem_index(s, runpriv);
4296     wmemidx = get_a64_user_mem_index(s, wunpriv);
4297 
4298     /*
4299      * We pass option_a == true, matching our implementation;
4300      * we pass wrong_option == false: helper function may set that bit.
4301      */
4302     syndrome = syn_mop(false, false, a->options, is_epilogue,
4303                        false, true, a->rd, a->rs, a->rn);
4304 
4305     /* If we need to do MTE tag checking, assemble the descriptors */
4306     if (s->mte_active[runpriv]) {
4307         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4308         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4309     }
4310     if (s->mte_active[wunpriv]) {
4311         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4312         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4313         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4314     }
4315     /* The helper function needs these parts of the descriptor regardless */
4316     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4317     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4318 
4319     /*
4320      * The helper needs the register numbers, but since they're in
4321      * the syndrome anyway, we let it extract them from there rather
4322      * than passing in an extra three integer arguments.
4323      */
4324     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4325        tcg_constant_i32(rdesc));
4326     return true;
4327 }
4328 
4329 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4330 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4331 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4332 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4333 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4334 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4335 
4336 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4337 
4338 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4339                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4340 {
4341     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4342     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4343     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4344 
4345     fn(tcg_rd, tcg_rn, tcg_imm);
4346     if (!a->sf) {
4347         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4348     }
4349     return true;
4350 }
4351 
4352 /*
4353  * PC-rel. addressing
4354  */
4355 
4356 static bool trans_ADR(DisasContext *s, arg_ri *a)
4357 {
4358     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4359     return true;
4360 }
4361 
4362 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4363 {
4364     int64_t offset = (int64_t)a->imm << 12;
4365 
4366     /* The page offset is ok for CF_PCREL. */
4367     offset -= s->pc_curr & 0xfff;
4368     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4369     return true;
4370 }
4371 
4372 /*
4373  * Add/subtract (immediate)
4374  */
4375 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4376 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4377 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4378 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4379 
4380 /*
4381  * Add/subtract (immediate, with tags)
4382  */
4383 
4384 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4385                                       bool sub_op)
4386 {
4387     TCGv_i64 tcg_rn, tcg_rd;
4388     int imm;
4389 
4390     imm = a->uimm6 << LOG2_TAG_GRANULE;
4391     if (sub_op) {
4392         imm = -imm;
4393     }
4394 
4395     tcg_rn = cpu_reg_sp(s, a->rn);
4396     tcg_rd = cpu_reg_sp(s, a->rd);
4397 
4398     if (s->ata[0]) {
4399         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4400                            tcg_constant_i32(imm),
4401                            tcg_constant_i32(a->uimm4));
4402     } else {
4403         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4404         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4405     }
4406     return true;
4407 }
4408 
4409 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4410 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4411 
4412 /* The input should be a value in the bottom e bits (with higher
4413  * bits zero); returns that value replicated into every element
4414  * of size e in a 64 bit integer.
4415  */
4416 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4417 {
4418     assert(e != 0);
4419     while (e < 64) {
4420         mask |= mask << e;
4421         e *= 2;
4422     }
4423     return mask;
4424 }
4425 
4426 /*
4427  * Logical (immediate)
4428  */
4429 
4430 /*
4431  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4432  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4433  * value (ie should cause a guest UNDEF exception), and true if they are
4434  * valid, in which case the decoded bit pattern is written to result.
4435  */
4436 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4437                             unsigned int imms, unsigned int immr)
4438 {
4439     uint64_t mask;
4440     unsigned e, levels, s, r;
4441     int len;
4442 
4443     assert(immn < 2 && imms < 64 && immr < 64);
4444 
4445     /* The bit patterns we create here are 64 bit patterns which
4446      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4447      * 64 bits each. Each element contains the same value: a run
4448      * of between 1 and e-1 non-zero bits, rotated within the
4449      * element by between 0 and e-1 bits.
4450      *
4451      * The element size and run length are encoded into immn (1 bit)
4452      * and imms (6 bits) as follows:
4453      * 64 bit elements: immn = 1, imms = <length of run - 1>
4454      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4455      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4456      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4457      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4458      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4459      * Notice that immn = 0, imms = 11111x is the only combination
4460      * not covered by one of the above options; this is reserved.
4461      * Further, <length of run - 1> all-ones is a reserved pattern.
4462      *
4463      * In all cases the rotation is by immr % e (and immr is 6 bits).
4464      */
4465 
4466     /* First determine the element size */
4467     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4468     if (len < 1) {
4469         /* This is the immn == 0, imms == 0x11111x case */
4470         return false;
4471     }
4472     e = 1 << len;
4473 
4474     levels = e - 1;
4475     s = imms & levels;
4476     r = immr & levels;
4477 
4478     if (s == levels) {
4479         /* <length of run - 1> mustn't be all-ones. */
4480         return false;
4481     }
4482 
4483     /* Create the value of one element: s+1 set bits rotated
4484      * by r within the element (which is e bits wide)...
4485      */
4486     mask = MAKE_64BIT_MASK(0, s + 1);
4487     if (r) {
4488         mask = (mask >> r) | (mask << (e - r));
4489         mask &= MAKE_64BIT_MASK(0, e);
4490     }
4491     /* ...then replicate the element over the whole 64 bit value */
4492     mask = bitfield_replicate(mask, e);
4493     *result = mask;
4494     return true;
4495 }
4496 
4497 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4498                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4499 {
4500     TCGv_i64 tcg_rd, tcg_rn;
4501     uint64_t imm;
4502 
4503     /* Some immediate field values are reserved. */
4504     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4505                                 extract32(a->dbm, 0, 6),
4506                                 extract32(a->dbm, 6, 6))) {
4507         return false;
4508     }
4509     if (!a->sf) {
4510         imm &= 0xffffffffull;
4511     }
4512 
4513     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4514     tcg_rn = cpu_reg(s, a->rn);
4515 
4516     fn(tcg_rd, tcg_rn, imm);
4517     if (set_cc) {
4518         gen_logic_CC(a->sf, tcg_rd);
4519     }
4520     if (!a->sf) {
4521         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4522     }
4523     return true;
4524 }
4525 
4526 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4527 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4528 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4529 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4530 
4531 /*
4532  * Move wide (immediate)
4533  */
4534 
4535 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4536 {
4537     int pos = a->hw << 4;
4538     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4539     return true;
4540 }
4541 
4542 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4543 {
4544     int pos = a->hw << 4;
4545     uint64_t imm = a->imm;
4546 
4547     imm = ~(imm << pos);
4548     if (!a->sf) {
4549         imm = (uint32_t)imm;
4550     }
4551     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4552     return true;
4553 }
4554 
4555 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4556 {
4557     int pos = a->hw << 4;
4558     TCGv_i64 tcg_rd, tcg_im;
4559 
4560     tcg_rd = cpu_reg(s, a->rd);
4561     tcg_im = tcg_constant_i64(a->imm);
4562     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4563     if (!a->sf) {
4564         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4565     }
4566     return true;
4567 }
4568 
4569 /*
4570  * Bitfield
4571  */
4572 
4573 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4574 {
4575     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4576     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4577     unsigned int bitsize = a->sf ? 64 : 32;
4578     unsigned int ri = a->immr;
4579     unsigned int si = a->imms;
4580     unsigned int pos, len;
4581 
4582     if (si >= ri) {
4583         /* Wd<s-r:0> = Wn<s:r> */
4584         len = (si - ri) + 1;
4585         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4586         if (!a->sf) {
4587             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4588         }
4589     } else {
4590         /* Wd<32+s-r,32-r> = Wn<s:0> */
4591         len = si + 1;
4592         pos = (bitsize - ri) & (bitsize - 1);
4593 
4594         if (len < ri) {
4595             /*
4596              * Sign extend the destination field from len to fill the
4597              * balance of the word.  Let the deposit below insert all
4598              * of those sign bits.
4599              */
4600             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4601             len = ri;
4602         }
4603 
4604         /*
4605          * We start with zero, and we haven't modified any bits outside
4606          * bitsize, therefore no final zero-extension is unneeded for !sf.
4607          */
4608         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4609     }
4610     return true;
4611 }
4612 
4613 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4614 {
4615     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4616     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4617     unsigned int bitsize = a->sf ? 64 : 32;
4618     unsigned int ri = a->immr;
4619     unsigned int si = a->imms;
4620     unsigned int pos, len;
4621 
4622     tcg_rd = cpu_reg(s, a->rd);
4623     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4624 
4625     if (si >= ri) {
4626         /* Wd<s-r:0> = Wn<s:r> */
4627         len = (si - ri) + 1;
4628         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4629     } else {
4630         /* Wd<32+s-r,32-r> = Wn<s:0> */
4631         len = si + 1;
4632         pos = (bitsize - ri) & (bitsize - 1);
4633         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4634     }
4635     return true;
4636 }
4637 
4638 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4639 {
4640     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4641     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4642     unsigned int bitsize = a->sf ? 64 : 32;
4643     unsigned int ri = a->immr;
4644     unsigned int si = a->imms;
4645     unsigned int pos, len;
4646 
4647     tcg_rd = cpu_reg(s, a->rd);
4648     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4649 
4650     if (si >= ri) {
4651         /* Wd<s-r:0> = Wn<s:r> */
4652         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4653         len = (si - ri) + 1;
4654         pos = 0;
4655     } else {
4656         /* Wd<32+s-r,32-r> = Wn<s:0> */
4657         len = si + 1;
4658         pos = (bitsize - ri) & (bitsize - 1);
4659     }
4660 
4661     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4662     if (!a->sf) {
4663         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4664     }
4665     return true;
4666 }
4667 
4668 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4669 {
4670     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4671 
4672     tcg_rd = cpu_reg(s, a->rd);
4673 
4674     if (unlikely(a->imm == 0)) {
4675         /*
4676          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4677          * so an extract from bit 0 is a special case.
4678          */
4679         if (a->sf) {
4680             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4681         } else {
4682             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4683         }
4684     } else {
4685         tcg_rm = cpu_reg(s, a->rm);
4686         tcg_rn = cpu_reg(s, a->rn);
4687 
4688         if (a->sf) {
4689             /* Specialization to ROR happens in EXTRACT2.  */
4690             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4691         } else {
4692             TCGv_i32 t0 = tcg_temp_new_i32();
4693 
4694             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4695             if (a->rm == a->rn) {
4696                 tcg_gen_rotri_i32(t0, t0, a->imm);
4697             } else {
4698                 TCGv_i32 t1 = tcg_temp_new_i32();
4699                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4700                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4701             }
4702             tcg_gen_extu_i32_i64(tcg_rd, t0);
4703         }
4704     }
4705     return true;
4706 }
4707 
4708 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4709 {
4710     if (fp_access_check(s)) {
4711         int len = (a->len + 1) * 16;
4712 
4713         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4714                            vec_full_reg_offset(s, a->rm), tcg_env,
4715                            a->q ? 16 : 8, vec_full_reg_size(s),
4716                            (len << 6) | (a->tbx << 5) | a->rn,
4717                            gen_helper_simd_tblx);
4718     }
4719     return true;
4720 }
4721 
4722 typedef int simd_permute_idx_fn(int i, int part, int elements);
4723 
4724 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4725                             simd_permute_idx_fn *fn, int part)
4726 {
4727     MemOp esz = a->esz;
4728     int datasize = a->q ? 16 : 8;
4729     int elements = datasize >> esz;
4730     TCGv_i64 tcg_res[2], tcg_ele;
4731 
4732     if (esz == MO_64 && !a->q) {
4733         return false;
4734     }
4735     if (!fp_access_check(s)) {
4736         return true;
4737     }
4738 
4739     tcg_res[0] = tcg_temp_new_i64();
4740     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4741     tcg_ele = tcg_temp_new_i64();
4742 
4743     for (int i = 0; i < elements; i++) {
4744         int o, w, idx;
4745 
4746         idx = fn(i, part, elements);
4747         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4748                          idx & (elements - 1), esz);
4749 
4750         w = (i << (esz + 3)) / 64;
4751         o = (i << (esz + 3)) % 64;
4752         if (o == 0) {
4753             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4754         } else {
4755             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4756         }
4757     }
4758 
4759     for (int i = a->q; i >= 0; --i) {
4760         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4761     }
4762     clear_vec_high(s, a->q, a->rd);
4763     return true;
4764 }
4765 
4766 static int permute_load_uzp(int i, int part, int elements)
4767 {
4768     return 2 * i + part;
4769 }
4770 
4771 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4772 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4773 
4774 static int permute_load_trn(int i, int part, int elements)
4775 {
4776     return (i & 1) * elements + (i & ~1) + part;
4777 }
4778 
4779 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4780 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4781 
4782 static int permute_load_zip(int i, int part, int elements)
4783 {
4784     return (i & 1) * elements + ((part * elements + i) >> 1);
4785 }
4786 
4787 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4788 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4789 
4790 /*
4791  * Cryptographic AES, SHA, SHA512
4792  */
4793 
4794 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4795 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4796 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4797 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4798 
4799 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4800 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4801 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4802 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4803 
4804 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4805 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4806 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4807 
4808 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4809 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4810 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4811 
4812 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4813 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4814 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
4815 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4816 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4817 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4818 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4819 
4820 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4821 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4822 
4823 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4824 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4825 
4826 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4827 {
4828     if (!dc_isar_feature(aa64_sm3, s)) {
4829         return false;
4830     }
4831     if (fp_access_check(s)) {
4832         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4833         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4834         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4835         TCGv_i32 tcg_res = tcg_temp_new_i32();
4836 
4837         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4838         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4839         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4840 
4841         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4842         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4843         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4844         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4845 
4846         /* Clear the whole register first, then store bits [127:96]. */
4847         clear_vec(s, a->rd);
4848         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4849     }
4850     return true;
4851 }
4852 
4853 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4854 {
4855     if (fp_access_check(s)) {
4856         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4857     }
4858     return true;
4859 }
4860 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4861 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4862 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4863 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4864 
4865 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4866 {
4867     if (!dc_isar_feature(aa64_sha3, s)) {
4868         return false;
4869     }
4870     if (fp_access_check(s)) {
4871         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4872                      vec_full_reg_offset(s, a->rn),
4873                      vec_full_reg_offset(s, a->rm), a->imm, 16,
4874                      vec_full_reg_size(s));
4875     }
4876     return true;
4877 }
4878 
4879 /*
4880  * Advanced SIMD copy
4881  */
4882 
4883 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4884 {
4885     unsigned esz = ctz32(imm);
4886     if (esz <= MO_64) {
4887         *pesz = esz;
4888         *pidx = imm >> (esz + 1);
4889         return true;
4890     }
4891     return false;
4892 }
4893 
4894 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4895 {
4896     MemOp esz;
4897     unsigned idx;
4898 
4899     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4900         return false;
4901     }
4902     if (fp_access_check(s)) {
4903         /*
4904          * This instruction just extracts the specified element and
4905          * zero-extends it into the bottom of the destination register.
4906          */
4907         TCGv_i64 tmp = tcg_temp_new_i64();
4908         read_vec_element(s, tmp, a->rn, idx, esz);
4909         write_fp_dreg(s, a->rd, tmp);
4910     }
4911     return true;
4912 }
4913 
4914 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4915 {
4916     MemOp esz;
4917     unsigned idx;
4918 
4919     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4920         return false;
4921     }
4922     if (esz == MO_64 && !a->q) {
4923         return false;
4924     }
4925     if (fp_access_check(s)) {
4926         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4927                              vec_reg_offset(s, a->rn, idx, esz),
4928                              a->q ? 16 : 8, vec_full_reg_size(s));
4929     }
4930     return true;
4931 }
4932 
4933 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4934 {
4935     MemOp esz;
4936     unsigned idx;
4937 
4938     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4939         return false;
4940     }
4941     if (esz == MO_64 && !a->q) {
4942         return false;
4943     }
4944     if (fp_access_check(s)) {
4945         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4946                              a->q ? 16 : 8, vec_full_reg_size(s),
4947                              cpu_reg(s, a->rn));
4948     }
4949     return true;
4950 }
4951 
4952 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4953 {
4954     MemOp esz;
4955     unsigned idx;
4956 
4957     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4958         return false;
4959     }
4960     if (is_signed) {
4961         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4962             return false;
4963         }
4964     } else {
4965         if (esz == MO_64 ? !a->q : a->q) {
4966             return false;
4967         }
4968     }
4969     if (fp_access_check(s)) {
4970         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4971         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4972         if (is_signed && !a->q) {
4973             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4974         }
4975     }
4976     return true;
4977 }
4978 
4979 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4980 TRANS(UMOV, do_smov_umov, a, 0)
4981 
4982 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4983 {
4984     MemOp esz;
4985     unsigned idx;
4986 
4987     if (!decode_esz_idx(a->imm, &esz, &idx)) {
4988         return false;
4989     }
4990     if (fp_access_check(s)) {
4991         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4992         clear_vec_high(s, true, a->rd);
4993     }
4994     return true;
4995 }
4996 
4997 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
4998 {
4999     MemOp esz;
5000     unsigned didx, sidx;
5001 
5002     if (!decode_esz_idx(a->di, &esz, &didx)) {
5003         return false;
5004     }
5005     sidx = a->si >> esz;
5006     if (fp_access_check(s)) {
5007         TCGv_i64 tmp = tcg_temp_new_i64();
5008 
5009         read_vec_element(s, tmp, a->rn, sidx, esz);
5010         write_vec_element(s, tmp, a->rd, didx, esz);
5011 
5012         /* INS is considered a 128-bit write for SVE. */
5013         clear_vec_high(s, true, a->rd);
5014     }
5015     return true;
5016 }
5017 
5018 /*
5019  * Advanced SIMD three same
5020  */
5021 
5022 typedef struct FPScalar {
5023     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5024     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5025     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5026 } FPScalar;
5027 
5028 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
5029 {
5030     switch (a->esz) {
5031     case MO_64:
5032         if (fp_access_check(s)) {
5033             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5034             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5035             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5036             write_fp_dreg(s, a->rd, t0);
5037         }
5038         break;
5039     case MO_32:
5040         if (fp_access_check(s)) {
5041             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5042             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5043             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5044             write_fp_sreg(s, a->rd, t0);
5045         }
5046         break;
5047     case MO_16:
5048         if (!dc_isar_feature(aa64_fp16, s)) {
5049             return false;
5050         }
5051         if (fp_access_check(s)) {
5052             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5053             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5054             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5055             write_fp_sreg(s, a->rd, t0);
5056         }
5057         break;
5058     default:
5059         return false;
5060     }
5061     return true;
5062 }
5063 
5064 static const FPScalar f_scalar_fadd = {
5065     gen_helper_vfp_addh,
5066     gen_helper_vfp_adds,
5067     gen_helper_vfp_addd,
5068 };
5069 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
5070 
5071 static const FPScalar f_scalar_fsub = {
5072     gen_helper_vfp_subh,
5073     gen_helper_vfp_subs,
5074     gen_helper_vfp_subd,
5075 };
5076 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
5077 
5078 static const FPScalar f_scalar_fdiv = {
5079     gen_helper_vfp_divh,
5080     gen_helper_vfp_divs,
5081     gen_helper_vfp_divd,
5082 };
5083 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
5084 
5085 static const FPScalar f_scalar_fmul = {
5086     gen_helper_vfp_mulh,
5087     gen_helper_vfp_muls,
5088     gen_helper_vfp_muld,
5089 };
5090 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
5091 
5092 static const FPScalar f_scalar_fmax = {
5093     gen_helper_vfp_maxh,
5094     gen_helper_vfp_maxs,
5095     gen_helper_vfp_maxd,
5096 };
5097 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
5098 
5099 static const FPScalar f_scalar_fmin = {
5100     gen_helper_vfp_minh,
5101     gen_helper_vfp_mins,
5102     gen_helper_vfp_mind,
5103 };
5104 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5105 
5106 static const FPScalar f_scalar_fmaxnm = {
5107     gen_helper_vfp_maxnumh,
5108     gen_helper_vfp_maxnums,
5109     gen_helper_vfp_maxnumd,
5110 };
5111 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5112 
5113 static const FPScalar f_scalar_fminnm = {
5114     gen_helper_vfp_minnumh,
5115     gen_helper_vfp_minnums,
5116     gen_helper_vfp_minnumd,
5117 };
5118 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5119 
5120 static const FPScalar f_scalar_fmulx = {
5121     gen_helper_advsimd_mulxh,
5122     gen_helper_vfp_mulxs,
5123     gen_helper_vfp_mulxd,
5124 };
5125 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5126 
5127 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5128 {
5129     gen_helper_vfp_mulh(d, n, m, s);
5130     gen_vfp_negh(d, d);
5131 }
5132 
5133 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5134 {
5135     gen_helper_vfp_muls(d, n, m, s);
5136     gen_vfp_negs(d, d);
5137 }
5138 
5139 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5140 {
5141     gen_helper_vfp_muld(d, n, m, s);
5142     gen_vfp_negd(d, d);
5143 }
5144 
5145 static const FPScalar f_scalar_fnmul = {
5146     gen_fnmul_h,
5147     gen_fnmul_s,
5148     gen_fnmul_d,
5149 };
5150 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5151 
5152 static const FPScalar f_scalar_fcmeq = {
5153     gen_helper_advsimd_ceq_f16,
5154     gen_helper_neon_ceq_f32,
5155     gen_helper_neon_ceq_f64,
5156 };
5157 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5158 
5159 static const FPScalar f_scalar_fcmge = {
5160     gen_helper_advsimd_cge_f16,
5161     gen_helper_neon_cge_f32,
5162     gen_helper_neon_cge_f64,
5163 };
5164 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5165 
5166 static const FPScalar f_scalar_fcmgt = {
5167     gen_helper_advsimd_cgt_f16,
5168     gen_helper_neon_cgt_f32,
5169     gen_helper_neon_cgt_f64,
5170 };
5171 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5172 
5173 static const FPScalar f_scalar_facge = {
5174     gen_helper_advsimd_acge_f16,
5175     gen_helper_neon_acge_f32,
5176     gen_helper_neon_acge_f64,
5177 };
5178 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5179 
5180 static const FPScalar f_scalar_facgt = {
5181     gen_helper_advsimd_acgt_f16,
5182     gen_helper_neon_acgt_f32,
5183     gen_helper_neon_acgt_f64,
5184 };
5185 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5186 
5187 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5188 {
5189     gen_helper_vfp_subh(d, n, m, s);
5190     gen_vfp_absh(d, d);
5191 }
5192 
5193 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5194 {
5195     gen_helper_vfp_subs(d, n, m, s);
5196     gen_vfp_abss(d, d);
5197 }
5198 
5199 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5200 {
5201     gen_helper_vfp_subd(d, n, m, s);
5202     gen_vfp_absd(d, d);
5203 }
5204 
5205 static const FPScalar f_scalar_fabd = {
5206     gen_fabd_h,
5207     gen_fabd_s,
5208     gen_fabd_d,
5209 };
5210 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5211 
5212 static const FPScalar f_scalar_frecps = {
5213     gen_helper_recpsf_f16,
5214     gen_helper_recpsf_f32,
5215     gen_helper_recpsf_f64,
5216 };
5217 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5218 
5219 static const FPScalar f_scalar_frsqrts = {
5220     gen_helper_rsqrtsf_f16,
5221     gen_helper_rsqrtsf_f32,
5222     gen_helper_rsqrtsf_f64,
5223 };
5224 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5225 
5226 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5227                        const FPScalar *f, bool swap)
5228 {
5229     switch (a->esz) {
5230     case MO_64:
5231         if (fp_access_check(s)) {
5232             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5233             TCGv_i64 t1 = tcg_constant_i64(0);
5234             if (swap) {
5235                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5236             } else {
5237                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5238             }
5239             write_fp_dreg(s, a->rd, t0);
5240         }
5241         break;
5242     case MO_32:
5243         if (fp_access_check(s)) {
5244             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5245             TCGv_i32 t1 = tcg_constant_i32(0);
5246             if (swap) {
5247                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5248             } else {
5249                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5250             }
5251             write_fp_sreg(s, a->rd, t0);
5252         }
5253         break;
5254     case MO_16:
5255         if (!dc_isar_feature(aa64_fp16, s)) {
5256             return false;
5257         }
5258         if (fp_access_check(s)) {
5259             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5260             TCGv_i32 t1 = tcg_constant_i32(0);
5261             if (swap) {
5262                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5263             } else {
5264                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5265             }
5266             write_fp_sreg(s, a->rd, t0);
5267         }
5268         break;
5269     default:
5270         return false;
5271     }
5272     return true;
5273 }
5274 
5275 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5276 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5277 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5278 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5279 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5280 
5281 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5282                 MemOp sgn_n, MemOp sgn_m,
5283                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5284                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5285 {
5286     TCGv_i64 t0, t1, t2, qc;
5287     MemOp esz = a->esz;
5288 
5289     if (!fp_access_check(s)) {
5290         return true;
5291     }
5292 
5293     t0 = tcg_temp_new_i64();
5294     t1 = tcg_temp_new_i64();
5295     t2 = tcg_temp_new_i64();
5296     qc = tcg_temp_new_i64();
5297     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5298     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5299     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5300 
5301     if (esz == MO_64) {
5302         gen_d(t0, qc, t1, t2);
5303     } else {
5304         gen_bhs(t0, qc, t1, t2, esz);
5305         tcg_gen_ext_i64(t0, t0, esz);
5306     }
5307 
5308     write_fp_dreg(s, a->rd, t0);
5309     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5310     return true;
5311 }
5312 
5313 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5314 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5315 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5316 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5317 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5318 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5319 
5320 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5321                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5322 {
5323     if (fp_access_check(s)) {
5324         TCGv_i64 t0 = tcg_temp_new_i64();
5325         TCGv_i64 t1 = tcg_temp_new_i64();
5326 
5327         read_vec_element(s, t0, a->rn, 0, MO_64);
5328         read_vec_element(s, t1, a->rm, 0, MO_64);
5329         fn(t0, t0, t1);
5330         write_fp_dreg(s, a->rd, t0);
5331     }
5332     return true;
5333 }
5334 
5335 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5336 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5337 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5338 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5339 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5340 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5341 
5342 typedef struct ENVScalar2 {
5343     NeonGenTwoOpEnvFn *gen_bhs[3];
5344     NeonGenTwo64OpEnvFn *gen_d;
5345 } ENVScalar2;
5346 
5347 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5348 {
5349     if (!fp_access_check(s)) {
5350         return true;
5351     }
5352     if (a->esz == MO_64) {
5353         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5354         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5355         f->gen_d(t0, tcg_env, t0, t1);
5356         write_fp_dreg(s, a->rd, t0);
5357     } else {
5358         TCGv_i32 t0 = tcg_temp_new_i32();
5359         TCGv_i32 t1 = tcg_temp_new_i32();
5360 
5361         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5362         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5363         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5364         write_fp_sreg(s, a->rd, t0);
5365     }
5366     return true;
5367 }
5368 
5369 static const ENVScalar2 f_scalar_sqshl = {
5370     { gen_helper_neon_qshl_s8,
5371       gen_helper_neon_qshl_s16,
5372       gen_helper_neon_qshl_s32 },
5373     gen_helper_neon_qshl_s64,
5374 };
5375 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5376 
5377 static const ENVScalar2 f_scalar_uqshl = {
5378     { gen_helper_neon_qshl_u8,
5379       gen_helper_neon_qshl_u16,
5380       gen_helper_neon_qshl_u32 },
5381     gen_helper_neon_qshl_u64,
5382 };
5383 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5384 
5385 static const ENVScalar2 f_scalar_sqrshl = {
5386     { gen_helper_neon_qrshl_s8,
5387       gen_helper_neon_qrshl_s16,
5388       gen_helper_neon_qrshl_s32 },
5389     gen_helper_neon_qrshl_s64,
5390 };
5391 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5392 
5393 static const ENVScalar2 f_scalar_uqrshl = {
5394     { gen_helper_neon_qrshl_u8,
5395       gen_helper_neon_qrshl_u16,
5396       gen_helper_neon_qrshl_u32 },
5397     gen_helper_neon_qrshl_u64,
5398 };
5399 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5400 
5401 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5402                               const ENVScalar2 *f)
5403 {
5404     if (a->esz == MO_16 || a->esz == MO_32) {
5405         return do_env_scalar2(s, a, f);
5406     }
5407     return false;
5408 }
5409 
5410 static const ENVScalar2 f_scalar_sqdmulh = {
5411     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5412 };
5413 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5414 
5415 static const ENVScalar2 f_scalar_sqrdmulh = {
5416     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5417 };
5418 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5419 
5420 typedef struct ENVScalar3 {
5421     NeonGenThreeOpEnvFn *gen_hs[2];
5422 } ENVScalar3;
5423 
5424 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5425                               const ENVScalar3 *f)
5426 {
5427     TCGv_i32 t0, t1, t2;
5428 
5429     if (a->esz != MO_16 && a->esz != MO_32) {
5430         return false;
5431     }
5432     if (!fp_access_check(s)) {
5433         return true;
5434     }
5435 
5436     t0 = tcg_temp_new_i32();
5437     t1 = tcg_temp_new_i32();
5438     t2 = tcg_temp_new_i32();
5439     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5440     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5441     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5442     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5443     write_fp_sreg(s, a->rd, t0);
5444     return true;
5445 }
5446 
5447 static const ENVScalar3 f_scalar_sqrdmlah = {
5448     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5449 };
5450 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5451 
5452 static const ENVScalar3 f_scalar_sqrdmlsh = {
5453     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5454 };
5455 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5456 
5457 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5458 {
5459     if (fp_access_check(s)) {
5460         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5461         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5462         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5463         write_fp_dreg(s, a->rd, t0);
5464     }
5465     return true;
5466 }
5467 
5468 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5469 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5470 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5471 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5472 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5473 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5474 
5475 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5476                           gen_helper_gvec_3_ptr * const fns[3])
5477 {
5478     MemOp esz = a->esz;
5479     int check = fp_access_check_vector_hsd(s, a->q, esz);
5480 
5481     if (check <= 0) {
5482         return check == 0;
5483     }
5484 
5485     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5486                       esz == MO_16, data, fns[esz - 1]);
5487     return true;
5488 }
5489 
5490 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5491     gen_helper_gvec_fadd_h,
5492     gen_helper_gvec_fadd_s,
5493     gen_helper_gvec_fadd_d,
5494 };
5495 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5496 
5497 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5498     gen_helper_gvec_fsub_h,
5499     gen_helper_gvec_fsub_s,
5500     gen_helper_gvec_fsub_d,
5501 };
5502 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5503 
5504 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5505     gen_helper_gvec_fdiv_h,
5506     gen_helper_gvec_fdiv_s,
5507     gen_helper_gvec_fdiv_d,
5508 };
5509 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5510 
5511 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5512     gen_helper_gvec_fmul_h,
5513     gen_helper_gvec_fmul_s,
5514     gen_helper_gvec_fmul_d,
5515 };
5516 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5517 
5518 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5519     gen_helper_gvec_fmax_h,
5520     gen_helper_gvec_fmax_s,
5521     gen_helper_gvec_fmax_d,
5522 };
5523 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5524 
5525 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5526     gen_helper_gvec_fmin_h,
5527     gen_helper_gvec_fmin_s,
5528     gen_helper_gvec_fmin_d,
5529 };
5530 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5531 
5532 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5533     gen_helper_gvec_fmaxnum_h,
5534     gen_helper_gvec_fmaxnum_s,
5535     gen_helper_gvec_fmaxnum_d,
5536 };
5537 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5538 
5539 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5540     gen_helper_gvec_fminnum_h,
5541     gen_helper_gvec_fminnum_s,
5542     gen_helper_gvec_fminnum_d,
5543 };
5544 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5545 
5546 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5547     gen_helper_gvec_fmulx_h,
5548     gen_helper_gvec_fmulx_s,
5549     gen_helper_gvec_fmulx_d,
5550 };
5551 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5552 
5553 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5554     gen_helper_gvec_vfma_h,
5555     gen_helper_gvec_vfma_s,
5556     gen_helper_gvec_vfma_d,
5557 };
5558 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5559 
5560 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5561     gen_helper_gvec_vfms_h,
5562     gen_helper_gvec_vfms_s,
5563     gen_helper_gvec_vfms_d,
5564 };
5565 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5566 
5567 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5568     gen_helper_gvec_fceq_h,
5569     gen_helper_gvec_fceq_s,
5570     gen_helper_gvec_fceq_d,
5571 };
5572 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5573 
5574 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5575     gen_helper_gvec_fcge_h,
5576     gen_helper_gvec_fcge_s,
5577     gen_helper_gvec_fcge_d,
5578 };
5579 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5580 
5581 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5582     gen_helper_gvec_fcgt_h,
5583     gen_helper_gvec_fcgt_s,
5584     gen_helper_gvec_fcgt_d,
5585 };
5586 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5587 
5588 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5589     gen_helper_gvec_facge_h,
5590     gen_helper_gvec_facge_s,
5591     gen_helper_gvec_facge_d,
5592 };
5593 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5594 
5595 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5596     gen_helper_gvec_facgt_h,
5597     gen_helper_gvec_facgt_s,
5598     gen_helper_gvec_facgt_d,
5599 };
5600 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5601 
5602 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5603     gen_helper_gvec_fabd_h,
5604     gen_helper_gvec_fabd_s,
5605     gen_helper_gvec_fabd_d,
5606 };
5607 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5608 
5609 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5610     gen_helper_gvec_recps_h,
5611     gen_helper_gvec_recps_s,
5612     gen_helper_gvec_recps_d,
5613 };
5614 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5615 
5616 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5617     gen_helper_gvec_rsqrts_h,
5618     gen_helper_gvec_rsqrts_s,
5619     gen_helper_gvec_rsqrts_d,
5620 };
5621 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5622 
5623 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5624     gen_helper_gvec_faddp_h,
5625     gen_helper_gvec_faddp_s,
5626     gen_helper_gvec_faddp_d,
5627 };
5628 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5629 
5630 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5631     gen_helper_gvec_fmaxp_h,
5632     gen_helper_gvec_fmaxp_s,
5633     gen_helper_gvec_fmaxp_d,
5634 };
5635 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5636 
5637 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5638     gen_helper_gvec_fminp_h,
5639     gen_helper_gvec_fminp_s,
5640     gen_helper_gvec_fminp_d,
5641 };
5642 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5643 
5644 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5645     gen_helper_gvec_fmaxnump_h,
5646     gen_helper_gvec_fmaxnump_s,
5647     gen_helper_gvec_fmaxnump_d,
5648 };
5649 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5650 
5651 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5652     gen_helper_gvec_fminnump_h,
5653     gen_helper_gvec_fminnump_s,
5654     gen_helper_gvec_fminnump_d,
5655 };
5656 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5657 
5658 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5659 {
5660     if (fp_access_check(s)) {
5661         int data = (is_2 << 1) | is_s;
5662         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5663                            vec_full_reg_offset(s, a->rn),
5664                            vec_full_reg_offset(s, a->rm), tcg_env,
5665                            a->q ? 16 : 8, vec_full_reg_size(s),
5666                            data, gen_helper_gvec_fmlal_a64);
5667     }
5668     return true;
5669 }
5670 
5671 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5672 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5673 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5674 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5675 
5676 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5677 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5678 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5679 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5680 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5681 
5682 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5683 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5684 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5685 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5686 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5687 
5688 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5689 {
5690     if (fp_access_check(s)) {
5691         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5692     }
5693     return true;
5694 }
5695 
5696 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5697 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5698 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5699 
5700 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5701 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5702 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5703 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5704 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5705 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5706 
5707 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5708 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5709 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5710 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5711 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5712 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5713 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5714 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5715 
5716 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5717 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5718 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5719 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5720 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5721 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5722 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5723 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5724 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5725 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5726 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5727 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5728 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5729 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5730 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5731 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5732 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5733 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5734 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5735 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5736 
5737 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5738 {
5739     if (a->esz == MO_64 && !a->q) {
5740         return false;
5741     }
5742     if (fp_access_check(s)) {
5743         tcg_gen_gvec_cmp(cond, a->esz,
5744                          vec_full_reg_offset(s, a->rd),
5745                          vec_full_reg_offset(s, a->rn),
5746                          vec_full_reg_offset(s, a->rm),
5747                          a->q ? 16 : 8, vec_full_reg_size(s));
5748     }
5749     return true;
5750 }
5751 
5752 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5753 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5754 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5755 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5756 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5757 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5758 
5759 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5760 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5761 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5762 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5763 
5764 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5765                           gen_helper_gvec_4 *fn)
5766 {
5767     if (fp_access_check(s)) {
5768         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5769     }
5770     return true;
5771 }
5772 
5773 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5774                               gen_helper_gvec_4_ptr *fn)
5775 {
5776     if (fp_access_check(s)) {
5777         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5778     }
5779     return true;
5780 }
5781 
5782 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5783 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5784 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5785 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5786 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5787 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5788 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5789 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5790 
5791 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5792 {
5793     if (!dc_isar_feature(aa64_bf16, s)) {
5794         return false;
5795     }
5796     if (fp_access_check(s)) {
5797         /* Q bit selects BFMLALB vs BFMLALT. */
5798         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5799                           gen_helper_gvec_bfmlal);
5800     }
5801     return true;
5802 }
5803 
5804 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5805     gen_helper_gvec_fcaddh,
5806     gen_helper_gvec_fcadds,
5807     gen_helper_gvec_fcaddd,
5808 };
5809 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5810 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5811 
5812 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5813 {
5814     static gen_helper_gvec_4_ptr * const fn[] = {
5815         [MO_16] = gen_helper_gvec_fcmlah,
5816         [MO_32] = gen_helper_gvec_fcmlas,
5817         [MO_64] = gen_helper_gvec_fcmlad,
5818     };
5819     int check;
5820 
5821     if (!dc_isar_feature(aa64_fcma, s)) {
5822         return false;
5823     }
5824 
5825     check = fp_access_check_vector_hsd(s, a->q, a->esz);
5826     if (check <= 0) {
5827         return check == 0;
5828     }
5829 
5830     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5831                       a->esz == MO_16, a->rot, fn[a->esz]);
5832     return true;
5833 }
5834 
5835 /*
5836  * Widening vector x vector/indexed.
5837  *
5838  * These read from the top or bottom half of a 128-bit vector.
5839  * After widening, optionally accumulate with a 128-bit vector.
5840  * Implement these inline, as the number of elements are limited
5841  * and the related SVE and SME operations on larger vectors use
5842  * even/odd elements instead of top/bottom half.
5843  *
5844  * If idx >= 0, operand 2 is indexed, otherwise vector.
5845  * If acc, operand 0 is loaded with rd.
5846  */
5847 
5848 /* For low half, iterating up. */
5849 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5850                             int rd, int rn, int rm, int idx,
5851                             NeonGenTwo64OpFn *fn, bool acc)
5852 {
5853     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5854     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5855     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5856     MemOp esz = memop & MO_SIZE;
5857     int half = 8 >> esz;
5858     int top_swap, top_half;
5859 
5860     /* There are no 64x64->128 bit operations. */
5861     if (esz >= MO_64) {
5862         return false;
5863     }
5864     if (!fp_access_check(s)) {
5865         return true;
5866     }
5867 
5868     if (idx >= 0) {
5869         read_vec_element(s, tcg_op2, rm, idx, memop);
5870     }
5871 
5872     /*
5873      * For top half inputs, iterate forward; backward for bottom half.
5874      * This means the store to the destination will not occur until
5875      * overlapping input inputs are consumed.
5876      * Use top_swap to conditionally invert the forward iteration index.
5877      */
5878     top_swap = top ? 0 : half - 1;
5879     top_half = top ? half : 0;
5880 
5881     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5882         int elt = elt_fwd ^ top_swap;
5883 
5884         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5885         if (idx < 0) {
5886             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5887         }
5888         if (acc) {
5889             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5890         }
5891         fn(tcg_op0, tcg_op1, tcg_op2);
5892         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5893     }
5894     clear_vec_high(s, 1, rd);
5895     return true;
5896 }
5897 
5898 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5899 {
5900     TCGv_i64 t = tcg_temp_new_i64();
5901     tcg_gen_mul_i64(t, n, m);
5902     tcg_gen_add_i64(d, d, t);
5903 }
5904 
5905 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5906 {
5907     TCGv_i64 t = tcg_temp_new_i64();
5908     tcg_gen_mul_i64(t, n, m);
5909     tcg_gen_sub_i64(d, d, t);
5910 }
5911 
5912 TRANS(SMULL_v, do_3op_widening,
5913       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5914       tcg_gen_mul_i64, false)
5915 TRANS(UMULL_v, do_3op_widening,
5916       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5917       tcg_gen_mul_i64, false)
5918 TRANS(SMLAL_v, do_3op_widening,
5919       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5920       gen_muladd_i64, true)
5921 TRANS(UMLAL_v, do_3op_widening,
5922       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5923       gen_muladd_i64, true)
5924 TRANS(SMLSL_v, do_3op_widening,
5925       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5926       gen_mulsub_i64, true)
5927 TRANS(UMLSL_v, do_3op_widening,
5928       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5929       gen_mulsub_i64, true)
5930 
5931 TRANS(SMULL_vi, do_3op_widening,
5932       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5933       tcg_gen_mul_i64, false)
5934 TRANS(UMULL_vi, do_3op_widening,
5935       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5936       tcg_gen_mul_i64, false)
5937 TRANS(SMLAL_vi, do_3op_widening,
5938       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5939       gen_muladd_i64, true)
5940 TRANS(UMLAL_vi, do_3op_widening,
5941       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5942       gen_muladd_i64, true)
5943 TRANS(SMLSL_vi, do_3op_widening,
5944       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5945       gen_mulsub_i64, true)
5946 TRANS(UMLSL_vi, do_3op_widening,
5947       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5948       gen_mulsub_i64, true)
5949 
5950 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5951 {
5952     TCGv_i64 t1 = tcg_temp_new_i64();
5953     TCGv_i64 t2 = tcg_temp_new_i64();
5954 
5955     tcg_gen_sub_i64(t1, n, m);
5956     tcg_gen_sub_i64(t2, m, n);
5957     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5958 }
5959 
5960 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5961 {
5962     TCGv_i64 t1 = tcg_temp_new_i64();
5963     TCGv_i64 t2 = tcg_temp_new_i64();
5964 
5965     tcg_gen_sub_i64(t1, n, m);
5966     tcg_gen_sub_i64(t2, m, n);
5967     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5968 }
5969 
5970 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5971 {
5972     TCGv_i64 t = tcg_temp_new_i64();
5973     gen_sabd_i64(t, n, m);
5974     tcg_gen_add_i64(d, d, t);
5975 }
5976 
5977 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5978 {
5979     TCGv_i64 t = tcg_temp_new_i64();
5980     gen_uabd_i64(t, n, m);
5981     tcg_gen_add_i64(d, d, t);
5982 }
5983 
5984 TRANS(SADDL_v, do_3op_widening,
5985       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5986       tcg_gen_add_i64, false)
5987 TRANS(UADDL_v, do_3op_widening,
5988       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5989       tcg_gen_add_i64, false)
5990 TRANS(SSUBL_v, do_3op_widening,
5991       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5992       tcg_gen_sub_i64, false)
5993 TRANS(USUBL_v, do_3op_widening,
5994       a->esz, a->q, a->rd, a->rn, a->rm, -1,
5995       tcg_gen_sub_i64, false)
5996 TRANS(SABDL_v, do_3op_widening,
5997       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5998       gen_sabd_i64, false)
5999 TRANS(UABDL_v, do_3op_widening,
6000       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6001       gen_uabd_i64, false)
6002 TRANS(SABAL_v, do_3op_widening,
6003       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6004       gen_saba_i64, true)
6005 TRANS(UABAL_v, do_3op_widening,
6006       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6007       gen_uaba_i64, true)
6008 
6009 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6010 {
6011     tcg_gen_mul_i64(d, n, m);
6012     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6013 }
6014 
6015 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6016 {
6017     tcg_gen_mul_i64(d, n, m);
6018     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6019 }
6020 
6021 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6022 {
6023     TCGv_i64 t = tcg_temp_new_i64();
6024 
6025     tcg_gen_mul_i64(t, n, m);
6026     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6027     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6028 }
6029 
6030 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6031 {
6032     TCGv_i64 t = tcg_temp_new_i64();
6033 
6034     tcg_gen_mul_i64(t, n, m);
6035     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6036     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6037 }
6038 
6039 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6040 {
6041     TCGv_i64 t = tcg_temp_new_i64();
6042 
6043     tcg_gen_mul_i64(t, n, m);
6044     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6045     tcg_gen_neg_i64(t, t);
6046     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6047 }
6048 
6049 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6050 {
6051     TCGv_i64 t = tcg_temp_new_i64();
6052 
6053     tcg_gen_mul_i64(t, n, m);
6054     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6055     tcg_gen_neg_i64(t, t);
6056     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6057 }
6058 
6059 TRANS(SQDMULL_v, do_3op_widening,
6060       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6061       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6062 TRANS(SQDMLAL_v, do_3op_widening,
6063       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6064       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6065 TRANS(SQDMLSL_v, do_3op_widening,
6066       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6067       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6068 
6069 TRANS(SQDMULL_vi, do_3op_widening,
6070       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6071       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6072 TRANS(SQDMLAL_vi, do_3op_widening,
6073       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6074       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6075 TRANS(SQDMLSL_vi, do_3op_widening,
6076       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6077       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6078 
6079 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6080                            MemOp sign, bool sub)
6081 {
6082     TCGv_i64 tcg_op0, tcg_op1;
6083     MemOp esz = a->esz;
6084     int half = 8 >> esz;
6085     bool top = a->q;
6086     int top_swap = top ? 0 : half - 1;
6087     int top_half = top ? half : 0;
6088 
6089     /* There are no 64x64->128 bit operations. */
6090     if (esz >= MO_64) {
6091         return false;
6092     }
6093     if (!fp_access_check(s)) {
6094         return true;
6095     }
6096     tcg_op0 = tcg_temp_new_i64();
6097     tcg_op1 = tcg_temp_new_i64();
6098 
6099     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6100         int elt = elt_fwd ^ top_swap;
6101 
6102         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6103         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6104         if (sub) {
6105             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6106         } else {
6107             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6108         }
6109         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6110     }
6111     clear_vec_high(s, 1, a->rd);
6112     return true;
6113 }
6114 
6115 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6116 TRANS(UADDW, do_addsub_wide, a, 0, false)
6117 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6118 TRANS(USUBW, do_addsub_wide, a, 0, true)
6119 
6120 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6121                                  bool sub, bool round)
6122 {
6123     TCGv_i64 tcg_op0, tcg_op1;
6124     MemOp esz = a->esz;
6125     int half = 8 >> esz;
6126     bool top = a->q;
6127     int ebits = 8 << esz;
6128     uint64_t rbit = 1ull << (ebits - 1);
6129     int top_swap, top_half;
6130 
6131     /* There are no 128x128->64 bit operations. */
6132     if (esz >= MO_64) {
6133         return false;
6134     }
6135     if (!fp_access_check(s)) {
6136         return true;
6137     }
6138     tcg_op0 = tcg_temp_new_i64();
6139     tcg_op1 = tcg_temp_new_i64();
6140 
6141     /*
6142      * For top half inputs, iterate backward; forward for bottom half.
6143      * This means the store to the destination will not occur until
6144      * overlapping input inputs are consumed.
6145      */
6146     top_swap = top ? half - 1 : 0;
6147     top_half = top ? half : 0;
6148 
6149     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6150         int elt = elt_fwd ^ top_swap;
6151 
6152         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6153         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6154         if (sub) {
6155             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6156         } else {
6157             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6158         }
6159         if (round) {
6160             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6161         }
6162         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6163         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6164     }
6165     clear_vec_high(s, top, a->rd);
6166     return true;
6167 }
6168 
6169 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6170 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6171 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6172 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6173 
6174 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6175 {
6176     if (fp_access_check(s)) {
6177         /* The Q field specifies lo/hi half input for these insns.  */
6178         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6179     }
6180     return true;
6181 }
6182 
6183 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6184 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6185 
6186 /*
6187  * Advanced SIMD scalar/vector x indexed element
6188  */
6189 
6190 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6191 {
6192     switch (a->esz) {
6193     case MO_64:
6194         if (fp_access_check(s)) {
6195             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6196             TCGv_i64 t1 = tcg_temp_new_i64();
6197 
6198             read_vec_element(s, t1, a->rm, a->idx, MO_64);
6199             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6200             write_fp_dreg(s, a->rd, t0);
6201         }
6202         break;
6203     case MO_32:
6204         if (fp_access_check(s)) {
6205             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6206             TCGv_i32 t1 = tcg_temp_new_i32();
6207 
6208             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6209             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6210             write_fp_sreg(s, a->rd, t0);
6211         }
6212         break;
6213     case MO_16:
6214         if (!dc_isar_feature(aa64_fp16, s)) {
6215             return false;
6216         }
6217         if (fp_access_check(s)) {
6218             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6219             TCGv_i32 t1 = tcg_temp_new_i32();
6220 
6221             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6222             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6223             write_fp_sreg(s, a->rd, t0);
6224         }
6225         break;
6226     default:
6227         g_assert_not_reached();
6228     }
6229     return true;
6230 }
6231 
6232 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6233 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6234 
6235 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6236 {
6237     switch (a->esz) {
6238     case MO_64:
6239         if (fp_access_check(s)) {
6240             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6241             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6242             TCGv_i64 t2 = tcg_temp_new_i64();
6243 
6244             read_vec_element(s, t2, a->rm, a->idx, MO_64);
6245             if (neg) {
6246                 gen_vfp_negd(t1, t1);
6247             }
6248             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6249             write_fp_dreg(s, a->rd, t0);
6250         }
6251         break;
6252     case MO_32:
6253         if (fp_access_check(s)) {
6254             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6255             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6256             TCGv_i32 t2 = tcg_temp_new_i32();
6257 
6258             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6259             if (neg) {
6260                 gen_vfp_negs(t1, t1);
6261             }
6262             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6263             write_fp_sreg(s, a->rd, t0);
6264         }
6265         break;
6266     case MO_16:
6267         if (!dc_isar_feature(aa64_fp16, s)) {
6268             return false;
6269         }
6270         if (fp_access_check(s)) {
6271             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6272             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6273             TCGv_i32 t2 = tcg_temp_new_i32();
6274 
6275             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6276             if (neg) {
6277                 gen_vfp_negh(t1, t1);
6278             }
6279             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6280                                        fpstatus_ptr(FPST_A64_F16));
6281             write_fp_sreg(s, a->rd, t0);
6282         }
6283         break;
6284     default:
6285         g_assert_not_reached();
6286     }
6287     return true;
6288 }
6289 
6290 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6291 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6292 
6293 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6294                                   const ENVScalar2 *f)
6295 {
6296     if (a->esz < MO_16 || a->esz > MO_32) {
6297         return false;
6298     }
6299     if (fp_access_check(s)) {
6300         TCGv_i32 t0 = tcg_temp_new_i32();
6301         TCGv_i32 t1 = tcg_temp_new_i32();
6302 
6303         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6304         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6305         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6306         write_fp_sreg(s, a->rd, t0);
6307     }
6308     return true;
6309 }
6310 
6311 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6312 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6313 
6314 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6315                                   const ENVScalar3 *f)
6316 {
6317     if (a->esz < MO_16 || a->esz > MO_32) {
6318         return false;
6319     }
6320     if (fp_access_check(s)) {
6321         TCGv_i32 t0 = tcg_temp_new_i32();
6322         TCGv_i32 t1 = tcg_temp_new_i32();
6323         TCGv_i32 t2 = tcg_temp_new_i32();
6324 
6325         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6326         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6327         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6328         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6329         write_fp_sreg(s, a->rd, t0);
6330     }
6331     return true;
6332 }
6333 
6334 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6335 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6336 
6337 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6338                                           NeonGenTwo64OpFn *fn, bool acc)
6339 {
6340     if (fp_access_check(s)) {
6341         TCGv_i64 t0 = tcg_temp_new_i64();
6342         TCGv_i64 t1 = tcg_temp_new_i64();
6343         TCGv_i64 t2 = tcg_temp_new_i64();
6344 
6345         if (acc) {
6346             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6347         }
6348         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6349         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6350         fn(t0, t1, t2);
6351 
6352         /* Clear the whole register first, then store scalar. */
6353         clear_vec(s, a->rd);
6354         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6355     }
6356     return true;
6357 }
6358 
6359 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6360       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6361 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6362       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6363 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6364       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6365 
6366 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6367                               gen_helper_gvec_3_ptr * const fns[3])
6368 {
6369     MemOp esz = a->esz;
6370     int check = fp_access_check_vector_hsd(s, a->q, esz);
6371 
6372     if (check <= 0) {
6373         return check == 0;
6374     }
6375 
6376     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6377                       esz == MO_16, a->idx, fns[esz - 1]);
6378     return true;
6379 }
6380 
6381 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6382     gen_helper_gvec_fmul_idx_h,
6383     gen_helper_gvec_fmul_idx_s,
6384     gen_helper_gvec_fmul_idx_d,
6385 };
6386 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6387 
6388 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6389     gen_helper_gvec_fmulx_idx_h,
6390     gen_helper_gvec_fmulx_idx_s,
6391     gen_helper_gvec_fmulx_idx_d,
6392 };
6393 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6394 
6395 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6396 {
6397     static gen_helper_gvec_4_ptr * const fns[3] = {
6398         gen_helper_gvec_fmla_idx_h,
6399         gen_helper_gvec_fmla_idx_s,
6400         gen_helper_gvec_fmla_idx_d,
6401     };
6402     MemOp esz = a->esz;
6403     int check = fp_access_check_vector_hsd(s, a->q, esz);
6404 
6405     if (check <= 0) {
6406         return check == 0;
6407     }
6408 
6409     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6410                       esz == MO_16, (a->idx << 1) | neg,
6411                       fns[esz - 1]);
6412     return true;
6413 }
6414 
6415 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6416 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6417 
6418 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6419 {
6420     if (fp_access_check(s)) {
6421         int data = (a->idx << 2) | (is_2 << 1) | is_s;
6422         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6423                            vec_full_reg_offset(s, a->rn),
6424                            vec_full_reg_offset(s, a->rm), tcg_env,
6425                            a->q ? 16 : 8, vec_full_reg_size(s),
6426                            data, gen_helper_gvec_fmlal_idx_a64);
6427     }
6428     return true;
6429 }
6430 
6431 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6432 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6433 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6434 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6435 
6436 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6437                                gen_helper_gvec_3 * const fns[2])
6438 {
6439     assert(a->esz == MO_16 || a->esz == MO_32);
6440     if (fp_access_check(s)) {
6441         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6442     }
6443     return true;
6444 }
6445 
6446 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6447     gen_helper_gvec_mul_idx_h,
6448     gen_helper_gvec_mul_idx_s,
6449 };
6450 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6451 
6452 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6453 {
6454     static gen_helper_gvec_4 * const fns[2][2] = {
6455         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6456         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6457     };
6458 
6459     assert(a->esz == MO_16 || a->esz == MO_32);
6460     if (fp_access_check(s)) {
6461         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6462                          a->idx, fns[a->esz - 1][sub]);
6463     }
6464     return true;
6465 }
6466 
6467 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6468 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6469 
6470 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6471                                   gen_helper_gvec_4 * const fns[2])
6472 {
6473     assert(a->esz == MO_16 || a->esz == MO_32);
6474     if (fp_access_check(s)) {
6475         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6476                            vec_full_reg_offset(s, a->rn),
6477                            vec_full_reg_offset(s, a->rm),
6478                            offsetof(CPUARMState, vfp.qc),
6479                            a->q ? 16 : 8, vec_full_reg_size(s),
6480                            a->idx, fns[a->esz - 1]);
6481     }
6482     return true;
6483 }
6484 
6485 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6486     gen_helper_neon_sqdmulh_idx_h,
6487     gen_helper_neon_sqdmulh_idx_s,
6488 };
6489 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6490 
6491 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6492     gen_helper_neon_sqrdmulh_idx_h,
6493     gen_helper_neon_sqrdmulh_idx_s,
6494 };
6495 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6496 
6497 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6498     gen_helper_neon_sqrdmlah_idx_h,
6499     gen_helper_neon_sqrdmlah_idx_s,
6500 };
6501 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6502            f_vector_idx_sqrdmlah)
6503 
6504 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6505     gen_helper_neon_sqrdmlsh_idx_h,
6506     gen_helper_neon_sqrdmlsh_idx_s,
6507 };
6508 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6509            f_vector_idx_sqrdmlsh)
6510 
6511 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6512                               gen_helper_gvec_4 *fn)
6513 {
6514     if (fp_access_check(s)) {
6515         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6516     }
6517     return true;
6518 }
6519 
6520 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6521                                   gen_helper_gvec_4_ptr *fn)
6522 {
6523     if (fp_access_check(s)) {
6524         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6525     }
6526     return true;
6527 }
6528 
6529 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6530 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6531 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6532            gen_helper_gvec_sudot_idx_b)
6533 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6534            gen_helper_gvec_usdot_idx_b)
6535 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6536            gen_helper_gvec_bfdot_idx)
6537 
6538 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6539 {
6540     if (!dc_isar_feature(aa64_bf16, s)) {
6541         return false;
6542     }
6543     if (fp_access_check(s)) {
6544         /* Q bit selects BFMLALB vs BFMLALT. */
6545         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6546                           (a->idx << 1) | a->q,
6547                           gen_helper_gvec_bfmlal_idx);
6548     }
6549     return true;
6550 }
6551 
6552 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6553 {
6554     gen_helper_gvec_4_ptr *fn;
6555 
6556     if (!dc_isar_feature(aa64_fcma, s)) {
6557         return false;
6558     }
6559     switch (a->esz) {
6560     case MO_16:
6561         if (!dc_isar_feature(aa64_fp16, s)) {
6562             return false;
6563         }
6564         fn = gen_helper_gvec_fcmlah_idx;
6565         break;
6566     case MO_32:
6567         fn = gen_helper_gvec_fcmlas_idx;
6568         break;
6569     default:
6570         g_assert_not_reached();
6571     }
6572     if (fp_access_check(s)) {
6573         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6574                           a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6575     }
6576     return true;
6577 }
6578 
6579 /*
6580  * Advanced SIMD scalar pairwise
6581  */
6582 
6583 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6584 {
6585     switch (a->esz) {
6586     case MO_64:
6587         if (fp_access_check(s)) {
6588             TCGv_i64 t0 = tcg_temp_new_i64();
6589             TCGv_i64 t1 = tcg_temp_new_i64();
6590 
6591             read_vec_element(s, t0, a->rn, 0, MO_64);
6592             read_vec_element(s, t1, a->rn, 1, MO_64);
6593             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6594             write_fp_dreg(s, a->rd, t0);
6595         }
6596         break;
6597     case MO_32:
6598         if (fp_access_check(s)) {
6599             TCGv_i32 t0 = tcg_temp_new_i32();
6600             TCGv_i32 t1 = tcg_temp_new_i32();
6601 
6602             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6603             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6604             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6605             write_fp_sreg(s, a->rd, t0);
6606         }
6607         break;
6608     case MO_16:
6609         if (!dc_isar_feature(aa64_fp16, s)) {
6610             return false;
6611         }
6612         if (fp_access_check(s)) {
6613             TCGv_i32 t0 = tcg_temp_new_i32();
6614             TCGv_i32 t1 = tcg_temp_new_i32();
6615 
6616             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6617             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6618             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6619             write_fp_sreg(s, a->rd, t0);
6620         }
6621         break;
6622     default:
6623         g_assert_not_reached();
6624     }
6625     return true;
6626 }
6627 
6628 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6629 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6630 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6631 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6632 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6633 
6634 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6635 {
6636     if (fp_access_check(s)) {
6637         TCGv_i64 t0 = tcg_temp_new_i64();
6638         TCGv_i64 t1 = tcg_temp_new_i64();
6639 
6640         read_vec_element(s, t0, a->rn, 0, MO_64);
6641         read_vec_element(s, t1, a->rn, 1, MO_64);
6642         tcg_gen_add_i64(t0, t0, t1);
6643         write_fp_dreg(s, a->rd, t0);
6644     }
6645     return true;
6646 }
6647 
6648 /*
6649  * Floating-point conditional select
6650  */
6651 
6652 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6653 {
6654     TCGv_i64 t_true, t_false;
6655     DisasCompare64 c;
6656     int check = fp_access_check_scalar_hsd(s, a->esz);
6657 
6658     if (check <= 0) {
6659         return check == 0;
6660     }
6661 
6662     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6663     t_true = tcg_temp_new_i64();
6664     t_false = tcg_temp_new_i64();
6665     read_vec_element(s, t_true, a->rn, 0, a->esz);
6666     read_vec_element(s, t_false, a->rm, 0, a->esz);
6667 
6668     a64_test_cc(&c, a->cond);
6669     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6670                         t_true, t_false);
6671 
6672     /*
6673      * Note that sregs & hregs write back zeros to the high bits,
6674      * and we've already done the zero-extension.
6675      */
6676     write_fp_dreg(s, a->rd, t_true);
6677     return true;
6678 }
6679 
6680 /*
6681  * Advanced SIMD Extract
6682  */
6683 
6684 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
6685 {
6686     if (fp_access_check(s)) {
6687         TCGv_i64 lo = read_fp_dreg(s, a->rn);
6688         if (a->imm != 0) {
6689             TCGv_i64 hi = read_fp_dreg(s, a->rm);
6690             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
6691         }
6692         write_fp_dreg(s, a->rd, lo);
6693     }
6694     return true;
6695 }
6696 
6697 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
6698 {
6699     TCGv_i64 lo, hi;
6700     int pos = (a->imm & 7) * 8;
6701     int elt = a->imm >> 3;
6702 
6703     if (!fp_access_check(s)) {
6704         return true;
6705     }
6706 
6707     lo = tcg_temp_new_i64();
6708     hi = tcg_temp_new_i64();
6709 
6710     read_vec_element(s, lo, a->rn, elt, MO_64);
6711     elt++;
6712     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
6713     elt++;
6714 
6715     if (pos != 0) {
6716         TCGv_i64 hh = tcg_temp_new_i64();
6717         tcg_gen_extract2_i64(lo, lo, hi, pos);
6718         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
6719         tcg_gen_extract2_i64(hi, hi, hh, pos);
6720     }
6721 
6722     write_vec_element(s, lo, a->rd, 0, MO_64);
6723     write_vec_element(s, hi, a->rd, 1, MO_64);
6724     clear_vec_high(s, true, a->rd);
6725     return true;
6726 }
6727 
6728 /*
6729  * Floating-point data-processing (3 source)
6730  */
6731 
6732 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6733 {
6734     TCGv_ptr fpst;
6735 
6736     /*
6737      * These are fused multiply-add.  Note that doing the negations here
6738      * as separate steps is correct: an input NaN should come out with
6739      * its sign bit flipped if it is a negated-input.
6740      */
6741     switch (a->esz) {
6742     case MO_64:
6743         if (fp_access_check(s)) {
6744             TCGv_i64 tn = read_fp_dreg(s, a->rn);
6745             TCGv_i64 tm = read_fp_dreg(s, a->rm);
6746             TCGv_i64 ta = read_fp_dreg(s, a->ra);
6747 
6748             if (neg_a) {
6749                 gen_vfp_negd(ta, ta);
6750             }
6751             if (neg_n) {
6752                 gen_vfp_negd(tn, tn);
6753             }
6754             fpst = fpstatus_ptr(FPST_A64);
6755             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6756             write_fp_dreg(s, a->rd, ta);
6757         }
6758         break;
6759 
6760     case MO_32:
6761         if (fp_access_check(s)) {
6762             TCGv_i32 tn = read_fp_sreg(s, a->rn);
6763             TCGv_i32 tm = read_fp_sreg(s, a->rm);
6764             TCGv_i32 ta = read_fp_sreg(s, a->ra);
6765 
6766             if (neg_a) {
6767                 gen_vfp_negs(ta, ta);
6768             }
6769             if (neg_n) {
6770                 gen_vfp_negs(tn, tn);
6771             }
6772             fpst = fpstatus_ptr(FPST_A64);
6773             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6774             write_fp_sreg(s, a->rd, ta);
6775         }
6776         break;
6777 
6778     case MO_16:
6779         if (!dc_isar_feature(aa64_fp16, s)) {
6780             return false;
6781         }
6782         if (fp_access_check(s)) {
6783             TCGv_i32 tn = read_fp_hreg(s, a->rn);
6784             TCGv_i32 tm = read_fp_hreg(s, a->rm);
6785             TCGv_i32 ta = read_fp_hreg(s, a->ra);
6786 
6787             if (neg_a) {
6788                 gen_vfp_negh(ta, ta);
6789             }
6790             if (neg_n) {
6791                 gen_vfp_negh(tn, tn);
6792             }
6793             fpst = fpstatus_ptr(FPST_A64_F16);
6794             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6795             write_fp_sreg(s, a->rd, ta);
6796         }
6797         break;
6798 
6799     default:
6800         return false;
6801     }
6802     return true;
6803 }
6804 
6805 TRANS(FMADD, do_fmadd, a, false, false)
6806 TRANS(FNMADD, do_fmadd, a, true, true)
6807 TRANS(FMSUB, do_fmadd, a, false, true)
6808 TRANS(FNMSUB, do_fmadd, a, true, false)
6809 
6810 /*
6811  * Advanced SIMD Across Lanes
6812  */
6813 
6814 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
6815                              MemOp src_sign, NeonGenTwo64OpFn *fn)
6816 {
6817     TCGv_i64 tcg_res, tcg_elt;
6818     MemOp src_mop = a->esz | src_sign;
6819     int elements = (a->q ? 16 : 8) >> a->esz;
6820 
6821     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
6822     if (elements < 4) {
6823         return false;
6824     }
6825     if (!fp_access_check(s)) {
6826         return true;
6827     }
6828 
6829     tcg_res = tcg_temp_new_i64();
6830     tcg_elt = tcg_temp_new_i64();
6831 
6832     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
6833     for (int i = 1; i < elements; i++) {
6834         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
6835         fn(tcg_res, tcg_res, tcg_elt);
6836     }
6837 
6838     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
6839     write_fp_dreg(s, a->rd, tcg_res);
6840     return true;
6841 }
6842 
6843 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
6844 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
6845 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
6846 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
6847 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
6848 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
6849 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
6850 
6851 /*
6852  * do_fp_reduction helper
6853  *
6854  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6855  * important for correct NaN propagation that we do these
6856  * operations in exactly the order specified by the pseudocode.
6857  *
6858  * This is a recursive function.
6859  */
6860 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
6861                                 int ebase, int ecount, TCGv_ptr fpst,
6862                                 NeonGenTwoSingleOpFn *fn)
6863 {
6864     if (ecount == 1) {
6865         TCGv_i32 tcg_elem = tcg_temp_new_i32();
6866         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
6867         return tcg_elem;
6868     } else {
6869         int half = ecount >> 1;
6870         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6871 
6872         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
6873         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
6874         tcg_res = tcg_temp_new_i32();
6875 
6876         fn(tcg_res, tcg_lo, tcg_hi, fpst);
6877         return tcg_res;
6878     }
6879 }
6880 
6881 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
6882                               NeonGenTwoSingleOpFn *fn)
6883 {
6884     if (fp_access_check(s)) {
6885         MemOp esz = a->esz;
6886         int elts = (a->q ? 16 : 8) >> esz;
6887         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
6888         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
6889         write_fp_sreg(s, a->rd, res);
6890     }
6891     return true;
6892 }
6893 
6894 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh)
6895 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh)
6896 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh)
6897 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh)
6898 
6899 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
6900 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
6901 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
6902 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
6903 
6904 /*
6905  * Floating-point Immediate
6906  */
6907 
6908 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
6909 {
6910     int check = fp_access_check_scalar_hsd(s, a->esz);
6911     uint64_t imm;
6912 
6913     if (check <= 0) {
6914         return check == 0;
6915     }
6916 
6917     imm = vfp_expand_imm(a->esz, a->imm);
6918     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
6919     return true;
6920 }
6921 
6922 /*
6923  * Floating point compare, conditional compare
6924  */
6925 
6926 static void handle_fp_compare(DisasContext *s, int size,
6927                               unsigned int rn, unsigned int rm,
6928                               bool cmp_with_zero, bool signal_all_nans)
6929 {
6930     TCGv_i64 tcg_flags = tcg_temp_new_i64();
6931     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
6932 
6933     if (size == MO_64) {
6934         TCGv_i64 tcg_vn, tcg_vm;
6935 
6936         tcg_vn = read_fp_dreg(s, rn);
6937         if (cmp_with_zero) {
6938             tcg_vm = tcg_constant_i64(0);
6939         } else {
6940             tcg_vm = read_fp_dreg(s, rm);
6941         }
6942         if (signal_all_nans) {
6943             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6944         } else {
6945             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6946         }
6947     } else {
6948         TCGv_i32 tcg_vn = tcg_temp_new_i32();
6949         TCGv_i32 tcg_vm = tcg_temp_new_i32();
6950 
6951         read_vec_element_i32(s, tcg_vn, rn, 0, size);
6952         if (cmp_with_zero) {
6953             tcg_gen_movi_i32(tcg_vm, 0);
6954         } else {
6955             read_vec_element_i32(s, tcg_vm, rm, 0, size);
6956         }
6957 
6958         switch (size) {
6959         case MO_32:
6960             if (signal_all_nans) {
6961                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6962             } else {
6963                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6964             }
6965             break;
6966         case MO_16:
6967             if (signal_all_nans) {
6968                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6969             } else {
6970                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
6971             }
6972             break;
6973         default:
6974             g_assert_not_reached();
6975         }
6976     }
6977 
6978     gen_set_nzcv(tcg_flags);
6979 }
6980 
6981 /* FCMP, FCMPE */
6982 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
6983 {
6984     int check = fp_access_check_scalar_hsd(s, a->esz);
6985 
6986     if (check <= 0) {
6987         return check == 0;
6988     }
6989 
6990     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
6991     return true;
6992 }
6993 
6994 /* FCCMP, FCCMPE */
6995 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
6996 {
6997     TCGLabel *label_continue = NULL;
6998     int check = fp_access_check_scalar_hsd(s, a->esz);
6999 
7000     if (check <= 0) {
7001         return check == 0;
7002     }
7003 
7004     if (a->cond < 0x0e) { /* not always */
7005         TCGLabel *label_match = gen_new_label();
7006         label_continue = gen_new_label();
7007         arm_gen_test_cc(a->cond, label_match);
7008         /* nomatch: */
7009         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7010         tcg_gen_br(label_continue);
7011         gen_set_label(label_match);
7012     }
7013 
7014     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7015 
7016     if (label_continue) {
7017         gen_set_label(label_continue);
7018     }
7019     return true;
7020 }
7021 
7022 /*
7023  * Advanced SIMD Modified Immediate
7024  */
7025 
7026 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7027 {
7028     if (!dc_isar_feature(aa64_fp16, s)) {
7029         return false;
7030     }
7031     if (fp_access_check(s)) {
7032         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7033                              a->q ? 16 : 8, vec_full_reg_size(s),
7034                              vfp_expand_imm(MO_16, a->abcdefgh));
7035     }
7036     return true;
7037 }
7038 
7039 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7040                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7041 {
7042     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7043 }
7044 
7045 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7046 {
7047     GVecGen2iFn *fn;
7048 
7049     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7050     if ((a->cmode & 1) && a->cmode < 12) {
7051         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7052         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7053     } else {
7054         /* There is one unallocated cmode/op combination in this space */
7055         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7056             return false;
7057         }
7058         fn = gen_movi;
7059     }
7060 
7061     if (fp_access_check(s)) {
7062         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7063         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7064     }
7065     return true;
7066 }
7067 
7068 /*
7069  * Advanced SIMD Shift by Immediate
7070  */
7071 
7072 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7073 {
7074     if (fp_access_check(s)) {
7075         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7076     }
7077     return true;
7078 }
7079 
7080 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7081 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7082 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7083 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7084 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7085 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7086 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7087 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7088 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7089 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7090 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7091 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7092 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7093 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7094 
7095 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7096 {
7097     TCGv_i64 tcg_rn, tcg_rd;
7098     int esz = a->esz;
7099     int esize;
7100 
7101     if (!fp_access_check(s)) {
7102         return true;
7103     }
7104 
7105     /*
7106      * For the LL variants the store is larger than the load,
7107      * so if rd == rn we would overwrite parts of our input.
7108      * So load everything right now and use shifts in the main loop.
7109      */
7110     tcg_rd = tcg_temp_new_i64();
7111     tcg_rn = tcg_temp_new_i64();
7112     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7113 
7114     esize = 8 << esz;
7115     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7116         if (is_u) {
7117             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7118         } else {
7119             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7120         }
7121         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7122         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7123     }
7124     clear_vec_high(s, true, a->rd);
7125     return true;
7126 }
7127 
7128 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7129 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7130 
7131 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7132 {
7133     assert(shift >= 0 && shift <= 64);
7134     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7135 }
7136 
7137 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7138 {
7139     assert(shift >= 0 && shift <= 64);
7140     if (shift == 64) {
7141         tcg_gen_movi_i64(dst, 0);
7142     } else {
7143         tcg_gen_shri_i64(dst, src, shift);
7144     }
7145 }
7146 
7147 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7148 {
7149     gen_sshr_d(src, src, shift);
7150     tcg_gen_add_i64(dst, dst, src);
7151 }
7152 
7153 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7154 {
7155     gen_ushr_d(src, src, shift);
7156     tcg_gen_add_i64(dst, dst, src);
7157 }
7158 
7159 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7160 {
7161     assert(shift >= 0 && shift <= 32);
7162     if (shift) {
7163         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7164         tcg_gen_add_i64(dst, src, rnd);
7165         tcg_gen_sari_i64(dst, dst, shift);
7166     } else {
7167         tcg_gen_mov_i64(dst, src);
7168     }
7169 }
7170 
7171 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7172 {
7173     assert(shift >= 0 && shift <= 32);
7174     if (shift) {
7175         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7176         tcg_gen_add_i64(dst, src, rnd);
7177         tcg_gen_shri_i64(dst, dst, shift);
7178     } else {
7179         tcg_gen_mov_i64(dst, src);
7180     }
7181 }
7182 
7183 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7184 {
7185     assert(shift >= 0 && shift <= 64);
7186     if (shift == 0) {
7187         tcg_gen_mov_i64(dst, src);
7188     } else if (shift == 64) {
7189         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7190         tcg_gen_movi_i64(dst, 0);
7191     } else {
7192         TCGv_i64 rnd = tcg_temp_new_i64();
7193         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7194         tcg_gen_sari_i64(dst, src, shift);
7195         tcg_gen_add_i64(dst, dst, rnd);
7196     }
7197 }
7198 
7199 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7200 {
7201     assert(shift >= 0 && shift <= 64);
7202     if (shift == 0) {
7203         tcg_gen_mov_i64(dst, src);
7204     } else if (shift == 64) {
7205         /* Rounding will propagate bit 63 into bit 64. */
7206         tcg_gen_shri_i64(dst, src, 63);
7207     } else {
7208         TCGv_i64 rnd = tcg_temp_new_i64();
7209         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7210         tcg_gen_shri_i64(dst, src, shift);
7211         tcg_gen_add_i64(dst, dst, rnd);
7212     }
7213 }
7214 
7215 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7216 {
7217     gen_srshr_d(src, src, shift);
7218     tcg_gen_add_i64(dst, dst, src);
7219 }
7220 
7221 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7222 {
7223     gen_urshr_d(src, src, shift);
7224     tcg_gen_add_i64(dst, dst, src);
7225 }
7226 
7227 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7228 {
7229     /* If shift is 64, dst is unchanged. */
7230     if (shift != 64) {
7231         tcg_gen_shri_i64(src, src, shift);
7232         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7233     }
7234 }
7235 
7236 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7237 {
7238     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7239 }
7240 
7241 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7242                                     WideShiftImmFn * const fns[3], MemOp sign)
7243 {
7244     TCGv_i64 tcg_rn, tcg_rd;
7245     int esz = a->esz;
7246     int esize;
7247     WideShiftImmFn *fn;
7248 
7249     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7250 
7251     if (!fp_access_check(s)) {
7252         return true;
7253     }
7254 
7255     tcg_rn = tcg_temp_new_i64();
7256     tcg_rd = tcg_temp_new_i64();
7257     tcg_gen_movi_i64(tcg_rd, 0);
7258 
7259     fn = fns[esz];
7260     esize = 8 << esz;
7261     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7262         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7263         fn(tcg_rn, tcg_rn, a->imm);
7264         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7265     }
7266 
7267     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7268     clear_vec_high(s, a->q, a->rd);
7269     return true;
7270 }
7271 
7272 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7273 {
7274     tcg_gen_sari_i64(d, s, i);
7275     tcg_gen_ext16u_i64(d, d);
7276     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7277 }
7278 
7279 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7280 {
7281     tcg_gen_sari_i64(d, s, i);
7282     tcg_gen_ext32u_i64(d, d);
7283     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7284 }
7285 
7286 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7287 {
7288     gen_sshr_d(d, s, i);
7289     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7290 }
7291 
7292 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7293 {
7294     tcg_gen_shri_i64(d, s, i);
7295     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7296 }
7297 
7298 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7299 {
7300     tcg_gen_shri_i64(d, s, i);
7301     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7302 }
7303 
7304 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7305 {
7306     gen_ushr_d(d, s, i);
7307     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7308 }
7309 
7310 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7311 {
7312     tcg_gen_sari_i64(d, s, i);
7313     tcg_gen_ext16u_i64(d, d);
7314     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7315 }
7316 
7317 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7318 {
7319     tcg_gen_sari_i64(d, s, i);
7320     tcg_gen_ext32u_i64(d, d);
7321     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7322 }
7323 
7324 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7325 {
7326     gen_sshr_d(d, s, i);
7327     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7328 }
7329 
7330 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7331 {
7332     gen_srshr_bhs(d, s, i);
7333     tcg_gen_ext16u_i64(d, d);
7334     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7335 }
7336 
7337 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7338 {
7339     gen_srshr_bhs(d, s, i);
7340     tcg_gen_ext32u_i64(d, d);
7341     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7342 }
7343 
7344 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7345 {
7346     gen_srshr_d(d, s, i);
7347     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7348 }
7349 
7350 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7351 {
7352     gen_urshr_bhs(d, s, i);
7353     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7354 }
7355 
7356 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7357 {
7358     gen_urshr_bhs(d, s, i);
7359     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7360 }
7361 
7362 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7363 {
7364     gen_urshr_d(d, s, i);
7365     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7366 }
7367 
7368 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7369 {
7370     gen_srshr_bhs(d, s, i);
7371     tcg_gen_ext16u_i64(d, d);
7372     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7373 }
7374 
7375 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7376 {
7377     gen_srshr_bhs(d, s, i);
7378     tcg_gen_ext32u_i64(d, d);
7379     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7380 }
7381 
7382 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7383 {
7384     gen_srshr_d(d, s, i);
7385     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7386 }
7387 
7388 static WideShiftImmFn * const shrn_fns[] = {
7389     tcg_gen_shri_i64,
7390     tcg_gen_shri_i64,
7391     gen_ushr_d,
7392 };
7393 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7394 
7395 static WideShiftImmFn * const rshrn_fns[] = {
7396     gen_urshr_bhs,
7397     gen_urshr_bhs,
7398     gen_urshr_d,
7399 };
7400 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7401 
7402 static WideShiftImmFn * const sqshrn_fns[] = {
7403     gen_sqshrn_b,
7404     gen_sqshrn_h,
7405     gen_sqshrn_s,
7406 };
7407 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7408 
7409 static WideShiftImmFn * const uqshrn_fns[] = {
7410     gen_uqshrn_b,
7411     gen_uqshrn_h,
7412     gen_uqshrn_s,
7413 };
7414 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7415 
7416 static WideShiftImmFn * const sqshrun_fns[] = {
7417     gen_sqshrun_b,
7418     gen_sqshrun_h,
7419     gen_sqshrun_s,
7420 };
7421 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7422 
7423 static WideShiftImmFn * const sqrshrn_fns[] = {
7424     gen_sqrshrn_b,
7425     gen_sqrshrn_h,
7426     gen_sqrshrn_s,
7427 };
7428 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7429 
7430 static WideShiftImmFn * const uqrshrn_fns[] = {
7431     gen_uqrshrn_b,
7432     gen_uqrshrn_h,
7433     gen_uqrshrn_s,
7434 };
7435 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7436 
7437 static WideShiftImmFn * const sqrshrun_fns[] = {
7438     gen_sqrshrun_b,
7439     gen_sqrshrun_h,
7440     gen_sqrshrun_s,
7441 };
7442 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7443 
7444 /*
7445  * Advanced SIMD Scalar Shift by Immediate
7446  */
7447 
7448 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7449                                 WideShiftImmFn *fn, bool accumulate,
7450                                 MemOp sign)
7451 {
7452     if (fp_access_check(s)) {
7453         TCGv_i64 rd = tcg_temp_new_i64();
7454         TCGv_i64 rn = tcg_temp_new_i64();
7455 
7456         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7457         if (accumulate) {
7458             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7459         }
7460         fn(rd, rn, a->imm);
7461         write_fp_dreg(s, a->rd, rd);
7462     }
7463     return true;
7464 }
7465 
7466 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7467 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7468 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7469 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7470 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7471 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7472 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7473 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7474 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7475 
7476 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7477 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7478 
7479 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7480                               NeonGenTwoOpEnvFn *fn)
7481 {
7482     TCGv_i32 t = tcg_temp_new_i32();
7483     tcg_gen_extrl_i64_i32(t, s);
7484     fn(t, tcg_env, t, tcg_constant_i32(i));
7485     tcg_gen_extu_i32_i64(d, t);
7486 }
7487 
7488 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7489 {
7490     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7491 }
7492 
7493 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7494 {
7495     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7496 }
7497 
7498 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7499 {
7500     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7501 }
7502 
7503 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7504 {
7505     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7506 }
7507 
7508 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7509 {
7510     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7511 }
7512 
7513 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7514 {
7515     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7516 }
7517 
7518 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7519 {
7520     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7521 }
7522 
7523 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7524 {
7525     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7526 }
7527 
7528 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7529 {
7530     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7531 }
7532 
7533 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7534 {
7535     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7536 }
7537 
7538 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7539 {
7540     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7541 }
7542 
7543 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7544 {
7545     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7546 }
7547 
7548 static WideShiftImmFn * const f_scalar_sqshli[] = {
7549     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7550 };
7551 
7552 static WideShiftImmFn * const f_scalar_uqshli[] = {
7553     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7554 };
7555 
7556 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7557     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7558 };
7559 
7560 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7561 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7562 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7563 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7564 
7565 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7566                                        WideShiftImmFn * const fns[3],
7567                                        MemOp sign, bool zext)
7568 {
7569     MemOp esz = a->esz;
7570 
7571     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7572 
7573     if (fp_access_check(s)) {
7574         TCGv_i64 rd = tcg_temp_new_i64();
7575         TCGv_i64 rn = tcg_temp_new_i64();
7576 
7577         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7578         fns[esz](rd, rn, a->imm);
7579         if (zext) {
7580             tcg_gen_ext_i64(rd, rd, esz);
7581         }
7582         write_fp_dreg(s, a->rd, rd);
7583     }
7584     return true;
7585 }
7586 
7587 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7588 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7589 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7590 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7591 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7592 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7593 
7594 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7595 {
7596     TCGv_i64 tcg_n, tcg_m, tcg_rd;
7597     tcg_rd = cpu_reg(s, a->rd);
7598 
7599     if (!a->sf && is_signed) {
7600         tcg_n = tcg_temp_new_i64();
7601         tcg_m = tcg_temp_new_i64();
7602         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7603         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7604     } else {
7605         tcg_n = read_cpu_reg(s, a->rn, a->sf);
7606         tcg_m = read_cpu_reg(s, a->rm, a->sf);
7607     }
7608 
7609     if (is_signed) {
7610         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7611     } else {
7612         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7613     }
7614 
7615     if (!a->sf) { /* zero extend final result */
7616         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7617     }
7618     return true;
7619 }
7620 
7621 TRANS(SDIV, do_div, a, true)
7622 TRANS(UDIV, do_div, a, false)
7623 
7624 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7625  * Note that it is the caller's responsibility to ensure that the
7626  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7627  * mandated semantics for out of range shifts.
7628  */
7629 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7630                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7631 {
7632     switch (shift_type) {
7633     case A64_SHIFT_TYPE_LSL:
7634         tcg_gen_shl_i64(dst, src, shift_amount);
7635         break;
7636     case A64_SHIFT_TYPE_LSR:
7637         tcg_gen_shr_i64(dst, src, shift_amount);
7638         break;
7639     case A64_SHIFT_TYPE_ASR:
7640         if (!sf) {
7641             tcg_gen_ext32s_i64(dst, src);
7642         }
7643         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
7644         break;
7645     case A64_SHIFT_TYPE_ROR:
7646         if (sf) {
7647             tcg_gen_rotr_i64(dst, src, shift_amount);
7648         } else {
7649             TCGv_i32 t0, t1;
7650             t0 = tcg_temp_new_i32();
7651             t1 = tcg_temp_new_i32();
7652             tcg_gen_extrl_i64_i32(t0, src);
7653             tcg_gen_extrl_i64_i32(t1, shift_amount);
7654             tcg_gen_rotr_i32(t0, t0, t1);
7655             tcg_gen_extu_i32_i64(dst, t0);
7656         }
7657         break;
7658     default:
7659         assert(FALSE); /* all shift types should be handled */
7660         break;
7661     }
7662 
7663     if (!sf) { /* zero extend final result */
7664         tcg_gen_ext32u_i64(dst, dst);
7665     }
7666 }
7667 
7668 /* Shift a TCGv src by immediate, put result in dst.
7669  * The shift amount must be in range (this should always be true as the
7670  * relevant instructions will UNDEF on bad shift immediates).
7671  */
7672 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
7673                           enum a64_shift_type shift_type, unsigned int shift_i)
7674 {
7675     assert(shift_i < (sf ? 64 : 32));
7676 
7677     if (shift_i == 0) {
7678         tcg_gen_mov_i64(dst, src);
7679     } else {
7680         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
7681     }
7682 }
7683 
7684 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
7685                          enum a64_shift_type shift_type)
7686 {
7687     TCGv_i64 tcg_shift = tcg_temp_new_i64();
7688     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7689     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
7690 
7691     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
7692     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
7693     return true;
7694 }
7695 
7696 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
7697 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
7698 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
7699 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
7700 
7701 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
7702 {
7703     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
7704     TCGv_i32 tcg_bytes;
7705 
7706     switch (a->esz) {
7707     case MO_8:
7708     case MO_16:
7709     case MO_32:
7710         tcg_val = tcg_temp_new_i64();
7711         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
7712         break;
7713     case MO_64:
7714         tcg_val = cpu_reg(s, a->rm);
7715         break;
7716     default:
7717         g_assert_not_reached();
7718     }
7719     tcg_acc = cpu_reg(s, a->rn);
7720     tcg_bytes = tcg_constant_i32(1 << a->esz);
7721     tcg_rd = cpu_reg(s, a->rd);
7722 
7723     if (crc32c) {
7724         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7725     } else {
7726         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
7727     }
7728     return true;
7729 }
7730 
7731 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
7732 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
7733 
7734 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
7735 {
7736     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
7737     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
7738     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
7739 
7740     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
7741     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
7742 
7743     if (setflag) {
7744         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
7745     } else {
7746         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
7747     }
7748     return true;
7749 }
7750 
7751 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
7752 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
7753 
7754 static bool trans_IRG(DisasContext *s, arg_rrr *a)
7755 {
7756     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7757         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
7758         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
7759 
7760         if (s->ata[0]) {
7761             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
7762         } else {
7763             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
7764         }
7765         return true;
7766     }
7767     return false;
7768 }
7769 
7770 static bool trans_GMI(DisasContext *s, arg_rrr *a)
7771 {
7772     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
7773         TCGv_i64 t = tcg_temp_new_i64();
7774 
7775         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
7776         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
7777         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
7778         return true;
7779     }
7780     return false;
7781 }
7782 
7783 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
7784 {
7785     if (dc_isar_feature(aa64_pauth, s)) {
7786         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
7787                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
7788         return true;
7789     }
7790     return false;
7791 }
7792 
7793 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
7794 
7795 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
7796 {
7797     fn(cpu_reg(s, rd), cpu_reg(s, rn));
7798     return true;
7799 }
7800 
7801 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7802 {
7803     TCGv_i32 t32 = tcg_temp_new_i32();
7804 
7805     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7806     gen_helper_rbit(t32, t32);
7807     tcg_gen_extu_i32_i64(tcg_rd, t32);
7808 }
7809 
7810 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
7811 {
7812     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7813 
7814     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
7815     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
7816     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
7817     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
7818     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
7819 }
7820 
7821 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7822 {
7823     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
7824 }
7825 
7826 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7827 {
7828     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
7829 }
7830 
7831 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7832 {
7833     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
7834 }
7835 
7836 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7837 {
7838     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
7839     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
7840 }
7841 
7842 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
7843 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
7844 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
7845 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
7846 
7847 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7848 {
7849     TCGv_i32 t32 = tcg_temp_new_i32();
7850 
7851     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7852     tcg_gen_clzi_i32(t32, t32, 32);
7853     tcg_gen_extu_i32_i64(tcg_rd, t32);
7854 }
7855 
7856 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7857 {
7858     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7859 }
7860 
7861 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
7862 {
7863     TCGv_i32 t32 = tcg_temp_new_i32();
7864 
7865     tcg_gen_extrl_i64_i32(t32, tcg_rn);
7866     tcg_gen_clrsb_i32(t32, t32);
7867     tcg_gen_extu_i32_i64(tcg_rd, t32);
7868 }
7869 
7870 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
7871 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
7872 
7873 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
7874 {
7875     TCGv_i64 tcg_rd, tcg_rn;
7876 
7877     if (a->z) {
7878         if (a->rn != 31) {
7879             return false;
7880         }
7881         tcg_rn = tcg_constant_i64(0);
7882     } else {
7883         tcg_rn = cpu_reg_sp(s, a->rn);
7884     }
7885     if (s->pauth_active) {
7886         tcg_rd = cpu_reg(s, a->rd);
7887         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
7888     }
7889     return true;
7890 }
7891 
7892 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
7893 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
7894 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
7895 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
7896 
7897 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
7898 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
7899 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
7900 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
7901 
7902 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
7903 {
7904     if (s->pauth_active) {
7905         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7906         fn(tcg_rd, tcg_env, tcg_rd);
7907     }
7908     return true;
7909 }
7910 
7911 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
7912 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
7913 
7914 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
7915                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
7916 {
7917     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
7918 
7919     if (!a->sf && (a->sa & (1 << 5))) {
7920         return false;
7921     }
7922 
7923     tcg_rd = cpu_reg(s, a->rd);
7924     tcg_rn = cpu_reg(s, a->rn);
7925 
7926     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
7927     if (a->sa) {
7928         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
7929     }
7930 
7931     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
7932     if (!a->sf) {
7933         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7934     }
7935     if (setflags) {
7936         gen_logic_CC(a->sf, tcg_rd);
7937     }
7938     return true;
7939 }
7940 
7941 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
7942 {
7943     /*
7944      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
7945      * register-register MOV and MVN, so it is worth special casing.
7946      */
7947     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
7948         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
7949         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
7950 
7951         if (a->n) {
7952             tcg_gen_not_i64(tcg_rd, tcg_rm);
7953             if (!a->sf) {
7954                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7955             }
7956         } else {
7957             if (a->sf) {
7958                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
7959             } else {
7960                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
7961             }
7962         }
7963         return true;
7964     }
7965 
7966     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
7967 }
7968 
7969 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
7970 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
7971 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
7972 
7973 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
7974                           bool sub_op, bool setflags)
7975 {
7976     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
7977 
7978     if (a->sa > 4) {
7979         return false;
7980     }
7981 
7982     /* non-flag setting ops may use SP */
7983     if (!setflags) {
7984         tcg_rd = cpu_reg_sp(s, a->rd);
7985     } else {
7986         tcg_rd = cpu_reg(s, a->rd);
7987     }
7988     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
7989 
7990     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
7991     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
7992 
7993     tcg_result = tcg_temp_new_i64();
7994     if (!setflags) {
7995         if (sub_op) {
7996             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7997         } else {
7998             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
7999         }
8000     } else {
8001         if (sub_op) {
8002             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8003         } else {
8004             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8005         }
8006     }
8007 
8008     if (a->sf) {
8009         tcg_gen_mov_i64(tcg_rd, tcg_result);
8010     } else {
8011         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8012     }
8013     return true;
8014 }
8015 
8016 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8017 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8018 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8019 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8020 
8021 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8022                           bool sub_op, bool setflags)
8023 {
8024     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8025 
8026     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8027         return false;
8028     }
8029 
8030     tcg_rd = cpu_reg(s, a->rd);
8031     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8032     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8033 
8034     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8035 
8036     tcg_result = tcg_temp_new_i64();
8037     if (!setflags) {
8038         if (sub_op) {
8039             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8040         } else {
8041             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8042         }
8043     } else {
8044         if (sub_op) {
8045             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8046         } else {
8047             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8048         }
8049     }
8050 
8051     if (a->sf) {
8052         tcg_gen_mov_i64(tcg_rd, tcg_result);
8053     } else {
8054         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8055     }
8056     return true;
8057 }
8058 
8059 TRANS(ADD_r, do_addsub_reg, a, false, false)
8060 TRANS(SUB_r, do_addsub_reg, a, true, false)
8061 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8062 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8063 
8064 static bool do_mulh(DisasContext *s, arg_rrr *a,
8065                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8066 {
8067     TCGv_i64 discard = tcg_temp_new_i64();
8068     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8069     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8070     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8071 
8072     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8073     return true;
8074 }
8075 
8076 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8077 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8078 
8079 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8080                       bool sf, bool is_sub, MemOp mop)
8081 {
8082     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8083     TCGv_i64 tcg_op1, tcg_op2;
8084 
8085     if (mop == MO_64) {
8086         tcg_op1 = cpu_reg(s, a->rn);
8087         tcg_op2 = cpu_reg(s, a->rm);
8088     } else {
8089         tcg_op1 = tcg_temp_new_i64();
8090         tcg_op2 = tcg_temp_new_i64();
8091         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8092         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8093     }
8094 
8095     if (a->ra == 31 && !is_sub) {
8096         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8097         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8098     } else {
8099         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8100         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8101 
8102         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8103         if (is_sub) {
8104             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8105         } else {
8106             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8107         }
8108     }
8109 
8110     if (!sf) {
8111         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8112     }
8113     return true;
8114 }
8115 
8116 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8117 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8118 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8119 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8120 
8121 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8122 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8123 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8124 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8125 
8126 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8127                        bool is_sub, bool setflags)
8128 {
8129     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8130 
8131     tcg_rd = cpu_reg(s, a->rd);
8132     tcg_rn = cpu_reg(s, a->rn);
8133 
8134     if (is_sub) {
8135         tcg_y = tcg_temp_new_i64();
8136         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8137     } else {
8138         tcg_y = cpu_reg(s, a->rm);
8139     }
8140 
8141     if (setflags) {
8142         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8143     } else {
8144         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8145     }
8146     return true;
8147 }
8148 
8149 TRANS(ADC, do_adc_sbc, a, false, false)
8150 TRANS(SBC, do_adc_sbc, a, true, false)
8151 TRANS(ADCS, do_adc_sbc, a, false, true)
8152 TRANS(SBCS, do_adc_sbc, a, true, true)
8153 
8154 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8155 {
8156     int mask = a->mask;
8157     TCGv_i64 tcg_rn;
8158     TCGv_i32 nzcv;
8159 
8160     if (!dc_isar_feature(aa64_condm_4, s)) {
8161         return false;
8162     }
8163 
8164     tcg_rn = read_cpu_reg(s, a->rn, 1);
8165     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8166 
8167     nzcv = tcg_temp_new_i32();
8168     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8169 
8170     if (mask & 8) { /* N */
8171         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8172     }
8173     if (mask & 4) { /* Z */
8174         tcg_gen_not_i32(cpu_ZF, nzcv);
8175         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8176     }
8177     if (mask & 2) { /* C */
8178         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8179     }
8180     if (mask & 1) { /* V */
8181         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8182     }
8183     return true;
8184 }
8185 
8186 static bool do_setf(DisasContext *s, int rn, int shift)
8187 {
8188     TCGv_i32 tmp = tcg_temp_new_i32();
8189 
8190     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8191     tcg_gen_shli_i32(cpu_NF, tmp, shift);
8192     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8193     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8194     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8195     return true;
8196 }
8197 
8198 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8199 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8200 
8201 /* CCMP, CCMN */
8202 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8203 {
8204     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8205     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8206     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8207     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8208     TCGv_i64 tcg_rn, tcg_y;
8209     DisasCompare c;
8210     unsigned nzcv;
8211     bool has_andc;
8212 
8213     /* Set T0 = !COND.  */
8214     arm_test_cc(&c, a->cond);
8215     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8216 
8217     /* Load the arguments for the new comparison.  */
8218     if (a->imm) {
8219         tcg_y = tcg_constant_i64(a->y);
8220     } else {
8221         tcg_y = cpu_reg(s, a->y);
8222     }
8223     tcg_rn = cpu_reg(s, a->rn);
8224 
8225     /* Set the flags for the new comparison.  */
8226     if (a->op) {
8227         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8228     } else {
8229         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8230     }
8231 
8232     /*
8233      * If COND was false, force the flags to #nzcv.  Compute two masks
8234      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8235      * For tcg hosts that support ANDC, we can make do with just T1.
8236      * In either case, allow the tcg optimizer to delete any unused mask.
8237      */
8238     tcg_gen_neg_i32(tcg_t1, tcg_t0);
8239     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8240 
8241     nzcv = a->nzcv;
8242     has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8243     if (nzcv & 8) { /* N */
8244         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8245     } else {
8246         if (has_andc) {
8247             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8248         } else {
8249             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8250         }
8251     }
8252     if (nzcv & 4) { /* Z */
8253         if (has_andc) {
8254             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8255         } else {
8256             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8257         }
8258     } else {
8259         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8260     }
8261     if (nzcv & 2) { /* C */
8262         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8263     } else {
8264         if (has_andc) {
8265             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8266         } else {
8267             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8268         }
8269     }
8270     if (nzcv & 1) { /* V */
8271         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8272     } else {
8273         if (has_andc) {
8274             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8275         } else {
8276             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8277         }
8278     }
8279     return true;
8280 }
8281 
8282 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8283 {
8284     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8285     TCGv_i64 zero = tcg_constant_i64(0);
8286     DisasCompare64 c;
8287 
8288     a64_test_cc(&c, a->cond);
8289 
8290     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8291         /* CSET & CSETM.  */
8292         if (a->else_inv) {
8293             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8294                                    tcg_rd, c.value, zero);
8295         } else {
8296             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8297                                 tcg_rd, c.value, zero);
8298         }
8299     } else {
8300         TCGv_i64 t_true = cpu_reg(s, a->rn);
8301         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8302 
8303         if (a->else_inv && a->else_inc) {
8304             tcg_gen_neg_i64(t_false, t_false);
8305         } else if (a->else_inv) {
8306             tcg_gen_not_i64(t_false, t_false);
8307         } else if (a->else_inc) {
8308             tcg_gen_addi_i64(t_false, t_false, 1);
8309         }
8310         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8311     }
8312 
8313     if (!a->sf) {
8314         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8315     }
8316     return true;
8317 }
8318 
8319 typedef struct FPScalar1Int {
8320     void (*gen_h)(TCGv_i32, TCGv_i32);
8321     void (*gen_s)(TCGv_i32, TCGv_i32);
8322     void (*gen_d)(TCGv_i64, TCGv_i64);
8323 } FPScalar1Int;
8324 
8325 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8326                               const FPScalar1Int *f)
8327 {
8328     switch (a->esz) {
8329     case MO_64:
8330         if (fp_access_check(s)) {
8331             TCGv_i64 t = read_fp_dreg(s, a->rn);
8332             f->gen_d(t, t);
8333             write_fp_dreg(s, a->rd, t);
8334         }
8335         break;
8336     case MO_32:
8337         if (fp_access_check(s)) {
8338             TCGv_i32 t = read_fp_sreg(s, a->rn);
8339             f->gen_s(t, t);
8340             write_fp_sreg(s, a->rd, t);
8341         }
8342         break;
8343     case MO_16:
8344         if (!dc_isar_feature(aa64_fp16, s)) {
8345             return false;
8346         }
8347         if (fp_access_check(s)) {
8348             TCGv_i32 t = read_fp_hreg(s, a->rn);
8349             f->gen_h(t, t);
8350             write_fp_sreg(s, a->rd, t);
8351         }
8352         break;
8353     default:
8354         return false;
8355     }
8356     return true;
8357 }
8358 
8359 static const FPScalar1Int f_scalar_fmov = {
8360     tcg_gen_mov_i32,
8361     tcg_gen_mov_i32,
8362     tcg_gen_mov_i64,
8363 };
8364 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
8365 
8366 static const FPScalar1Int f_scalar_fabs = {
8367     gen_vfp_absh,
8368     gen_vfp_abss,
8369     gen_vfp_absd,
8370 };
8371 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
8372 
8373 static const FPScalar1Int f_scalar_fneg = {
8374     gen_vfp_negh,
8375     gen_vfp_negs,
8376     gen_vfp_negd,
8377 };
8378 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
8379 
8380 typedef struct FPScalar1 {
8381     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8382     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8383     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8384 } FPScalar1;
8385 
8386 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8387                           const FPScalar1 *f, int rmode)
8388 {
8389     TCGv_i32 tcg_rmode = NULL;
8390     TCGv_ptr fpst;
8391     TCGv_i64 t64;
8392     TCGv_i32 t32;
8393     int check = fp_access_check_scalar_hsd(s, a->esz);
8394 
8395     if (check <= 0) {
8396         return check == 0;
8397     }
8398 
8399     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8400     if (rmode >= 0) {
8401         tcg_rmode = gen_set_rmode(rmode, fpst);
8402     }
8403 
8404     switch (a->esz) {
8405     case MO_64:
8406         t64 = read_fp_dreg(s, a->rn);
8407         f->gen_d(t64, t64, fpst);
8408         write_fp_dreg(s, a->rd, t64);
8409         break;
8410     case MO_32:
8411         t32 = read_fp_sreg(s, a->rn);
8412         f->gen_s(t32, t32, fpst);
8413         write_fp_sreg(s, a->rd, t32);
8414         break;
8415     case MO_16:
8416         t32 = read_fp_hreg(s, a->rn);
8417         f->gen_h(t32, t32, fpst);
8418         write_fp_sreg(s, a->rd, t32);
8419         break;
8420     default:
8421         g_assert_not_reached();
8422     }
8423 
8424     if (rmode >= 0) {
8425         gen_restore_rmode(tcg_rmode, fpst);
8426     }
8427     return true;
8428 }
8429 
8430 static const FPScalar1 f_scalar_fsqrt = {
8431     gen_helper_vfp_sqrth,
8432     gen_helper_vfp_sqrts,
8433     gen_helper_vfp_sqrtd,
8434 };
8435 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8436 
8437 static const FPScalar1 f_scalar_frint = {
8438     gen_helper_advsimd_rinth,
8439     gen_helper_rints,
8440     gen_helper_rintd,
8441 };
8442 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8443 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8444 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8445 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8446 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8447 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8448 
8449 static const FPScalar1 f_scalar_frintx = {
8450     gen_helper_advsimd_rinth_exact,
8451     gen_helper_rints_exact,
8452     gen_helper_rintd_exact,
8453 };
8454 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8455 
8456 static const FPScalar1 f_scalar_bfcvt = {
8457     .gen_s = gen_helper_bfcvt,
8458 };
8459 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1)
8460 
8461 static const FPScalar1 f_scalar_frint32 = {
8462     NULL,
8463     gen_helper_frint32_s,
8464     gen_helper_frint32_d,
8465 };
8466 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8467            &f_scalar_frint32, FPROUNDING_ZERO)
8468 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8469 
8470 static const FPScalar1 f_scalar_frint64 = {
8471     NULL,
8472     gen_helper_frint64_s,
8473     gen_helper_frint64_d,
8474 };
8475 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8476            &f_scalar_frint64, FPROUNDING_ZERO)
8477 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8478 
8479 static const FPScalar1 f_scalar_frecpe = {
8480     gen_helper_recpe_f16,
8481     gen_helper_recpe_f32,
8482     gen_helper_recpe_f64,
8483 };
8484 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1)
8485 
8486 static const FPScalar1 f_scalar_frecpx = {
8487     gen_helper_frecpx_f16,
8488     gen_helper_frecpx_f32,
8489     gen_helper_frecpx_f64,
8490 };
8491 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1)
8492 
8493 static const FPScalar1 f_scalar_frsqrte = {
8494     gen_helper_rsqrte_f16,
8495     gen_helper_rsqrte_f32,
8496     gen_helper_rsqrte_f64,
8497 };
8498 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1)
8499 
8500 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8501 {
8502     if (fp_access_check(s)) {
8503         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8504         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8505         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8506 
8507         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8508         write_fp_dreg(s, a->rd, tcg_rd);
8509     }
8510     return true;
8511 }
8512 
8513 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8514 {
8515     if (fp_access_check(s)) {
8516         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8517         TCGv_i32 ahp = get_ahp_flag();
8518         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8519 
8520         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8521         /* write_fp_sreg is OK here because top half of result is zero */
8522         write_fp_sreg(s, a->rd, tmp);
8523     }
8524     return true;
8525 }
8526 
8527 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8528 {
8529     if (fp_access_check(s)) {
8530         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8531         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8532         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8533 
8534         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8535         write_fp_sreg(s, a->rd, tcg_rd);
8536     }
8537     return true;
8538 }
8539 
8540 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8541 {
8542     if (fp_access_check(s)) {
8543         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8544         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8545         TCGv_i32 ahp = get_ahp_flag();
8546         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8547 
8548         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8549         /* write_fp_sreg is OK here because top half of tcg_rd is zero */
8550         write_fp_sreg(s, a->rd, tcg_rd);
8551     }
8552     return true;
8553 }
8554 
8555 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8556 {
8557     if (fp_access_check(s)) {
8558         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8559         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8560         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8561         TCGv_i32 tcg_ahp = get_ahp_flag();
8562 
8563         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8564         write_fp_sreg(s, a->rd, tcg_rd);
8565     }
8566     return true;
8567 }
8568 
8569 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
8570 {
8571     if (fp_access_check(s)) {
8572         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8573         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8574         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8575         TCGv_i32 tcg_ahp = get_ahp_flag();
8576 
8577         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
8578         write_fp_dreg(s, a->rd, tcg_rd);
8579     }
8580     return true;
8581 }
8582 
8583 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
8584                            TCGv_i64 tcg_int, bool is_signed)
8585 {
8586     TCGv_ptr tcg_fpstatus;
8587     TCGv_i32 tcg_shift, tcg_single;
8588     TCGv_i64 tcg_double;
8589 
8590     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8591     tcg_shift = tcg_constant_i32(shift);
8592 
8593     switch (esz) {
8594     case MO_64:
8595         tcg_double = tcg_temp_new_i64();
8596         if (is_signed) {
8597             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8598         } else {
8599             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
8600         }
8601         write_fp_dreg(s, rd, tcg_double);
8602         break;
8603 
8604     case MO_32:
8605         tcg_single = tcg_temp_new_i32();
8606         if (is_signed) {
8607             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8608         } else {
8609             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8610         }
8611         write_fp_sreg(s, rd, tcg_single);
8612         break;
8613 
8614     case MO_16:
8615         tcg_single = tcg_temp_new_i32();
8616         if (is_signed) {
8617             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8618         } else {
8619             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
8620         }
8621         write_fp_sreg(s, rd, tcg_single);
8622         break;
8623 
8624     default:
8625         g_assert_not_reached();
8626     }
8627     return true;
8628 }
8629 
8630 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
8631 {
8632     TCGv_i64 tcg_int;
8633     int check = fp_access_check_scalar_hsd(s, a->esz);
8634 
8635     if (check <= 0) {
8636         return check == 0;
8637     }
8638 
8639     if (a->sf) {
8640         tcg_int = cpu_reg(s, a->rn);
8641     } else {
8642         tcg_int = read_cpu_reg(s, a->rn, true);
8643         if (is_signed) {
8644             tcg_gen_ext32s_i64(tcg_int, tcg_int);
8645         } else {
8646             tcg_gen_ext32u_i64(tcg_int, tcg_int);
8647         }
8648     }
8649     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8650 }
8651 
8652 TRANS(SCVTF_g, do_cvtf_g, a, true)
8653 TRANS(UCVTF_g, do_cvtf_g, a, false)
8654 
8655 /*
8656  * [US]CVTF (vector), scalar version.
8657  * Which sounds weird, but really just means input from fp register
8658  * instead of input from general register.  Input and output element
8659  * size are always equal.
8660  */
8661 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
8662 {
8663     TCGv_i64 tcg_int;
8664     int check = fp_access_check_scalar_hsd(s, a->esz);
8665 
8666     if (check <= 0) {
8667         return check == 0;
8668     }
8669 
8670     tcg_int = tcg_temp_new_i64();
8671     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
8672     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
8673 }
8674 
8675 TRANS(SCVTF_f, do_cvtf_f, a, true)
8676 TRANS(UCVTF_f, do_cvtf_f, a, false)
8677 
8678 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
8679                            TCGv_i64 tcg_out, int shift, int rn,
8680                            ARMFPRounding rmode)
8681 {
8682     TCGv_ptr tcg_fpstatus;
8683     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
8684 
8685     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
8686     tcg_shift = tcg_constant_i32(shift);
8687     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
8688 
8689     switch (esz) {
8690     case MO_64:
8691         read_vec_element(s, tcg_out, rn, 0, MO_64);
8692         switch (out) {
8693         case MO_64 | MO_SIGN:
8694             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8695             break;
8696         case MO_64:
8697             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8698             break;
8699         case MO_32 | MO_SIGN:
8700             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8701             break;
8702         case MO_32:
8703             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
8704             break;
8705         default:
8706             g_assert_not_reached();
8707         }
8708         break;
8709 
8710     case MO_32:
8711         tcg_single = read_fp_sreg(s, rn);
8712         switch (out) {
8713         case MO_64 | MO_SIGN:
8714             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8715             break;
8716         case MO_64:
8717             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8718             break;
8719         case MO_32 | MO_SIGN:
8720             gen_helper_vfp_tosls(tcg_single, tcg_single,
8721                                  tcg_shift, tcg_fpstatus);
8722             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8723             break;
8724         case MO_32:
8725             gen_helper_vfp_touls(tcg_single, tcg_single,
8726                                  tcg_shift, tcg_fpstatus);
8727             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8728             break;
8729         default:
8730             g_assert_not_reached();
8731         }
8732         break;
8733 
8734     case MO_16:
8735         tcg_single = read_fp_hreg(s, rn);
8736         switch (out) {
8737         case MO_64 | MO_SIGN:
8738             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8739             break;
8740         case MO_64:
8741             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
8742             break;
8743         case MO_32 | MO_SIGN:
8744             gen_helper_vfp_toslh(tcg_single, tcg_single,
8745                                  tcg_shift, tcg_fpstatus);
8746             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8747             break;
8748         case MO_32:
8749             gen_helper_vfp_toulh(tcg_single, tcg_single,
8750                                  tcg_shift, tcg_fpstatus);
8751             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8752             break;
8753         case MO_16 | MO_SIGN:
8754             gen_helper_vfp_toshh(tcg_single, tcg_single,
8755                                  tcg_shift, tcg_fpstatus);
8756             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8757             break;
8758         case MO_16:
8759             gen_helper_vfp_touhh(tcg_single, tcg_single,
8760                                  tcg_shift, tcg_fpstatus);
8761             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
8762             break;
8763         default:
8764             g_assert_not_reached();
8765         }
8766         break;
8767 
8768     default:
8769         g_assert_not_reached();
8770     }
8771 
8772     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8773 }
8774 
8775 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
8776                       ARMFPRounding rmode, bool is_signed)
8777 {
8778     TCGv_i64 tcg_int;
8779     int check = fp_access_check_scalar_hsd(s, a->esz);
8780 
8781     if (check <= 0) {
8782         return check == 0;
8783     }
8784 
8785     tcg_int = cpu_reg(s, a->rd);
8786     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
8787                    a->esz, tcg_int, a->shift, a->rn, rmode);
8788 
8789     if (!a->sf) {
8790         tcg_gen_ext32u_i64(tcg_int, tcg_int);
8791     }
8792     return true;
8793 }
8794 
8795 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
8796 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
8797 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
8798 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
8799 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
8800 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
8801 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
8802 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
8803 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
8804 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
8805 
8806 /*
8807  * FCVT* (vector), scalar version.
8808  * Which sounds weird, but really just means output to fp register
8809  * instead of output to general register.  Input and output element
8810  * size are always equal.
8811  */
8812 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
8813                       ARMFPRounding rmode, bool is_signed)
8814 {
8815     TCGv_i64 tcg_int;
8816     int check = fp_access_check_scalar_hsd(s, a->esz);
8817 
8818     if (check <= 0) {
8819         return check == 0;
8820     }
8821 
8822     tcg_int = tcg_temp_new_i64();
8823     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
8824                    a->esz, tcg_int, a->shift, a->rn, rmode);
8825 
8826     clear_vec(s, a->rd);
8827     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
8828     return true;
8829 }
8830 
8831 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
8832 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
8833 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
8834 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
8835 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
8836 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
8837 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
8838 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
8839 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
8840 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
8841 
8842 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
8843 {
8844     if (!dc_isar_feature(aa64_jscvt, s)) {
8845         return false;
8846     }
8847     if (fp_access_check(s)) {
8848         TCGv_i64 t = read_fp_dreg(s, a->rn);
8849         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
8850 
8851         gen_helper_fjcvtzs(t, t, fpstatus);
8852 
8853         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
8854         tcg_gen_extrh_i64_i32(cpu_ZF, t);
8855         tcg_gen_movi_i32(cpu_CF, 0);
8856         tcg_gen_movi_i32(cpu_NF, 0);
8857         tcg_gen_movi_i32(cpu_VF, 0);
8858     }
8859     return true;
8860 }
8861 
8862 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
8863 {
8864     if (!dc_isar_feature(aa64_fp16, s)) {
8865         return false;
8866     }
8867     if (fp_access_check(s)) {
8868         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8869         TCGv_i64 tmp = tcg_temp_new_i64();
8870         tcg_gen_ext16u_i64(tmp, tcg_rn);
8871         write_fp_dreg(s, a->rd, tmp);
8872     }
8873     return true;
8874 }
8875 
8876 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
8877 {
8878     if (fp_access_check(s)) {
8879         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8880         TCGv_i64 tmp = tcg_temp_new_i64();
8881         tcg_gen_ext32u_i64(tmp, tcg_rn);
8882         write_fp_dreg(s, a->rd, tmp);
8883     }
8884     return true;
8885 }
8886 
8887 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
8888 {
8889     if (fp_access_check(s)) {
8890         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8891         write_fp_dreg(s, a->rd, tcg_rn);
8892     }
8893     return true;
8894 }
8895 
8896 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
8897 {
8898     if (fp_access_check(s)) {
8899         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8900         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
8901         clear_vec_high(s, true, a->rd);
8902     }
8903     return true;
8904 }
8905 
8906 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
8907 {
8908     if (!dc_isar_feature(aa64_fp16, s)) {
8909         return false;
8910     }
8911     if (fp_access_check(s)) {
8912         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8913         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
8914     }
8915     return true;
8916 }
8917 
8918 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
8919 {
8920     if (fp_access_check(s)) {
8921         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8922         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
8923     }
8924     return true;
8925 }
8926 
8927 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
8928 {
8929     if (fp_access_check(s)) {
8930         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8931         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
8932     }
8933     return true;
8934 }
8935 
8936 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
8937 {
8938     if (fp_access_check(s)) {
8939         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8940         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
8941     }
8942     return true;
8943 }
8944 
8945 typedef struct ENVScalar1 {
8946     NeonGenOneOpEnvFn *gen_bhs[3];
8947     NeonGenOne64OpEnvFn *gen_d;
8948 } ENVScalar1;
8949 
8950 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
8951 {
8952     if (!fp_access_check(s)) {
8953         return true;
8954     }
8955     if (a->esz == MO_64) {
8956         TCGv_i64 t = read_fp_dreg(s, a->rn);
8957         f->gen_d(t, tcg_env, t);
8958         write_fp_dreg(s, a->rd, t);
8959     } else {
8960         TCGv_i32 t = tcg_temp_new_i32();
8961 
8962         read_vec_element_i32(s, t, a->rn, 0, a->esz);
8963         f->gen_bhs[a->esz](t, tcg_env, t);
8964         write_fp_sreg(s, a->rd, t);
8965     }
8966     return true;
8967 }
8968 
8969 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
8970 {
8971     if (a->esz == MO_64 && !a->q) {
8972         return false;
8973     }
8974     if (!fp_access_check(s)) {
8975         return true;
8976     }
8977     if (a->esz == MO_64) {
8978         TCGv_i64 t = tcg_temp_new_i64();
8979 
8980         for (int i = 0; i < 2; ++i) {
8981             read_vec_element(s, t, a->rn, i, MO_64);
8982             f->gen_d(t, tcg_env, t);
8983             write_vec_element(s, t, a->rd, i, MO_64);
8984         }
8985     } else {
8986         TCGv_i32 t = tcg_temp_new_i32();
8987         int n = (a->q ? 16 : 8) >> a->esz;
8988 
8989         for (int i = 0; i < n; ++i) {
8990             read_vec_element_i32(s, t, a->rn, i, a->esz);
8991             f->gen_bhs[a->esz](t, tcg_env, t);
8992             write_vec_element_i32(s, t, a->rd, i, a->esz);
8993         }
8994     }
8995     clear_vec_high(s, a->q, a->rd);
8996     return true;
8997 }
8998 
8999 static const ENVScalar1 f_scalar_sqabs = {
9000     { gen_helper_neon_qabs_s8,
9001       gen_helper_neon_qabs_s16,
9002       gen_helper_neon_qabs_s32 },
9003     gen_helper_neon_qabs_s64,
9004 };
9005 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9006 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9007 
9008 static const ENVScalar1 f_scalar_sqneg = {
9009     { gen_helper_neon_qneg_s8,
9010       gen_helper_neon_qneg_s16,
9011       gen_helper_neon_qneg_s32 },
9012     gen_helper_neon_qneg_s64,
9013 };
9014 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9015 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9016 
9017 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9018 {
9019     if (fp_access_check(s)) {
9020         TCGv_i64 t = read_fp_dreg(s, a->rn);
9021         f(t, t);
9022         write_fp_dreg(s, a->rd, t);
9023     }
9024     return true;
9025 }
9026 
9027 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9028 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9029 
9030 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9031 {
9032     if (fp_access_check(s)) {
9033         TCGv_i64 t = read_fp_dreg(s, a->rn);
9034         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9035         write_fp_dreg(s, a->rd, t);
9036     }
9037     return true;
9038 }
9039 
9040 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9041 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9042 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9043 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9044 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9045 
9046 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9047                                    ArithOneOp * const fn[3])
9048 {
9049     if (a->esz == MO_64) {
9050         return false;
9051     }
9052     if (fp_access_check(s)) {
9053         TCGv_i64 t = tcg_temp_new_i64();
9054 
9055         read_vec_element(s, t, a->rn, 0, a->esz + 1);
9056         fn[a->esz](t, t);
9057         clear_vec(s, a->rd);
9058         write_vec_element(s, t, a->rd, 0, a->esz);
9059     }
9060     return true;
9061 }
9062 
9063 #define WRAP_ENV(NAME) \
9064     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9065     { gen_helper_##NAME(d, tcg_env, n); }
9066 
9067 WRAP_ENV(neon_unarrow_sat8)
9068 WRAP_ENV(neon_unarrow_sat16)
9069 WRAP_ENV(neon_unarrow_sat32)
9070 
9071 static ArithOneOp * const f_scalar_sqxtun[] = {
9072     gen_neon_unarrow_sat8,
9073     gen_neon_unarrow_sat16,
9074     gen_neon_unarrow_sat32,
9075 };
9076 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9077 
9078 WRAP_ENV(neon_narrow_sat_s8)
9079 WRAP_ENV(neon_narrow_sat_s16)
9080 WRAP_ENV(neon_narrow_sat_s32)
9081 
9082 static ArithOneOp * const f_scalar_sqxtn[] = {
9083     gen_neon_narrow_sat_s8,
9084     gen_neon_narrow_sat_s16,
9085     gen_neon_narrow_sat_s32,
9086 };
9087 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9088 
9089 WRAP_ENV(neon_narrow_sat_u8)
9090 WRAP_ENV(neon_narrow_sat_u16)
9091 WRAP_ENV(neon_narrow_sat_u32)
9092 
9093 static ArithOneOp * const f_scalar_uqxtn[] = {
9094     gen_neon_narrow_sat_u8,
9095     gen_neon_narrow_sat_u16,
9096     gen_neon_narrow_sat_u32,
9097 };
9098 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9099 
9100 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9101 {
9102     /*
9103      * 64 bit to 32 bit float conversion
9104      * with von Neumann rounding (round to odd)
9105      */
9106     TCGv_i32 tmp = tcg_temp_new_i32();
9107     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9108     tcg_gen_extu_i32_i64(d, tmp);
9109 }
9110 
9111 static ArithOneOp * const f_scalar_fcvtxn[] = {
9112     NULL,
9113     NULL,
9114     gen_fcvtxn_sd,
9115 };
9116 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn)
9117 
9118 #undef WRAP_ENV
9119 
9120 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9121 {
9122     if (!a->q && a->esz == MO_64) {
9123         return false;
9124     }
9125     if (fp_access_check(s)) {
9126         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9127     }
9128     return true;
9129 }
9130 
9131 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9132 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9133 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9134 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9135 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9136 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9137 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9138 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9139 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9140 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9141 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9142 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9143 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9144 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9145 
9146 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9147 {
9148     if (a->esz == MO_64) {
9149         return false;
9150     }
9151     if (fp_access_check(s)) {
9152         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9153     }
9154     return true;
9155 }
9156 
9157 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9158 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9159 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9160 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9161 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9162 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9163 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9164 
9165 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9166                                    ArithOneOp * const fn[3])
9167 {
9168     if (a->esz == MO_64) {
9169         return false;
9170     }
9171     if (fp_access_check(s)) {
9172         TCGv_i64 t0 = tcg_temp_new_i64();
9173         TCGv_i64 t1 = tcg_temp_new_i64();
9174 
9175         read_vec_element(s, t0, a->rn, 0, MO_64);
9176         read_vec_element(s, t1, a->rn, 1, MO_64);
9177         fn[a->esz](t0, t0);
9178         fn[a->esz](t1, t1);
9179         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9180         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9181         clear_vec_high(s, a->q, a->rd);
9182     }
9183     return true;
9184 }
9185 
9186 static ArithOneOp * const f_scalar_xtn[] = {
9187     gen_helper_neon_narrow_u8,
9188     gen_helper_neon_narrow_u16,
9189     tcg_gen_ext32u_i64,
9190 };
9191 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9192 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9193 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9194 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9195 
9196 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9197 {
9198     TCGv_i32 tcg_lo = tcg_temp_new_i32();
9199     TCGv_i32 tcg_hi = tcg_temp_new_i32();
9200     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9201     TCGv_i32 ahp = get_ahp_flag();
9202 
9203     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9204     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9205     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9206     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9207     tcg_gen_extu_i32_i64(d, tcg_lo);
9208 }
9209 
9210 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9211 {
9212     TCGv_i32 tmp = tcg_temp_new_i32();
9213     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9214 
9215     gen_helper_vfp_fcvtsd(tmp, n, fpst);
9216     tcg_gen_extu_i32_i64(d, tmp);
9217 }
9218 
9219 static ArithOneOp * const f_vector_fcvtn[] = {
9220     NULL,
9221     gen_fcvtn_hs,
9222     gen_fcvtn_sd,
9223 };
9224 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9225 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9226 
9227 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9228 {
9229     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9230     TCGv_i32 tmp = tcg_temp_new_i32();
9231     gen_helper_bfcvt_pair(tmp, n, fpst);
9232     tcg_gen_extu_i32_i64(d, tmp);
9233 }
9234 
9235 static ArithOneOp * const f_vector_bfcvtn[] = {
9236     NULL,
9237     gen_bfcvtn_hs,
9238     NULL,
9239 };
9240 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn)
9241 
9242 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9243 {
9244     static NeonGenWidenFn * const widenfns[3] = {
9245         gen_helper_neon_widen_u8,
9246         gen_helper_neon_widen_u16,
9247         tcg_gen_extu_i32_i64,
9248     };
9249     NeonGenWidenFn *widenfn;
9250     TCGv_i64 tcg_res[2];
9251     TCGv_i32 tcg_op;
9252     int part, pass;
9253 
9254     if (a->esz == MO_64) {
9255         return false;
9256     }
9257     if (!fp_access_check(s)) {
9258         return true;
9259     }
9260 
9261     tcg_op = tcg_temp_new_i32();
9262     widenfn = widenfns[a->esz];
9263     part = a->q ? 2 : 0;
9264 
9265     for (pass = 0; pass < 2; pass++) {
9266         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9267         tcg_res[pass] = tcg_temp_new_i64();
9268         widenfn(tcg_res[pass], tcg_op);
9269         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9270     }
9271 
9272     for (pass = 0; pass < 2; pass++) {
9273         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9274     }
9275     return true;
9276 }
9277 
9278 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9279 {
9280     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9281 
9282     if (check <= 0) {
9283         return check == 0;
9284     }
9285 
9286     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9287     return true;
9288 }
9289 
9290 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9291 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9292 
9293 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9294                           const FPScalar1 *f, int rmode)
9295 {
9296     TCGv_i32 tcg_rmode = NULL;
9297     TCGv_ptr fpst;
9298     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9299 
9300     if (check <= 0) {
9301         return check == 0;
9302     }
9303 
9304     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9305     if (rmode >= 0) {
9306         tcg_rmode = gen_set_rmode(rmode, fpst);
9307     }
9308 
9309     if (a->esz == MO_64) {
9310         TCGv_i64 t64 = tcg_temp_new_i64();
9311 
9312         for (int pass = 0; pass < 2; ++pass) {
9313             read_vec_element(s, t64, a->rn, pass, MO_64);
9314             f->gen_d(t64, t64, fpst);
9315             write_vec_element(s, t64, a->rd, pass, MO_64);
9316         }
9317     } else {
9318         TCGv_i32 t32 = tcg_temp_new_i32();
9319         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9320             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9321 
9322         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9323             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9324             gen(t32, t32, fpst);
9325             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9326         }
9327     }
9328     clear_vec_high(s, a->q, a->rd);
9329 
9330     if (rmode >= 0) {
9331         gen_restore_rmode(tcg_rmode, fpst);
9332     }
9333     return true;
9334 }
9335 
9336 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9337 
9338 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9339 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9340 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9341 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9342 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9343 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9344 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9345 
9346 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9347            &f_scalar_frint32, FPROUNDING_ZERO)
9348 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9349 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9350            &f_scalar_frint64, FPROUNDING_ZERO)
9351 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9352 
9353 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9354                              int rd, int rn, int data,
9355                              gen_helper_gvec_2_ptr * const fns[3])
9356 {
9357     int check = fp_access_check_vector_hsd(s, is_q, esz);
9358     TCGv_ptr fpst;
9359 
9360     if (check <= 0) {
9361         return check == 0;
9362     }
9363 
9364     fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9365     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9366                        vec_full_reg_offset(s, rn), fpst,
9367                        is_q ? 16 : 8, vec_full_reg_size(s),
9368                        data, fns[esz - 1]);
9369     return true;
9370 }
9371 
9372 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9373     gen_helper_gvec_vcvt_sh,
9374     gen_helper_gvec_vcvt_sf,
9375     gen_helper_gvec_vcvt_sd,
9376 };
9377 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9378       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9379 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9380       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9381 
9382 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9383     gen_helper_gvec_vcvt_uh,
9384     gen_helper_gvec_vcvt_uf,
9385     gen_helper_gvec_vcvt_ud,
9386 };
9387 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9388       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9389 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9390       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9391 
9392 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9393     gen_helper_gvec_vcvt_rz_hs,
9394     gen_helper_gvec_vcvt_rz_fs,
9395     gen_helper_gvec_vcvt_rz_ds,
9396 };
9397 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9398       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9399 
9400 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9401     gen_helper_gvec_vcvt_rz_hu,
9402     gen_helper_gvec_vcvt_rz_fu,
9403     gen_helper_gvec_vcvt_rz_du,
9404 };
9405 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9406       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9407 
9408 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9409     gen_helper_gvec_vcvt_rm_sh,
9410     gen_helper_gvec_vcvt_rm_ss,
9411     gen_helper_gvec_vcvt_rm_sd,
9412 };
9413 
9414 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9415     gen_helper_gvec_vcvt_rm_uh,
9416     gen_helper_gvec_vcvt_rm_us,
9417     gen_helper_gvec_vcvt_rm_ud,
9418 };
9419 
9420 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9421       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9422 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9423       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9424 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9425       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9426 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9427       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9428 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9429       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9430 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9431       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9432 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9433       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9434 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9435       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9436 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9437       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9438 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9439       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9440 
9441 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9442     gen_helper_gvec_fceq0_h,
9443     gen_helper_gvec_fceq0_s,
9444     gen_helper_gvec_fceq0_d,
9445 };
9446 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9447 
9448 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9449     gen_helper_gvec_fcgt0_h,
9450     gen_helper_gvec_fcgt0_s,
9451     gen_helper_gvec_fcgt0_d,
9452 };
9453 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9454 
9455 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9456     gen_helper_gvec_fcge0_h,
9457     gen_helper_gvec_fcge0_s,
9458     gen_helper_gvec_fcge0_d,
9459 };
9460 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9461 
9462 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9463     gen_helper_gvec_fclt0_h,
9464     gen_helper_gvec_fclt0_s,
9465     gen_helper_gvec_fclt0_d,
9466 };
9467 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9468 
9469 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9470     gen_helper_gvec_fcle0_h,
9471     gen_helper_gvec_fcle0_s,
9472     gen_helper_gvec_fcle0_d,
9473 };
9474 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9475 
9476 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9477     gen_helper_gvec_frecpe_h,
9478     gen_helper_gvec_frecpe_s,
9479     gen_helper_gvec_frecpe_d,
9480 };
9481 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe)
9482 
9483 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9484     gen_helper_gvec_frsqrte_h,
9485     gen_helper_gvec_frsqrte_s,
9486     gen_helper_gvec_frsqrte_d,
9487 };
9488 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte)
9489 
9490 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9491 {
9492     /* Handle 2-reg-misc ops which are widening (so each size element
9493      * in the source becomes a 2*size element in the destination.
9494      * The only instruction like this is FCVTL.
9495      */
9496     int pass;
9497     TCGv_ptr fpst;
9498 
9499     if (!fp_access_check(s)) {
9500         return true;
9501     }
9502 
9503     if (a->esz == MO_64) {
9504         /* 32 -> 64 bit fp conversion */
9505         TCGv_i64 tcg_res[2];
9506         TCGv_i32 tcg_op = tcg_temp_new_i32();
9507         int srcelt = a->q ? 2 : 0;
9508 
9509         fpst = fpstatus_ptr(FPST_A64);
9510 
9511         for (pass = 0; pass < 2; pass++) {
9512             tcg_res[pass] = tcg_temp_new_i64();
9513             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
9514             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
9515         }
9516         for (pass = 0; pass < 2; pass++) {
9517             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9518         }
9519     } else {
9520         /* 16 -> 32 bit fp conversion */
9521         int srcelt = a->q ? 4 : 0;
9522         TCGv_i32 tcg_res[4];
9523         TCGv_i32 ahp = get_ahp_flag();
9524 
9525         fpst = fpstatus_ptr(FPST_A64_F16);
9526 
9527         for (pass = 0; pass < 4; pass++) {
9528             tcg_res[pass] = tcg_temp_new_i32();
9529             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
9530             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9531                                            fpst, ahp);
9532         }
9533         for (pass = 0; pass < 4; pass++) {
9534             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
9535         }
9536     }
9537     clear_vec_high(s, true, a->rd);
9538     return true;
9539 }
9540 
9541 static bool trans_OK(DisasContext *s, arg_OK *a)
9542 {
9543     return true;
9544 }
9545 
9546 static bool trans_FAIL(DisasContext *s, arg_OK *a)
9547 {
9548     s->is_nonstreaming = true;
9549     return true;
9550 }
9551 
9552 /**
9553  * btype_destination_ok:
9554  * @insn: The instruction at the branch destination
9555  * @bt: SCTLR_ELx.BT
9556  * @btype: PSTATE.BTYPE, and is non-zero
9557  *
9558  * On a guarded page, there are a limited number of insns
9559  * that may be present at the branch target:
9560  *   - branch target identifiers,
9561  *   - paciasp, pacibsp,
9562  *   - BRK insn
9563  *   - HLT insn
9564  * Anything else causes a Branch Target Exception.
9565  *
9566  * Return true if the branch is compatible, false to raise BTITRAP.
9567  */
9568 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
9569 {
9570     if ((insn & 0xfffff01fu) == 0xd503201fu) {
9571         /* HINT space */
9572         switch (extract32(insn, 5, 7)) {
9573         case 0b011001: /* PACIASP */
9574         case 0b011011: /* PACIBSP */
9575             /*
9576              * If SCTLR_ELx.BT, then PACI*SP are not compatible
9577              * with btype == 3.  Otherwise all btype are ok.
9578              */
9579             return !bt || btype != 3;
9580         case 0b100000: /* BTI */
9581             /* Not compatible with any btype.  */
9582             return false;
9583         case 0b100010: /* BTI c */
9584             /* Not compatible with btype == 3 */
9585             return btype != 3;
9586         case 0b100100: /* BTI j */
9587             /* Not compatible with btype == 2 */
9588             return btype != 2;
9589         case 0b100110: /* BTI jc */
9590             /* Compatible with any btype.  */
9591             return true;
9592         }
9593     } else {
9594         switch (insn & 0xffe0001fu) {
9595         case 0xd4200000u: /* BRK */
9596         case 0xd4400000u: /* HLT */
9597             /* Give priority to the breakpoint exception.  */
9598             return true;
9599         }
9600     }
9601     return false;
9602 }
9603 
9604 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
9605                                           CPUState *cpu)
9606 {
9607     DisasContext *dc = container_of(dcbase, DisasContext, base);
9608     CPUARMState *env = cpu_env(cpu);
9609     ARMCPU *arm_cpu = env_archcpu(env);
9610     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
9611     int bound, core_mmu_idx;
9612 
9613     dc->isar = &arm_cpu->isar;
9614     dc->condjmp = 0;
9615     dc->pc_save = dc->base.pc_first;
9616     dc->aarch64 = true;
9617     dc->thumb = false;
9618     dc->sctlr_b = 0;
9619     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
9620     dc->condexec_mask = 0;
9621     dc->condexec_cond = 0;
9622     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
9623     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
9624     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
9625     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
9626     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
9627     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
9628 #if !defined(CONFIG_USER_ONLY)
9629     dc->user = (dc->current_el == 0);
9630 #endif
9631     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
9632     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
9633     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
9634     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
9635     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
9636     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
9637     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
9638     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
9639     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
9640     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
9641     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
9642     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
9643     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
9644     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
9645     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
9646     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
9647     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
9648     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
9649     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
9650     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
9651     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
9652     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
9653     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
9654     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
9655     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
9656     dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
9657     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
9658     dc->vec_len = 0;
9659     dc->vec_stride = 0;
9660     dc->cp_regs = arm_cpu->cp_regs;
9661     dc->features = env->features;
9662     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
9663     dc->gm_blocksize = arm_cpu->gm_blocksize;
9664 
9665 #ifdef CONFIG_USER_ONLY
9666     /* In sve_probe_page, we assume TBI is enabled. */
9667     tcg_debug_assert(dc->tbid & 1);
9668 #endif
9669 
9670     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
9671 
9672     /* Single step state. The code-generation logic here is:
9673      *  SS_ACTIVE == 0:
9674      *   generate code with no special handling for single-stepping (except
9675      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
9676      *   this happens anyway because those changes are all system register or
9677      *   PSTATE writes).
9678      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
9679      *   emit code for one insn
9680      *   emit code to clear PSTATE.SS
9681      *   emit code to generate software step exception for completed step
9682      *   end TB (as usual for having generated an exception)
9683      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
9684      *   emit code to generate a software step exception
9685      *   end the TB
9686      */
9687     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
9688     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
9689     dc->is_ldex = false;
9690 
9691     /* Bound the number of insns to execute to those left on the page.  */
9692     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
9693 
9694     /* If architectural single step active, limit to 1.  */
9695     if (dc->ss_active) {
9696         bound = 1;
9697     }
9698     dc->base.max_insns = MIN(dc->base.max_insns, bound);
9699 }
9700 
9701 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
9702 {
9703 }
9704 
9705 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
9706 {
9707     DisasContext *dc = container_of(dcbase, DisasContext, base);
9708     target_ulong pc_arg = dc->base.pc_next;
9709 
9710     if (tb_cflags(dcbase->tb) & CF_PCREL) {
9711         pc_arg &= ~TARGET_PAGE_MASK;
9712     }
9713     tcg_gen_insn_start(pc_arg, 0, 0);
9714     dc->insn_start_updated = false;
9715 }
9716 
9717 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
9718 {
9719     DisasContext *s = container_of(dcbase, DisasContext, base);
9720     CPUARMState *env = cpu_env(cpu);
9721     uint64_t pc = s->base.pc_next;
9722     uint32_t insn;
9723 
9724     /* Singlestep exceptions have the highest priority. */
9725     if (s->ss_active && !s->pstate_ss) {
9726         /* Singlestep state is Active-pending.
9727          * If we're in this state at the start of a TB then either
9728          *  a) we just took an exception to an EL which is being debugged
9729          *     and this is the first insn in the exception handler
9730          *  b) debug exceptions were masked and we just unmasked them
9731          *     without changing EL (eg by clearing PSTATE.D)
9732          * In either case we're going to take a swstep exception in the
9733          * "did not step an insn" case, and so the syndrome ISV and EX
9734          * bits should be zero.
9735          */
9736         assert(s->base.num_insns == 1);
9737         gen_swstep_exception(s, 0, 0);
9738         s->base.is_jmp = DISAS_NORETURN;
9739         s->base.pc_next = pc + 4;
9740         return;
9741     }
9742 
9743     if (pc & 3) {
9744         /*
9745          * PC alignment fault.  This has priority over the instruction abort
9746          * that we would receive from a translation fault via arm_ldl_code.
9747          * This should only be possible after an indirect branch, at the
9748          * start of the TB.
9749          */
9750         assert(s->base.num_insns == 1);
9751         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
9752         s->base.is_jmp = DISAS_NORETURN;
9753         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
9754         return;
9755     }
9756 
9757     s->pc_curr = pc;
9758     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
9759     s->insn = insn;
9760     s->base.pc_next = pc + 4;
9761 
9762     s->fp_access_checked = false;
9763     s->sve_access_checked = false;
9764 
9765     if (s->pstate_il) {
9766         /*
9767          * Illegal execution state. This has priority over BTI
9768          * exceptions, but comes after instruction abort exceptions.
9769          */
9770         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
9771         return;
9772     }
9773 
9774     if (dc_isar_feature(aa64_bti, s)) {
9775         if (s->base.num_insns == 1) {
9776             /* First insn can have btype set to non-zero.  */
9777             tcg_debug_assert(s->btype >= 0);
9778 
9779             /*
9780              * Note that the Branch Target Exception has fairly high
9781              * priority -- below debugging exceptions but above most
9782              * everything else.  This allows us to handle this now
9783              * instead of waiting until the insn is otherwise decoded.
9784              *
9785              * We can check all but the guarded page check here;
9786              * defer the latter to a helper.
9787              */
9788             if (s->btype != 0
9789                 && !btype_destination_ok(insn, s->bt, s->btype)) {
9790                 gen_helper_guarded_page_check(tcg_env);
9791             }
9792         } else {
9793             /* Not the first insn: btype must be 0.  */
9794             tcg_debug_assert(s->btype == 0);
9795         }
9796     }
9797 
9798     s->is_nonstreaming = false;
9799     if (s->sme_trap_nonstreaming) {
9800         disas_sme_fa64(s, insn);
9801     }
9802 
9803     if (!disas_a64(s, insn) &&
9804         !disas_sme(s, insn) &&
9805         !disas_sve(s, insn)) {
9806         unallocated_encoding(s);
9807     }
9808 
9809     /*
9810      * After execution of most insns, btype is reset to 0.
9811      * Note that we set btype == -1 when the insn sets btype.
9812      */
9813     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
9814         reset_btype(s);
9815     }
9816 }
9817 
9818 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
9819 {
9820     DisasContext *dc = container_of(dcbase, DisasContext, base);
9821 
9822     if (unlikely(dc->ss_active)) {
9823         /* Note that this means single stepping WFI doesn't halt the CPU.
9824          * For conditional branch insns this is harmless unreachable code as
9825          * gen_goto_tb() has already handled emitting the debug exception
9826          * (and thus a tb-jump is not possible when singlestepping).
9827          */
9828         switch (dc->base.is_jmp) {
9829         default:
9830             gen_a64_update_pc(dc, 4);
9831             /* fall through */
9832         case DISAS_EXIT:
9833         case DISAS_JUMP:
9834             gen_step_complete_exception(dc);
9835             break;
9836         case DISAS_NORETURN:
9837             break;
9838         }
9839     } else {
9840         switch (dc->base.is_jmp) {
9841         case DISAS_NEXT:
9842         case DISAS_TOO_MANY:
9843             gen_goto_tb(dc, 1, 4);
9844             break;
9845         default:
9846         case DISAS_UPDATE_EXIT:
9847             gen_a64_update_pc(dc, 4);
9848             /* fall through */
9849         case DISAS_EXIT:
9850             tcg_gen_exit_tb(NULL, 0);
9851             break;
9852         case DISAS_UPDATE_NOCHAIN:
9853             gen_a64_update_pc(dc, 4);
9854             /* fall through */
9855         case DISAS_JUMP:
9856             tcg_gen_lookup_and_goto_ptr();
9857             break;
9858         case DISAS_NORETURN:
9859         case DISAS_SWI:
9860             break;
9861         case DISAS_WFE:
9862             gen_a64_update_pc(dc, 4);
9863             gen_helper_wfe(tcg_env);
9864             break;
9865         case DISAS_YIELD:
9866             gen_a64_update_pc(dc, 4);
9867             gen_helper_yield(tcg_env);
9868             break;
9869         case DISAS_WFI:
9870             /*
9871              * This is a special case because we don't want to just halt
9872              * the CPU if trying to debug across a WFI.
9873              */
9874             gen_a64_update_pc(dc, 4);
9875             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
9876             /*
9877              * The helper doesn't necessarily throw an exception, but we
9878              * must go back to the main loop to check for interrupts anyway.
9879              */
9880             tcg_gen_exit_tb(NULL, 0);
9881             break;
9882         }
9883     }
9884 }
9885 
9886 const TranslatorOps aarch64_translator_ops = {
9887     .init_disas_context = aarch64_tr_init_disas_context,
9888     .tb_start           = aarch64_tr_tb_start,
9889     .insn_start         = aarch64_tr_insn_start,
9890     .translate_insn     = aarch64_tr_translate_insn,
9891     .tb_stop            = aarch64_tr_tb_stop,
9892 };
9893