xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 66ec38b6fa593afcc26a2dfb5d1f9871dd15f527)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 #include "exec/target_page.h"
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "arm_ldst.h"
25 #include "semihosting/semihost.h"
26 #include "cpregs.h"
27 
28 static TCGv_i64 cpu_X[32];
29 static TCGv_i64 cpu_gcspr[4];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Helpers for extracting complex instruction fields
51  */
52 
53 /*
54  * For load/store with an unsigned 12 bit immediate scaled by the element
55  * size. The input has the immediate field in bits [14:3] and the element
56  * size in [2:0].
57  */
58 static int uimm_scaled(DisasContext *s, int x)
59 {
60     unsigned imm = x >> 3;
61     unsigned scale = extract32(x, 0, 3);
62     return imm << scale;
63 }
64 
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68     return x << LOG2_TAG_GRANULE;
69 }
70 
71 /*
72  * Include the generated decoders.
73  */
74 
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77 
78 /* initialize TCG globals.  */
79 void a64_translate_init(void)
80 {
81     static const char gcspr_names[4][12] = {
82         "gcspr_el0", "gcspr_el1", "gcspr_el2", "gcspr_el3"
83     };
84 
85     int i;
86 
87     cpu_pc = tcg_global_mem_new_i64(tcg_env,
88                                     offsetof(CPUARMState, pc),
89                                     "pc");
90     for (i = 0; i < 32; i++) {
91         cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
92                                           offsetof(CPUARMState, xregs[i]),
93                                           regnames[i]);
94     }
95 
96     cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
97         offsetof(CPUARMState, exclusive_high), "exclusive_high");
98 
99     for (i = 0; i < 4; i++) {
100         cpu_gcspr[i] =
101             tcg_global_mem_new_i64(tcg_env,
102                                    offsetof(CPUARMState, cp15.gcspr_el[i]),
103                                    gcspr_names[i]);
104     }
105 }
106 
107 /*
108  * Return the full arm mmu_idx to use for A64 load/store insns which
109  * have a "unprivileged load/store" variant. Those insns access
110  * EL0 if executed from an EL which has control over EL0 (usually
111  * EL1) but behave like normal loads and stores if executed from
112  * elsewhere (eg EL3).
113  *
114  * @unpriv : true for the unprivileged encoding; false for the
115  *           normal encoding (in which case we will return the same
116  *           thing as get_mem_index().
117  */
118 static ARMMMUIdx full_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120     /*
121      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122      * which is the usual mmu_idx for this cpu state.
123      */
124     ARMMMUIdx useridx = s->mmu_idx;
125 
126     if (unpriv && s->unpriv) {
127         /*
128          * We have pre-computed the condition for AccType_UNPRIV.
129          * Therefore we should never get here with a mmu_idx for
130          * which we do not know the corresponding user mmu_idx.
131          */
132         switch (useridx) {
133         case ARMMMUIdx_E10_1:
134         case ARMMMUIdx_E10_1_PAN:
135             useridx = ARMMMUIdx_E10_0;
136             break;
137         case ARMMMUIdx_E20_2:
138         case ARMMMUIdx_E20_2_PAN:
139             useridx = ARMMMUIdx_E20_0;
140             break;
141         default:
142             g_assert_not_reached();
143         }
144     }
145     return useridx;
146 }
147 
148 /* Return the core mmu_idx per above. */
149 static int core_a64_user_mem_index(DisasContext *s, bool unpriv)
150 {
151     return arm_to_core_mmu_idx(full_a64_user_mem_index(s, unpriv));
152 }
153 
154 /* For a given translation regime, return the core mmu_idx for gcs access. */
155 static int core_gcs_mem_index(ARMMMUIdx armidx)
156 {
157     return arm_to_core_mmu_idx(regime_to_gcs(armidx));
158 }
159 
160 static void set_btype_raw(int val)
161 {
162     tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
163                    offsetof(CPUARMState, btype));
164 }
165 
166 static void set_btype(DisasContext *s, int val)
167 {
168     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
169     tcg_debug_assert(val >= 1 && val <= 3);
170     set_btype_raw(val);
171     s->btype = -1;
172 }
173 
174 static void reset_btype(DisasContext *s)
175 {
176     if (s->btype != 0) {
177         set_btype_raw(0);
178         s->btype = 0;
179     }
180 }
181 
182 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
183 {
184     assert(s->pc_save != -1);
185     if (tb_cflags(s->base.tb) & CF_PCREL) {
186         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
187     } else {
188         tcg_gen_movi_i64(dest, s->pc_curr + diff);
189     }
190 }
191 
192 void gen_a64_update_pc(DisasContext *s, target_long diff)
193 {
194     gen_pc_plus_diff(s, cpu_pc, diff);
195     s->pc_save = s->pc_curr + diff;
196 }
197 
198 /*
199  * Handle Top Byte Ignore (TBI) bits.
200  *
201  * If address tagging is enabled via the TCR TBI bits:
202  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
203  *    then the address is zero-extended, clearing bits [63:56]
204  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
205  *    and TBI1 controls addresses with bit 55 == 1.
206  *    If the appropriate TBI bit is set for the address then
207  *    the address is sign-extended from bit 55 into bits [63:56]
208  *
209  * Here We have concatenated TBI{1,0} into tbi.
210  */
211 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
212                                 TCGv_i64 src, int tbi)
213 {
214     if (tbi == 0) {
215         /* Load unmodified address */
216         tcg_gen_mov_i64(dst, src);
217     } else if (!regime_has_2_ranges(s->mmu_idx)) {
218         /* Force tag byte to all zero */
219         tcg_gen_extract_i64(dst, src, 0, 56);
220     } else {
221         /* Sign-extend from bit 55.  */
222         tcg_gen_sextract_i64(dst, src, 0, 56);
223 
224         switch (tbi) {
225         case 1:
226             /* tbi0 but !tbi1: only use the extension if positive */
227             tcg_gen_and_i64(dst, dst, src);
228             break;
229         case 2:
230             /* !tbi0 but tbi1: only use the extension if negative */
231             tcg_gen_or_i64(dst, dst, src);
232             break;
233         case 3:
234             /* tbi0 and tbi1: always use the extension */
235             break;
236         default:
237             g_assert_not_reached();
238         }
239     }
240 }
241 
242 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
243 {
244     /*
245      * If address tagging is enabled for instructions via the TCR TBI bits,
246      * then loading an address into the PC will clear out any tag.
247      */
248     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
249     s->pc_save = -1;
250 }
251 
252 /*
253  * Handle MTE and/or TBI.
254  *
255  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
256  * for the tag to be present in the FAR_ELx register.  But for user-only
257  * mode we do not have a TLB with which to implement this, so we must
258  * remove the top byte now.
259  *
260  * Always return a fresh temporary that we can increment independently
261  * of the write-back address.
262  */
263 
264 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
265 {
266     TCGv_i64 clean = tcg_temp_new_i64();
267 #ifdef CONFIG_USER_ONLY
268     gen_top_byte_ignore(s, clean, addr, s->tbid);
269 #else
270     tcg_gen_mov_i64(clean, addr);
271 #endif
272     return clean;
273 }
274 
275 /* Insert a zero tag into src, with the result at dst. */
276 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
277 {
278     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
279 }
280 
281 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
282                              MMUAccessType acc, int log2_size)
283 {
284     gen_helper_probe_access(tcg_env, ptr,
285                             tcg_constant_i32(acc),
286                             tcg_constant_i32(get_mem_index(s)),
287                             tcg_constant_i32(1 << log2_size));
288 }
289 
290 /*
291  * For MTE, check a single logical or atomic access.  This probes a single
292  * address, the exact one specified.  The size and alignment of the access
293  * is not relevant to MTE, per se, but watchpoints do require the size,
294  * and we want to recognize those before making any other changes to state.
295  */
296 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
297                                       bool is_write, bool tag_checked,
298                                       MemOp memop, bool is_unpriv,
299                                       int core_idx)
300 {
301     if (tag_checked && s->mte_active[is_unpriv]) {
302         TCGv_i64 ret;
303         int desc = 0;
304 
305         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
306         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
307         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
308         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
309         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
310         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
311 
312         ret = tcg_temp_new_i64();
313         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
314 
315         return ret;
316     }
317     return clean_data_tbi(s, addr);
318 }
319 
320 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
321                         bool tag_checked, MemOp memop)
322 {
323     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
324                                  false, get_mem_index(s));
325 }
326 
327 /*
328  * For MTE, check multiple logical sequential accesses.
329  */
330 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
331                         bool tag_checked, int total_size, MemOp single_mop)
332 {
333     if (tag_checked && s->mte_active[0]) {
334         TCGv_i64 ret;
335         int desc = 0;
336 
337         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
338         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
339         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
340         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
341         desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
342         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
343 
344         ret = tcg_temp_new_i64();
345         gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
346 
347         return ret;
348     }
349     return clean_data_tbi(s, addr);
350 }
351 
352 /*
353  * Generate the special alignment check that applies to AccType_ATOMIC
354  * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
355  * naturally aligned, but it must not cross a 16-byte boundary.
356  * See AArch64.CheckAlignment().
357  */
358 static void check_lse2_align(DisasContext *s, int rn, int imm,
359                              bool is_write, MemOp mop)
360 {
361     TCGv_i32 tmp;
362     TCGv_i64 addr;
363     TCGLabel *over_label;
364     MMUAccessType type;
365     int mmu_idx;
366 
367     tmp = tcg_temp_new_i32();
368     tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
369     tcg_gen_addi_i32(tmp, tmp, imm & 15);
370     tcg_gen_andi_i32(tmp, tmp, 15);
371     tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
372 
373     over_label = gen_new_label();
374     tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
375 
376     addr = tcg_temp_new_i64();
377     tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
378 
379     type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
380     mmu_idx = get_mem_index(s);
381     gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
382                                 tcg_constant_i32(mmu_idx));
383 
384     gen_set_label(over_label);
385 
386 }
387 
388 /* Handle the alignment check for AccType_ATOMIC instructions. */
389 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
390 {
391     MemOp size = mop & MO_SIZE;
392 
393     if (size == MO_8) {
394         return mop;
395     }
396 
397     /*
398      * If size == MO_128, this is a LDXP, and the operation is single-copy
399      * atomic for each doubleword, not the entire quadword; it still must
400      * be quadword aligned.
401      */
402     if (size == MO_128) {
403         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
404                                    MO_ATOM_IFALIGN_PAIR);
405     }
406     if (dc_isar_feature(aa64_lse2, s)) {
407         check_lse2_align(s, rn, 0, true, mop);
408     } else {
409         mop |= MO_ALIGN;
410     }
411     return finalize_memop(s, mop);
412 }
413 
414 /* Handle the alignment check for AccType_ORDERED instructions. */
415 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
416                                  bool is_write, MemOp mop)
417 {
418     MemOp size = mop & MO_SIZE;
419 
420     if (size == MO_8) {
421         return mop;
422     }
423     if (size == MO_128) {
424         return finalize_memop_atom(s, MO_128 | MO_ALIGN,
425                                    MO_ATOM_IFALIGN_PAIR);
426     }
427     if (!dc_isar_feature(aa64_lse2, s)) {
428         mop |= MO_ALIGN;
429     } else if (!s->naa) {
430         check_lse2_align(s, rn, imm, is_write, mop);
431     }
432     return finalize_memop(s, mop);
433 }
434 
435 static void gen_add_gcs_record(DisasContext *s, TCGv_i64 value)
436 {
437     TCGv_i64 addr = tcg_temp_new_i64();
438     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
439     int mmuidx = core_gcs_mem_index(s->mmu_idx);
440     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
441 
442     tcg_gen_addi_i64(addr, gcspr, -8);
443     tcg_gen_qemu_st_i64(value, clean_data_tbi(s, addr), mmuidx, mop);
444     tcg_gen_mov_i64(gcspr, addr);
445 }
446 
447 static void gen_load_check_gcs_record(DisasContext *s, TCGv_i64 target,
448                                       GCSInstructionType it, int rt)
449 {
450     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
451     int mmuidx = core_gcs_mem_index(s->mmu_idx);
452     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
453     TCGv_i64 rec_va = tcg_temp_new_i64();
454 
455     tcg_gen_qemu_ld_i64(rec_va, clean_data_tbi(s, gcspr), mmuidx, mop);
456 
457     if (s->gcs_rvcen) {
458         TCGLabel *fail_label =
459             delay_exception(s, EXCP_UDEF, syn_gcs_data_check(it, rt));
460 
461         tcg_gen_brcond_i64(TCG_COND_NE, rec_va, target, fail_label);
462     }
463 
464     gen_a64_set_pc(s, rec_va);
465     tcg_gen_addi_i64(gcspr, gcspr, 8);
466 }
467 
468 typedef struct DisasCompare64 {
469     TCGCond cond;
470     TCGv_i64 value;
471 } DisasCompare64;
472 
473 static void a64_test_cc(DisasCompare64 *c64, int cc)
474 {
475     DisasCompare c32;
476 
477     arm_test_cc(&c32, cc);
478 
479     /*
480      * Sign-extend the 32-bit value so that the GE/LT comparisons work
481      * properly.  The NE/EQ comparisons are also fine with this choice.
482       */
483     c64->cond = c32.cond;
484     c64->value = tcg_temp_new_i64();
485     tcg_gen_ext_i32_i64(c64->value, c32.value);
486 }
487 
488 static void gen_rebuild_hflags(DisasContext *s)
489 {
490     gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
491 }
492 
493 static void gen_exception_internal_insn(DisasContext *s, int excp)
494 {
495     gen_a64_update_pc(s, 0);
496     gen_exception_internal(excp);
497     s->base.is_jmp = DISAS_NORETURN;
498 }
499 
500 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
501 {
502     gen_a64_update_pc(s, 0);
503     gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
504     s->base.is_jmp = DISAS_NORETURN;
505 }
506 
507 static void gen_step_complete_exception(DisasContext *s)
508 {
509     /* We just completed step of an insn. Move from Active-not-pending
510      * to Active-pending, and then also take the swstep exception.
511      * This corresponds to making the (IMPDEF) choice to prioritize
512      * swstep exceptions over asynchronous exceptions taken to an exception
513      * level where debug is disabled. This choice has the advantage that
514      * we do not need to maintain internal state corresponding to the
515      * ISV/EX syndrome bits between completion of the step and generation
516      * of the exception, and our syndrome information is always correct.
517      */
518     gen_ss_advance(s);
519     gen_swstep_exception(s, 1, s->is_ldex);
520     s->base.is_jmp = DISAS_NORETURN;
521 }
522 
523 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
524 {
525     if (s->ss_active) {
526         return false;
527     }
528     return translator_use_goto_tb(&s->base, dest);
529 }
530 
531 static void gen_goto_tb(DisasContext *s, unsigned tb_slot_idx, int64_t diff)
532 {
533     if (use_goto_tb(s, s->pc_curr + diff)) {
534         /*
535          * For pcrel, the pc must always be up-to-date on entry to
536          * the linked TB, so that it can use simple additions for all
537          * further adjustments.  For !pcrel, the linked TB is compiled
538          * to know its full virtual address, so we can delay the
539          * update to pc to the unlinked path.  A long chain of links
540          * can thus avoid many updates to the PC.
541          */
542         if (tb_cflags(s->base.tb) & CF_PCREL) {
543             gen_a64_update_pc(s, diff);
544             tcg_gen_goto_tb(tb_slot_idx);
545         } else {
546             tcg_gen_goto_tb(tb_slot_idx);
547             gen_a64_update_pc(s, diff);
548         }
549         tcg_gen_exit_tb(s->base.tb, tb_slot_idx);
550         s->base.is_jmp = DISAS_NORETURN;
551     } else {
552         gen_a64_update_pc(s, diff);
553         if (s->ss_active) {
554             gen_step_complete_exception(s);
555         } else {
556             tcg_gen_lookup_and_goto_ptr();
557             s->base.is_jmp = DISAS_NORETURN;
558         }
559     }
560 }
561 
562 /*
563  * Register access functions
564  *
565  * These functions are used for directly accessing a register in where
566  * changes to the final register value are likely to be made. If you
567  * need to use a register for temporary calculation (e.g. index type
568  * operations) use the read_* form.
569  *
570  * B1.2.1 Register mappings
571  *
572  * In instruction register encoding 31 can refer to ZR (zero register) or
573  * the SP (stack pointer) depending on context. In QEMU's case we map SP
574  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
575  * This is the point of the _sp forms.
576  */
577 TCGv_i64 cpu_reg(DisasContext *s, int reg)
578 {
579     if (reg == 31) {
580         TCGv_i64 t = tcg_temp_new_i64();
581         tcg_gen_movi_i64(t, 0);
582         return t;
583     } else {
584         return cpu_X[reg];
585     }
586 }
587 
588 /* register access for when 31 == SP */
589 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
590 {
591     return cpu_X[reg];
592 }
593 
594 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
595  * representing the register contents. This TCGv is an auto-freed
596  * temporary so it need not be explicitly freed, and may be modified.
597  */
598 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
599 {
600     TCGv_i64 v = tcg_temp_new_i64();
601     if (reg != 31) {
602         if (sf) {
603             tcg_gen_mov_i64(v, cpu_X[reg]);
604         } else {
605             tcg_gen_ext32u_i64(v, cpu_X[reg]);
606         }
607     } else {
608         tcg_gen_movi_i64(v, 0);
609     }
610     return v;
611 }
612 
613 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
614 {
615     TCGv_i64 v = tcg_temp_new_i64();
616     if (sf) {
617         tcg_gen_mov_i64(v, cpu_X[reg]);
618     } else {
619         tcg_gen_ext32u_i64(v, cpu_X[reg]);
620     }
621     return v;
622 }
623 
624 /* Return the offset into CPUARMState of a slice (from
625  * the least significant end) of FP register Qn (ie
626  * Dn, Sn, Hn or Bn).
627  * (Note that this is not the same mapping as for A32; see cpu.h)
628  */
629 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
630 {
631     return vec_reg_offset(s, regno, 0, size);
632 }
633 
634 /* Offset of the high half of the 128 bit vector Qn */
635 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
636 {
637     return vec_reg_offset(s, regno, 1, MO_64);
638 }
639 
640 /* Convenience accessors for reading and writing single and double
641  * FP registers. Writing clears the upper parts of the associated
642  * 128 bit vector register, as required by the architecture.
643  * Note that unlike the GP register accessors, the values returned
644  * by the read functions must be manually freed.
645  */
646 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
647 {
648     TCGv_i64 v = tcg_temp_new_i64();
649 
650     tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
651     return v;
652 }
653 
654 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
655 {
656     TCGv_i32 v = tcg_temp_new_i32();
657 
658     tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
659     return v;
660 }
661 
662 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
663 {
664     TCGv_i32 v = tcg_temp_new_i32();
665 
666     tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
667     return v;
668 }
669 
670 static void clear_vec(DisasContext *s, int rd)
671 {
672     unsigned ofs = fp_reg_offset(s, rd, MO_64);
673     unsigned vsz = vec_full_reg_size(s);
674 
675     tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
676 }
677 
678 /*
679  * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
680  * If SVE is not enabled, then there are only 128 bits in the vector.
681  */
682 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
683 {
684     unsigned ofs = fp_reg_offset(s, rd, MO_64);
685     unsigned vsz = vec_full_reg_size(s);
686 
687     /* Nop move, with side effect of clearing the tail. */
688     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
689 }
690 
691 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
692 {
693     unsigned ofs = fp_reg_offset(s, reg, MO_64);
694 
695     tcg_gen_st_i64(v, tcg_env, ofs);
696     clear_vec_high(s, false, reg);
697 }
698 
699 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
700 {
701     TCGv_i64 tmp = tcg_temp_new_i64();
702 
703     tcg_gen_extu_i32_i64(tmp, v);
704     write_fp_dreg(s, reg, tmp);
705 }
706 
707 /*
708  * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
709  * - if FPCR.NEP == 0, clear the high elements of reg
710  * - if FPCR.NEP == 1, set the high elements of reg from mergereg
711  *   (i.e. merge the result with those high elements)
712  * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
713  */
714 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
715                                   TCGv_i64 v)
716 {
717     if (!s->fpcr_nep) {
718         write_fp_dreg(s, reg, v);
719         return;
720     }
721 
722     /*
723      * Move from mergereg to reg; this sets the high elements and
724      * clears the bits above 128 as a side effect.
725      */
726     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
727                      vec_full_reg_offset(s, mergereg),
728                      16, vec_full_reg_size(s));
729     tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
730 }
731 
732 /*
733  * Write a single-prec result, but only clear the higher elements
734  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
735  */
736 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
737                                   TCGv_i32 v)
738 {
739     if (!s->fpcr_nep) {
740         write_fp_sreg(s, reg, v);
741         return;
742     }
743 
744     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
745                      vec_full_reg_offset(s, mergereg),
746                      16, vec_full_reg_size(s));
747     tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
748 }
749 
750 /*
751  * Write a half-prec result, but only clear the higher elements
752  * of the destination register if FPCR.NEP is 0; otherwise preserve them.
753  * The caller must ensure that the top 16 bits of v are zero.
754  */
755 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
756                                   TCGv_i32 v)
757 {
758     if (!s->fpcr_nep) {
759         write_fp_sreg(s, reg, v);
760         return;
761     }
762 
763     tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
764                      vec_full_reg_offset(s, mergereg),
765                      16, vec_full_reg_size(s));
766     tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
767 }
768 
769 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
770 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
771                          GVecGen2Fn *gvec_fn, int vece)
772 {
773     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
774             is_q ? 16 : 8, vec_full_reg_size(s));
775 }
776 
777 /* Expand a 2-operand + immediate AdvSIMD vector operation using
778  * an expander function.
779  */
780 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
781                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
782 {
783     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
784             imm, is_q ? 16 : 8, vec_full_reg_size(s));
785 }
786 
787 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
788 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
789                          GVecGen3Fn *gvec_fn, int vece)
790 {
791     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
792             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
793 }
794 
795 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
796 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
797                          int rx, GVecGen4Fn *gvec_fn, int vece)
798 {
799     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
800             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
801             is_q ? 16 : 8, vec_full_reg_size(s));
802 }
803 
804 /* Expand a 2-operand operation using an out-of-line helper.  */
805 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
806                              int rn, int data, gen_helper_gvec_2 *fn)
807 {
808     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
809                        vec_full_reg_offset(s, rn),
810                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
811 }
812 
813 /* Expand a 3-operand operation using an out-of-line helper.  */
814 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
815                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
816 {
817     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
818                        vec_full_reg_offset(s, rn),
819                        vec_full_reg_offset(s, rm),
820                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
821 }
822 
823 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
824  * an out-of-line helper.
825  */
826 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
827                               int rm, ARMFPStatusFlavour fpsttype, int data,
828                               gen_helper_gvec_3_ptr *fn)
829 {
830     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
831     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
832                        vec_full_reg_offset(s, rn),
833                        vec_full_reg_offset(s, rm), fpst,
834                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
835 }
836 
837 /* Expand a 4-operand operation using an out-of-line helper.  */
838 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
839                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
840 {
841     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
842                        vec_full_reg_offset(s, rn),
843                        vec_full_reg_offset(s, rm),
844                        vec_full_reg_offset(s, ra),
845                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
846 }
847 
848 /*
849  * Expand a 4-operand operation using an out-of-line helper that takes
850  * a pointer to the CPU env.
851  */
852 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
853                              int rm, int ra, int data,
854                              gen_helper_gvec_4_ptr *fn)
855 {
856     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
857                        vec_full_reg_offset(s, rn),
858                        vec_full_reg_offset(s, rm),
859                        vec_full_reg_offset(s, ra),
860                        tcg_env,
861                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
862 }
863 
864 /*
865  * Expand a 4-operand + fpstatus pointer + simd data value operation using
866  * an out-of-line helper.
867  */
868 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
869                               int rm, int ra, ARMFPStatusFlavour fpsttype,
870                               int data,
871                               gen_helper_gvec_4_ptr *fn)
872 {
873     TCGv_ptr fpst = fpstatus_ptr(fpsttype);
874     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
875                        vec_full_reg_offset(s, rn),
876                        vec_full_reg_offset(s, rm),
877                        vec_full_reg_offset(s, ra), fpst,
878                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
879 }
880 
881 /*
882  * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
883  * These functions implement
884  *   d = floatN_is_any_nan(s) ? s : floatN_chs(s)
885  * which for float32 is
886  *   d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
887  * and similarly for the other float sizes.
888  */
889 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
890 {
891     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
892 
893     gen_vfp_negh(chs_s, s);
894     gen_vfp_absh(abs_s, s);
895     tcg_gen_movcond_i32(TCG_COND_GTU, d,
896                         abs_s, tcg_constant_i32(0x7c00),
897                         s, chs_s);
898 }
899 
900 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
901 {
902     TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
903 
904     gen_vfp_negs(chs_s, s);
905     gen_vfp_abss(abs_s, s);
906     tcg_gen_movcond_i32(TCG_COND_GTU, d,
907                         abs_s, tcg_constant_i32(0x7f800000UL),
908                         s, chs_s);
909 }
910 
911 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
912 {
913     TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
914 
915     gen_vfp_negd(chs_s, s);
916     gen_vfp_absd(abs_s, s);
917     tcg_gen_movcond_i64(TCG_COND_GTU, d,
918                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
919                         s, chs_s);
920 }
921 
922 /*
923  * These functions implement
924  *  d = floatN_is_any_nan(s) ? s : floatN_abs(s)
925  * which for float32 is
926  *  d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
927  * and similarly for the other float sizes.
928  */
929 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
930 {
931     TCGv_i32 abs_s = tcg_temp_new_i32();
932 
933     gen_vfp_absh(abs_s, s);
934     tcg_gen_movcond_i32(TCG_COND_GTU, d,
935                         abs_s, tcg_constant_i32(0x7c00),
936                         s, abs_s);
937 }
938 
939 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
940 {
941     TCGv_i32 abs_s = tcg_temp_new_i32();
942 
943     gen_vfp_abss(abs_s, s);
944     tcg_gen_movcond_i32(TCG_COND_GTU, d,
945                         abs_s, tcg_constant_i32(0x7f800000UL),
946                         s, abs_s);
947 }
948 
949 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
950 {
951     TCGv_i64 abs_s = tcg_temp_new_i64();
952 
953     gen_vfp_absd(abs_s, s);
954     tcg_gen_movcond_i64(TCG_COND_GTU, d,
955                         abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
956                         s, abs_s);
957 }
958 
959 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
960 {
961     if (dc->fpcr_ah) {
962         gen_vfp_ah_negh(d, s);
963     } else {
964         gen_vfp_negh(d, s);
965     }
966 }
967 
968 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
969 {
970     if (dc->fpcr_ah) {
971         gen_vfp_ah_negs(d, s);
972     } else {
973         gen_vfp_negs(d, s);
974     }
975 }
976 
977 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
978 {
979     if (dc->fpcr_ah) {
980         gen_vfp_ah_negd(d, s);
981     } else {
982         gen_vfp_negd(d, s);
983     }
984 }
985 
986 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
987  * than the 32 bit equivalent.
988  */
989 static inline void gen_set_NZ64(TCGv_i64 result)
990 {
991     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
992     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
993 }
994 
995 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
996 static inline void gen_logic_CC(int sf, TCGv_i64 result)
997 {
998     if (sf) {
999         gen_set_NZ64(result);
1000     } else {
1001         tcg_gen_extrl_i64_i32(cpu_ZF, result);
1002         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
1003     }
1004     tcg_gen_movi_i32(cpu_CF, 0);
1005     tcg_gen_movi_i32(cpu_VF, 0);
1006 }
1007 
1008 /* dest = T0 + T1; compute C, N, V and Z flags */
1009 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1010 {
1011     TCGv_i64 result, flag, tmp;
1012     result = tcg_temp_new_i64();
1013     flag = tcg_temp_new_i64();
1014     tmp = tcg_temp_new_i64();
1015 
1016     tcg_gen_movi_i64(tmp, 0);
1017     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
1018 
1019     tcg_gen_extrl_i64_i32(cpu_CF, flag);
1020 
1021     gen_set_NZ64(result);
1022 
1023     tcg_gen_xor_i64(flag, result, t0);
1024     tcg_gen_xor_i64(tmp, t0, t1);
1025     tcg_gen_andc_i64(flag, flag, tmp);
1026     tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027 
1028     tcg_gen_mov_i64(dest, result);
1029 }
1030 
1031 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1032 {
1033     TCGv_i32 t0_32 = tcg_temp_new_i32();
1034     TCGv_i32 t1_32 = tcg_temp_new_i32();
1035     TCGv_i32 tmp = tcg_temp_new_i32();
1036 
1037     tcg_gen_movi_i32(tmp, 0);
1038     tcg_gen_extrl_i64_i32(t0_32, t0);
1039     tcg_gen_extrl_i64_i32(t1_32, t1);
1040     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
1041     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1042     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043     tcg_gen_xor_i32(tmp, t0_32, t1_32);
1044     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1045     tcg_gen_extu_i32_i64(dest, cpu_NF);
1046 }
1047 
1048 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1049 {
1050     if (sf) {
1051         gen_add64_CC(dest, t0, t1);
1052     } else {
1053         gen_add32_CC(dest, t0, t1);
1054     }
1055 }
1056 
1057 /* dest = T0 - T1; compute C, N, V and Z flags */
1058 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1059 {
1060     /* 64 bit arithmetic */
1061     TCGv_i64 result, flag, tmp;
1062 
1063     result = tcg_temp_new_i64();
1064     flag = tcg_temp_new_i64();
1065     tcg_gen_sub_i64(result, t0, t1);
1066 
1067     gen_set_NZ64(result);
1068 
1069     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1070     tcg_gen_extrl_i64_i32(cpu_CF, flag);
1071 
1072     tcg_gen_xor_i64(flag, result, t0);
1073     tmp = tcg_temp_new_i64();
1074     tcg_gen_xor_i64(tmp, t0, t1);
1075     tcg_gen_and_i64(flag, flag, tmp);
1076     tcg_gen_extrh_i64_i32(cpu_VF, flag);
1077     tcg_gen_mov_i64(dest, result);
1078 }
1079 
1080 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1081 {
1082     /* 32 bit arithmetic */
1083     TCGv_i32 t0_32 = tcg_temp_new_i32();
1084     TCGv_i32 t1_32 = tcg_temp_new_i32();
1085     TCGv_i32 tmp;
1086 
1087     tcg_gen_extrl_i64_i32(t0_32, t0);
1088     tcg_gen_extrl_i64_i32(t1_32, t1);
1089     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1090     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1091     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1092     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1093     tmp = tcg_temp_new_i32();
1094     tcg_gen_xor_i32(tmp, t0_32, t1_32);
1095     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1096     tcg_gen_extu_i32_i64(dest, cpu_NF);
1097 }
1098 
1099 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1100 {
1101     if (sf) {
1102         gen_sub64_CC(dest, t0, t1);
1103     } else {
1104         gen_sub32_CC(dest, t0, t1);
1105     }
1106 }
1107 
1108 /* dest = T0 + T1 + CF; do not compute flags. */
1109 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1110 {
1111     TCGv_i64 flag = tcg_temp_new_i64();
1112     tcg_gen_extu_i32_i64(flag, cpu_CF);
1113     tcg_gen_add_i64(dest, t0, t1);
1114     tcg_gen_add_i64(dest, dest, flag);
1115 
1116     if (!sf) {
1117         tcg_gen_ext32u_i64(dest, dest);
1118     }
1119 }
1120 
1121 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
1122 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1123 {
1124     if (sf) {
1125         TCGv_i64 result = tcg_temp_new_i64();
1126         TCGv_i64 cf_64 = tcg_temp_new_i64();
1127         TCGv_i64 vf_64 = tcg_temp_new_i64();
1128         TCGv_i64 tmp = tcg_temp_new_i64();
1129 
1130         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1131         tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64);
1132         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1133         gen_set_NZ64(result);
1134 
1135         tcg_gen_xor_i64(vf_64, result, t0);
1136         tcg_gen_xor_i64(tmp, t0, t1);
1137         tcg_gen_andc_i64(vf_64, vf_64, tmp);
1138         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1139 
1140         tcg_gen_mov_i64(dest, result);
1141     } else {
1142         TCGv_i32 t0_32 = tcg_temp_new_i32();
1143         TCGv_i32 t1_32 = tcg_temp_new_i32();
1144         TCGv_i32 tmp = tcg_temp_new_i32();
1145 
1146         tcg_gen_extrl_i64_i32(t0_32, t0);
1147         tcg_gen_extrl_i64_i32(t1_32, t1);
1148         tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF);
1149 
1150         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1151         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1152         tcg_gen_xor_i32(tmp, t0_32, t1_32);
1153         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1154         tcg_gen_extu_i32_i64(dest, cpu_NF);
1155     }
1156 }
1157 
1158 /*
1159  * Load/Store generators
1160  */
1161 
1162 /*
1163  * Store from GPR register to memory.
1164  */
1165 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1166                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
1167                              bool iss_valid,
1168                              unsigned int iss_srt,
1169                              bool iss_sf, bool iss_ar)
1170 {
1171     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1172 
1173     if (iss_valid) {
1174         uint32_t syn;
1175 
1176         syn = syn_data_abort_with_iss(0,
1177                                       (memop & MO_SIZE),
1178                                       false,
1179                                       iss_srt,
1180                                       iss_sf,
1181                                       iss_ar,
1182                                       0, 0, 0, 0, 0, false);
1183         disas_set_insn_syndrome(s, syn);
1184     }
1185 }
1186 
1187 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1188                       TCGv_i64 tcg_addr, MemOp memop,
1189                       bool iss_valid,
1190                       unsigned int iss_srt,
1191                       bool iss_sf, bool iss_ar)
1192 {
1193     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1194                      iss_valid, iss_srt, iss_sf, iss_ar);
1195 }
1196 
1197 /*
1198  * Load from memory to GPR register
1199  */
1200 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1201                              MemOp memop, bool extend, int memidx,
1202                              bool iss_valid, unsigned int iss_srt,
1203                              bool iss_sf, bool iss_ar)
1204 {
1205     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1206 
1207     if (extend && (memop & MO_SIGN)) {
1208         g_assert((memop & MO_SIZE) <= MO_32);
1209         tcg_gen_ext32u_i64(dest, dest);
1210     }
1211 
1212     if (iss_valid) {
1213         uint32_t syn;
1214 
1215         syn = syn_data_abort_with_iss(0,
1216                                       (memop & MO_SIZE),
1217                                       (memop & MO_SIGN) != 0,
1218                                       iss_srt,
1219                                       iss_sf,
1220                                       iss_ar,
1221                                       0, 0, 0, 0, 0, false);
1222         disas_set_insn_syndrome(s, syn);
1223     }
1224 }
1225 
1226 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1227                       MemOp memop, bool extend,
1228                       bool iss_valid, unsigned int iss_srt,
1229                       bool iss_sf, bool iss_ar)
1230 {
1231     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1232                      iss_valid, iss_srt, iss_sf, iss_ar);
1233 }
1234 
1235 /*
1236  * Store from FP register to memory
1237  */
1238 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1239 {
1240     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1241     TCGv_i64 tmplo = tcg_temp_new_i64();
1242 
1243     tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1244 
1245     if ((mop & MO_SIZE) < MO_128) {
1246         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1247     } else {
1248         TCGv_i64 tmphi = tcg_temp_new_i64();
1249         TCGv_i128 t16 = tcg_temp_new_i128();
1250 
1251         tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1252         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1253 
1254         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1255     }
1256 }
1257 
1258 /*
1259  * Load from memory to FP register
1260  */
1261 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1262 {
1263     /* This always zero-extends and writes to a full 128 bit wide vector */
1264     TCGv_i64 tmplo = tcg_temp_new_i64();
1265     TCGv_i64 tmphi = NULL;
1266 
1267     if ((mop & MO_SIZE) < MO_128) {
1268         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1269     } else {
1270         TCGv_i128 t16 = tcg_temp_new_i128();
1271 
1272         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1273 
1274         tmphi = tcg_temp_new_i64();
1275         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1276     }
1277 
1278     tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1279 
1280     if (tmphi) {
1281         tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1282     }
1283     clear_vec_high(s, tmphi != NULL, destidx);
1284 }
1285 
1286 /*
1287  * Vector load/store helpers.
1288  *
1289  * The principal difference between this and a FP load is that we don't
1290  * zero extend as we are filling a partial chunk of the vector register.
1291  * These functions don't support 128 bit loads/stores, which would be
1292  * normal load/store operations.
1293  *
1294  * The _i32 versions are useful when operating on 32 bit quantities
1295  * (eg for floating point single or using Neon helper functions).
1296  */
1297 
1298 /* Get value of an element within a vector register */
1299 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1300                              int element, MemOp memop)
1301 {
1302     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1303     switch ((unsigned)memop) {
1304     case MO_8:
1305         tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1306         break;
1307     case MO_16:
1308         tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1309         break;
1310     case MO_32:
1311         tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1312         break;
1313     case MO_8|MO_SIGN:
1314         tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1315         break;
1316     case MO_16|MO_SIGN:
1317         tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1318         break;
1319     case MO_32|MO_SIGN:
1320         tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1321         break;
1322     case MO_64:
1323     case MO_64|MO_SIGN:
1324         tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1325         break;
1326     default:
1327         g_assert_not_reached();
1328     }
1329 }
1330 
1331 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1332                                  int element, MemOp memop)
1333 {
1334     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1335     switch (memop) {
1336     case MO_8:
1337         tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1338         break;
1339     case MO_16:
1340         tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1341         break;
1342     case MO_8|MO_SIGN:
1343         tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1344         break;
1345     case MO_16|MO_SIGN:
1346         tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1347         break;
1348     case MO_32:
1349     case MO_32|MO_SIGN:
1350         tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1351         break;
1352     default:
1353         g_assert_not_reached();
1354     }
1355 }
1356 
1357 /* Set value of an element within a vector register */
1358 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1359                               int element, MemOp memop)
1360 {
1361     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1362     switch (memop) {
1363     case MO_8:
1364         tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1365         break;
1366     case MO_16:
1367         tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1368         break;
1369     case MO_32:
1370         tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1371         break;
1372     case MO_64:
1373         tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1374         break;
1375     default:
1376         g_assert_not_reached();
1377     }
1378 }
1379 
1380 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1381                                   int destidx, int element, MemOp memop)
1382 {
1383     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1384     switch (memop) {
1385     case MO_8:
1386         tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1387         break;
1388     case MO_16:
1389         tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1390         break;
1391     case MO_32:
1392         tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1393         break;
1394     default:
1395         g_assert_not_reached();
1396     }
1397 }
1398 
1399 /* Store from vector register to memory */
1400 static void do_vec_st(DisasContext *s, int srcidx, int element,
1401                       TCGv_i64 tcg_addr, MemOp mop)
1402 {
1403     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1404 
1405     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1406     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1407 }
1408 
1409 /* Load from memory to vector register */
1410 static void do_vec_ld(DisasContext *s, int destidx, int element,
1411                       TCGv_i64 tcg_addr, MemOp mop)
1412 {
1413     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1414 
1415     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1416     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1417 }
1418 
1419 /* Check that FP/Neon access is enabled. If it is, return
1420  * true. If not, emit code to generate an appropriate exception,
1421  * and return false; the caller should not emit any code for
1422  * the instruction. Note that this check must happen after all
1423  * unallocated-encoding checks (otherwise the syndrome information
1424  * for the resulting exception will be incorrect).
1425  */
1426 static bool fp_access_check_only(DisasContext *s)
1427 {
1428     if (s->fp_excp_el) {
1429         assert(!s->fp_access_checked);
1430         s->fp_access_checked = -1;
1431 
1432         gen_exception_insn_el(s, 0, EXCP_UDEF,
1433                               syn_fp_access_trap(1, 0xe, false, 0),
1434                               s->fp_excp_el);
1435         return false;
1436     }
1437     s->fp_access_checked = 1;
1438     return true;
1439 }
1440 
1441 static bool nonstreaming_check(DisasContext *s)
1442 {
1443     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1444         gen_exception_insn(s, 0, EXCP_UDEF,
1445                            syn_smetrap(SME_ET_Streaming, false));
1446         return false;
1447     }
1448     return true;
1449 }
1450 
1451 static bool fp_access_check(DisasContext *s)
1452 {
1453     return fp_access_check_only(s) && nonstreaming_check(s);
1454 }
1455 
1456 /*
1457  * Return <0 for non-supported element sizes, with MO_16 controlled by
1458  * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1459  */
1460 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1461 {
1462     switch (esz) {
1463     case MO_64:
1464     case MO_32:
1465         break;
1466     case MO_16:
1467         if (!dc_isar_feature(aa64_fp16, s)) {
1468             return -1;
1469         }
1470         break;
1471     default:
1472         return -1;
1473     }
1474     return fp_access_check(s);
1475 }
1476 
1477 /* Likewise, but vector MO_64 must have two elements. */
1478 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1479 {
1480     switch (esz) {
1481     case MO_64:
1482         if (!is_q) {
1483             return -1;
1484         }
1485         break;
1486     case MO_32:
1487         break;
1488     case MO_16:
1489         if (!dc_isar_feature(aa64_fp16, s)) {
1490             return -1;
1491         }
1492         break;
1493     default:
1494         return -1;
1495     }
1496     return fp_access_check(s);
1497 }
1498 
1499 /*
1500  * Check that SVE access is enabled.  If it is, return true.
1501  * If not, emit code to generate an appropriate exception and return false.
1502  * This function corresponds to CheckSVEEnabled().
1503  */
1504 bool sve_access_check(DisasContext *s)
1505 {
1506     if (dc_isar_feature(aa64_sme, s)) {
1507         bool ret;
1508 
1509         if (s->pstate_sm) {
1510             ret = sme_enabled_check(s);
1511         } else if (dc_isar_feature(aa64_sve, s)) {
1512             goto continue_sve;
1513         } else {
1514             ret = sme_sm_enabled_check(s);
1515         }
1516         if (ret) {
1517             ret = nonstreaming_check(s);
1518         }
1519         s->sve_access_checked = (ret ? 1 : -1);
1520         return ret;
1521     }
1522 
1523  continue_sve:
1524     if (s->sve_excp_el) {
1525         /* Assert that we only raise one exception per instruction. */
1526         assert(!s->sve_access_checked);
1527         gen_exception_insn_el(s, 0, EXCP_UDEF,
1528                               syn_sve_access_trap(), s->sve_excp_el);
1529         s->sve_access_checked = -1;
1530         return false;
1531     }
1532     s->sve_access_checked = 1;
1533     return fp_access_check(s);
1534 }
1535 
1536 /*
1537  * Check that SME access is enabled, raise an exception if not.
1538  * Note that this function corresponds to CheckSMEAccess and is
1539  * only used directly for cpregs.
1540  */
1541 static bool sme_access_check(DisasContext *s)
1542 {
1543     if (s->sme_excp_el) {
1544         gen_exception_insn_el(s, 0, EXCP_UDEF,
1545                               syn_smetrap(SME_ET_AccessTrap, false),
1546                               s->sme_excp_el);
1547         return false;
1548     }
1549     return true;
1550 }
1551 
1552 /* This function corresponds to CheckSMEEnabled. */
1553 bool sme_enabled_check(DisasContext *s)
1554 {
1555     /*
1556      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1557      * to be zero when fp_excp_el has priority.  This is because we need
1558      * sme_excp_el by itself for cpregs access checks.
1559      */
1560     if (s->sme_excp_el
1561         && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) {
1562         bool ret = sme_access_check(s);
1563         s->fp_access_checked = (ret ? 1 : -1);
1564         return ret;
1565     }
1566     return fp_access_check_only(s);
1567 }
1568 
1569 /* Common subroutine for CheckSMEAnd*Enabled. */
1570 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1571 {
1572     if (!sme_enabled_check(s)) {
1573         return false;
1574     }
1575     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1576         gen_exception_insn(s, 0, EXCP_UDEF,
1577                            syn_smetrap(SME_ET_NotStreaming, false));
1578         return false;
1579     }
1580     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1581         gen_exception_insn(s, 0, EXCP_UDEF,
1582                            syn_smetrap(SME_ET_InactiveZA, false));
1583         return false;
1584     }
1585     return true;
1586 }
1587 
1588 /*
1589  * Expanders for AdvSIMD translation functions.
1590  */
1591 
1592 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1593                             gen_helper_gvec_2 *fn)
1594 {
1595     if (!a->q && a->esz == MO_64) {
1596         return false;
1597     }
1598     if (fp_access_check(s)) {
1599         gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1600     }
1601     return true;
1602 }
1603 
1604 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1605                             gen_helper_gvec_3 *fn)
1606 {
1607     if (!a->q && a->esz == MO_64) {
1608         return false;
1609     }
1610     if (fp_access_check(s)) {
1611         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1612     }
1613     return true;
1614 }
1615 
1616 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1617 {
1618     if (!a->q && a->esz == MO_64) {
1619         return false;
1620     }
1621     if (fp_access_check(s)) {
1622         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1623     }
1624     return true;
1625 }
1626 
1627 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1628 {
1629     if (a->esz == MO_64) {
1630         return false;
1631     }
1632     if (fp_access_check(s)) {
1633         gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1634     }
1635     return true;
1636 }
1637 
1638 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1639 {
1640     if (a->esz == MO_8) {
1641         return false;
1642     }
1643     return do_gvec_fn3_no64(s, a, fn);
1644 }
1645 
1646 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1647 {
1648     if (!a->q && a->esz == MO_64) {
1649         return false;
1650     }
1651     if (fp_access_check(s)) {
1652         gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1653     }
1654     return true;
1655 }
1656 
1657 /*
1658  * This utility function is for doing register extension with an
1659  * optional shift. You will likely want to pass a temporary for the
1660  * destination register. See DecodeRegExtend() in the ARM ARM.
1661  */
1662 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1663                               int option, unsigned int shift)
1664 {
1665     int extsize = extract32(option, 0, 2);
1666     bool is_signed = extract32(option, 2, 1);
1667 
1668     tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1669     tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1670 }
1671 
1672 static inline void gen_check_sp_alignment(DisasContext *s)
1673 {
1674     /* The AArch64 architecture mandates that (if enabled via PSTATE
1675      * or SCTLR bits) there is a check that SP is 16-aligned on every
1676      * SP-relative load or store (with an exception generated if it is not).
1677      * In line with general QEMU practice regarding misaligned accesses,
1678      * we omit these checks for the sake of guest program performance.
1679      * This function is provided as a hook so we can more easily add these
1680      * checks in future (possibly as a "favour catching guest program bugs
1681      * over speed" user selectable option).
1682      */
1683 }
1684 
1685 /*
1686  * The instruction disassembly implemented here matches
1687  * the instruction encoding classifications in chapter C4
1688  * of the ARM Architecture Reference Manual (DDI0487B_a);
1689  * classification names and decode diagrams here should generally
1690  * match up with those in the manual.
1691  */
1692 
1693 static bool trans_B(DisasContext *s, arg_i *a)
1694 {
1695     reset_btype(s);
1696     gen_goto_tb(s, 0, a->imm);
1697     return true;
1698 }
1699 
1700 static bool trans_BL(DisasContext *s, arg_i *a)
1701 {
1702     TCGv_i64 link = tcg_temp_new_i64();
1703 
1704     gen_pc_plus_diff(s, link, 4);
1705     if (s->gcs_en) {
1706         gen_add_gcs_record(s, link);
1707     }
1708     tcg_gen_mov_i64(cpu_reg(s, 30), link);
1709 
1710     reset_btype(s);
1711     gen_goto_tb(s, 0, a->imm);
1712     return true;
1713 }
1714 
1715 
1716 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1717 {
1718     DisasLabel match;
1719     TCGv_i64 tcg_cmp;
1720 
1721     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1722     reset_btype(s);
1723 
1724     match = gen_disas_label(s);
1725     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1726                         tcg_cmp, 0, match.label);
1727     gen_goto_tb(s, 0, 4);
1728     set_disas_label(s, match);
1729     gen_goto_tb(s, 1, a->imm);
1730     return true;
1731 }
1732 
1733 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1734 {
1735     DisasLabel match;
1736     TCGv_i64 tcg_cmp;
1737 
1738     tcg_cmp = tcg_temp_new_i64();
1739     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1740 
1741     reset_btype(s);
1742 
1743     match = gen_disas_label(s);
1744     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1745                         tcg_cmp, 0, match.label);
1746     gen_goto_tb(s, 0, 4);
1747     set_disas_label(s, match);
1748     gen_goto_tb(s, 1, a->imm);
1749     return true;
1750 }
1751 
1752 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1753 {
1754     /* BC.cond is only present with FEAT_HBC */
1755     if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1756         return false;
1757     }
1758     reset_btype(s);
1759     if (a->cond < 0x0e) {
1760         /* genuinely conditional branches */
1761         DisasLabel match = gen_disas_label(s);
1762         arm_gen_test_cc(a->cond, match.label);
1763         gen_goto_tb(s, 0, 4);
1764         set_disas_label(s, match);
1765         gen_goto_tb(s, 1, a->imm);
1766     } else {
1767         /* 0xe and 0xf are both "always" conditions */
1768         gen_goto_tb(s, 0, a->imm);
1769     }
1770     return true;
1771 }
1772 
1773 static void set_btype_for_br(DisasContext *s, int rn)
1774 {
1775     if (dc_isar_feature(aa64_bti, s)) {
1776         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1777         if (rn == 16 || rn == 17) {
1778             set_btype(s, 1);
1779         } else {
1780             TCGv_i64 pc = tcg_temp_new_i64();
1781             gen_pc_plus_diff(s, pc, 0);
1782             gen_helper_guarded_page_br(tcg_env, pc);
1783             s->btype = -1;
1784         }
1785     }
1786 }
1787 
1788 static void set_btype_for_blr(DisasContext *s)
1789 {
1790     if (dc_isar_feature(aa64_bti, s)) {
1791         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1792         set_btype(s, 2);
1793     }
1794 }
1795 
1796 static bool trans_BR(DisasContext *s, arg_r *a)
1797 {
1798     set_btype_for_br(s, a->rn);
1799     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1800     s->base.is_jmp = DISAS_JUMP;
1801     return true;
1802 }
1803 
1804 static bool trans_BLR(DisasContext *s, arg_r *a)
1805 {
1806     TCGv_i64 link = tcg_temp_new_i64();
1807 
1808     gen_pc_plus_diff(s, link, 4);
1809     if (s->gcs_en) {
1810         gen_add_gcs_record(s, link);
1811     }
1812     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1813     tcg_gen_mov_i64(cpu_reg(s, 30), link);
1814 
1815     set_btype_for_blr(s);
1816     s->base.is_jmp = DISAS_JUMP;
1817     return true;
1818 }
1819 
1820 static bool trans_RET(DisasContext *s, arg_r *a)
1821 {
1822     TCGv_i64 target = cpu_reg(s, a->rn);
1823 
1824     if (s->gcs_en) {
1825         gen_load_check_gcs_record(s, target, GCS_IT_RET_nPauth, a->rn);
1826     } else {
1827         gen_a64_set_pc(s, target);
1828     }
1829     s->base.is_jmp = DISAS_JUMP;
1830     return true;
1831 }
1832 
1833 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1834                                    TCGv_i64 modifier, bool use_key_a)
1835 {
1836     TCGv_i64 truedst;
1837     /*
1838      * Return the branch target for a BRAA/RETA/etc, which is either
1839      * just the destination dst, or that value with the pauth check
1840      * done and the code removed from the high bits.
1841      */
1842     if (!s->pauth_active) {
1843         return dst;
1844     }
1845 
1846     truedst = tcg_temp_new_i64();
1847     if (use_key_a) {
1848         gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1849     } else {
1850         gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1851     }
1852     return truedst;
1853 }
1854 
1855 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1856 {
1857     TCGv_i64 dst;
1858 
1859     if (!dc_isar_feature(aa64_pauth, s)) {
1860         return false;
1861     }
1862 
1863     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1864     set_btype_for_br(s, a->rn);
1865     gen_a64_set_pc(s, dst);
1866     s->base.is_jmp = DISAS_JUMP;
1867     return true;
1868 }
1869 
1870 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1871 {
1872     TCGv_i64 dst, link;
1873 
1874     if (!dc_isar_feature(aa64_pauth, s)) {
1875         return false;
1876     }
1877     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1878 
1879     link = tcg_temp_new_i64();
1880     gen_pc_plus_diff(s, link, 4);
1881     if (s->gcs_en) {
1882         gen_add_gcs_record(s, link);
1883     }
1884     gen_a64_set_pc(s, dst);
1885     tcg_gen_mov_i64(cpu_reg(s, 30), link);
1886 
1887     set_btype_for_blr(s);
1888     s->base.is_jmp = DISAS_JUMP;
1889     return true;
1890 }
1891 
1892 static bool trans_RETA(DisasContext *s, arg_reta *a)
1893 {
1894     TCGv_i64 dst;
1895 
1896     if (!dc_isar_feature(aa64_pauth, s)) {
1897         return false;
1898     }
1899 
1900     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1901     if (s->gcs_en) {
1902         GCSInstructionType it = a->m ? GCS_IT_RET_PauthB : GCS_IT_RET_PauthA;
1903         gen_load_check_gcs_record(s, dst, it, 30);
1904     } else {
1905         gen_a64_set_pc(s, dst);
1906     }
1907     s->base.is_jmp = DISAS_JUMP;
1908     return true;
1909 }
1910 
1911 static bool trans_BRA(DisasContext *s, arg_bra *a)
1912 {
1913     TCGv_i64 dst;
1914 
1915     if (!dc_isar_feature(aa64_pauth, s)) {
1916         return false;
1917     }
1918     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1919     set_btype_for_br(s, a->rn);
1920     gen_a64_set_pc(s, dst);
1921     s->base.is_jmp = DISAS_JUMP;
1922     return true;
1923 }
1924 
1925 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1926 {
1927     TCGv_i64 dst, link;
1928 
1929     if (!dc_isar_feature(aa64_pauth, s)) {
1930         return false;
1931     }
1932     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1933 
1934     link = tcg_temp_new_i64();
1935     gen_pc_plus_diff(s, link, 4);
1936     if (s->gcs_en) {
1937         gen_add_gcs_record(s, link);
1938     }
1939     gen_a64_set_pc(s, dst);
1940     tcg_gen_mov_i64(cpu_reg(s, 30), link);
1941 
1942     set_btype_for_blr(s);
1943     s->base.is_jmp = DISAS_JUMP;
1944     return true;
1945 }
1946 
1947 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1948 {
1949 #ifdef CONFIG_USER_ONLY
1950     return false;
1951 #else
1952     TCGv_i64 dst;
1953 
1954     if (s->current_el == 0) {
1955         return false;
1956     }
1957     if (s->trap_eret) {
1958         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1959         return true;
1960     }
1961     dst = tcg_temp_new_i64();
1962     tcg_gen_ld_i64(dst, tcg_env,
1963                    offsetof(CPUARMState, elr_el[s->current_el]));
1964 
1965     translator_io_start(&s->base);
1966 
1967     gen_helper_exception_return(tcg_env, dst);
1968     /* Must exit loop to check un-masked IRQs */
1969     s->base.is_jmp = DISAS_EXIT;
1970     return true;
1971 #endif
1972 }
1973 
1974 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1975 {
1976 #ifdef CONFIG_USER_ONLY
1977     return false;
1978 #else
1979     TCGv_i64 dst;
1980 
1981     if (!dc_isar_feature(aa64_pauth, s)) {
1982         return false;
1983     }
1984     if (s->current_el == 0) {
1985         return false;
1986     }
1987     /* The FGT trap takes precedence over an auth trap. */
1988     if (s->trap_eret) {
1989         gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1990         return true;
1991     }
1992     dst = tcg_temp_new_i64();
1993     tcg_gen_ld_i64(dst, tcg_env,
1994                    offsetof(CPUARMState, elr_el[s->current_el]));
1995 
1996     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1997 
1998     translator_io_start(&s->base);
1999 
2000     gen_helper_exception_return(tcg_env, dst);
2001     /* Must exit loop to check un-masked IRQs */
2002     s->base.is_jmp = DISAS_EXIT;
2003     return true;
2004 #endif
2005 }
2006 
2007 static bool trans_NOP(DisasContext *s, arg_NOP *a)
2008 {
2009     return true;
2010 }
2011 
2012 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
2013 {
2014     /*
2015      * When running in MTTCG we don't generate jumps to the yield and
2016      * WFE helpers as it won't affect the scheduling of other vCPUs.
2017      * If we wanted to more completely model WFE/SEV so we don't busy
2018      * spin unnecessarily we would need to do something more involved.
2019      */
2020     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
2021         s->base.is_jmp = DISAS_YIELD;
2022     }
2023     return true;
2024 }
2025 
2026 static bool trans_WFI(DisasContext *s, arg_WFI *a)
2027 {
2028     s->base.is_jmp = DISAS_WFI;
2029     return true;
2030 }
2031 
2032 static bool trans_WFE(DisasContext *s, arg_WFI *a)
2033 {
2034     /*
2035      * When running in MTTCG we don't generate jumps to the yield and
2036      * WFE helpers as it won't affect the scheduling of other vCPUs.
2037      * If we wanted to more completely model WFE/SEV so we don't busy
2038      * spin unnecessarily we would need to do something more involved.
2039      */
2040     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
2041         s->base.is_jmp = DISAS_WFE;
2042     }
2043     return true;
2044 }
2045 
2046 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
2047 {
2048     if (!dc_isar_feature(aa64_wfxt, s)) {
2049         return false;
2050     }
2051 
2052     /*
2053      * Because we need to pass the register value to the helper,
2054      * it's easier to emit the code now, unlike trans_WFI which
2055      * defers it to aarch64_tr_tb_stop(). That means we need to
2056      * check ss_active so that single-stepping a WFIT doesn't halt.
2057      */
2058     if (s->ss_active) {
2059         /* Act like a NOP under architectural singlestep */
2060         return true;
2061     }
2062 
2063     gen_a64_update_pc(s, 4);
2064     gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
2065     /* Go back to the main loop to check for interrupts */
2066     s->base.is_jmp = DISAS_EXIT;
2067     return true;
2068 }
2069 
2070 static bool trans_WFET(DisasContext *s, arg_WFET *a)
2071 {
2072     if (!dc_isar_feature(aa64_wfxt, s)) {
2073         return false;
2074     }
2075 
2076     /*
2077      * We rely here on our WFE implementation being a NOP, so we
2078      * don't need to do anything different to handle the WFET timeout
2079      * from what trans_WFE does.
2080      */
2081     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
2082         s->base.is_jmp = DISAS_WFE;
2083     }
2084     return true;
2085 }
2086 
2087 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
2088 {
2089     if (s->pauth_active) {
2090         gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
2091     }
2092     return true;
2093 }
2094 
2095 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2096 {
2097     if (s->pauth_active) {
2098         gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2099     }
2100     return true;
2101 }
2102 
2103 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2104 {
2105     if (s->pauth_active) {
2106         gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2107     }
2108     return true;
2109 }
2110 
2111 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2112 {
2113     if (s->pauth_active) {
2114         gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2115     }
2116     return true;
2117 }
2118 
2119 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2120 {
2121     if (s->pauth_active) {
2122         gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2123     }
2124     return true;
2125 }
2126 
2127 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2128 {
2129     /* Without RAS, we must implement this as NOP. */
2130     if (dc_isar_feature(aa64_ras, s)) {
2131         /*
2132          * QEMU does not have a source of physical SErrors,
2133          * so we are only concerned with virtual SErrors.
2134          * The pseudocode in the ARM for this case is
2135          *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2136          *      AArch64.vESBOperation();
2137          * Most of the condition can be evaluated at translation time.
2138          * Test for EL2 present, and defer test for SEL2 to runtime.
2139          */
2140         if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2141             gen_helper_vesb(tcg_env);
2142         }
2143     }
2144     return true;
2145 }
2146 
2147 static bool trans_GCSB(DisasContext *s, arg_GCSB *a)
2148 {
2149     if (dc_isar_feature(aa64_gcs, s)) {
2150         tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2151     }
2152     return true;
2153 }
2154 
2155 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2156 {
2157     if (s->pauth_active) {
2158         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2159     }
2160     return true;
2161 }
2162 
2163 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2164 {
2165     if (s->pauth_active) {
2166         gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2167     }
2168     return true;
2169 }
2170 
2171 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2172 {
2173     if (s->pauth_active) {
2174         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2175     }
2176     return true;
2177 }
2178 
2179 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2180 {
2181     if (s->pauth_active) {
2182         gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2183     }
2184     return true;
2185 }
2186 
2187 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2188 {
2189     if (s->pauth_active) {
2190         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2191     }
2192     return true;
2193 }
2194 
2195 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2196 {
2197     if (s->pauth_active) {
2198         gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2199     }
2200     return true;
2201 }
2202 
2203 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2204 {
2205     if (s->pauth_active) {
2206         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2207     }
2208     return true;
2209 }
2210 
2211 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2212 {
2213     if (s->pauth_active) {
2214         gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2215     }
2216     return true;
2217 }
2218 
2219 static bool trans_CHKFEAT(DisasContext *s, arg_CHKFEAT *a)
2220 {
2221     uint64_t feat_en = 0;
2222 
2223     if (s->gcs_en) {
2224         feat_en |= 1 << 0;
2225     }
2226     if (feat_en) {
2227         TCGv_i64 x16 = cpu_reg(s, 16);
2228         tcg_gen_andi_i64(x16, x16, ~feat_en);
2229     }
2230     return true;
2231 }
2232 
2233 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2234 {
2235     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2236     return true;
2237 }
2238 
2239 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2240 {
2241     /* We handle DSB and DMB the same way */
2242     TCGBar bar;
2243 
2244     switch (a->types) {
2245     case 1: /* MBReqTypes_Reads */
2246         bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2247         break;
2248     case 2: /* MBReqTypes_Writes */
2249         bar = TCG_BAR_SC | TCG_MO_ST_ST;
2250         break;
2251     default: /* MBReqTypes_All */
2252         bar = TCG_BAR_SC | TCG_MO_ALL;
2253         break;
2254     }
2255     tcg_gen_mb(bar);
2256     return true;
2257 }
2258 
2259 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2260 {
2261     if (!dc_isar_feature(aa64_xs, s)) {
2262         return false;
2263     }
2264     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2265     return true;
2266 }
2267 
2268 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2269 {
2270     /*
2271      * We need to break the TB after this insn to execute
2272      * self-modifying code correctly and also to take
2273      * any pending interrupts immediately.
2274      */
2275     reset_btype(s);
2276     gen_goto_tb(s, 0, 4);
2277     return true;
2278 }
2279 
2280 static bool trans_SB(DisasContext *s, arg_SB *a)
2281 {
2282     if (!dc_isar_feature(aa64_sb, s)) {
2283         return false;
2284     }
2285     /*
2286      * TODO: There is no speculation barrier opcode for TCG;
2287      * MB and end the TB instead.
2288      */
2289     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2290     gen_goto_tb(s, 0, 4);
2291     return true;
2292 }
2293 
2294 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2295 {
2296     if (!dc_isar_feature(aa64_condm_4, s)) {
2297         return false;
2298     }
2299     tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2300     return true;
2301 }
2302 
2303 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2304 {
2305     TCGv_i32 z;
2306 
2307     if (!dc_isar_feature(aa64_condm_5, s)) {
2308         return false;
2309     }
2310 
2311     z = tcg_temp_new_i32();
2312 
2313     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2314 
2315     /*
2316      * (!C & !Z) << 31
2317      * (!(C | Z)) << 31
2318      * ~((C | Z) << 31)
2319      * ~-(C | Z)
2320      * (C | Z) - 1
2321      */
2322     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2323     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2324 
2325     /* !(Z & C) */
2326     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2327     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2328 
2329     /* (!C & Z) << 31 -> -(Z & ~C) */
2330     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2331     tcg_gen_neg_i32(cpu_VF, cpu_VF);
2332 
2333     /* C | Z */
2334     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2335 
2336     return true;
2337 }
2338 
2339 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2340 {
2341     if (!dc_isar_feature(aa64_condm_5, s)) {
2342         return false;
2343     }
2344 
2345     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
2346     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
2347 
2348     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2349     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2350 
2351     tcg_gen_movi_i32(cpu_NF, 0);
2352     tcg_gen_movi_i32(cpu_VF, 0);
2353 
2354     return true;
2355 }
2356 
2357 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2358 {
2359     if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2360         return false;
2361     }
2362     if (a->imm & 1) {
2363         set_pstate_bits(PSTATE_UAO);
2364     } else {
2365         clear_pstate_bits(PSTATE_UAO);
2366     }
2367     gen_rebuild_hflags(s);
2368     s->base.is_jmp = DISAS_TOO_MANY;
2369     return true;
2370 }
2371 
2372 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2373 {
2374     if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2375         return false;
2376     }
2377     if (a->imm & 1) {
2378         set_pstate_bits(PSTATE_PAN);
2379     } else {
2380         clear_pstate_bits(PSTATE_PAN);
2381     }
2382     gen_rebuild_hflags(s);
2383     s->base.is_jmp = DISAS_TOO_MANY;
2384     return true;
2385 }
2386 
2387 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2388 {
2389     if (s->current_el == 0) {
2390         return false;
2391     }
2392     gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2393     s->base.is_jmp = DISAS_TOO_MANY;
2394     return true;
2395 }
2396 
2397 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2398 {
2399     if (!dc_isar_feature(aa64_ssbs, s)) {
2400         return false;
2401     }
2402     if (a->imm & 1) {
2403         set_pstate_bits(PSTATE_SSBS);
2404     } else {
2405         clear_pstate_bits(PSTATE_SSBS);
2406     }
2407     /* Don't need to rebuild hflags since SSBS is a nop */
2408     s->base.is_jmp = DISAS_TOO_MANY;
2409     return true;
2410 }
2411 
2412 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2413 {
2414     if (!dc_isar_feature(aa64_dit, s)) {
2415         return false;
2416     }
2417     if (a->imm & 1) {
2418         set_pstate_bits(PSTATE_DIT);
2419     } else {
2420         clear_pstate_bits(PSTATE_DIT);
2421     }
2422     /* There's no need to rebuild hflags because DIT is a nop */
2423     s->base.is_jmp = DISAS_TOO_MANY;
2424     return true;
2425 }
2426 
2427 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2428 {
2429     if (dc_isar_feature(aa64_mte, s)) {
2430         /* Full MTE is enabled -- set the TCO bit as directed. */
2431         if (a->imm & 1) {
2432             set_pstate_bits(PSTATE_TCO);
2433         } else {
2434             clear_pstate_bits(PSTATE_TCO);
2435         }
2436         gen_rebuild_hflags(s);
2437         /* Many factors, including TCO, go into MTE_ACTIVE. */
2438         s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2439         return true;
2440     } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2441         /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
2442         return true;
2443     } else {
2444         /* Insn not present */
2445         return false;
2446     }
2447 }
2448 
2449 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2450 {
2451     gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2452     s->base.is_jmp = DISAS_TOO_MANY;
2453     return true;
2454 }
2455 
2456 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2457 {
2458     gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2459     /* Exit the cpu loop to re-evaluate pending IRQs. */
2460     s->base.is_jmp = DISAS_UPDATE_EXIT;
2461     return true;
2462 }
2463 
2464 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2465 {
2466     if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2467         return false;
2468     }
2469 
2470     if (a->imm == 0) {
2471         clear_pstate_bits(PSTATE_ALLINT);
2472     } else if (s->current_el > 1) {
2473         set_pstate_bits(PSTATE_ALLINT);
2474     } else {
2475         gen_helper_msr_set_allint_el1(tcg_env);
2476     }
2477 
2478     /* Exit the cpu loop to re-evaluate pending IRQs. */
2479     s->base.is_jmp = DISAS_UPDATE_EXIT;
2480     return true;
2481 }
2482 
2483 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2484 {
2485     if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2486         return false;
2487     }
2488     if (sme_access_check(s)) {
2489         int old = s->pstate_sm | (s->pstate_za << 1);
2490         int new = a->imm * 3;
2491 
2492         if ((old ^ new) & a->mask) {
2493             /* At least one bit changes. */
2494             gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2495                                 tcg_constant_i32(a->mask));
2496             s->base.is_jmp = DISAS_TOO_MANY;
2497         }
2498     }
2499     return true;
2500 }
2501 
2502 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2503 {
2504     TCGv_i32 tmp = tcg_temp_new_i32();
2505     TCGv_i32 nzcv = tcg_temp_new_i32();
2506 
2507     /* build bit 31, N */
2508     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2509     /* build bit 30, Z */
2510     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2511     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2512     /* build bit 29, C */
2513     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2514     /* build bit 28, V */
2515     tcg_gen_shri_i32(tmp, cpu_VF, 31);
2516     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2517     /* generate result */
2518     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2519 }
2520 
2521 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2522 {
2523     TCGv_i32 nzcv = tcg_temp_new_i32();
2524 
2525     /* take NZCV from R[t] */
2526     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2527 
2528     /* bit 31, N */
2529     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2530     /* bit 30, Z */
2531     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2532     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2533     /* bit 29, C */
2534     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2535     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2536     /* bit 28, V */
2537     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2538     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2539 }
2540 
2541 static void gen_sysreg_undef(DisasContext *s, bool isread,
2542                              uint8_t op0, uint8_t op1, uint8_t op2,
2543                              uint8_t crn, uint8_t crm, uint8_t rt)
2544 {
2545     /*
2546      * Generate code to emit an UNDEF with correct syndrome
2547      * information for a failed system register access.
2548      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2549      * but if FEAT_IDST is implemented then read accesses to registers
2550      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2551      * syndrome.
2552      */
2553     uint32_t syndrome;
2554 
2555     if (isread && dc_isar_feature(aa64_ids, s) &&
2556         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2557         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2558     } else {
2559         syndrome = syn_uncategorized();
2560     }
2561     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2562 }
2563 
2564 static void gen_gcspopm(DisasContext *s, int rt)
2565 {
2566     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2567     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2568     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2569     TCGv_i64 value = tcg_temp_new_i64();
2570     TCGLabel *fail_label =
2571         delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPM, rt));
2572 
2573     /* The value at top-of-stack must have low 2 bits clear. */
2574     tcg_gen_qemu_ld_i64(value, clean_data_tbi(s, gcspr), mmuidx, mop);
2575     tcg_gen_brcondi_i64(TCG_COND_TSTNE, value, 3, fail_label);
2576 
2577     /* Complete the pop and return the value. */
2578     tcg_gen_addi_i64(gcspr, gcspr, 8);
2579     tcg_gen_mov_i64(cpu_reg(s, rt), value);
2580 }
2581 
2582 static void gen_gcspushx(DisasContext *s)
2583 {
2584     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2585     int spsr_idx = aarch64_banked_spsr_index(s->current_el);
2586     int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]);
2587     int elr_off = offsetof(CPUARMState, elr_el[s->current_el]);
2588     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2589     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2590     TCGv_i64 addr = tcg_temp_new_i64();
2591     TCGv_i64 tmp = tcg_temp_new_i64();
2592 
2593     tcg_gen_addi_i64(addr, gcspr, -8);
2594     tcg_gen_qemu_st_i64(cpu_reg(s, 30), addr, mmuidx, mop);
2595 
2596     tcg_gen_ld_i64(tmp, tcg_env, spsr_off);
2597     tcg_gen_addi_i64(addr, addr, -8);
2598     tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop);
2599 
2600     tcg_gen_ld_i64(tmp, tcg_env, elr_off);
2601     tcg_gen_addi_i64(addr, addr, -8);
2602     tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop);
2603 
2604     tcg_gen_addi_i64(addr, addr, -8);
2605     tcg_gen_qemu_st_i64(tcg_constant_i64(0b1001), addr, mmuidx, mop);
2606 
2607     tcg_gen_mov_i64(gcspr, addr);
2608     clear_pstate_bits(PSTATE_EXLOCK);
2609 }
2610 
2611 static void gen_gcspopcx(DisasContext *s)
2612 {
2613     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2614     int spsr_idx = aarch64_banked_spsr_index(s->current_el);
2615     int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]);
2616     int elr_off = offsetof(CPUARMState, elr_el[s->current_el]);
2617     int gcscr_off = offsetof(CPUARMState, cp15.gcscr_el[s->current_el]);
2618     int pstate_off = offsetof(CPUARMState, pstate);
2619     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2620     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2621     TCGv_i64 addr = tcg_temp_new_i64();
2622     TCGv_i64 tmp1 = tcg_temp_new_i64();
2623     TCGv_i64 tmp2 = tcg_temp_new_i64();
2624     TCGLabel *fail_label =
2625         delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPCX, 31));
2626 
2627     /* The value at top-of-stack must be an exception token. */
2628     tcg_gen_qemu_ld_i64(tmp1, gcspr, mmuidx, mop);
2629     tcg_gen_brcondi_i64(TCG_COND_NE, tmp1, 0b1001, fail_label);
2630 
2631     /* Validate in turn, ELR ... */
2632     tcg_gen_addi_i64(addr, gcspr, 8);
2633     tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop);
2634     tcg_gen_ld_i64(tmp2, tcg_env, elr_off);
2635     tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label);
2636 
2637     /* ... SPSR ... */
2638     tcg_gen_addi_i64(addr, addr, 8);
2639     tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop);
2640     tcg_gen_ld_i64(tmp2, tcg_env, spsr_off);
2641     tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label);
2642 
2643     /* ... and LR. */
2644     tcg_gen_addi_i64(addr, addr, 8);
2645     tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop);
2646     tcg_gen_brcond_i64(TCG_COND_NE, tmp1, cpu_reg(s, 30), fail_label);
2647 
2648     /* Writeback stack pointer after pop. */
2649     tcg_gen_addi_i64(gcspr, addr, 8);
2650 
2651     /* PSTATE.EXLOCK = GetCurrentEXLOCKEN(). */
2652     tcg_gen_ld_i64(tmp1, tcg_env, gcscr_off);
2653     tcg_gen_ld_i64(tmp2, tcg_env, pstate_off);
2654     tcg_gen_shri_i64(tmp1, tmp1, ctz64(GCSCR_EXLOCKEN));
2655     tcg_gen_deposit_i64(tmp2, tmp2, tmp1, ctz64(PSTATE_EXLOCK), 1);
2656     tcg_gen_st_i64(tmp2, tcg_env, pstate_off);
2657 }
2658 
2659 static void gen_gcspopx(DisasContext *s)
2660 {
2661     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2662     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2663     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2664     TCGv_i64 addr = tcg_temp_new_i64();
2665     TCGv_i64 tmp = tcg_temp_new_i64();
2666     TCGLabel *fail_label =
2667         delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPX, 31));
2668 
2669     /* The value at top-of-stack must be an exception token. */
2670     tcg_gen_qemu_ld_i64(tmp, gcspr, mmuidx, mop);
2671     tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0b1001, fail_label);
2672 
2673     /*
2674      * The other three values in the exception return record
2675      * are ignored, but are loaded anyway to raise faults.
2676      */
2677     tcg_gen_addi_i64(addr, gcspr, 8);
2678     tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop);
2679     tcg_gen_addi_i64(addr, addr, 8);
2680     tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop);
2681     tcg_gen_addi_i64(addr, addr, 8);
2682     tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop);
2683     tcg_gen_addi_i64(gcspr, addr, 8);
2684 }
2685 
2686 static void gen_gcsss1(DisasContext *s, int rt)
2687 {
2688     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2689     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2690     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2691     TCGv_i64 inptr = cpu_reg(s, rt);
2692     TCGv_i64 cmp = tcg_temp_new_i64();
2693     TCGv_i64 new = tcg_temp_new_i64();
2694     TCGv_i64 old = tcg_temp_new_i64();
2695     TCGLabel *fail_label =
2696         delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS1, rt));
2697 
2698     /* Compute the valid cap entry that the new stack must have. */
2699     tcg_gen_deposit_i64(cmp, inptr, tcg_constant_i64(1), 0, 12);
2700     /* Compute the in-progress cap entry for the old stack. */
2701     tcg_gen_deposit_i64(new, gcspr, tcg_constant_i64(5), 0, 3);
2702 
2703     /* Swap the valid cap the with the in-progress cap. */
2704     tcg_gen_atomic_cmpxchg_i64(old, inptr, cmp, new, mmuidx, mop);
2705     tcg_gen_brcond_i64(TCG_COND_NE, old, cmp, fail_label);
2706 
2707     /* The new stack had a valid cap: change gcspr. */
2708     tcg_gen_andi_i64(gcspr, inptr, ~7);
2709 }
2710 
2711 static void gen_gcsss2(DisasContext *s, int rt)
2712 {
2713     TCGv_i64 gcspr = cpu_gcspr[s->current_el];
2714     int mmuidx = core_gcs_mem_index(s->mmu_idx);
2715     MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN);
2716     TCGv_i64 outptr = tcg_temp_new_i64();
2717     TCGv_i64 tmp = tcg_temp_new_i64();
2718     TCGLabel *fail_label =
2719         delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS2, rt));
2720 
2721     /* Validate that the new stack has an in-progress cap. */
2722     tcg_gen_qemu_ld_i64(outptr, gcspr, mmuidx, mop);
2723     tcg_gen_andi_i64(tmp, outptr, 7);
2724     tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 5, fail_label);
2725 
2726     /* Push a valid cap to the old stack. */
2727     tcg_gen_andi_i64(outptr, outptr, ~7);
2728     tcg_gen_addi_i64(outptr, outptr, -8);
2729     tcg_gen_deposit_i64(tmp, outptr, tcg_constant_i64(1), 0, 12);
2730     tcg_gen_qemu_st_i64(tmp, outptr, mmuidx, mop);
2731     tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2732 
2733     /* Pop the in-progress cap from the new stack. */
2734     tcg_gen_addi_i64(gcspr, gcspr, 8);
2735 
2736     /* Return a pointer to the old stack cap. */
2737     tcg_gen_mov_i64(cpu_reg(s, rt), outptr);
2738 }
2739 
2740 /*
2741  * Look up @key, returning the cpreg, which must exist.
2742  * Additionally, the new cpreg must also be accessible.
2743  */
2744 static const ARMCPRegInfo *
2745 redirect_cpreg(DisasContext *s, uint32_t key, bool isread)
2746 {
2747     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2748     assert(ri);
2749     assert(cp_access_ok(s->current_el, ri, isread));
2750     return ri;
2751 }
2752 
2753 /* MRS - move from system register
2754  * MSR (register) - move to system register
2755  * SYS
2756  * SYSL
2757  * These are all essentially the same insn in 'read' and 'write'
2758  * versions, with varying op0 fields.
2759  */
2760 static void handle_sys(DisasContext *s, bool isread,
2761                        unsigned int op0, unsigned int op1, unsigned int op2,
2762                        unsigned int crn, unsigned int crm, unsigned int rt)
2763 {
2764     uint32_t key = ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2);
2765     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2766     bool need_exit_tb = false;
2767     bool nv_trap_to_el2 = false;
2768     bool nv_redirect_reg = false;
2769     bool skip_fp_access_checks = false;
2770     bool nv2_mem_redirect = false;
2771     TCGv_ptr tcg_ri = NULL;
2772     TCGv_i64 tcg_rt;
2773     uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2774 
2775     if (crn == 11 || crn == 15) {
2776         /*
2777          * Check for TIDCP trap, which must take precedence over
2778          * the UNDEF for "no such register" etc.
2779          */
2780         switch (s->current_el) {
2781         case 0:
2782             if (dc_isar_feature(aa64_tidcp1, s)) {
2783                 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2784             }
2785             break;
2786         case 1:
2787             gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2788             break;
2789         }
2790     }
2791 
2792     if (!ri) {
2793         /* Unknown register; this might be a guest error or a QEMU
2794          * unimplemented feature.
2795          */
2796         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2797                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2798                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2799         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2800         return;
2801     }
2802 
2803     if (s->nv2 && ri->nv2_redirect_offset) {
2804         /*
2805          * Some registers always redirect to memory; some only do so if
2806          * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2807          * pairs which share an offset; see the table in R_CSRPQ).
2808          */
2809         if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2810             nv2_mem_redirect = s->nv1;
2811         } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2812             nv2_mem_redirect = !s->nv1;
2813         } else {
2814             nv2_mem_redirect = true;
2815         }
2816     }
2817 
2818     /* Check access permissions */
2819     if (!cp_access_ok(s->current_el, ri, isread)) {
2820         /*
2821          * FEAT_NV/NV2 handling does not do the usual FP access checks
2822          * for registers only accessible at EL2 (though it *does* do them
2823          * for registers accessible at EL1).
2824          */
2825         skip_fp_access_checks = true;
2826         if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2827             /*
2828              * This is one of the few EL2 registers which should redirect
2829              * to the equivalent EL1 register. We do that after running
2830              * the EL2 register's accessfn.
2831              */
2832             nv_redirect_reg = true;
2833             assert(!nv2_mem_redirect);
2834         } else if (nv2_mem_redirect) {
2835             /*
2836              * NV2 redirect-to-memory takes precedence over trap to EL2 or
2837              * UNDEF to EL1.
2838              */
2839         } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2840             /*
2841              * This register / instruction exists and is an EL2 register, so
2842              * we must trap to EL2 if accessed in nested virtualization EL1
2843              * instead of UNDEFing. We'll do that after the usual access checks.
2844              * (This makes a difference only for a couple of registers like
2845              * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2846              * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2847              * an accessfn which does nothing when called from EL1, because
2848              * the trap-to-EL3 controls which would apply to that register
2849              * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2850              */
2851             nv_trap_to_el2 = true;
2852         } else {
2853             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2854             return;
2855         }
2856     }
2857 
2858     if (ri->vhe_redir_to_el2 && s->current_el == 2 && s->e2h) {
2859         /*
2860          * This one of the FOO_EL1 registers which redirect to FOO_EL2
2861          * from EL2 when HCR_EL2.E2H is set.
2862          */
2863         key = ri->vhe_redir_to_el2;
2864         ri = redirect_cpreg(s, key, isread);
2865     } else if (ri->vhe_redir_to_el01 && s->current_el >= 2) {
2866         /*
2867          * This is one of the FOO_EL12 or FOO_EL02 registers.
2868          * With !E2H, they all UNDEF.
2869          * With E2H, from EL2 or EL3, they redirect to FOO_EL1/FOO_EL0.
2870          */
2871         if (!s->e2h) {
2872             gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2873             return;
2874         }
2875         key = ri->vhe_redir_to_el01;
2876         ri = redirect_cpreg(s, key, isread);
2877     }
2878 
2879     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2880         /* Emit code to perform further access permissions checks at
2881          * runtime; this may result in an exception.
2882          */
2883         gen_a64_update_pc(s, 0);
2884         tcg_ri = tcg_temp_new_ptr();
2885         gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2886                                        tcg_constant_i32(key),
2887                                        tcg_constant_i32(syndrome),
2888                                        tcg_constant_i32(isread));
2889     } else if (ri->type & ARM_CP_RAISES_EXC) {
2890         /*
2891          * The readfn or writefn might raise an exception;
2892          * synchronize the CPU state in case it does.
2893          */
2894         gen_a64_update_pc(s, 0);
2895     }
2896 
2897     if (!skip_fp_access_checks) {
2898         if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2899             return;
2900         } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2901             return;
2902         } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2903             return;
2904         }
2905     }
2906 
2907     if (nv_trap_to_el2) {
2908         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2909         return;
2910     }
2911 
2912     if (nv_redirect_reg) {
2913         /*
2914          * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2915          * Conveniently in all cases the encoding of the EL1 register is
2916          * identical to the EL2 register except that opc1 is 0.
2917          * Get the reginfo for the EL1 register to use for the actual access.
2918          * We don't use the EL1 register's access function, and
2919          * fine-grained-traps on EL1 also do not apply here.
2920          */
2921         key = ENCODE_AA64_CP_REG(op0, 0, crn, crm, op2);
2922         ri = redirect_cpreg(s, key, isread);
2923         /*
2924          * We might not have done an update_pc earlier, so check we don't
2925          * need it. We could support this in future if necessary.
2926          */
2927         assert(!(ri->type & ARM_CP_RAISES_EXC));
2928     }
2929 
2930     if (nv2_mem_redirect) {
2931         /*
2932          * This system register is being redirected into an EL2 memory access.
2933          * This means it is not an IO operation, doesn't change hflags,
2934          * and need not end the TB, because it has no side effects.
2935          *
2936          * The access is 64-bit single copy atomic, guaranteed aligned because
2937          * of the definition of VCNR_EL2. Its endianness depends on
2938          * SCTLR_EL2.EE, not on the data endianness of EL1.
2939          * It is done under either the EL2 translation regime or the EL2&0
2940          * translation regime, depending on HCR_EL2.E2H. It behaves as if
2941          * PSTATE.PAN is 0.
2942          */
2943         TCGv_i64 ptr = tcg_temp_new_i64();
2944         MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2945         ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2946         int memidx = arm_to_core_mmu_idx(armmemidx);
2947         uint32_t syn;
2948 
2949         mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2950 
2951         tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2952         tcg_gen_addi_i64(ptr, ptr,
2953                          (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2954         tcg_rt = cpu_reg(s, rt);
2955 
2956         syn = syn_data_abort_vncr(0, !isread, 0);
2957         disas_set_insn_syndrome(s, syn);
2958         if (isread) {
2959             tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2960         } else {
2961             tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2962         }
2963         return;
2964     }
2965 
2966     /* Handle special cases first */
2967     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2968     case 0:
2969         break;
2970     case ARM_CP_NOP:
2971         return;
2972     case ARM_CP_NZCV:
2973         tcg_rt = cpu_reg(s, rt);
2974         if (isread) {
2975             gen_get_nzcv(tcg_rt);
2976         } else {
2977             gen_set_nzcv(tcg_rt);
2978         }
2979         return;
2980     case ARM_CP_CURRENTEL:
2981     {
2982         /*
2983          * Reads as current EL value from pstate, which is
2984          * guaranteed to be constant by the tb flags.
2985          * For nested virt we should report EL2.
2986          */
2987         int el = s->nv ? 2 : s->current_el;
2988         tcg_rt = cpu_reg(s, rt);
2989         tcg_gen_movi_i64(tcg_rt, el << 2);
2990         return;
2991     }
2992     case ARM_CP_DC_ZVA:
2993         /* Writes clear the aligned block of memory which rt points into. */
2994         if (s->mte_active[0]) {
2995             int desc = 0;
2996 
2997             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2998             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2999             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
3000 
3001             tcg_rt = tcg_temp_new_i64();
3002             gen_helper_mte_check_zva(tcg_rt, tcg_env,
3003                                      tcg_constant_i32(desc), cpu_reg(s, rt));
3004         } else {
3005             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
3006         }
3007         gen_helper_dc_zva(tcg_env, tcg_rt);
3008         return;
3009     case ARM_CP_DC_GVA:
3010         {
3011             TCGv_i64 clean_addr, tag;
3012 
3013             /*
3014              * DC_GVA, like DC_ZVA, requires that we supply the original
3015              * pointer for an invalid page.  Probe that address first.
3016              */
3017             tcg_rt = cpu_reg(s, rt);
3018             clean_addr = clean_data_tbi(s, tcg_rt);
3019             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
3020 
3021             if (s->ata[0]) {
3022                 /* Extract the tag from the register to match STZGM.  */
3023                 tag = tcg_temp_new_i64();
3024                 tcg_gen_shri_i64(tag, tcg_rt, 56);
3025                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
3026             }
3027         }
3028         return;
3029     case ARM_CP_DC_GZVA:
3030         {
3031             TCGv_i64 clean_addr, tag;
3032 
3033             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
3034             tcg_rt = cpu_reg(s, rt);
3035             clean_addr = clean_data_tbi(s, tcg_rt);
3036             gen_helper_dc_zva(tcg_env, clean_addr);
3037 
3038             if (s->ata[0]) {
3039                 /* Extract the tag from the register to match STZGM.  */
3040                 tag = tcg_temp_new_i64();
3041                 tcg_gen_shri_i64(tag, tcg_rt, 56);
3042                 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
3043             }
3044         }
3045         return;
3046     case ARM_CP_GCSPUSHM:
3047         if (s->gcs_en) {
3048             gen_add_gcs_record(s, cpu_reg(s, rt));
3049         }
3050         return;
3051     case ARM_CP_GCSPOPM:
3052         /* Note that X[rt] is unchanged if !GCSEnabled. */
3053         if (s->gcs_en) {
3054             gen_gcspopm(s, rt);
3055         }
3056         return;
3057     case ARM_CP_GCSPUSHX:
3058         /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */
3059         if (rt != 31) {
3060             unallocated_encoding(s);
3061         } else if (s->gcs_en) {
3062             gen_gcspushx(s);
3063         }
3064         return;
3065     case ARM_CP_GCSPOPCX:
3066         /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */
3067         if (rt != 31) {
3068             unallocated_encoding(s);
3069         } else if (s->gcs_en) {
3070             gen_gcspopcx(s);
3071         }
3072         return;
3073     case ARM_CP_GCSPOPX:
3074         /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */
3075         if (rt != 31) {
3076             unallocated_encoding(s);
3077         } else if (s->gcs_en) {
3078             gen_gcspopx(s);
3079         }
3080         return;
3081     case ARM_CP_GCSSS1:
3082         if (s->gcs_en) {
3083             gen_gcsss1(s, rt);
3084         }
3085         return;
3086     case ARM_CP_GCSSS2:
3087         if (s->gcs_en) {
3088             gen_gcsss2(s, rt);
3089         }
3090         return;
3091     default:
3092         g_assert_not_reached();
3093     }
3094 
3095     if (ri->type & ARM_CP_IO) {
3096         /* I/O operations must end the TB here (whether read or write) */
3097         need_exit_tb = translator_io_start(&s->base);
3098     }
3099 
3100     tcg_rt = cpu_reg(s, rt);
3101 
3102     if (isread) {
3103         if (ri->type & ARM_CP_CONST) {
3104             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
3105         } else if (ri->readfn) {
3106             if (!tcg_ri) {
3107                 tcg_ri = gen_lookup_cp_reg(key);
3108             }
3109             gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
3110         } else {
3111             tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
3112         }
3113     } else {
3114         if (ri->type & ARM_CP_CONST) {
3115             /* If not forbidden by access permissions, treat as WI */
3116             return;
3117         } else if (ri->writefn) {
3118             if (!tcg_ri) {
3119                 tcg_ri = gen_lookup_cp_reg(key);
3120             }
3121             gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
3122         } else {
3123             tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
3124         }
3125     }
3126 
3127     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
3128         /*
3129          * A write to any coprocessor register that ends a TB
3130          * must rebuild the hflags for the next TB.
3131          */
3132         gen_rebuild_hflags(s);
3133         /*
3134          * We default to ending the TB on a coprocessor register write,
3135          * but allow this to be suppressed by the register definition
3136          * (usually only necessary to work around guest bugs).
3137          */
3138         need_exit_tb = true;
3139     }
3140     if (need_exit_tb) {
3141         s->base.is_jmp = DISAS_UPDATE_EXIT;
3142     }
3143 }
3144 
3145 static bool trans_SYS(DisasContext *s, arg_SYS *a)
3146 {
3147     handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
3148     return true;
3149 }
3150 
3151 static bool trans_SVC(DisasContext *s, arg_i *a)
3152 {
3153     /*
3154      * For SVC, HVC and SMC we advance the single-step state
3155      * machine before taking the exception. This is architecturally
3156      * mandated, to ensure that single-stepping a system call
3157      * instruction works properly.
3158      */
3159     uint32_t syndrome = syn_aa64_svc(a->imm);
3160     if (s->fgt_svc) {
3161         gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
3162         return true;
3163     }
3164     gen_ss_advance(s);
3165     gen_exception_insn(s, 4, EXCP_SWI, syndrome);
3166     return true;
3167 }
3168 
3169 static bool trans_HVC(DisasContext *s, arg_i *a)
3170 {
3171     int target_el = s->current_el == 3 ? 3 : 2;
3172 
3173     if (s->current_el == 0) {
3174         unallocated_encoding(s);
3175         return true;
3176     }
3177     /*
3178      * The pre HVC helper handles cases when HVC gets trapped
3179      * as an undefined insn by runtime configuration.
3180      */
3181     gen_a64_update_pc(s, 0);
3182     gen_helper_pre_hvc(tcg_env);
3183     /* Architecture requires ss advance before we do the actual work */
3184     gen_ss_advance(s);
3185     gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
3186     return true;
3187 }
3188 
3189 static bool trans_SMC(DisasContext *s, arg_i *a)
3190 {
3191     if (s->current_el == 0) {
3192         unallocated_encoding(s);
3193         return true;
3194     }
3195     gen_a64_update_pc(s, 0);
3196     gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
3197     /* Architecture requires ss advance before we do the actual work */
3198     gen_ss_advance(s);
3199     gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
3200     return true;
3201 }
3202 
3203 static bool trans_BRK(DisasContext *s, arg_i *a)
3204 {
3205     gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
3206     return true;
3207 }
3208 
3209 static bool trans_HLT(DisasContext *s, arg_i *a)
3210 {
3211     /*
3212      * HLT. This has two purposes.
3213      * Architecturally, it is an external halting debug instruction.
3214      * Since QEMU doesn't implement external debug, we treat this as
3215      * it is required for halting debug disabled: it will UNDEF.
3216      * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
3217      */
3218     if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
3219         gen_exception_internal_insn(s, EXCP_SEMIHOST);
3220     } else {
3221         unallocated_encoding(s);
3222     }
3223     return true;
3224 }
3225 
3226 /*
3227  * Load/Store exclusive instructions are implemented by remembering
3228  * the value/address loaded, and seeing if these are the same
3229  * when the store is performed. This is not actually the architecturally
3230  * mandated semantics, but it works for typical guest code sequences
3231  * and avoids having to monitor regular stores.
3232  *
3233  * The store exclusive uses the atomic cmpxchg primitives to avoid
3234  * races in multi-threaded linux-user and when MTTCG softmmu is
3235  * enabled.
3236  */
3237 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
3238                                int size, bool is_pair)
3239 {
3240     int idx = get_mem_index(s);
3241     TCGv_i64 dirty_addr, clean_addr;
3242     MemOp memop = check_atomic_align(s, rn, size + is_pair);
3243 
3244     s->is_ldex = true;
3245     dirty_addr = cpu_reg_sp(s, rn);
3246     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
3247 
3248     g_assert(size <= 3);
3249     if (is_pair) {
3250         g_assert(size >= 2);
3251         if (size == 2) {
3252             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
3253             if (s->be_data == MO_LE) {
3254                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
3255                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
3256             } else {
3257                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
3258                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
3259             }
3260         } else {
3261             TCGv_i128 t16 = tcg_temp_new_i128();
3262 
3263             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
3264 
3265             if (s->be_data == MO_LE) {
3266                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
3267                                       cpu_exclusive_high, t16);
3268             } else {
3269                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
3270                                       cpu_exclusive_val, t16);
3271             }
3272             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
3273             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
3274         }
3275     } else {
3276         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
3277         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
3278     }
3279     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
3280 }
3281 
3282 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
3283                                 int rn, int size, int is_pair)
3284 {
3285     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
3286      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
3287      *     [addr] = {Rt};
3288      *     if (is_pair) {
3289      *         [addr + datasize] = {Rt2};
3290      *     }
3291      *     {Rd} = 0;
3292      * } else {
3293      *     {Rd} = 1;
3294      * }
3295      * env->exclusive_addr = -1;
3296      */
3297     TCGLabel *fail_label = gen_new_label();
3298     TCGLabel *done_label = gen_new_label();
3299     TCGv_i64 tmp, clean_addr;
3300     MemOp memop;
3301 
3302     /*
3303      * FIXME: We are out of spec here.  We have recorded only the address
3304      * from load_exclusive, not the entire range, and we assume that the
3305      * size of the access on both sides match.  The architecture allows the
3306      * store to be smaller than the load, so long as the stored bytes are
3307      * within the range recorded by the load.
3308      */
3309 
3310     /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
3311     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3312     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
3313 
3314     /*
3315      * The write, and any associated faults, only happen if the virtual
3316      * and physical addresses pass the exclusive monitor check.  These
3317      * faults are exceedingly unlikely, because normally the guest uses
3318      * the exact same address register for the load_exclusive, and we
3319      * would have recognized these faults there.
3320      *
3321      * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
3322      * unaligned 4-byte write within the range of an aligned 8-byte load.
3323      * With LSE2, the store would need to cross a 16-byte boundary when the
3324      * load did not, which would mean the store is outside the range
3325      * recorded for the monitor, which would have failed a corrected monitor
3326      * check above.  For now, we assume no size change and retain the
3327      * MO_ALIGN to let tcg know what we checked in the load_exclusive.
3328      *
3329      * It is possible to trigger an MTE fault, by performing the load with
3330      * a virtual address with a valid tag and performing the store with the
3331      * same virtual address and a different invalid tag.
3332      */
3333     memop = size + is_pair;
3334     if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
3335         memop |= MO_ALIGN;
3336     }
3337     memop = finalize_memop(s, memop);
3338     gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3339 
3340     tmp = tcg_temp_new_i64();
3341     if (is_pair) {
3342         if (size == 2) {
3343             if (s->be_data == MO_LE) {
3344                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
3345             } else {
3346                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
3347             }
3348             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
3349                                        cpu_exclusive_val, tmp,
3350                                        get_mem_index(s), memop);
3351             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3352         } else {
3353             TCGv_i128 t16 = tcg_temp_new_i128();
3354             TCGv_i128 c16 = tcg_temp_new_i128();
3355             TCGv_i64 a, b;
3356 
3357             if (s->be_data == MO_LE) {
3358                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
3359                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
3360                                         cpu_exclusive_high);
3361             } else {
3362                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
3363                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
3364                                         cpu_exclusive_val);
3365             }
3366 
3367             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3368                                         get_mem_index(s), memop);
3369 
3370             a = tcg_temp_new_i64();
3371             b = tcg_temp_new_i64();
3372             if (s->be_data == MO_LE) {
3373                 tcg_gen_extr_i128_i64(a, b, t16);
3374             } else {
3375                 tcg_gen_extr_i128_i64(b, a, t16);
3376             }
3377 
3378             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3379             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3380             tcg_gen_or_i64(tmp, a, b);
3381 
3382             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3383         }
3384     } else {
3385         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3386                                    cpu_reg(s, rt), get_mem_index(s), memop);
3387         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3388     }
3389     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3390     tcg_gen_br(done_label);
3391 
3392     gen_set_label(fail_label);
3393     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3394     gen_set_label(done_label);
3395     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3396 }
3397 
3398 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3399                                  int rn, int size)
3400 {
3401     TCGv_i64 tcg_rs = cpu_reg(s, rs);
3402     TCGv_i64 tcg_rt = cpu_reg(s, rt);
3403     int memidx = get_mem_index(s);
3404     TCGv_i64 clean_addr;
3405     MemOp memop;
3406 
3407     if (rn == 31) {
3408         gen_check_sp_alignment(s);
3409     }
3410     memop = check_atomic_align(s, rn, size);
3411     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3412     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3413                                memidx, memop);
3414 }
3415 
3416 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3417                                       int rn, int size)
3418 {
3419     TCGv_i64 s1 = cpu_reg(s, rs);
3420     TCGv_i64 s2 = cpu_reg(s, rs + 1);
3421     TCGv_i64 t1 = cpu_reg(s, rt);
3422     TCGv_i64 t2 = cpu_reg(s, rt + 1);
3423     TCGv_i64 clean_addr;
3424     int memidx = get_mem_index(s);
3425     MemOp memop;
3426 
3427     if (rn == 31) {
3428         gen_check_sp_alignment(s);
3429     }
3430 
3431     /* This is a single atomic access, despite the "pair". */
3432     memop = check_atomic_align(s, rn, size + 1);
3433     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3434 
3435     if (size == 2) {
3436         TCGv_i64 cmp = tcg_temp_new_i64();
3437         TCGv_i64 val = tcg_temp_new_i64();
3438 
3439         if (s->be_data == MO_LE) {
3440             tcg_gen_concat32_i64(val, t1, t2);
3441             tcg_gen_concat32_i64(cmp, s1, s2);
3442         } else {
3443             tcg_gen_concat32_i64(val, t2, t1);
3444             tcg_gen_concat32_i64(cmp, s2, s1);
3445         }
3446 
3447         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3448 
3449         if (s->be_data == MO_LE) {
3450             tcg_gen_extr32_i64(s1, s2, cmp);
3451         } else {
3452             tcg_gen_extr32_i64(s2, s1, cmp);
3453         }
3454     } else {
3455         TCGv_i128 cmp = tcg_temp_new_i128();
3456         TCGv_i128 val = tcg_temp_new_i128();
3457 
3458         if (s->be_data == MO_LE) {
3459             tcg_gen_concat_i64_i128(val, t1, t2);
3460             tcg_gen_concat_i64_i128(cmp, s1, s2);
3461         } else {
3462             tcg_gen_concat_i64_i128(val, t2, t1);
3463             tcg_gen_concat_i64_i128(cmp, s2, s1);
3464         }
3465 
3466         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3467 
3468         if (s->be_data == MO_LE) {
3469             tcg_gen_extr_i128_i64(s1, s2, cmp);
3470         } else {
3471             tcg_gen_extr_i128_i64(s2, s1, cmp);
3472         }
3473     }
3474 }
3475 
3476 /*
3477  * Compute the ISS.SF bit for syndrome information if an exception
3478  * is taken on a load or store. This indicates whether the instruction
3479  * is accessing a 32-bit or 64-bit register. This logic is derived
3480  * from the ARMv8 specs for LDR (Shared decode for all encodings).
3481  */
3482 static bool ldst_iss_sf(int size, bool sign, bool ext)
3483 {
3484 
3485     if (sign) {
3486         /*
3487          * Signed loads are 64 bit results if we are not going to
3488          * do a zero-extend from 32 to 64 after the load.
3489          * (For a store, sign and ext are always false.)
3490          */
3491         return !ext;
3492     } else {
3493         /* Unsigned loads/stores work at the specified size */
3494         return size == MO_64;
3495     }
3496 }
3497 
3498 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3499 {
3500     if (a->rn == 31) {
3501         gen_check_sp_alignment(s);
3502     }
3503     if (a->lasr) {
3504         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3505     }
3506     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3507     return true;
3508 }
3509 
3510 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3511 {
3512     if (a->rn == 31) {
3513         gen_check_sp_alignment(s);
3514     }
3515     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3516     if (a->lasr) {
3517         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3518     }
3519     return true;
3520 }
3521 
3522 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3523 {
3524     TCGv_i64 clean_addr;
3525     MemOp memop;
3526     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3527 
3528     /*
3529      * StoreLORelease is the same as Store-Release for QEMU, but
3530      * needs the feature-test.
3531      */
3532     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3533         return false;
3534     }
3535     /* Generate ISS for non-exclusive accesses including LASR.  */
3536     if (a->rn == 31) {
3537         gen_check_sp_alignment(s);
3538     }
3539     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3540     memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3541     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3542                                 true, a->rn != 31, memop);
3543     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3544               iss_sf, a->lasr);
3545     return true;
3546 }
3547 
3548 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3549 {
3550     TCGv_i64 clean_addr;
3551     MemOp memop;
3552     bool iss_sf = ldst_iss_sf(a->sz, false, false);
3553 
3554     /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
3555     if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3556         return false;
3557     }
3558     /* Generate ISS for non-exclusive accesses including LASR.  */
3559     if (a->rn == 31) {
3560         gen_check_sp_alignment(s);
3561     }
3562     memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3563     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3564                                 false, a->rn != 31, memop);
3565     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3566               a->rt, iss_sf, a->lasr);
3567     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3568     return true;
3569 }
3570 
3571 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3572 {
3573     if (a->rn == 31) {
3574         gen_check_sp_alignment(s);
3575     }
3576     if (a->lasr) {
3577         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3578     }
3579     gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3580     return true;
3581 }
3582 
3583 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3584 {
3585     if (a->rn == 31) {
3586         gen_check_sp_alignment(s);
3587     }
3588     gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3589     if (a->lasr) {
3590         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3591     }
3592     return true;
3593 }
3594 
3595 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3596 {
3597     if (!dc_isar_feature(aa64_lse, s)) {
3598         return false;
3599     }
3600     if (((a->rt | a->rs) & 1) != 0) {
3601         return false;
3602     }
3603 
3604     gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3605     return true;
3606 }
3607 
3608 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3609 {
3610     if (!dc_isar_feature(aa64_lse, s)) {
3611         return false;
3612     }
3613     gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3614     return true;
3615 }
3616 
3617 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3618 {
3619     bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3620     TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3621     TCGv_i64 clean_addr = tcg_temp_new_i64();
3622     MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3623 
3624     gen_pc_plus_diff(s, clean_addr, a->imm);
3625     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3626               false, true, a->rt, iss_sf, false);
3627     return true;
3628 }
3629 
3630 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3631 {
3632     /* Load register (literal), vector version */
3633     TCGv_i64 clean_addr;
3634     MemOp memop;
3635 
3636     if (!fp_access_check(s)) {
3637         return true;
3638     }
3639     memop = finalize_memop_asimd(s, a->sz);
3640     clean_addr = tcg_temp_new_i64();
3641     gen_pc_plus_diff(s, clean_addr, a->imm);
3642     do_fp_ld(s, a->rt, clean_addr, memop);
3643     return true;
3644 }
3645 
3646 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3647                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3648                                  uint64_t offset, bool is_store, MemOp mop)
3649 {
3650     if (a->rn == 31) {
3651         gen_check_sp_alignment(s);
3652     }
3653 
3654     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3655     if (!a->p) {
3656         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3657     }
3658 
3659     *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3660                                  (a->w || a->rn != 31), 2 << a->sz, mop);
3661 }
3662 
3663 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3664                                   TCGv_i64 dirty_addr, uint64_t offset)
3665 {
3666     if (a->w) {
3667         if (a->p) {
3668             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3669         }
3670         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3671     }
3672 }
3673 
3674 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3675 {
3676     uint64_t offset = a->imm << a->sz;
3677     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3678     MemOp mop = finalize_memop(s, a->sz);
3679 
3680     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3681     tcg_rt = cpu_reg(s, a->rt);
3682     tcg_rt2 = cpu_reg(s, a->rt2);
3683     /*
3684      * We built mop above for the single logical access -- rebuild it
3685      * now for the paired operation.
3686      *
3687      * With LSE2, non-sign-extending pairs are treated atomically if
3688      * aligned, and if unaligned one of the pair will be completely
3689      * within a 16-byte block and that element will be atomic.
3690      * Otherwise each element is separately atomic.
3691      * In all cases, issue one operation with the correct atomicity.
3692      */
3693     mop = a->sz + 1;
3694     mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3695     mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
3696     mop = finalize_memop_pair(s, mop);
3697     if (a->sz == 2) {
3698         TCGv_i64 tmp = tcg_temp_new_i64();
3699 
3700         if (s->be_data == MO_LE) {
3701             tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3702         } else {
3703             tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3704         }
3705         tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3706     } else {
3707         TCGv_i128 tmp = tcg_temp_new_i128();
3708 
3709         if (s->be_data == MO_LE) {
3710             tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3711         } else {
3712             tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3713         }
3714         tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3715     }
3716     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3717     return true;
3718 }
3719 
3720 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3721 {
3722     uint64_t offset = a->imm << a->sz;
3723     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3724     MemOp mop = finalize_memop(s, a->sz);
3725 
3726     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3727     tcg_rt = cpu_reg(s, a->rt);
3728     tcg_rt2 = cpu_reg(s, a->rt2);
3729 
3730     /*
3731      * We built mop above for the single logical access -- rebuild it
3732      * now for the paired operation.
3733      *
3734      * With LSE2, non-sign-extending pairs are treated atomically if
3735      * aligned, and if unaligned one of the pair will be completely
3736      * within a 16-byte block and that element will be atomic.
3737      * Otherwise each element is separately atomic.
3738      * In all cases, issue one operation with the correct atomicity.
3739      *
3740      * This treats sign-extending loads like zero-extending loads,
3741      * since that reuses the most code below.
3742      */
3743     mop = a->sz + 1;
3744     mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3745     mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY);
3746     mop = finalize_memop_pair(s, mop);
3747     if (a->sz == 2) {
3748         int o2 = s->be_data == MO_LE ? 32 : 0;
3749         int o1 = o2 ^ 32;
3750 
3751         tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3752         if (a->sign) {
3753             tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3754             tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3755         } else {
3756             tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3757             tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3758         }
3759     } else {
3760         TCGv_i128 tmp = tcg_temp_new_i128();
3761 
3762         tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3763         if (s->be_data == MO_LE) {
3764             tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3765         } else {
3766             tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3767         }
3768     }
3769     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3770     return true;
3771 }
3772 
3773 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3774 {
3775     uint64_t offset = a->imm << a->sz;
3776     TCGv_i64 clean_addr, dirty_addr;
3777     MemOp mop;
3778 
3779     if (!fp_access_check(s)) {
3780         return true;
3781     }
3782 
3783     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3784     mop = finalize_memop_asimd(s, a->sz);
3785     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3786     do_fp_st(s, a->rt, clean_addr, mop);
3787     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3788     do_fp_st(s, a->rt2, clean_addr, mop);
3789     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3790     return true;
3791 }
3792 
3793 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3794 {
3795     uint64_t offset = a->imm << a->sz;
3796     TCGv_i64 clean_addr, dirty_addr;
3797     MemOp mop;
3798 
3799     if (!fp_access_check(s)) {
3800         return true;
3801     }
3802 
3803     /* LSE2 does not merge FP pairs; leave these as separate operations. */
3804     mop = finalize_memop_asimd(s, a->sz);
3805     op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3806     do_fp_ld(s, a->rt, clean_addr, mop);
3807     tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3808     do_fp_ld(s, a->rt2, clean_addr, mop);
3809     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3810     return true;
3811 }
3812 
3813 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3814 {
3815     TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3816     uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3817     MemOp mop;
3818     TCGv_i128 tmp;
3819 
3820     /* STGP only comes in one size. */
3821     tcg_debug_assert(a->sz == MO_64);
3822 
3823     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3824         return false;
3825     }
3826 
3827     if (a->rn == 31) {
3828         gen_check_sp_alignment(s);
3829     }
3830 
3831     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3832     if (!a->p) {
3833         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3834     }
3835 
3836     clean_addr = clean_data_tbi(s, dirty_addr);
3837     tcg_rt = cpu_reg(s, a->rt);
3838     tcg_rt2 = cpu_reg(s, a->rt2);
3839 
3840     /*
3841      * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3842      * and one tag operation.  We implement it as one single aligned 16-byte
3843      * memory operation for convenience.  Note that the alignment ensures
3844      * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3845      */
3846     mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3847 
3848     tmp = tcg_temp_new_i128();
3849     if (s->be_data == MO_LE) {
3850         tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3851     } else {
3852         tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3853     }
3854     tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3855 
3856     /* Perform the tag store, if tag access enabled. */
3857     if (s->ata[0]) {
3858         if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3859             gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3860         } else {
3861             gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3862         }
3863     }
3864 
3865     op_addr_ldstpair_post(s, a, dirty_addr, offset);
3866     return true;
3867 }
3868 
3869 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3870                                  TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3871                                  uint64_t offset, bool is_store, MemOp mop)
3872 {
3873     int memidx;
3874 
3875     if (a->rn == 31) {
3876         gen_check_sp_alignment(s);
3877     }
3878 
3879     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3880     if (!a->p) {
3881         tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3882     }
3883     memidx = core_a64_user_mem_index(s, a->unpriv);
3884     *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3885                                         a->w || a->rn != 31,
3886                                         mop, a->unpriv, memidx);
3887 }
3888 
3889 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3890                                   TCGv_i64 dirty_addr, uint64_t offset)
3891 {
3892     if (a->w) {
3893         if (a->p) {
3894             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3895         }
3896         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3897     }
3898 }
3899 
3900 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3901 {
3902     bool iss_sf, iss_valid = !a->w;
3903     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3904     int memidx = core_a64_user_mem_index(s, a->unpriv);
3905     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3906 
3907     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3908 
3909     tcg_rt = cpu_reg(s, a->rt);
3910     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3911 
3912     do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3913                      iss_valid, a->rt, iss_sf, false);
3914     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3915     return true;
3916 }
3917 
3918 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3919 {
3920     bool iss_sf, iss_valid = !a->w;
3921     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3922     int memidx = core_a64_user_mem_index(s, a->unpriv);
3923     MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3924 
3925     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3926 
3927     tcg_rt = cpu_reg(s, a->rt);
3928     iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3929 
3930     do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3931                      a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3932     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3933     return true;
3934 }
3935 
3936 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3937 {
3938     TCGv_i64 clean_addr, dirty_addr;
3939     MemOp mop;
3940 
3941     if (!fp_access_check(s)) {
3942         return true;
3943     }
3944     mop = finalize_memop_asimd(s, a->sz);
3945     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3946     do_fp_st(s, a->rt, clean_addr, mop);
3947     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3948     return true;
3949 }
3950 
3951 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3952 {
3953     TCGv_i64 clean_addr, dirty_addr;
3954     MemOp mop;
3955 
3956     if (!fp_access_check(s)) {
3957         return true;
3958     }
3959     mop = finalize_memop_asimd(s, a->sz);
3960     op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3961     do_fp_ld(s, a->rt, clean_addr, mop);
3962     op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3963     return true;
3964 }
3965 
3966 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3967                              TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3968                              bool is_store, MemOp memop)
3969 {
3970     TCGv_i64 tcg_rm;
3971 
3972     if (a->rn == 31) {
3973         gen_check_sp_alignment(s);
3974     }
3975     *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3976 
3977     tcg_rm = read_cpu_reg(s, a->rm, 1);
3978     ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3979 
3980     tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3981     *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3982 }
3983 
3984 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3985 {
3986     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3987     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3988     MemOp memop;
3989 
3990     if (extract32(a->opt, 1, 1) == 0) {
3991         return false;
3992     }
3993 
3994     memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3995     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3996     tcg_rt = cpu_reg(s, a->rt);
3997     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3998               a->ext, true, a->rt, iss_sf, false);
3999     return true;
4000 }
4001 
4002 static bool trans_STR(DisasContext *s, arg_ldst *a)
4003 {
4004     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
4005     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
4006     MemOp memop;
4007 
4008     if (extract32(a->opt, 1, 1) == 0) {
4009         return false;
4010     }
4011 
4012     memop = finalize_memop(s, a->sz);
4013     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
4014     tcg_rt = cpu_reg(s, a->rt);
4015     do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
4016     return true;
4017 }
4018 
4019 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
4020 {
4021     TCGv_i64 clean_addr, dirty_addr;
4022     MemOp memop;
4023 
4024     if (extract32(a->opt, 1, 1) == 0) {
4025         return false;
4026     }
4027 
4028     if (!fp_access_check(s)) {
4029         return true;
4030     }
4031 
4032     memop = finalize_memop_asimd(s, a->sz);
4033     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
4034     do_fp_ld(s, a->rt, clean_addr, memop);
4035     return true;
4036 }
4037 
4038 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
4039 {
4040     TCGv_i64 clean_addr, dirty_addr;
4041     MemOp memop;
4042 
4043     if (extract32(a->opt, 1, 1) == 0) {
4044         return false;
4045     }
4046 
4047     if (!fp_access_check(s)) {
4048         return true;
4049     }
4050 
4051     memop = finalize_memop_asimd(s, a->sz);
4052     op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
4053     do_fp_st(s, a->rt, clean_addr, memop);
4054     return true;
4055 }
4056 
4057 
4058 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
4059                          int sign, bool invert)
4060 {
4061     MemOp mop = a->sz | sign;
4062     TCGv_i64 clean_addr, tcg_rs, tcg_rt;
4063 
4064     if (a->rn == 31) {
4065         gen_check_sp_alignment(s);
4066     }
4067     mop = check_atomic_align(s, a->rn, mop);
4068     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
4069                                 a->rn != 31, mop);
4070     tcg_rs = read_cpu_reg(s, a->rs, true);
4071     tcg_rt = cpu_reg(s, a->rt);
4072     if (invert) {
4073         tcg_gen_not_i64(tcg_rs, tcg_rs);
4074     }
4075     /*
4076      * The tcg atomic primitives are all full barriers.  Therefore we
4077      * can ignore the Acquire and Release bits of this instruction.
4078      */
4079     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
4080 
4081     if (mop & MO_SIGN) {
4082         switch (a->sz) {
4083         case MO_8:
4084             tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
4085             break;
4086         case MO_16:
4087             tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
4088             break;
4089         case MO_32:
4090             tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
4091             break;
4092         case MO_64:
4093             break;
4094         default:
4095             g_assert_not_reached();
4096         }
4097     }
4098     return true;
4099 }
4100 
4101 TRANS_FEAT(LDADD, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
4102 TRANS_FEAT(LDCLR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
4103 TRANS_FEAT(LDEOR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
4104 TRANS_FEAT(LDSET, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
4105 TRANS_FEAT(LDSMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
4106 TRANS_FEAT(LDSMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
4107 TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
4108 TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
4109 TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
4110 
4111 typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp);
4112 
4113 static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a,
4114                             Atomic128ThreeOpFn *fn, bool invert)
4115 {
4116     MemOp mop;
4117     int rlo, rhi;
4118     TCGv_i64 clean_addr, tlo, thi;
4119     TCGv_i128 t16;
4120 
4121     if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) {
4122         return false;
4123     }
4124     if (a->rn == 31) {
4125         gen_check_sp_alignment(s);
4126     }
4127     mop = check_atomic_align(s, a->rn, MO_128);
4128     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
4129                                 a->rn != 31, mop);
4130 
4131     rlo = (s->be_data == MO_LE ? a->rt : a->rt2);
4132     rhi = (s->be_data == MO_LE ? a->rt2 : a->rt);
4133 
4134     tlo = read_cpu_reg(s, rlo, true);
4135     thi = read_cpu_reg(s, rhi, true);
4136     if (invert) {
4137         tcg_gen_not_i64(tlo, tlo);
4138         tcg_gen_not_i64(thi, thi);
4139     }
4140     /*
4141      * The tcg atomic primitives are all full barriers.  Therefore we
4142      * can ignore the Acquire and Release bits of this instruction.
4143      */
4144     t16 = tcg_temp_new_i128();
4145     tcg_gen_concat_i64_i128(t16, tlo, thi);
4146 
4147     fn(t16, clean_addr, t16, get_mem_index(s), mop);
4148 
4149     tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16);
4150     return true;
4151 }
4152 
4153 TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld,
4154            a, tcg_gen_atomic_fetch_and_i128, true)
4155 TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld,
4156            a, tcg_gen_atomic_fetch_or_i128, false)
4157 TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld,
4158            a, tcg_gen_atomic_xchg_i128, false)
4159 
4160 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
4161 {
4162     bool iss_sf = ldst_iss_sf(a->sz, false, false);
4163     TCGv_i64 clean_addr;
4164     MemOp mop;
4165 
4166     if (!dc_isar_feature(aa64_lse, s) ||
4167         !dc_isar_feature(aa64_rcpc_8_3, s)) {
4168         return false;
4169     }
4170     if (a->rn == 31) {
4171         gen_check_sp_alignment(s);
4172     }
4173     mop = check_ordered_align(s, a->rn, 0, false, a->sz);
4174     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
4175                                 a->rn != 31, mop);
4176     /*
4177      * LDAPR* are a special case because they are a simple load, not a
4178      * fetch-and-do-something op.
4179      * The architectural consistency requirements here are weaker than
4180      * full load-acquire (we only need "load-acquire processor consistent"),
4181      * but we choose to implement them as full LDAQ.
4182      */
4183     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
4184               true, a->rt, iss_sf, true);
4185     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
4186     return true;
4187 }
4188 
4189 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
4190 {
4191     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
4192     MemOp memop;
4193 
4194     /* Load with pointer authentication */
4195     if (!dc_isar_feature(aa64_pauth, s)) {
4196         return false;
4197     }
4198 
4199     if (a->rn == 31) {
4200         gen_check_sp_alignment(s);
4201     }
4202     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
4203 
4204     if (s->pauth_active) {
4205         if (!a->m) {
4206             gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
4207                                       tcg_constant_i64(0));
4208         } else {
4209             gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
4210                                       tcg_constant_i64(0));
4211         }
4212     }
4213 
4214     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
4215 
4216     memop = finalize_memop(s, MO_64);
4217 
4218     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
4219     clean_addr = gen_mte_check1(s, dirty_addr, false,
4220                                 a->w || a->rn != 31, memop);
4221 
4222     tcg_rt = cpu_reg(s, a->rt);
4223     do_gpr_ld(s, tcg_rt, clean_addr, memop,
4224               /* extend */ false, /* iss_valid */ !a->w,
4225               /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
4226 
4227     if (a->w) {
4228         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
4229     }
4230     return true;
4231 }
4232 
4233 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
4234 {
4235     TCGv_i64 clean_addr, dirty_addr;
4236     MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
4237     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
4238 
4239     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
4240         return false;
4241     }
4242 
4243     if (a->rn == 31) {
4244         gen_check_sp_alignment(s);
4245     }
4246 
4247     mop = check_ordered_align(s, a->rn, a->imm, false, mop);
4248     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
4249     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
4250     clean_addr = clean_data_tbi(s, dirty_addr);
4251 
4252     /*
4253      * Load-AcquirePC semantics; we implement as the slightly more
4254      * restrictive Load-Acquire.
4255      */
4256     do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
4257               a->rt, iss_sf, true);
4258     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
4259     return true;
4260 }
4261 
4262 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
4263 {
4264     TCGv_i64 clean_addr, dirty_addr;
4265     MemOp mop = a->sz;
4266     bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
4267 
4268     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
4269         return false;
4270     }
4271 
4272     /* TODO: ARMv8.4-LSE SCTLR.nAA */
4273 
4274     if (a->rn == 31) {
4275         gen_check_sp_alignment(s);
4276     }
4277 
4278     mop = check_ordered_align(s, a->rn, a->imm, true, mop);
4279     dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
4280     tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
4281     clean_addr = clean_data_tbi(s, dirty_addr);
4282 
4283     /* Store-Release semantics */
4284     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
4285     do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
4286     return true;
4287 }
4288 
4289 static bool trans_GCSSTR(DisasContext *s, arg_GCSSTR *a)
4290 {
4291     ARMMMUIdx armidx;
4292 
4293     if (!dc_isar_feature(aa64_gcs, s)) {
4294         return false;
4295     }
4296 
4297     /*
4298      * The pseudocode for GCSSTTR is
4299      *
4300      *   effective_el = AArch64.IsUnprivAccessPriv() ? PSTATE.EL : EL0;
4301      *   if (effective_el == PSTATE.EL) CheckGCSSTREnabled();
4302      *
4303      * We have cached the result of IsUnprivAccessPriv in DisasContext,
4304      * but since we need the result of full_a64_user_mem_index anyway,
4305      * use the mmu_idx test as a proxy for the effective_el test.
4306      */
4307     armidx = full_a64_user_mem_index(s, a->unpriv);
4308     if (armidx == s->mmu_idx && s->gcsstr_el != 0) {
4309         gen_exception_insn_el(s, 0, EXCP_UDEF,
4310                               syn_gcs_gcsstr(a->rn, a->rt),
4311                               s->gcsstr_el);
4312         return true;
4313     }
4314 
4315     if (a->rn == 31) {
4316         gen_check_sp_alignment(s);
4317     }
4318     tcg_gen_qemu_st_i64(cpu_reg(s, a->rt),
4319                         clean_data_tbi(s, cpu_reg_sp(s, a->rn)),
4320                         core_gcs_mem_index(armidx),
4321                         finalize_memop(s, MO_64 | MO_ALIGN));
4322     return true;
4323 }
4324 
4325 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
4326 {
4327     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4328     MemOp endian, align, mop;
4329 
4330     int total;    /* total bytes */
4331     int elements; /* elements per vector */
4332     int r;
4333     int size = a->sz;
4334 
4335     if (!a->p && a->rm != 0) {
4336         /* For non-postindexed accesses the Rm field must be 0 */
4337         return false;
4338     }
4339     if (size == 3 && !a->q && a->selem != 1) {
4340         return false;
4341     }
4342     if (!fp_access_check(s)) {
4343         return true;
4344     }
4345 
4346     if (a->rn == 31) {
4347         gen_check_sp_alignment(s);
4348     }
4349 
4350     /* For our purposes, bytes are always little-endian.  */
4351     endian = s->be_data;
4352     if (size == 0) {
4353         endian = MO_LE;
4354     }
4355 
4356     total = a->rpt * a->selem * (a->q ? 16 : 8);
4357     tcg_rn = cpu_reg_sp(s, a->rn);
4358 
4359     /*
4360      * Issue the MTE check vs the logical repeat count, before we
4361      * promote consecutive little-endian elements below.
4362      */
4363     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
4364                                 finalize_memop_asimd(s, size));
4365 
4366     /*
4367      * Consecutive little-endian elements from a single register
4368      * can be promoted to a larger little-endian operation.
4369      */
4370     align = MO_ALIGN;
4371     if (a->selem == 1 && endian == MO_LE) {
4372         align = pow2_align(size);
4373         size = 3;
4374     }
4375     if (!s->align_mem) {
4376         align = 0;
4377     }
4378     mop = endian | size | align;
4379 
4380     elements = (a->q ? 16 : 8) >> size;
4381     tcg_ebytes = tcg_constant_i64(1 << size);
4382     for (r = 0; r < a->rpt; r++) {
4383         int e;
4384         for (e = 0; e < elements; e++) {
4385             int xs;
4386             for (xs = 0; xs < a->selem; xs++) {
4387                 int tt = (a->rt + r + xs) % 32;
4388                 do_vec_ld(s, tt, e, clean_addr, mop);
4389                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4390             }
4391         }
4392     }
4393 
4394     /*
4395      * For non-quad operations, setting a slice of the low 64 bits of
4396      * the register clears the high 64 bits (in the ARM ARM pseudocode
4397      * this is implicit in the fact that 'rval' is a 64 bit wide
4398      * variable).  For quad operations, we might still need to zero
4399      * the high bits of SVE.
4400      */
4401     for (r = 0; r < a->rpt * a->selem; r++) {
4402         int tt = (a->rt + r) % 32;
4403         clear_vec_high(s, a->q, tt);
4404     }
4405 
4406     if (a->p) {
4407         if (a->rm == 31) {
4408             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4409         } else {
4410             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4411         }
4412     }
4413     return true;
4414 }
4415 
4416 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
4417 {
4418     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4419     MemOp endian, align, mop;
4420 
4421     int total;    /* total bytes */
4422     int elements; /* elements per vector */
4423     int r;
4424     int size = a->sz;
4425 
4426     if (!a->p && a->rm != 0) {
4427         /* For non-postindexed accesses the Rm field must be 0 */
4428         return false;
4429     }
4430     if (size == 3 && !a->q && a->selem != 1) {
4431         return false;
4432     }
4433     if (!fp_access_check(s)) {
4434         return true;
4435     }
4436 
4437     if (a->rn == 31) {
4438         gen_check_sp_alignment(s);
4439     }
4440 
4441     /* For our purposes, bytes are always little-endian.  */
4442     endian = s->be_data;
4443     if (size == 0) {
4444         endian = MO_LE;
4445     }
4446 
4447     total = a->rpt * a->selem * (a->q ? 16 : 8);
4448     tcg_rn = cpu_reg_sp(s, a->rn);
4449 
4450     /*
4451      * Issue the MTE check vs the logical repeat count, before we
4452      * promote consecutive little-endian elements below.
4453      */
4454     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4455                                 finalize_memop_asimd(s, size));
4456 
4457     /*
4458      * Consecutive little-endian elements from a single register
4459      * can be promoted to a larger little-endian operation.
4460      */
4461     align = MO_ALIGN;
4462     if (a->selem == 1 && endian == MO_LE) {
4463         align = pow2_align(size);
4464         size = 3;
4465     }
4466     if (!s->align_mem) {
4467         align = 0;
4468     }
4469     mop = endian | size | align;
4470 
4471     elements = (a->q ? 16 : 8) >> size;
4472     tcg_ebytes = tcg_constant_i64(1 << size);
4473     for (r = 0; r < a->rpt; r++) {
4474         int e;
4475         for (e = 0; e < elements; e++) {
4476             int xs;
4477             for (xs = 0; xs < a->selem; xs++) {
4478                 int tt = (a->rt + r + xs) % 32;
4479                 do_vec_st(s, tt, e, clean_addr, mop);
4480                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4481             }
4482         }
4483     }
4484 
4485     if (a->p) {
4486         if (a->rm == 31) {
4487             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4488         } else {
4489             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4490         }
4491     }
4492     return true;
4493 }
4494 
4495 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4496 {
4497     int xs, total, rt;
4498     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4499     MemOp mop;
4500 
4501     if (!a->p && a->rm != 0) {
4502         return false;
4503     }
4504     if (!fp_access_check(s)) {
4505         return true;
4506     }
4507 
4508     if (a->rn == 31) {
4509         gen_check_sp_alignment(s);
4510     }
4511 
4512     total = a->selem << a->scale;
4513     tcg_rn = cpu_reg_sp(s, a->rn);
4514 
4515     mop = finalize_memop_asimd(s, a->scale);
4516     clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4517                                 total, mop);
4518 
4519     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4520     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4521         do_vec_st(s, rt, a->index, clean_addr, mop);
4522         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4523     }
4524 
4525     if (a->p) {
4526         if (a->rm == 31) {
4527             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4528         } else {
4529             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4530         }
4531     }
4532     return true;
4533 }
4534 
4535 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4536 {
4537     int xs, total, rt;
4538     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4539     MemOp mop;
4540 
4541     if (!a->p && a->rm != 0) {
4542         return false;
4543     }
4544     if (!fp_access_check(s)) {
4545         return true;
4546     }
4547 
4548     if (a->rn == 31) {
4549         gen_check_sp_alignment(s);
4550     }
4551 
4552     total = a->selem << a->scale;
4553     tcg_rn = cpu_reg_sp(s, a->rn);
4554 
4555     mop = finalize_memop_asimd(s, a->scale);
4556     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4557                                 total, mop);
4558 
4559     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4560     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4561         do_vec_ld(s, rt, a->index, clean_addr, mop);
4562         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4563     }
4564 
4565     if (a->p) {
4566         if (a->rm == 31) {
4567             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4568         } else {
4569             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4570         }
4571     }
4572     return true;
4573 }
4574 
4575 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4576 {
4577     int xs, total, rt;
4578     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4579     MemOp mop;
4580 
4581     if (!a->p && a->rm != 0) {
4582         return false;
4583     }
4584     if (!fp_access_check(s)) {
4585         return true;
4586     }
4587 
4588     if (a->rn == 31) {
4589         gen_check_sp_alignment(s);
4590     }
4591 
4592     total = a->selem << a->scale;
4593     tcg_rn = cpu_reg_sp(s, a->rn);
4594 
4595     mop = finalize_memop_asimd(s, a->scale);
4596     clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4597                                 total, mop);
4598 
4599     tcg_ebytes = tcg_constant_i64(1 << a->scale);
4600     for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4601         /* Load and replicate to all elements */
4602         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4603 
4604         tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4605         tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4606                              (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4607         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4608     }
4609 
4610     if (a->p) {
4611         if (a->rm == 31) {
4612             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4613         } else {
4614             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4615         }
4616     }
4617     return true;
4618 }
4619 
4620 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4621 {
4622     TCGv_i64 addr, clean_addr, tcg_rt;
4623     int size = 4 << s->dcz_blocksize;
4624 
4625     if (!dc_isar_feature(aa64_mte, s)) {
4626         return false;
4627     }
4628     if (s->current_el == 0) {
4629         return false;
4630     }
4631 
4632     if (a->rn == 31) {
4633         gen_check_sp_alignment(s);
4634     }
4635 
4636     addr = read_cpu_reg_sp(s, a->rn, true);
4637     tcg_gen_addi_i64(addr, addr, a->imm);
4638     tcg_rt = cpu_reg(s, a->rt);
4639 
4640     if (s->ata[0]) {
4641         gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4642     }
4643     /*
4644      * The non-tags portion of STZGM is mostly like DC_ZVA,
4645      * except the alignment happens before the access.
4646      */
4647     clean_addr = clean_data_tbi(s, addr);
4648     tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4649     gen_helper_dc_zva(tcg_env, clean_addr);
4650     return true;
4651 }
4652 
4653 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4654 {
4655     TCGv_i64 addr, clean_addr, tcg_rt;
4656 
4657     if (!dc_isar_feature(aa64_mte, s)) {
4658         return false;
4659     }
4660     if (s->current_el == 0) {
4661         return false;
4662     }
4663 
4664     if (a->rn == 31) {
4665         gen_check_sp_alignment(s);
4666     }
4667 
4668     addr = read_cpu_reg_sp(s, a->rn, true);
4669     tcg_gen_addi_i64(addr, addr, a->imm);
4670     tcg_rt = cpu_reg(s, a->rt);
4671 
4672     if (s->ata[0]) {
4673         gen_helper_stgm(tcg_env, addr, tcg_rt);
4674     } else {
4675         MMUAccessType acc = MMU_DATA_STORE;
4676         int size = 4 << s->gm_blocksize;
4677 
4678         clean_addr = clean_data_tbi(s, addr);
4679         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4680         gen_probe_access(s, clean_addr, acc, size);
4681     }
4682     return true;
4683 }
4684 
4685 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4686 {
4687     TCGv_i64 addr, clean_addr, tcg_rt;
4688 
4689     if (!dc_isar_feature(aa64_mte, s)) {
4690         return false;
4691     }
4692     if (s->current_el == 0) {
4693         return false;
4694     }
4695 
4696     if (a->rn == 31) {
4697         gen_check_sp_alignment(s);
4698     }
4699 
4700     addr = read_cpu_reg_sp(s, a->rn, true);
4701     tcg_gen_addi_i64(addr, addr, a->imm);
4702     tcg_rt = cpu_reg(s, a->rt);
4703 
4704     if (s->ata[0]) {
4705         gen_helper_ldgm(tcg_rt, tcg_env, addr);
4706     } else {
4707         MMUAccessType acc = MMU_DATA_LOAD;
4708         int size = 4 << s->gm_blocksize;
4709 
4710         clean_addr = clean_data_tbi(s, addr);
4711         tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4712         gen_probe_access(s, clean_addr, acc, size);
4713         /* The result tags are zeros.  */
4714         tcg_gen_movi_i64(tcg_rt, 0);
4715     }
4716     return true;
4717 }
4718 
4719 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4720 {
4721     TCGv_i64 addr, clean_addr, tcg_rt;
4722 
4723     if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4724         return false;
4725     }
4726 
4727     if (a->rn == 31) {
4728         gen_check_sp_alignment(s);
4729     }
4730 
4731     addr = read_cpu_reg_sp(s, a->rn, true);
4732     if (!a->p) {
4733         /* pre-index or signed offset */
4734         tcg_gen_addi_i64(addr, addr, a->imm);
4735     }
4736 
4737     tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4738     tcg_rt = cpu_reg(s, a->rt);
4739     if (s->ata[0]) {
4740         gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4741     } else {
4742         /*
4743          * Tag access disabled: we must check for aborts on the load
4744          * load from [rn+offset], and then insert a 0 tag into rt.
4745          */
4746         clean_addr = clean_data_tbi(s, addr);
4747         gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4748         gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4749     }
4750 
4751     if (a->w) {
4752         /* pre-index or post-index */
4753         if (a->p) {
4754             /* post-index */
4755             tcg_gen_addi_i64(addr, addr, a->imm);
4756         }
4757         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4758     }
4759     return true;
4760 }
4761 
4762 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4763 {
4764     TCGv_i64 addr, tcg_rt;
4765 
4766     if (a->rn == 31) {
4767         gen_check_sp_alignment(s);
4768     }
4769 
4770     addr = read_cpu_reg_sp(s, a->rn, true);
4771     if (!a->p) {
4772         /* pre-index or signed offset */
4773         tcg_gen_addi_i64(addr, addr, a->imm);
4774     }
4775     tcg_rt = cpu_reg_sp(s, a->rt);
4776     if (!s->ata[0]) {
4777         /*
4778          * For STG and ST2G, we need to check alignment and probe memory.
4779          * TODO: For STZG and STZ2G, we could rely on the stores below,
4780          * at least for system mode; user-only won't enforce alignment.
4781          */
4782         if (is_pair) {
4783             gen_helper_st2g_stub(tcg_env, addr);
4784         } else {
4785             gen_helper_stg_stub(tcg_env, addr);
4786         }
4787     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4788         if (is_pair) {
4789             gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4790         } else {
4791             gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4792         }
4793     } else {
4794         if (is_pair) {
4795             gen_helper_st2g(tcg_env, addr, tcg_rt);
4796         } else {
4797             gen_helper_stg(tcg_env, addr, tcg_rt);
4798         }
4799     }
4800 
4801     if (is_zero) {
4802         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4803         TCGv_i64 zero64 = tcg_constant_i64(0);
4804         TCGv_i128 zero128 = tcg_temp_new_i128();
4805         int mem_index = get_mem_index(s);
4806         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4807 
4808         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4809 
4810         /* This is 1 or 2 atomic 16-byte operations. */
4811         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4812         if (is_pair) {
4813             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4814             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4815         }
4816     }
4817 
4818     if (a->w) {
4819         /* pre-index or post-index */
4820         if (a->p) {
4821             /* post-index */
4822             tcg_gen_addi_i64(addr, addr, a->imm);
4823         }
4824         tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4825     }
4826     return true;
4827 }
4828 
4829 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4830 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4831 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4832 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4833 
4834 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4835 
4836 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4837                    bool is_setg, SetFn fn)
4838 {
4839     int memidx;
4840     uint32_t syndrome, desc = 0;
4841 
4842     if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4843         return false;
4844     }
4845 
4846     /*
4847      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4848      * us to pull this check before the CheckMOPSEnabled() test
4849      * (which we do in the helper function)
4850      */
4851     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4852         a->rd == 31 || a->rn == 31) {
4853         return false;
4854     }
4855 
4856     memidx = core_a64_user_mem_index(s, a->unpriv);
4857 
4858     /*
4859      * We pass option_a == true, matching our implementation;
4860      * we pass wrong_option == false: helper function may set that bit.
4861      */
4862     syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4863                        is_epilogue, false, true, a->rd, a->rs, a->rn);
4864 
4865     if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4866         /* We may need to do MTE tag checking, so assemble the descriptor */
4867         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4868         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4869         desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4870         /* SIZEM1 and ALIGN we leave 0 (byte write) */
4871     }
4872     /* The helper function always needs the memidx even with MTE disabled */
4873     desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4874 
4875     /*
4876      * The helper needs the register numbers, but since they're in
4877      * the syndrome anyway, we let it extract them from there rather
4878      * than passing in an extra three integer arguments.
4879      */
4880     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4881     return true;
4882 }
4883 
4884 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4885 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4886 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4887 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4888 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4889 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4890 
4891 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4892 
4893 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4894 {
4895     int rmemidx, wmemidx;
4896     uint32_t syndrome, rdesc = 0, wdesc = 0;
4897     bool wunpriv = extract32(a->options, 0, 1);
4898     bool runpriv = extract32(a->options, 1, 1);
4899 
4900     /*
4901      * UNPREDICTABLE cases: we choose to UNDEF, which allows
4902      * us to pull this check before the CheckMOPSEnabled() test
4903      * (which we do in the helper function)
4904      */
4905     if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4906         a->rd == 31 || a->rs == 31 || a->rn == 31) {
4907         return false;
4908     }
4909 
4910     rmemidx = core_a64_user_mem_index(s, runpriv);
4911     wmemidx = core_a64_user_mem_index(s, wunpriv);
4912 
4913     /*
4914      * We pass option_a == true, matching our implementation;
4915      * we pass wrong_option == false: helper function may set that bit.
4916      */
4917     syndrome = syn_mop(false, false, a->options, is_epilogue,
4918                        false, true, a->rd, a->rs, a->rn);
4919 
4920     /* If we need to do MTE tag checking, assemble the descriptors */
4921     if (s->mte_active[runpriv]) {
4922         rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4923         rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4924     }
4925     if (s->mte_active[wunpriv]) {
4926         wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4927         wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4928         wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4929     }
4930     /* The helper function needs these parts of the descriptor regardless */
4931     rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4932     wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4933 
4934     /*
4935      * The helper needs the register numbers, but since they're in
4936      * the syndrome anyway, we let it extract them from there rather
4937      * than passing in an extra three integer arguments.
4938      */
4939     fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4940        tcg_constant_i32(rdesc));
4941     return true;
4942 }
4943 
4944 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4945 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4946 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4947 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4948 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4949 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4950 
4951 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4952 
4953 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4954                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4955 {
4956     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4957     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4958     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4959 
4960     fn(tcg_rd, tcg_rn, tcg_imm);
4961     if (!a->sf) {
4962         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4963     }
4964     return true;
4965 }
4966 
4967 /*
4968  * PC-rel. addressing
4969  */
4970 
4971 static bool trans_ADR(DisasContext *s, arg_ri *a)
4972 {
4973     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4974     return true;
4975 }
4976 
4977 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4978 {
4979     int64_t offset = (int64_t)a->imm << 12;
4980 
4981     /* The page offset is ok for CF_PCREL. */
4982     offset -= s->pc_curr & 0xfff;
4983     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4984     return true;
4985 }
4986 
4987 /*
4988  * Add/subtract (immediate)
4989  */
4990 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4991 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4992 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4993 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4994 
4995 /*
4996  * Min/Max (immediate)
4997  */
4998 
4999 static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn)
5000 {
5001     TCGv_i32 t1 = tcg_temp_new_i32();
5002     TCGv_i32 t2 = tcg_temp_new_i32();
5003 
5004     tcg_gen_extrl_i64_i32(t1, n);
5005     tcg_gen_extrl_i64_i32(t2, m);
5006     fn(t1, t1, t2);
5007     tcg_gen_extu_i32_i64(d, t1);
5008 }
5009 
5010 static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5011 {
5012     gen_wrap3_i32(d, n, m, tcg_gen_smax_i32);
5013 }
5014 
5015 static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5016 {
5017     gen_wrap3_i32(d, n, m, tcg_gen_smin_i32);
5018 }
5019 
5020 static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5021 {
5022     gen_wrap3_i32(d, n, m, tcg_gen_umax_i32);
5023 }
5024 
5025 static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5026 {
5027     gen_wrap3_i32(d, n, m, tcg_gen_umin_i32);
5028 }
5029 
5030 TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0,
5031            a->sf ? tcg_gen_smax_i64 : gen_smax32_i64)
5032 TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0,
5033            a->sf ? tcg_gen_smin_i64 : gen_smin32_i64)
5034 TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0,
5035            a->sf ? tcg_gen_umax_i64 : gen_umax32_i64)
5036 TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0,
5037            a->sf ? tcg_gen_umin_i64 : gen_umin32_i64)
5038 
5039 /*
5040  * Add/subtract (immediate, with tags)
5041  */
5042 
5043 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
5044                                       bool sub_op)
5045 {
5046     TCGv_i64 tcg_rn, tcg_rd;
5047     int imm;
5048 
5049     imm = a->uimm6 << LOG2_TAG_GRANULE;
5050     if (sub_op) {
5051         imm = -imm;
5052     }
5053 
5054     tcg_rn = cpu_reg_sp(s, a->rn);
5055     tcg_rd = cpu_reg_sp(s, a->rd);
5056 
5057     if (s->ata[0]) {
5058         gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
5059                            tcg_constant_i32(imm),
5060                            tcg_constant_i32(a->uimm4));
5061     } else {
5062         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
5063         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
5064     }
5065     return true;
5066 }
5067 
5068 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
5069 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
5070 
5071 /* The input should be a value in the bottom e bits (with higher
5072  * bits zero); returns that value replicated into every element
5073  * of size e in a 64 bit integer.
5074  */
5075 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
5076 {
5077     assert(e != 0);
5078     while (e < 64) {
5079         mask |= mask << e;
5080         e *= 2;
5081     }
5082     return mask;
5083 }
5084 
5085 /*
5086  * Logical (immediate)
5087  */
5088 
5089 /*
5090  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
5091  * only require the wmask. Returns false if the imms/immr/immn are a reserved
5092  * value (ie should cause a guest UNDEF exception), and true if they are
5093  * valid, in which case the decoded bit pattern is written to result.
5094  */
5095 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
5096                             unsigned int imms, unsigned int immr)
5097 {
5098     uint64_t mask;
5099     unsigned e, levels, s, r;
5100     int len;
5101 
5102     assert(immn < 2 && imms < 64 && immr < 64);
5103 
5104     /* The bit patterns we create here are 64 bit patterns which
5105      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
5106      * 64 bits each. Each element contains the same value: a run
5107      * of between 1 and e-1 non-zero bits, rotated within the
5108      * element by between 0 and e-1 bits.
5109      *
5110      * The element size and run length are encoded into immn (1 bit)
5111      * and imms (6 bits) as follows:
5112      * 64 bit elements: immn = 1, imms = <length of run - 1>
5113      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
5114      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
5115      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
5116      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
5117      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
5118      * Notice that immn = 0, imms = 11111x is the only combination
5119      * not covered by one of the above options; this is reserved.
5120      * Further, <length of run - 1> all-ones is a reserved pattern.
5121      *
5122      * In all cases the rotation is by immr % e (and immr is 6 bits).
5123      */
5124 
5125     /* First determine the element size */
5126     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
5127     if (len < 1) {
5128         /* This is the immn == 0, imms == 0x11111x case */
5129         return false;
5130     }
5131     e = 1 << len;
5132 
5133     levels = e - 1;
5134     s = imms & levels;
5135     r = immr & levels;
5136 
5137     if (s == levels) {
5138         /* <length of run - 1> mustn't be all-ones. */
5139         return false;
5140     }
5141 
5142     /* Create the value of one element: s+1 set bits rotated
5143      * by r within the element (which is e bits wide)...
5144      */
5145     mask = MAKE_64BIT_MASK(0, s + 1);
5146     if (r) {
5147         mask = (mask >> r) | (mask << (e - r));
5148         mask &= MAKE_64BIT_MASK(0, e);
5149     }
5150     /* ...then replicate the element over the whole 64 bit value */
5151     mask = bitfield_replicate(mask, e);
5152     *result = mask;
5153     return true;
5154 }
5155 
5156 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
5157                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
5158 {
5159     TCGv_i64 tcg_rd, tcg_rn;
5160     uint64_t imm;
5161 
5162     /* Some immediate field values are reserved. */
5163     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
5164                                 extract32(a->dbm, 0, 6),
5165                                 extract32(a->dbm, 6, 6))) {
5166         return false;
5167     }
5168     if (!a->sf) {
5169         imm &= 0xffffffffull;
5170     }
5171 
5172     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
5173     tcg_rn = cpu_reg(s, a->rn);
5174 
5175     fn(tcg_rd, tcg_rn, imm);
5176     if (set_cc) {
5177         gen_logic_CC(a->sf, tcg_rd);
5178     }
5179     if (!a->sf) {
5180         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5181     }
5182     return true;
5183 }
5184 
5185 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
5186 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
5187 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
5188 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
5189 
5190 /*
5191  * Move wide (immediate)
5192  */
5193 
5194 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
5195 {
5196     int pos = a->hw << 4;
5197     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
5198     return true;
5199 }
5200 
5201 static bool trans_MOVN(DisasContext *s, arg_movw *a)
5202 {
5203     int pos = a->hw << 4;
5204     uint64_t imm = a->imm;
5205 
5206     imm = ~(imm << pos);
5207     if (!a->sf) {
5208         imm = (uint32_t)imm;
5209     }
5210     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
5211     return true;
5212 }
5213 
5214 static bool trans_MOVK(DisasContext *s, arg_movw *a)
5215 {
5216     int pos = a->hw << 4;
5217     TCGv_i64 tcg_rd, tcg_im;
5218 
5219     tcg_rd = cpu_reg(s, a->rd);
5220     tcg_im = tcg_constant_i64(a->imm);
5221     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
5222     if (!a->sf) {
5223         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5224     }
5225     return true;
5226 }
5227 
5228 /*
5229  * Bitfield
5230  */
5231 
5232 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
5233 {
5234     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5235     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
5236     unsigned int bitsize = a->sf ? 64 : 32;
5237     unsigned int ri = a->immr;
5238     unsigned int si = a->imms;
5239     unsigned int pos, len;
5240 
5241     if (si >= ri) {
5242         /* Wd<s-r:0> = Wn<s:r> */
5243         len = (si - ri) + 1;
5244         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
5245         if (!a->sf) {
5246             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5247         }
5248     } else {
5249         /* Wd<32+s-r,32-r> = Wn<s:0> */
5250         len = si + 1;
5251         pos = (bitsize - ri) & (bitsize - 1);
5252 
5253         if (len < ri) {
5254             /*
5255              * Sign extend the destination field from len to fill the
5256              * balance of the word.  Let the deposit below insert all
5257              * of those sign bits.
5258              */
5259             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
5260             len = ri;
5261         }
5262 
5263         /*
5264          * We start with zero, and we haven't modified any bits outside
5265          * bitsize, therefore no final zero-extension is unneeded for !sf.
5266          */
5267         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
5268     }
5269     return true;
5270 }
5271 
5272 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
5273 {
5274     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5275     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
5276     unsigned int bitsize = a->sf ? 64 : 32;
5277     unsigned int ri = a->immr;
5278     unsigned int si = a->imms;
5279     unsigned int pos, len;
5280 
5281     tcg_rd = cpu_reg(s, a->rd);
5282     tcg_tmp = read_cpu_reg(s, a->rn, 1);
5283 
5284     if (si >= ri) {
5285         /* Wd<s-r:0> = Wn<s:r> */
5286         len = (si - ri) + 1;
5287         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
5288     } else {
5289         /* Wd<32+s-r,32-r> = Wn<s:0> */
5290         len = si + 1;
5291         pos = (bitsize - ri) & (bitsize - 1);
5292         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
5293     }
5294     return true;
5295 }
5296 
5297 static bool trans_BFM(DisasContext *s, arg_BFM *a)
5298 {
5299     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5300     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
5301     unsigned int bitsize = a->sf ? 64 : 32;
5302     unsigned int ri = a->immr;
5303     unsigned int si = a->imms;
5304     unsigned int pos, len;
5305 
5306     tcg_rd = cpu_reg(s, a->rd);
5307     tcg_tmp = read_cpu_reg(s, a->rn, 1);
5308 
5309     if (si >= ri) {
5310         /* Wd<s-r:0> = Wn<s:r> */
5311         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
5312         len = (si - ri) + 1;
5313         pos = 0;
5314     } else {
5315         /* Wd<32+s-r,32-r> = Wn<s:0> */
5316         len = si + 1;
5317         pos = (bitsize - ri) & (bitsize - 1);
5318     }
5319 
5320     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
5321     if (!a->sf) {
5322         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5323     }
5324     return true;
5325 }
5326 
5327 static bool trans_EXTR(DisasContext *s, arg_extract *a)
5328 {
5329     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
5330 
5331     tcg_rd = cpu_reg(s, a->rd);
5332 
5333     if (unlikely(a->imm == 0)) {
5334         /*
5335          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
5336          * so an extract from bit 0 is a special case.
5337          */
5338         if (a->sf) {
5339             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
5340         } else {
5341             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
5342         }
5343     } else {
5344         tcg_rm = cpu_reg(s, a->rm);
5345         tcg_rn = cpu_reg(s, a->rn);
5346 
5347         if (a->sf) {
5348             /* Specialization to ROR happens in EXTRACT2.  */
5349             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
5350         } else {
5351             TCGv_i32 t0 = tcg_temp_new_i32();
5352 
5353             tcg_gen_extrl_i64_i32(t0, tcg_rm);
5354             if (a->rm == a->rn) {
5355                 tcg_gen_rotri_i32(t0, t0, a->imm);
5356             } else {
5357                 TCGv_i32 t1 = tcg_temp_new_i32();
5358                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
5359                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
5360             }
5361             tcg_gen_extu_i32_i64(tcg_rd, t0);
5362         }
5363     }
5364     return true;
5365 }
5366 
5367 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
5368 {
5369     if (fp_access_check(s)) {
5370         int len = (a->len + 1) * 16;
5371 
5372         tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
5373                            vec_full_reg_offset(s, a->rm), tcg_env,
5374                            a->q ? 16 : 8, vec_full_reg_size(s),
5375                            (len << 6) | (a->tbx << 5) | a->rn,
5376                            gen_helper_simd_tblx);
5377     }
5378     return true;
5379 }
5380 
5381 typedef int simd_permute_idx_fn(int i, int part, int elements);
5382 
5383 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
5384                             simd_permute_idx_fn *fn, int part)
5385 {
5386     MemOp esz = a->esz;
5387     int datasize = a->q ? 16 : 8;
5388     int elements = datasize >> esz;
5389     TCGv_i64 tcg_res[2], tcg_ele;
5390 
5391     if (esz == MO_64 && !a->q) {
5392         return false;
5393     }
5394     if (!fp_access_check(s)) {
5395         return true;
5396     }
5397 
5398     tcg_res[0] = tcg_temp_new_i64();
5399     tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
5400     tcg_ele = tcg_temp_new_i64();
5401 
5402     for (int i = 0; i < elements; i++) {
5403         int o, w, idx;
5404 
5405         idx = fn(i, part, elements);
5406         read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
5407                          idx & (elements - 1), esz);
5408 
5409         w = (i << (esz + 3)) / 64;
5410         o = (i << (esz + 3)) % 64;
5411         if (o == 0) {
5412             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
5413         } else {
5414             tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
5415         }
5416     }
5417 
5418     for (int i = a->q; i >= 0; --i) {
5419         write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
5420     }
5421     clear_vec_high(s, a->q, a->rd);
5422     return true;
5423 }
5424 
5425 static int permute_load_uzp(int i, int part, int elements)
5426 {
5427     return 2 * i + part;
5428 }
5429 
5430 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
5431 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
5432 
5433 static int permute_load_trn(int i, int part, int elements)
5434 {
5435     return (i & 1) * elements + (i & ~1) + part;
5436 }
5437 
5438 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
5439 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
5440 
5441 static int permute_load_zip(int i, int part, int elements)
5442 {
5443     return (i & 1) * elements + ((part * elements + i) >> 1);
5444 }
5445 
5446 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
5447 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
5448 
5449 /*
5450  * Cryptographic AES, SHA, SHA512
5451  */
5452 
5453 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
5454 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
5455 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
5456 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
5457 
5458 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
5459 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
5460 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
5461 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
5462 
5463 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
5464 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
5465 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
5466 
5467 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
5468 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
5469 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
5470 
5471 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
5472 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
5473 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
5474 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
5475 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
5476 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
5477 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
5478 
5479 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
5480 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
5481 
5482 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
5483 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
5484 
5485 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
5486 {
5487     if (!dc_isar_feature(aa64_sm3, s)) {
5488         return false;
5489     }
5490     if (fp_access_check(s)) {
5491         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5492         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5493         TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5494         TCGv_i32 tcg_res = tcg_temp_new_i32();
5495 
5496         read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5497         read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5498         read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5499 
5500         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5501         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5502         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5503         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5504 
5505         /* Clear the whole register first, then store bits [127:96]. */
5506         clear_vec(s, a->rd);
5507         write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5508     }
5509     return true;
5510 }
5511 
5512 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5513 {
5514     if (fp_access_check(s)) {
5515         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5516     }
5517     return true;
5518 }
5519 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5520 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5521 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5522 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5523 
5524 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5525 {
5526     if (!dc_isar_feature(aa64_sha3, s)) {
5527         return false;
5528     }
5529     if (fp_access_check(s)) {
5530         gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5531                      vec_full_reg_offset(s, a->rn),
5532                      vec_full_reg_offset(s, a->rm), a->imm, 16,
5533                      vec_full_reg_size(s));
5534     }
5535     return true;
5536 }
5537 
5538 /*
5539  * Advanced SIMD copy
5540  */
5541 
5542 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5543 {
5544     unsigned esz = ctz32(imm);
5545     if (esz <= MO_64) {
5546         *pesz = esz;
5547         *pidx = imm >> (esz + 1);
5548         return true;
5549     }
5550     return false;
5551 }
5552 
5553 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5554 {
5555     MemOp esz;
5556     unsigned idx;
5557 
5558     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5559         return false;
5560     }
5561     if (fp_access_check(s)) {
5562         /*
5563          * This instruction just extracts the specified element and
5564          * zero-extends it into the bottom of the destination register.
5565          */
5566         TCGv_i64 tmp = tcg_temp_new_i64();
5567         read_vec_element(s, tmp, a->rn, idx, esz);
5568         write_fp_dreg(s, a->rd, tmp);
5569     }
5570     return true;
5571 }
5572 
5573 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5574 {
5575     MemOp esz;
5576     unsigned idx;
5577 
5578     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5579         return false;
5580     }
5581     if (esz == MO_64 && !a->q) {
5582         return false;
5583     }
5584     if (fp_access_check(s)) {
5585         tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5586                              vec_reg_offset(s, a->rn, idx, esz),
5587                              a->q ? 16 : 8, vec_full_reg_size(s));
5588     }
5589     return true;
5590 }
5591 
5592 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5593 {
5594     MemOp esz;
5595     unsigned idx;
5596 
5597     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5598         return false;
5599     }
5600     if (esz == MO_64 && !a->q) {
5601         return false;
5602     }
5603     if (fp_access_check(s)) {
5604         tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5605                              a->q ? 16 : 8, vec_full_reg_size(s),
5606                              cpu_reg(s, a->rn));
5607     }
5608     return true;
5609 }
5610 
5611 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5612 {
5613     MemOp esz;
5614     unsigned idx;
5615 
5616     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5617         return false;
5618     }
5619     if (is_signed) {
5620         if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5621             return false;
5622         }
5623     } else {
5624         if (esz == MO_64 ? !a->q : a->q) {
5625             return false;
5626         }
5627     }
5628     if (fp_access_check(s)) {
5629         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5630         read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5631         if (is_signed && !a->q) {
5632             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5633         }
5634     }
5635     return true;
5636 }
5637 
5638 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5639 TRANS(UMOV, do_smov_umov, a, 0)
5640 
5641 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5642 {
5643     MemOp esz;
5644     unsigned idx;
5645 
5646     if (!decode_esz_idx(a->imm, &esz, &idx)) {
5647         return false;
5648     }
5649     if (fp_access_check(s)) {
5650         write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5651         clear_vec_high(s, true, a->rd);
5652     }
5653     return true;
5654 }
5655 
5656 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5657 {
5658     MemOp esz;
5659     unsigned didx, sidx;
5660 
5661     if (!decode_esz_idx(a->di, &esz, &didx)) {
5662         return false;
5663     }
5664     sidx = a->si >> esz;
5665     if (fp_access_check(s)) {
5666         TCGv_i64 tmp = tcg_temp_new_i64();
5667 
5668         read_vec_element(s, tmp, a->rn, sidx, esz);
5669         write_vec_element(s, tmp, a->rd, didx, esz);
5670 
5671         /* INS is considered a 128-bit write for SVE. */
5672         clear_vec_high(s, true, a->rd);
5673     }
5674     return true;
5675 }
5676 
5677 /*
5678  * Advanced SIMD three same
5679  */
5680 
5681 typedef struct FPScalar {
5682     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5683     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5684     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5685 } FPScalar;
5686 
5687 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5688                                         const FPScalar *f, int mergereg,
5689                                         ARMFPStatusFlavour fpsttype)
5690 {
5691     switch (a->esz) {
5692     case MO_64:
5693         if (fp_access_check(s)) {
5694             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5695             TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5696             f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5697             write_fp_dreg_merging(s, a->rd, mergereg, t0);
5698         }
5699         break;
5700     case MO_32:
5701         if (fp_access_check(s)) {
5702             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5703             TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5704             f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5705             write_fp_sreg_merging(s, a->rd, mergereg, t0);
5706         }
5707         break;
5708     case MO_16:
5709         if (!dc_isar_feature(aa64_fp16, s)) {
5710             return false;
5711         }
5712         if (fp_access_check(s)) {
5713             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5714             TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5715             f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5716             write_fp_hreg_merging(s, a->rd, mergereg, t0);
5717         }
5718         break;
5719     default:
5720         return false;
5721     }
5722     return true;
5723 }
5724 
5725 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5726                           int mergereg)
5727 {
5728     return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5729                                        a->esz == MO_16 ?
5730                                        FPST_A64_F16 : FPST_A64);
5731 }
5732 
5733 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5734                                  const FPScalar *fnormal, const FPScalar *fah,
5735                                  int mergereg)
5736 {
5737     return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5738                                        mergereg, select_ah_fpst(s, a->esz));
5739 }
5740 
5741 /* Some insns need to call different helpers when FPCR.AH == 1 */
5742 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5743                               const FPScalar *fnormal,
5744                               const FPScalar *fah,
5745                               int mergereg)
5746 {
5747     return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5748 }
5749 
5750 static const FPScalar f_scalar_fadd = {
5751     gen_helper_vfp_addh,
5752     gen_helper_vfp_adds,
5753     gen_helper_vfp_addd,
5754 };
5755 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5756 
5757 static const FPScalar f_scalar_fsub = {
5758     gen_helper_vfp_subh,
5759     gen_helper_vfp_subs,
5760     gen_helper_vfp_subd,
5761 };
5762 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5763 
5764 static const FPScalar f_scalar_fdiv = {
5765     gen_helper_vfp_divh,
5766     gen_helper_vfp_divs,
5767     gen_helper_vfp_divd,
5768 };
5769 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5770 
5771 static const FPScalar f_scalar_fmul = {
5772     gen_helper_vfp_mulh,
5773     gen_helper_vfp_muls,
5774     gen_helper_vfp_muld,
5775 };
5776 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5777 
5778 static const FPScalar f_scalar_fmax = {
5779     gen_helper_vfp_maxh,
5780     gen_helper_vfp_maxs,
5781     gen_helper_vfp_maxd,
5782 };
5783 static const FPScalar f_scalar_fmax_ah = {
5784     gen_helper_vfp_ah_maxh,
5785     gen_helper_vfp_ah_maxs,
5786     gen_helper_vfp_ah_maxd,
5787 };
5788 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5789 
5790 static const FPScalar f_scalar_fmin = {
5791     gen_helper_vfp_minh,
5792     gen_helper_vfp_mins,
5793     gen_helper_vfp_mind,
5794 };
5795 static const FPScalar f_scalar_fmin_ah = {
5796     gen_helper_vfp_ah_minh,
5797     gen_helper_vfp_ah_mins,
5798     gen_helper_vfp_ah_mind,
5799 };
5800 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5801 
5802 static const FPScalar f_scalar_fmaxnm = {
5803     gen_helper_vfp_maxnumh,
5804     gen_helper_vfp_maxnums,
5805     gen_helper_vfp_maxnumd,
5806 };
5807 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5808 
5809 static const FPScalar f_scalar_fminnm = {
5810     gen_helper_vfp_minnumh,
5811     gen_helper_vfp_minnums,
5812     gen_helper_vfp_minnumd,
5813 };
5814 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5815 
5816 static const FPScalar f_scalar_fmulx = {
5817     gen_helper_advsimd_mulxh,
5818     gen_helper_vfp_mulxs,
5819     gen_helper_vfp_mulxd,
5820 };
5821 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5822 
5823 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5824 {
5825     gen_helper_vfp_mulh(d, n, m, s);
5826     gen_vfp_negh(d, d);
5827 }
5828 
5829 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5830 {
5831     gen_helper_vfp_muls(d, n, m, s);
5832     gen_vfp_negs(d, d);
5833 }
5834 
5835 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5836 {
5837     gen_helper_vfp_muld(d, n, m, s);
5838     gen_vfp_negd(d, d);
5839 }
5840 
5841 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5842 {
5843     gen_helper_vfp_mulh(d, n, m, s);
5844     gen_vfp_ah_negh(d, d);
5845 }
5846 
5847 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5848 {
5849     gen_helper_vfp_muls(d, n, m, s);
5850     gen_vfp_ah_negs(d, d);
5851 }
5852 
5853 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5854 {
5855     gen_helper_vfp_muld(d, n, m, s);
5856     gen_vfp_ah_negd(d, d);
5857 }
5858 
5859 static const FPScalar f_scalar_fnmul = {
5860     gen_fnmul_h,
5861     gen_fnmul_s,
5862     gen_fnmul_d,
5863 };
5864 static const FPScalar f_scalar_ah_fnmul = {
5865     gen_fnmul_ah_h,
5866     gen_fnmul_ah_s,
5867     gen_fnmul_ah_d,
5868 };
5869 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5870 
5871 static const FPScalar f_scalar_fcmeq = {
5872     gen_helper_advsimd_ceq_f16,
5873     gen_helper_neon_ceq_f32,
5874     gen_helper_neon_ceq_f64,
5875 };
5876 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5877 
5878 static const FPScalar f_scalar_fcmge = {
5879     gen_helper_advsimd_cge_f16,
5880     gen_helper_neon_cge_f32,
5881     gen_helper_neon_cge_f64,
5882 };
5883 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5884 
5885 static const FPScalar f_scalar_fcmgt = {
5886     gen_helper_advsimd_cgt_f16,
5887     gen_helper_neon_cgt_f32,
5888     gen_helper_neon_cgt_f64,
5889 };
5890 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5891 
5892 static const FPScalar f_scalar_facge = {
5893     gen_helper_advsimd_acge_f16,
5894     gen_helper_neon_acge_f32,
5895     gen_helper_neon_acge_f64,
5896 };
5897 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5898 
5899 static const FPScalar f_scalar_facgt = {
5900     gen_helper_advsimd_acgt_f16,
5901     gen_helper_neon_acgt_f32,
5902     gen_helper_neon_acgt_f64,
5903 };
5904 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5905 
5906 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5907 {
5908     gen_helper_vfp_subh(d, n, m, s);
5909     gen_vfp_absh(d, d);
5910 }
5911 
5912 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5913 {
5914     gen_helper_vfp_subs(d, n, m, s);
5915     gen_vfp_abss(d, d);
5916 }
5917 
5918 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5919 {
5920     gen_helper_vfp_subd(d, n, m, s);
5921     gen_vfp_absd(d, d);
5922 }
5923 
5924 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5925 {
5926     gen_helper_vfp_subh(d, n, m, s);
5927     gen_vfp_ah_absh(d, d);
5928 }
5929 
5930 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5931 {
5932     gen_helper_vfp_subs(d, n, m, s);
5933     gen_vfp_ah_abss(d, d);
5934 }
5935 
5936 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5937 {
5938     gen_helper_vfp_subd(d, n, m, s);
5939     gen_vfp_ah_absd(d, d);
5940 }
5941 
5942 static const FPScalar f_scalar_fabd = {
5943     gen_fabd_h,
5944     gen_fabd_s,
5945     gen_fabd_d,
5946 };
5947 static const FPScalar f_scalar_ah_fabd = {
5948     gen_fabd_ah_h,
5949     gen_fabd_ah_s,
5950     gen_fabd_ah_d,
5951 };
5952 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5953 
5954 static const FPScalar f_scalar_frecps = {
5955     gen_helper_recpsf_f16,
5956     gen_helper_recpsf_f32,
5957     gen_helper_recpsf_f64,
5958 };
5959 static const FPScalar f_scalar_ah_frecps = {
5960     gen_helper_recpsf_ah_f16,
5961     gen_helper_recpsf_ah_f32,
5962     gen_helper_recpsf_ah_f64,
5963 };
5964 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5965       &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5966 
5967 static const FPScalar f_scalar_frsqrts = {
5968     gen_helper_rsqrtsf_f16,
5969     gen_helper_rsqrtsf_f32,
5970     gen_helper_rsqrtsf_f64,
5971 };
5972 static const FPScalar f_scalar_ah_frsqrts = {
5973     gen_helper_rsqrtsf_ah_f16,
5974     gen_helper_rsqrtsf_ah_f32,
5975     gen_helper_rsqrtsf_ah_f64,
5976 };
5977 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5978       &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5979 
5980 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5981                        const FPScalar *f, bool swap)
5982 {
5983     switch (a->esz) {
5984     case MO_64:
5985         if (fp_access_check(s)) {
5986             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5987             TCGv_i64 t1 = tcg_constant_i64(0);
5988             if (swap) {
5989                 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5990             } else {
5991                 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5992             }
5993             write_fp_dreg(s, a->rd, t0);
5994         }
5995         break;
5996     case MO_32:
5997         if (fp_access_check(s)) {
5998             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5999             TCGv_i32 t1 = tcg_constant_i32(0);
6000             if (swap) {
6001                 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
6002             } else {
6003                 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6004             }
6005             write_fp_sreg(s, a->rd, t0);
6006         }
6007         break;
6008     case MO_16:
6009         if (!dc_isar_feature(aa64_fp16, s)) {
6010             return false;
6011         }
6012         if (fp_access_check(s)) {
6013             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6014             TCGv_i32 t1 = tcg_constant_i32(0);
6015             if (swap) {
6016                 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
6017             } else {
6018                 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6019             }
6020             write_fp_sreg(s, a->rd, t0);
6021         }
6022         break;
6023     default:
6024         return false;
6025     }
6026     return true;
6027 }
6028 
6029 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
6030 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
6031 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
6032 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
6033 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
6034 
6035 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
6036                 MemOp sgn_n, MemOp sgn_m,
6037                 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
6038                 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
6039 {
6040     TCGv_i64 t0, t1, t2, qc;
6041     MemOp esz = a->esz;
6042 
6043     if (!fp_access_check(s)) {
6044         return true;
6045     }
6046 
6047     t0 = tcg_temp_new_i64();
6048     t1 = tcg_temp_new_i64();
6049     t2 = tcg_temp_new_i64();
6050     qc = tcg_temp_new_i64();
6051     read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
6052     read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
6053     tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
6054 
6055     if (esz == MO_64) {
6056         gen_d(t0, qc, t1, t2);
6057     } else {
6058         gen_bhs(t0, qc, t1, t2, esz);
6059         tcg_gen_ext_i64(t0, t0, esz);
6060     }
6061 
6062     write_fp_dreg(s, a->rd, t0);
6063     tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
6064     return true;
6065 }
6066 
6067 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
6068 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
6069 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
6070 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
6071 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
6072 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
6073 
6074 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
6075                              void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
6076 {
6077     if (fp_access_check(s)) {
6078         TCGv_i64 t0 = tcg_temp_new_i64();
6079         TCGv_i64 t1 = tcg_temp_new_i64();
6080 
6081         read_vec_element(s, t0, a->rn, 0, MO_64);
6082         read_vec_element(s, t1, a->rm, 0, MO_64);
6083         fn(t0, t0, t1);
6084         write_fp_dreg(s, a->rd, t0);
6085     }
6086     return true;
6087 }
6088 
6089 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
6090 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
6091 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
6092 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
6093 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
6094 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
6095 
6096 typedef struct ENVScalar2 {
6097     NeonGenTwoOpEnvFn *gen_bhs[3];
6098     NeonGenTwo64OpEnvFn *gen_d;
6099 } ENVScalar2;
6100 
6101 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
6102 {
6103     if (!fp_access_check(s)) {
6104         return true;
6105     }
6106     if (a->esz == MO_64) {
6107         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6108         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
6109         f->gen_d(t0, tcg_env, t0, t1);
6110         write_fp_dreg(s, a->rd, t0);
6111     } else {
6112         TCGv_i32 t0 = tcg_temp_new_i32();
6113         TCGv_i32 t1 = tcg_temp_new_i32();
6114 
6115         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6116         read_vec_element_i32(s, t1, a->rm, 0, a->esz);
6117         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6118         write_fp_sreg(s, a->rd, t0);
6119     }
6120     return true;
6121 }
6122 
6123 static const ENVScalar2 f_scalar_sqshl = {
6124     { gen_helper_neon_qshl_s8,
6125       gen_helper_neon_qshl_s16,
6126       gen_helper_neon_qshl_s32 },
6127     gen_helper_neon_qshl_s64,
6128 };
6129 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
6130 
6131 static const ENVScalar2 f_scalar_uqshl = {
6132     { gen_helper_neon_qshl_u8,
6133       gen_helper_neon_qshl_u16,
6134       gen_helper_neon_qshl_u32 },
6135     gen_helper_neon_qshl_u64,
6136 };
6137 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
6138 
6139 static const ENVScalar2 f_scalar_sqrshl = {
6140     { gen_helper_neon_qrshl_s8,
6141       gen_helper_neon_qrshl_s16,
6142       gen_helper_neon_qrshl_s32 },
6143     gen_helper_neon_qrshl_s64,
6144 };
6145 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
6146 
6147 static const ENVScalar2 f_scalar_uqrshl = {
6148     { gen_helper_neon_qrshl_u8,
6149       gen_helper_neon_qrshl_u16,
6150       gen_helper_neon_qrshl_u32 },
6151     gen_helper_neon_qrshl_u64,
6152 };
6153 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
6154 
6155 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
6156                               const ENVScalar2 *f)
6157 {
6158     if (a->esz == MO_16 || a->esz == MO_32) {
6159         return do_env_scalar2(s, a, f);
6160     }
6161     return false;
6162 }
6163 
6164 static const ENVScalar2 f_scalar_sqdmulh = {
6165     { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
6166 };
6167 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
6168 
6169 static const ENVScalar2 f_scalar_sqrdmulh = {
6170     { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
6171 };
6172 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
6173 
6174 typedef struct ENVScalar3 {
6175     NeonGenThreeOpEnvFn *gen_hs[2];
6176 } ENVScalar3;
6177 
6178 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
6179                               const ENVScalar3 *f)
6180 {
6181     TCGv_i32 t0, t1, t2;
6182 
6183     if (a->esz != MO_16 && a->esz != MO_32) {
6184         return false;
6185     }
6186     if (!fp_access_check(s)) {
6187         return true;
6188     }
6189 
6190     t0 = tcg_temp_new_i32();
6191     t1 = tcg_temp_new_i32();
6192     t2 = tcg_temp_new_i32();
6193     read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6194     read_vec_element_i32(s, t1, a->rm, 0, a->esz);
6195     read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6196     f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6197     write_fp_sreg(s, a->rd, t0);
6198     return true;
6199 }
6200 
6201 static const ENVScalar3 f_scalar_sqrdmlah = {
6202     { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
6203 };
6204 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
6205 
6206 static const ENVScalar3 f_scalar_sqrdmlsh = {
6207     { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
6208 };
6209 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
6210 
6211 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
6212 {
6213     if (fp_access_check(s)) {
6214         TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6215         TCGv_i64 t1 = read_fp_dreg(s, a->rm);
6216         tcg_gen_negsetcond_i64(cond, t0, t0, t1);
6217         write_fp_dreg(s, a->rd, t0);
6218     }
6219     return true;
6220 }
6221 
6222 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
6223 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
6224 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
6225 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
6226 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
6227 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
6228 
6229 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
6230                                         int data,
6231                                         gen_helper_gvec_3_ptr * const fns[3],
6232                                         ARMFPStatusFlavour fpsttype)
6233 {
6234     MemOp esz = a->esz;
6235     int check = fp_access_check_vector_hsd(s, a->q, esz);
6236 
6237     if (check <= 0) {
6238         return check == 0;
6239     }
6240 
6241     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
6242                       data, fns[esz - 1]);
6243     return true;
6244 }
6245 
6246 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
6247                           gen_helper_gvec_3_ptr * const fns[3])
6248 {
6249     return do_fp3_vector_with_fpsttype(s, a, data, fns,
6250                                        a->esz == MO_16 ?
6251                                        FPST_A64_F16 : FPST_A64);
6252 }
6253 
6254 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
6255                               gen_helper_gvec_3_ptr * const fnormal[3],
6256                               gen_helper_gvec_3_ptr * const fah[3])
6257 {
6258     return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
6259 }
6260 
6261 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
6262                                  gen_helper_gvec_3_ptr * const fnormal[3],
6263                                  gen_helper_gvec_3_ptr * const fah[3])
6264 {
6265     return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
6266                                        select_ah_fpst(s, a->esz));
6267 }
6268 
6269 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
6270     gen_helper_gvec_fadd_h,
6271     gen_helper_gvec_fadd_s,
6272     gen_helper_gvec_fadd_d,
6273 };
6274 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
6275 
6276 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
6277     gen_helper_gvec_fsub_h,
6278     gen_helper_gvec_fsub_s,
6279     gen_helper_gvec_fsub_d,
6280 };
6281 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
6282 
6283 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
6284     gen_helper_gvec_fdiv_h,
6285     gen_helper_gvec_fdiv_s,
6286     gen_helper_gvec_fdiv_d,
6287 };
6288 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
6289 
6290 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
6291     gen_helper_gvec_fmul_h,
6292     gen_helper_gvec_fmul_s,
6293     gen_helper_gvec_fmul_d,
6294 };
6295 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
6296 
6297 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
6298     gen_helper_gvec_fmax_h,
6299     gen_helper_gvec_fmax_s,
6300     gen_helper_gvec_fmax_d,
6301 };
6302 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
6303     gen_helper_gvec_ah_fmax_h,
6304     gen_helper_gvec_ah_fmax_s,
6305     gen_helper_gvec_ah_fmax_d,
6306 };
6307 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
6308 
6309 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
6310     gen_helper_gvec_fmin_h,
6311     gen_helper_gvec_fmin_s,
6312     gen_helper_gvec_fmin_d,
6313 };
6314 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
6315     gen_helper_gvec_ah_fmin_h,
6316     gen_helper_gvec_ah_fmin_s,
6317     gen_helper_gvec_ah_fmin_d,
6318 };
6319 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
6320 
6321 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
6322     gen_helper_gvec_fmaxnum_h,
6323     gen_helper_gvec_fmaxnum_s,
6324     gen_helper_gvec_fmaxnum_d,
6325 };
6326 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
6327 
6328 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
6329     gen_helper_gvec_fminnum_h,
6330     gen_helper_gvec_fminnum_s,
6331     gen_helper_gvec_fminnum_d,
6332 };
6333 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
6334 
6335 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
6336     gen_helper_gvec_fmulx_h,
6337     gen_helper_gvec_fmulx_s,
6338     gen_helper_gvec_fmulx_d,
6339 };
6340 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
6341 
6342 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
6343     gen_helper_gvec_vfma_h,
6344     gen_helper_gvec_vfma_s,
6345     gen_helper_gvec_vfma_d,
6346 };
6347 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
6348 
6349 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
6350     gen_helper_gvec_vfms_h,
6351     gen_helper_gvec_vfms_s,
6352     gen_helper_gvec_vfms_d,
6353 };
6354 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
6355     gen_helper_gvec_ah_vfms_h,
6356     gen_helper_gvec_ah_vfms_s,
6357     gen_helper_gvec_ah_vfms_d,
6358 };
6359 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
6360 
6361 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
6362     gen_helper_gvec_fceq_h,
6363     gen_helper_gvec_fceq_s,
6364     gen_helper_gvec_fceq_d,
6365 };
6366 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
6367 
6368 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
6369     gen_helper_gvec_fcge_h,
6370     gen_helper_gvec_fcge_s,
6371     gen_helper_gvec_fcge_d,
6372 };
6373 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
6374 
6375 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
6376     gen_helper_gvec_fcgt_h,
6377     gen_helper_gvec_fcgt_s,
6378     gen_helper_gvec_fcgt_d,
6379 };
6380 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
6381 
6382 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
6383     gen_helper_gvec_facge_h,
6384     gen_helper_gvec_facge_s,
6385     gen_helper_gvec_facge_d,
6386 };
6387 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
6388 
6389 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
6390     gen_helper_gvec_facgt_h,
6391     gen_helper_gvec_facgt_s,
6392     gen_helper_gvec_facgt_d,
6393 };
6394 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
6395 
6396 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
6397     gen_helper_gvec_fabd_h,
6398     gen_helper_gvec_fabd_s,
6399     gen_helper_gvec_fabd_d,
6400 };
6401 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
6402     gen_helper_gvec_ah_fabd_h,
6403     gen_helper_gvec_ah_fabd_s,
6404     gen_helper_gvec_ah_fabd_d,
6405 };
6406 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
6407 
6408 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
6409     gen_helper_gvec_recps_h,
6410     gen_helper_gvec_recps_s,
6411     gen_helper_gvec_recps_d,
6412 };
6413 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
6414     gen_helper_gvec_ah_recps_h,
6415     gen_helper_gvec_ah_recps_s,
6416     gen_helper_gvec_ah_recps_d,
6417 };
6418 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
6419 
6420 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
6421     gen_helper_gvec_rsqrts_h,
6422     gen_helper_gvec_rsqrts_s,
6423     gen_helper_gvec_rsqrts_d,
6424 };
6425 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
6426     gen_helper_gvec_ah_rsqrts_h,
6427     gen_helper_gvec_ah_rsqrts_s,
6428     gen_helper_gvec_ah_rsqrts_d,
6429 };
6430 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
6431 
6432 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
6433     gen_helper_gvec_faddp_h,
6434     gen_helper_gvec_faddp_s,
6435     gen_helper_gvec_faddp_d,
6436 };
6437 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
6438 
6439 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
6440     gen_helper_gvec_fmaxp_h,
6441     gen_helper_gvec_fmaxp_s,
6442     gen_helper_gvec_fmaxp_d,
6443 };
6444 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
6445     gen_helper_gvec_ah_fmaxp_h,
6446     gen_helper_gvec_ah_fmaxp_s,
6447     gen_helper_gvec_ah_fmaxp_d,
6448 };
6449 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
6450 
6451 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
6452     gen_helper_gvec_fminp_h,
6453     gen_helper_gvec_fminp_s,
6454     gen_helper_gvec_fminp_d,
6455 };
6456 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
6457     gen_helper_gvec_ah_fminp_h,
6458     gen_helper_gvec_ah_fminp_s,
6459     gen_helper_gvec_ah_fminp_d,
6460 };
6461 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
6462 
6463 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
6464     gen_helper_gvec_fmaxnump_h,
6465     gen_helper_gvec_fmaxnump_s,
6466     gen_helper_gvec_fmaxnump_d,
6467 };
6468 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
6469 
6470 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
6471     gen_helper_gvec_fminnump_h,
6472     gen_helper_gvec_fminnump_s,
6473     gen_helper_gvec_fminnump_d,
6474 };
6475 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
6476 
6477 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
6478 {
6479     if (fp_access_check(s)) {
6480         int data = (is_2 << 1) | is_s;
6481         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6482                            vec_full_reg_offset(s, a->rn),
6483                            vec_full_reg_offset(s, a->rm), tcg_env,
6484                            a->q ? 16 : 8, vec_full_reg_size(s),
6485                            data, gen_helper_gvec_fmlal_a64);
6486     }
6487     return true;
6488 }
6489 
6490 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
6491 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
6492 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
6493 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6494 
6495 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6496 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6497 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6498 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6499 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6500 
6501 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6502 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6503 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6504 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6505 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6506 
6507 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6508 {
6509     if (fp_access_check(s)) {
6510         gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6511     }
6512     return true;
6513 }
6514 
6515 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6516 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6517 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6518 
6519 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6520 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6521 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6522 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6523 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6524 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6525 
6526 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6527 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6528 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6529 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6530 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6531 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6532 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6533 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6534 
6535 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6536 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6537 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6538 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6539 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6540 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6541 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6542 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6543 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6544 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6545 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6546 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6547 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6548 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6549 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6550 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6551 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6552 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6553 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6554 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6555 
6556 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6557 {
6558     if (a->esz == MO_64 && !a->q) {
6559         return false;
6560     }
6561     if (fp_access_check(s)) {
6562         tcg_gen_gvec_cmp(cond, a->esz,
6563                          vec_full_reg_offset(s, a->rd),
6564                          vec_full_reg_offset(s, a->rn),
6565                          vec_full_reg_offset(s, a->rm),
6566                          a->q ? 16 : 8, vec_full_reg_size(s));
6567     }
6568     return true;
6569 }
6570 
6571 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6572 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6573 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6574 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6575 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6576 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6577 
6578 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6579 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6580 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6581 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6582 
6583 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6584                           gen_helper_gvec_4 *fn)
6585 {
6586     if (fp_access_check(s)) {
6587         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6588     }
6589     return true;
6590 }
6591 
6592 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6593                               gen_helper_gvec_4_ptr *fn)
6594 {
6595     if (fp_access_check(s)) {
6596         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6597     }
6598     return true;
6599 }
6600 
6601 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b)
6602 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b)
6603 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b)
6604 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6605 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6606 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6607 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6608 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6609 
6610 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6611 {
6612     if (!dc_isar_feature(aa64_bf16, s)) {
6613         return false;
6614     }
6615     if (fp_access_check(s)) {
6616         /* Q bit selects BFMLALB vs BFMLALT. */
6617         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6618                           s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6619                           gen_helper_gvec_bfmlal);
6620     }
6621     return true;
6622 }
6623 
6624 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6625     gen_helper_gvec_fcaddh,
6626     gen_helper_gvec_fcadds,
6627     gen_helper_gvec_fcaddd,
6628 };
6629 /*
6630  * Encode FPCR.AH into the data so the helper knows whether the
6631  * negations it does should avoid flipping the sign bit on a NaN
6632  */
6633 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6634            f_vector_fcadd)
6635 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6636            f_vector_fcadd)
6637 
6638 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6639 {
6640     static gen_helper_gvec_4_ptr * const fn[] = {
6641         [MO_16] = gen_helper_gvec_fcmlah,
6642         [MO_32] = gen_helper_gvec_fcmlas,
6643         [MO_64] = gen_helper_gvec_fcmlad,
6644     };
6645     int check;
6646 
6647     if (!dc_isar_feature(aa64_fcma, s)) {
6648         return false;
6649     }
6650 
6651     check = fp_access_check_vector_hsd(s, a->q, a->esz);
6652     if (check <= 0) {
6653         return check == 0;
6654     }
6655 
6656     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6657                       a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6658                       a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6659     return true;
6660 }
6661 
6662 /*
6663  * Widening vector x vector/indexed.
6664  *
6665  * These read from the top or bottom half of a 128-bit vector.
6666  * After widening, optionally accumulate with a 128-bit vector.
6667  * Implement these inline, as the number of elements are limited
6668  * and the related SVE and SME operations on larger vectors use
6669  * even/odd elements instead of top/bottom half.
6670  *
6671  * If idx >= 0, operand 2 is indexed, otherwise vector.
6672  * If acc, operand 0 is loaded with rd.
6673  */
6674 
6675 /* For low half, iterating up. */
6676 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6677                             int rd, int rn, int rm, int idx,
6678                             NeonGenTwo64OpFn *fn, bool acc)
6679 {
6680     TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6681     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6682     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6683     MemOp esz = memop & MO_SIZE;
6684     int half = 8 >> esz;
6685     int top_swap, top_half;
6686 
6687     /* There are no 64x64->128 bit operations. */
6688     if (esz >= MO_64) {
6689         return false;
6690     }
6691     if (!fp_access_check(s)) {
6692         return true;
6693     }
6694 
6695     if (idx >= 0) {
6696         read_vec_element(s, tcg_op2, rm, idx, memop);
6697     }
6698 
6699     /*
6700      * For top half inputs, iterate forward; backward for bottom half.
6701      * This means the store to the destination will not occur until
6702      * overlapping input inputs are consumed.
6703      * Use top_swap to conditionally invert the forward iteration index.
6704      */
6705     top_swap = top ? 0 : half - 1;
6706     top_half = top ? half : 0;
6707 
6708     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6709         int elt = elt_fwd ^ top_swap;
6710 
6711         read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6712         if (idx < 0) {
6713             read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6714         }
6715         if (acc) {
6716             read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6717         }
6718         fn(tcg_op0, tcg_op1, tcg_op2);
6719         write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6720     }
6721     clear_vec_high(s, 1, rd);
6722     return true;
6723 }
6724 
6725 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6726 {
6727     TCGv_i64 t = tcg_temp_new_i64();
6728     tcg_gen_mul_i64(t, n, m);
6729     tcg_gen_add_i64(d, d, t);
6730 }
6731 
6732 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6733 {
6734     TCGv_i64 t = tcg_temp_new_i64();
6735     tcg_gen_mul_i64(t, n, m);
6736     tcg_gen_sub_i64(d, d, t);
6737 }
6738 
6739 TRANS(SMULL_v, do_3op_widening,
6740       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6741       tcg_gen_mul_i64, false)
6742 TRANS(UMULL_v, do_3op_widening,
6743       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6744       tcg_gen_mul_i64, false)
6745 TRANS(SMLAL_v, do_3op_widening,
6746       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6747       gen_muladd_i64, true)
6748 TRANS(UMLAL_v, do_3op_widening,
6749       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6750       gen_muladd_i64, true)
6751 TRANS(SMLSL_v, do_3op_widening,
6752       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6753       gen_mulsub_i64, true)
6754 TRANS(UMLSL_v, do_3op_widening,
6755       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6756       gen_mulsub_i64, true)
6757 
6758 TRANS(SMULL_vi, do_3op_widening,
6759       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6760       tcg_gen_mul_i64, false)
6761 TRANS(UMULL_vi, do_3op_widening,
6762       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6763       tcg_gen_mul_i64, false)
6764 TRANS(SMLAL_vi, do_3op_widening,
6765       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6766       gen_muladd_i64, true)
6767 TRANS(UMLAL_vi, do_3op_widening,
6768       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6769       gen_muladd_i64, true)
6770 TRANS(SMLSL_vi, do_3op_widening,
6771       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6772       gen_mulsub_i64, true)
6773 TRANS(UMLSL_vi, do_3op_widening,
6774       a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6775       gen_mulsub_i64, true)
6776 
6777 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6778 {
6779     TCGv_i64 t1 = tcg_temp_new_i64();
6780     TCGv_i64 t2 = tcg_temp_new_i64();
6781 
6782     tcg_gen_sub_i64(t1, n, m);
6783     tcg_gen_sub_i64(t2, m, n);
6784     tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6785 }
6786 
6787 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6788 {
6789     TCGv_i64 t1 = tcg_temp_new_i64();
6790     TCGv_i64 t2 = tcg_temp_new_i64();
6791 
6792     tcg_gen_sub_i64(t1, n, m);
6793     tcg_gen_sub_i64(t2, m, n);
6794     tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6795 }
6796 
6797 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6798 {
6799     TCGv_i64 t = tcg_temp_new_i64();
6800     gen_sabd_i64(t, n, m);
6801     tcg_gen_add_i64(d, d, t);
6802 }
6803 
6804 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6805 {
6806     TCGv_i64 t = tcg_temp_new_i64();
6807     gen_uabd_i64(t, n, m);
6808     tcg_gen_add_i64(d, d, t);
6809 }
6810 
6811 TRANS(SADDL_v, do_3op_widening,
6812       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6813       tcg_gen_add_i64, false)
6814 TRANS(UADDL_v, do_3op_widening,
6815       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6816       tcg_gen_add_i64, false)
6817 TRANS(SSUBL_v, do_3op_widening,
6818       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6819       tcg_gen_sub_i64, false)
6820 TRANS(USUBL_v, do_3op_widening,
6821       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6822       tcg_gen_sub_i64, false)
6823 TRANS(SABDL_v, do_3op_widening,
6824       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6825       gen_sabd_i64, false)
6826 TRANS(UABDL_v, do_3op_widening,
6827       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6828       gen_uabd_i64, false)
6829 TRANS(SABAL_v, do_3op_widening,
6830       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6831       gen_saba_i64, true)
6832 TRANS(UABAL_v, do_3op_widening,
6833       a->esz, a->q, a->rd, a->rn, a->rm, -1,
6834       gen_uaba_i64, true)
6835 
6836 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6837 {
6838     tcg_gen_mul_i64(d, n, m);
6839     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6840 }
6841 
6842 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6843 {
6844     tcg_gen_mul_i64(d, n, m);
6845     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6846 }
6847 
6848 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6849 {
6850     TCGv_i64 t = tcg_temp_new_i64();
6851 
6852     tcg_gen_mul_i64(t, n, m);
6853     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6854     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6855 }
6856 
6857 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6858 {
6859     TCGv_i64 t = tcg_temp_new_i64();
6860 
6861     tcg_gen_mul_i64(t, n, m);
6862     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6863     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6864 }
6865 
6866 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6867 {
6868     TCGv_i64 t = tcg_temp_new_i64();
6869 
6870     tcg_gen_mul_i64(t, n, m);
6871     gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6872     tcg_gen_neg_i64(t, t);
6873     gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6874 }
6875 
6876 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6877 {
6878     TCGv_i64 t = tcg_temp_new_i64();
6879 
6880     tcg_gen_mul_i64(t, n, m);
6881     gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6882     tcg_gen_neg_i64(t, t);
6883     gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6884 }
6885 
6886 TRANS(SQDMULL_v, do_3op_widening,
6887       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6888       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6889 TRANS(SQDMLAL_v, do_3op_widening,
6890       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6891       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6892 TRANS(SQDMLSL_v, do_3op_widening,
6893       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6894       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6895 
6896 TRANS(SQDMULL_vi, do_3op_widening,
6897       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6898       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6899 TRANS(SQDMLAL_vi, do_3op_widening,
6900       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6901       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6902 TRANS(SQDMLSL_vi, do_3op_widening,
6903       a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6904       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6905 
6906 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6907                            MemOp sign, bool sub)
6908 {
6909     TCGv_i64 tcg_op0, tcg_op1;
6910     MemOp esz = a->esz;
6911     int half = 8 >> esz;
6912     bool top = a->q;
6913     int top_swap = top ? 0 : half - 1;
6914     int top_half = top ? half : 0;
6915 
6916     /* There are no 64x64->128 bit operations. */
6917     if (esz >= MO_64) {
6918         return false;
6919     }
6920     if (!fp_access_check(s)) {
6921         return true;
6922     }
6923     tcg_op0 = tcg_temp_new_i64();
6924     tcg_op1 = tcg_temp_new_i64();
6925 
6926     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6927         int elt = elt_fwd ^ top_swap;
6928 
6929         read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6930         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6931         if (sub) {
6932             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6933         } else {
6934             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6935         }
6936         write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6937     }
6938     clear_vec_high(s, 1, a->rd);
6939     return true;
6940 }
6941 
6942 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6943 TRANS(UADDW, do_addsub_wide, a, 0, false)
6944 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6945 TRANS(USUBW, do_addsub_wide, a, 0, true)
6946 
6947 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6948                                  bool sub, bool round)
6949 {
6950     TCGv_i64 tcg_op0, tcg_op1;
6951     MemOp esz = a->esz;
6952     int half = 8 >> esz;
6953     bool top = a->q;
6954     int ebits = 8 << esz;
6955     uint64_t rbit = 1ull << (ebits - 1);
6956     int top_swap, top_half;
6957 
6958     /* There are no 128x128->64 bit operations. */
6959     if (esz >= MO_64) {
6960         return false;
6961     }
6962     if (!fp_access_check(s)) {
6963         return true;
6964     }
6965     tcg_op0 = tcg_temp_new_i64();
6966     tcg_op1 = tcg_temp_new_i64();
6967 
6968     /*
6969      * For top half inputs, iterate backward; forward for bottom half.
6970      * This means the store to the destination will not occur until
6971      * overlapping input inputs are consumed.
6972      */
6973     top_swap = top ? half - 1 : 0;
6974     top_half = top ? half : 0;
6975 
6976     for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6977         int elt = elt_fwd ^ top_swap;
6978 
6979         read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6980         read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6981         if (sub) {
6982             tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6983         } else {
6984             tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6985         }
6986         if (round) {
6987             tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6988         }
6989         tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6990         write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6991     }
6992     clear_vec_high(s, top, a->rd);
6993     return true;
6994 }
6995 
6996 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6997 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6998 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6999 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
7000 
7001 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
7002 {
7003     if (fp_access_check(s)) {
7004         /* The Q field specifies lo/hi half input for these insns.  */
7005         gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
7006     }
7007     return true;
7008 }
7009 
7010 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
7011 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
7012 
7013 /*
7014  * Advanced SIMD scalar/vector x indexed element
7015  */
7016 
7017 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
7018 {
7019     switch (a->esz) {
7020     case MO_64:
7021         if (fp_access_check(s)) {
7022             TCGv_i64 t0 = read_fp_dreg(s, a->rn);
7023             TCGv_i64 t1 = tcg_temp_new_i64();
7024 
7025             read_vec_element(s, t1, a->rm, a->idx, MO_64);
7026             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
7027             write_fp_dreg_merging(s, a->rd, a->rn, t0);
7028         }
7029         break;
7030     case MO_32:
7031         if (fp_access_check(s)) {
7032             TCGv_i32 t0 = read_fp_sreg(s, a->rn);
7033             TCGv_i32 t1 = tcg_temp_new_i32();
7034 
7035             read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
7036             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
7037             write_fp_sreg_merging(s, a->rd, a->rn, t0);
7038         }
7039         break;
7040     case MO_16:
7041         if (!dc_isar_feature(aa64_fp16, s)) {
7042             return false;
7043         }
7044         if (fp_access_check(s)) {
7045             TCGv_i32 t0 = read_fp_hreg(s, a->rn);
7046             TCGv_i32 t1 = tcg_temp_new_i32();
7047 
7048             read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
7049             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
7050             write_fp_hreg_merging(s, a->rd, a->rn, t0);
7051         }
7052         break;
7053     default:
7054         g_assert_not_reached();
7055     }
7056     return true;
7057 }
7058 
7059 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
7060 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
7061 
7062 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
7063 {
7064     switch (a->esz) {
7065     case MO_64:
7066         if (fp_access_check(s)) {
7067             TCGv_i64 t0 = read_fp_dreg(s, a->rd);
7068             TCGv_i64 t1 = read_fp_dreg(s, a->rn);
7069             TCGv_i64 t2 = tcg_temp_new_i64();
7070 
7071             read_vec_element(s, t2, a->rm, a->idx, MO_64);
7072             if (neg) {
7073                 gen_vfp_maybe_ah_negd(s, t1, t1);
7074             }
7075             gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
7076             write_fp_dreg_merging(s, a->rd, a->rd, t0);
7077         }
7078         break;
7079     case MO_32:
7080         if (fp_access_check(s)) {
7081             TCGv_i32 t0 = read_fp_sreg(s, a->rd);
7082             TCGv_i32 t1 = read_fp_sreg(s, a->rn);
7083             TCGv_i32 t2 = tcg_temp_new_i32();
7084 
7085             read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
7086             if (neg) {
7087                 gen_vfp_maybe_ah_negs(s, t1, t1);
7088             }
7089             gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
7090             write_fp_sreg_merging(s, a->rd, a->rd, t0);
7091         }
7092         break;
7093     case MO_16:
7094         if (!dc_isar_feature(aa64_fp16, s)) {
7095             return false;
7096         }
7097         if (fp_access_check(s)) {
7098             TCGv_i32 t0 = read_fp_hreg(s, a->rd);
7099             TCGv_i32 t1 = read_fp_hreg(s, a->rn);
7100             TCGv_i32 t2 = tcg_temp_new_i32();
7101 
7102             read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
7103             if (neg) {
7104                 gen_vfp_maybe_ah_negh(s, t1, t1);
7105             }
7106             gen_helper_advsimd_muladdh(t0, t1, t2, t0,
7107                                        fpstatus_ptr(FPST_A64_F16));
7108             write_fp_hreg_merging(s, a->rd, a->rd, t0);
7109         }
7110         break;
7111     default:
7112         g_assert_not_reached();
7113     }
7114     return true;
7115 }
7116 
7117 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
7118 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
7119 
7120 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
7121                                   const ENVScalar2 *f)
7122 {
7123     if (a->esz < MO_16 || a->esz > MO_32) {
7124         return false;
7125     }
7126     if (fp_access_check(s)) {
7127         TCGv_i32 t0 = tcg_temp_new_i32();
7128         TCGv_i32 t1 = tcg_temp_new_i32();
7129 
7130         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
7131         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
7132         f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
7133         write_fp_sreg(s, a->rd, t0);
7134     }
7135     return true;
7136 }
7137 
7138 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
7139 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
7140 
7141 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
7142                                   const ENVScalar3 *f)
7143 {
7144     if (a->esz < MO_16 || a->esz > MO_32) {
7145         return false;
7146     }
7147     if (fp_access_check(s)) {
7148         TCGv_i32 t0 = tcg_temp_new_i32();
7149         TCGv_i32 t1 = tcg_temp_new_i32();
7150         TCGv_i32 t2 = tcg_temp_new_i32();
7151 
7152         read_vec_element_i32(s, t0, a->rn, 0, a->esz);
7153         read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
7154         read_vec_element_i32(s, t2, a->rd, 0, a->esz);
7155         f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
7156         write_fp_sreg(s, a->rd, t0);
7157     }
7158     return true;
7159 }
7160 
7161 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
7162 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
7163 
7164 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
7165                                           NeonGenTwo64OpFn *fn, bool acc)
7166 {
7167     if (fp_access_check(s)) {
7168         TCGv_i64 t0 = tcg_temp_new_i64();
7169         TCGv_i64 t1 = tcg_temp_new_i64();
7170         TCGv_i64 t2 = tcg_temp_new_i64();
7171 
7172         if (acc) {
7173             read_vec_element(s, t0, a->rd, 0, a->esz + 1);
7174         }
7175         read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
7176         read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
7177         fn(t0, t1, t2);
7178 
7179         /* Clear the whole register first, then store scalar. */
7180         clear_vec(s, a->rd);
7181         write_vec_element(s, t0, a->rd, 0, a->esz + 1);
7182     }
7183     return true;
7184 }
7185 
7186 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
7187       a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
7188 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
7189       a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
7190 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
7191       a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
7192 
7193 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
7194                               gen_helper_gvec_3_ptr * const fns[3])
7195 {
7196     MemOp esz = a->esz;
7197     int check = fp_access_check_vector_hsd(s, a->q, esz);
7198 
7199     if (check <= 0) {
7200         return check == 0;
7201     }
7202 
7203     gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
7204                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
7205                       a->idx, fns[esz - 1]);
7206     return true;
7207 }
7208 
7209 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
7210     gen_helper_gvec_fmul_idx_h,
7211     gen_helper_gvec_fmul_idx_s,
7212     gen_helper_gvec_fmul_idx_d,
7213 };
7214 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
7215 
7216 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
7217     gen_helper_gvec_fmulx_idx_h,
7218     gen_helper_gvec_fmulx_idx_s,
7219     gen_helper_gvec_fmulx_idx_d,
7220 };
7221 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
7222 
7223 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
7224 {
7225     static gen_helper_gvec_4_ptr * const fns[3][3] = {
7226         { gen_helper_gvec_fmla_idx_h,
7227           gen_helper_gvec_fmla_idx_s,
7228           gen_helper_gvec_fmla_idx_d },
7229         { gen_helper_gvec_fmls_idx_h,
7230           gen_helper_gvec_fmls_idx_s,
7231           gen_helper_gvec_fmls_idx_d },
7232         { gen_helper_gvec_ah_fmls_idx_h,
7233           gen_helper_gvec_ah_fmls_idx_s,
7234           gen_helper_gvec_ah_fmls_idx_d },
7235     };
7236     MemOp esz = a->esz;
7237     int check = fp_access_check_vector_hsd(s, a->q, esz);
7238 
7239     if (check <= 0) {
7240         return check == 0;
7241     }
7242 
7243     gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
7244                       esz == MO_16 ? FPST_A64_F16 : FPST_A64,
7245                       a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
7246     return true;
7247 }
7248 
7249 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
7250 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
7251 
7252 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
7253 {
7254     if (fp_access_check(s)) {
7255         int data = (a->idx << 2) | (is_2 << 1) | is_s;
7256         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
7257                            vec_full_reg_offset(s, a->rn),
7258                            vec_full_reg_offset(s, a->rm), tcg_env,
7259                            a->q ? 16 : 8, vec_full_reg_size(s),
7260                            data, gen_helper_gvec_fmlal_idx_a64);
7261     }
7262     return true;
7263 }
7264 
7265 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
7266 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
7267 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
7268 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
7269 
7270 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
7271                                gen_helper_gvec_3 * const fns[2])
7272 {
7273     assert(a->esz == MO_16 || a->esz == MO_32);
7274     if (fp_access_check(s)) {
7275         gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
7276     }
7277     return true;
7278 }
7279 
7280 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
7281     gen_helper_gvec_mul_idx_h,
7282     gen_helper_gvec_mul_idx_s,
7283 };
7284 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
7285 
7286 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
7287 {
7288     static gen_helper_gvec_4 * const fns[2][2] = {
7289         { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
7290         { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
7291     };
7292 
7293     assert(a->esz == MO_16 || a->esz == MO_32);
7294     if (fp_access_check(s)) {
7295         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
7296                          a->idx, fns[a->esz - 1][sub]);
7297     }
7298     return true;
7299 }
7300 
7301 TRANS(MLA_vi, do_mla_vector_idx, a, false)
7302 TRANS(MLS_vi, do_mla_vector_idx, a, true)
7303 
7304 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
7305                                   gen_helper_gvec_4 * const fns[2])
7306 {
7307     assert(a->esz == MO_16 || a->esz == MO_32);
7308     if (fp_access_check(s)) {
7309         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
7310                            vec_full_reg_offset(s, a->rn),
7311                            vec_full_reg_offset(s, a->rm),
7312                            offsetof(CPUARMState, vfp.qc),
7313                            a->q ? 16 : 8, vec_full_reg_size(s),
7314                            a->idx, fns[a->esz - 1]);
7315     }
7316     return true;
7317 }
7318 
7319 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
7320     gen_helper_neon_sqdmulh_idx_h,
7321     gen_helper_neon_sqdmulh_idx_s,
7322 };
7323 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
7324 
7325 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
7326     gen_helper_neon_sqrdmulh_idx_h,
7327     gen_helper_neon_sqrdmulh_idx_s,
7328 };
7329 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
7330 
7331 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
7332     gen_helper_neon_sqrdmlah_idx_h,
7333     gen_helper_neon_sqrdmlah_idx_s,
7334 };
7335 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
7336            f_vector_idx_sqrdmlah)
7337 
7338 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
7339     gen_helper_neon_sqrdmlsh_idx_h,
7340     gen_helper_neon_sqrdmlsh_idx_s,
7341 };
7342 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
7343            f_vector_idx_sqrdmlsh)
7344 
7345 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
7346                               gen_helper_gvec_4 *fn)
7347 {
7348     if (fp_access_check(s)) {
7349         gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
7350     }
7351     return true;
7352 }
7353 
7354 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
7355                                   gen_helper_gvec_4_ptr *fn)
7356 {
7357     if (fp_access_check(s)) {
7358         gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
7359     }
7360     return true;
7361 }
7362 
7363 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b)
7364 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b)
7365 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
7366            gen_helper_gvec_sudot_idx_4b)
7367 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
7368            gen_helper_gvec_usdot_idx_4b)
7369 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
7370            gen_helper_gvec_bfdot_idx)
7371 
7372 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
7373 {
7374     if (!dc_isar_feature(aa64_bf16, s)) {
7375         return false;
7376     }
7377     if (fp_access_check(s)) {
7378         /* Q bit selects BFMLALB vs BFMLALT. */
7379         gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
7380                           s->fpcr_ah ? FPST_AH : FPST_A64,
7381                           (a->idx << 1) | a->q,
7382                           gen_helper_gvec_bfmlal_idx);
7383     }
7384     return true;
7385 }
7386 
7387 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
7388 {
7389     gen_helper_gvec_4_ptr *fn;
7390 
7391     if (!dc_isar_feature(aa64_fcma, s)) {
7392         return false;
7393     }
7394     switch (a->esz) {
7395     case MO_16:
7396         if (!dc_isar_feature(aa64_fp16, s)) {
7397             return false;
7398         }
7399         fn = gen_helper_gvec_fcmlah_idx;
7400         break;
7401     case MO_32:
7402         fn = gen_helper_gvec_fcmlas_idx;
7403         break;
7404     default:
7405         g_assert_not_reached();
7406     }
7407     if (fp_access_check(s)) {
7408         gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
7409                           a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
7410                           (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
7411     }
7412     return true;
7413 }
7414 
7415 /*
7416  * Advanced SIMD scalar pairwise
7417  */
7418 
7419 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
7420 {
7421     switch (a->esz) {
7422     case MO_64:
7423         if (fp_access_check(s)) {
7424             TCGv_i64 t0 = tcg_temp_new_i64();
7425             TCGv_i64 t1 = tcg_temp_new_i64();
7426 
7427             read_vec_element(s, t0, a->rn, 0, MO_64);
7428             read_vec_element(s, t1, a->rn, 1, MO_64);
7429             f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
7430             write_fp_dreg(s, a->rd, t0);
7431         }
7432         break;
7433     case MO_32:
7434         if (fp_access_check(s)) {
7435             TCGv_i32 t0 = tcg_temp_new_i32();
7436             TCGv_i32 t1 = tcg_temp_new_i32();
7437 
7438             read_vec_element_i32(s, t0, a->rn, 0, MO_32);
7439             read_vec_element_i32(s, t1, a->rn, 1, MO_32);
7440             f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
7441             write_fp_sreg(s, a->rd, t0);
7442         }
7443         break;
7444     case MO_16:
7445         if (!dc_isar_feature(aa64_fp16, s)) {
7446             return false;
7447         }
7448         if (fp_access_check(s)) {
7449             TCGv_i32 t0 = tcg_temp_new_i32();
7450             TCGv_i32 t1 = tcg_temp_new_i32();
7451 
7452             read_vec_element_i32(s, t0, a->rn, 0, MO_16);
7453             read_vec_element_i32(s, t1, a->rn, 1, MO_16);
7454             f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
7455             write_fp_sreg(s, a->rd, t0);
7456         }
7457         break;
7458     default:
7459         g_assert_not_reached();
7460     }
7461     return true;
7462 }
7463 
7464 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
7465                                    const FPScalar *fnormal,
7466                                    const FPScalar *fah)
7467 {
7468     return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
7469 }
7470 
7471 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
7472 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
7473 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
7474 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
7475 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
7476 
7477 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
7478 {
7479     if (fp_access_check(s)) {
7480         TCGv_i64 t0 = tcg_temp_new_i64();
7481         TCGv_i64 t1 = tcg_temp_new_i64();
7482 
7483         read_vec_element(s, t0, a->rn, 0, MO_64);
7484         read_vec_element(s, t1, a->rn, 1, MO_64);
7485         tcg_gen_add_i64(t0, t0, t1);
7486         write_fp_dreg(s, a->rd, t0);
7487     }
7488     return true;
7489 }
7490 
7491 /*
7492  * Floating-point conditional select
7493  */
7494 
7495 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7496 {
7497     TCGv_i64 t_true, t_false;
7498     DisasCompare64 c;
7499     int check = fp_access_check_scalar_hsd(s, a->esz);
7500 
7501     if (check <= 0) {
7502         return check == 0;
7503     }
7504 
7505     /* Zero extend sreg & hreg inputs to 64 bits now.  */
7506     t_true = tcg_temp_new_i64();
7507     t_false = tcg_temp_new_i64();
7508     read_vec_element(s, t_true, a->rn, 0, a->esz);
7509     read_vec_element(s, t_false, a->rm, 0, a->esz);
7510 
7511     a64_test_cc(&c, a->cond);
7512     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7513                         t_true, t_false);
7514 
7515     /*
7516      * Note that sregs & hregs write back zeros to the high bits,
7517      * and we've already done the zero-extension.
7518      */
7519     write_fp_dreg(s, a->rd, t_true);
7520     return true;
7521 }
7522 
7523 /*
7524  * Advanced SIMD Extract
7525  */
7526 
7527 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7528 {
7529     if (fp_access_check(s)) {
7530         TCGv_i64 lo = read_fp_dreg(s, a->rn);
7531         if (a->imm != 0) {
7532             TCGv_i64 hi = read_fp_dreg(s, a->rm);
7533             tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7534         }
7535         write_fp_dreg(s, a->rd, lo);
7536     }
7537     return true;
7538 }
7539 
7540 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7541 {
7542     TCGv_i64 lo, hi;
7543     int pos = (a->imm & 7) * 8;
7544     int elt = a->imm >> 3;
7545 
7546     if (!fp_access_check(s)) {
7547         return true;
7548     }
7549 
7550     lo = tcg_temp_new_i64();
7551     hi = tcg_temp_new_i64();
7552 
7553     read_vec_element(s, lo, a->rn, elt, MO_64);
7554     elt++;
7555     read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7556     elt++;
7557 
7558     if (pos != 0) {
7559         TCGv_i64 hh = tcg_temp_new_i64();
7560         tcg_gen_extract2_i64(lo, lo, hi, pos);
7561         read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7562         tcg_gen_extract2_i64(hi, hi, hh, pos);
7563     }
7564 
7565     write_vec_element(s, lo, a->rd, 0, MO_64);
7566     write_vec_element(s, hi, a->rd, 1, MO_64);
7567     clear_vec_high(s, true, a->rd);
7568     return true;
7569 }
7570 
7571 /*
7572  * Floating-point data-processing (3 source)
7573  */
7574 
7575 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7576 {
7577     TCGv_ptr fpst;
7578 
7579     /*
7580      * These are fused multiply-add.  Note that doing the negations here
7581      * as separate steps is correct: an input NaN should come out with
7582      * its sign bit flipped if it is a negated-input.
7583      */
7584     switch (a->esz) {
7585     case MO_64:
7586         if (fp_access_check(s)) {
7587             TCGv_i64 tn = read_fp_dreg(s, a->rn);
7588             TCGv_i64 tm = read_fp_dreg(s, a->rm);
7589             TCGv_i64 ta = read_fp_dreg(s, a->ra);
7590 
7591             if (neg_a) {
7592                 gen_vfp_maybe_ah_negd(s, ta, ta);
7593             }
7594             if (neg_n) {
7595                 gen_vfp_maybe_ah_negd(s, tn, tn);
7596             }
7597             fpst = fpstatus_ptr(FPST_A64);
7598             gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7599             write_fp_dreg_merging(s, a->rd, a->ra, ta);
7600         }
7601         break;
7602 
7603     case MO_32:
7604         if (fp_access_check(s)) {
7605             TCGv_i32 tn = read_fp_sreg(s, a->rn);
7606             TCGv_i32 tm = read_fp_sreg(s, a->rm);
7607             TCGv_i32 ta = read_fp_sreg(s, a->ra);
7608 
7609             if (neg_a) {
7610                 gen_vfp_maybe_ah_negs(s, ta, ta);
7611             }
7612             if (neg_n) {
7613                 gen_vfp_maybe_ah_negs(s, tn, tn);
7614             }
7615             fpst = fpstatus_ptr(FPST_A64);
7616             gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7617             write_fp_sreg_merging(s, a->rd, a->ra, ta);
7618         }
7619         break;
7620 
7621     case MO_16:
7622         if (!dc_isar_feature(aa64_fp16, s)) {
7623             return false;
7624         }
7625         if (fp_access_check(s)) {
7626             TCGv_i32 tn = read_fp_hreg(s, a->rn);
7627             TCGv_i32 tm = read_fp_hreg(s, a->rm);
7628             TCGv_i32 ta = read_fp_hreg(s, a->ra);
7629 
7630             if (neg_a) {
7631                 gen_vfp_maybe_ah_negh(s, ta, ta);
7632             }
7633             if (neg_n) {
7634                 gen_vfp_maybe_ah_negh(s, tn, tn);
7635             }
7636             fpst = fpstatus_ptr(FPST_A64_F16);
7637             gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7638             write_fp_hreg_merging(s, a->rd, a->ra, ta);
7639         }
7640         break;
7641 
7642     default:
7643         return false;
7644     }
7645     return true;
7646 }
7647 
7648 TRANS(FMADD, do_fmadd, a, false, false)
7649 TRANS(FNMADD, do_fmadd, a, true, true)
7650 TRANS(FMSUB, do_fmadd, a, false, true)
7651 TRANS(FNMSUB, do_fmadd, a, true, false)
7652 
7653 /*
7654  * Advanced SIMD Across Lanes
7655  */
7656 
7657 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7658                              MemOp src_sign, NeonGenTwo64OpFn *fn)
7659 {
7660     TCGv_i64 tcg_res, tcg_elt;
7661     MemOp src_mop = a->esz | src_sign;
7662     int elements = (a->q ? 16 : 8) >> a->esz;
7663 
7664     /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7665     if (elements < 4) {
7666         return false;
7667     }
7668     if (!fp_access_check(s)) {
7669         return true;
7670     }
7671 
7672     tcg_res = tcg_temp_new_i64();
7673     tcg_elt = tcg_temp_new_i64();
7674 
7675     read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7676     for (int i = 1; i < elements; i++) {
7677         read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7678         fn(tcg_res, tcg_res, tcg_elt);
7679     }
7680 
7681     tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7682     write_fp_dreg(s, a->rd, tcg_res);
7683     return true;
7684 }
7685 
7686 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
7687 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7688 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7689 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7690 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7691 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7692 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7693 
7694 /*
7695  * do_fp_reduction helper
7696  *
7697  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7698  * important for correct NaN propagation that we do these
7699  * operations in exactly the order specified by the pseudocode.
7700  *
7701  * This is a recursive function.
7702  */
7703 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7704                                 int ebase, int ecount, TCGv_ptr fpst,
7705                                 NeonGenTwoSingleOpFn *fn)
7706 {
7707     if (ecount == 1) {
7708         TCGv_i32 tcg_elem = tcg_temp_new_i32();
7709         read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7710         return tcg_elem;
7711     } else {
7712         int half = ecount >> 1;
7713         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7714 
7715         tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7716         tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7717         tcg_res = tcg_temp_new_i32();
7718 
7719         fn(tcg_res, tcg_lo, tcg_hi, fpst);
7720         return tcg_res;
7721     }
7722 }
7723 
7724 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7725                             NeonGenTwoSingleOpFn *fnormal,
7726                             NeonGenTwoSingleOpFn *fah)
7727 {
7728     if (fp_access_check(s)) {
7729         MemOp esz = a->esz;
7730         int elts = (a->q ? 16 : 8) >> esz;
7731         TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7732         TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7733                                        s->fpcr_ah ? fah : fnormal);
7734         write_fp_sreg(s, a->rd, res);
7735     }
7736     return true;
7737 }
7738 
7739 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7740            gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7741 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7742            gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7743 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7744            gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7745 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7746            gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7747 
7748 TRANS(FMAXNMV_s, do_fp_reduction, a,
7749       gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7750 TRANS(FMINNMV_s, do_fp_reduction, a,
7751       gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7752 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7753 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7754 
7755 /*
7756  * Floating-point Immediate
7757  */
7758 
7759 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7760 {
7761     int check = fp_access_check_scalar_hsd(s, a->esz);
7762     uint64_t imm;
7763 
7764     if (check <= 0) {
7765         return check == 0;
7766     }
7767 
7768     imm = vfp_expand_imm(a->esz, a->imm);
7769     write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7770     return true;
7771 }
7772 
7773 /*
7774  * Floating point compare, conditional compare
7775  */
7776 
7777 static void handle_fp_compare(DisasContext *s, int size,
7778                               unsigned int rn, unsigned int rm,
7779                               bool cmp_with_zero, bool signal_all_nans)
7780 {
7781     TCGv_i64 tcg_flags = tcg_temp_new_i64();
7782     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7783 
7784     if (size == MO_64) {
7785         TCGv_i64 tcg_vn, tcg_vm;
7786 
7787         tcg_vn = read_fp_dreg(s, rn);
7788         if (cmp_with_zero) {
7789             tcg_vm = tcg_constant_i64(0);
7790         } else {
7791             tcg_vm = read_fp_dreg(s, rm);
7792         }
7793         if (signal_all_nans) {
7794             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7795         } else {
7796             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7797         }
7798     } else {
7799         TCGv_i32 tcg_vn = tcg_temp_new_i32();
7800         TCGv_i32 tcg_vm = tcg_temp_new_i32();
7801 
7802         read_vec_element_i32(s, tcg_vn, rn, 0, size);
7803         if (cmp_with_zero) {
7804             tcg_gen_movi_i32(tcg_vm, 0);
7805         } else {
7806             read_vec_element_i32(s, tcg_vm, rm, 0, size);
7807         }
7808 
7809         switch (size) {
7810         case MO_32:
7811             if (signal_all_nans) {
7812                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7813             } else {
7814                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7815             }
7816             break;
7817         case MO_16:
7818             if (signal_all_nans) {
7819                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7820             } else {
7821                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7822             }
7823             break;
7824         default:
7825             g_assert_not_reached();
7826         }
7827     }
7828 
7829     gen_set_nzcv(tcg_flags);
7830 }
7831 
7832 /* FCMP, FCMPE */
7833 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7834 {
7835     int check = fp_access_check_scalar_hsd(s, a->esz);
7836 
7837     if (check <= 0) {
7838         return check == 0;
7839     }
7840 
7841     handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7842     return true;
7843 }
7844 
7845 /* FCCMP, FCCMPE */
7846 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7847 {
7848     TCGLabel *label_continue = NULL;
7849     int check = fp_access_check_scalar_hsd(s, a->esz);
7850 
7851     if (check <= 0) {
7852         return check == 0;
7853     }
7854 
7855     if (a->cond < 0x0e) { /* not always */
7856         TCGLabel *label_match = gen_new_label();
7857         label_continue = gen_new_label();
7858         arm_gen_test_cc(a->cond, label_match);
7859         /* nomatch: */
7860         gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7861         tcg_gen_br(label_continue);
7862         gen_set_label(label_match);
7863     }
7864 
7865     handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7866 
7867     if (label_continue) {
7868         gen_set_label(label_continue);
7869     }
7870     return true;
7871 }
7872 
7873 /*
7874  * Advanced SIMD Modified Immediate
7875  */
7876 
7877 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7878 {
7879     if (!dc_isar_feature(aa64_fp16, s)) {
7880         return false;
7881     }
7882     if (fp_access_check(s)) {
7883         tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7884                              a->q ? 16 : 8, vec_full_reg_size(s),
7885                              vfp_expand_imm(MO_16, a->abcdefgh));
7886     }
7887     return true;
7888 }
7889 
7890 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7891                      int64_t c, uint32_t oprsz, uint32_t maxsz)
7892 {
7893     tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7894 }
7895 
7896 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7897 {
7898     GVecGen2iFn *fn;
7899 
7900     /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7901     if ((a->cmode & 1) && a->cmode < 12) {
7902         /* For op=1, the imm will be inverted, so BIC becomes AND. */
7903         fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7904     } else {
7905         /* There is one unallocated cmode/op combination in this space */
7906         if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7907             return false;
7908         }
7909         fn = gen_movi;
7910     }
7911 
7912     if (fp_access_check(s)) {
7913         uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7914         gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7915     }
7916     return true;
7917 }
7918 
7919 /*
7920  * Advanced SIMD Shift by Immediate
7921  */
7922 
7923 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7924 {
7925     if (fp_access_check(s)) {
7926         gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7927     }
7928     return true;
7929 }
7930 
7931 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7932 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7933 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7934 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7935 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7936 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7937 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7938 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7939 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7940 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7941 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
7942 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7943 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7944 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7945 
7946 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7947 {
7948     TCGv_i64 tcg_rn, tcg_rd;
7949     int esz = a->esz;
7950     int esize;
7951 
7952     if (!fp_access_check(s)) {
7953         return true;
7954     }
7955 
7956     /*
7957      * For the LL variants the store is larger than the load,
7958      * so if rd == rn we would overwrite parts of our input.
7959      * So load everything right now and use shifts in the main loop.
7960      */
7961     tcg_rd = tcg_temp_new_i64();
7962     tcg_rn = tcg_temp_new_i64();
7963     read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7964 
7965     esize = 8 << esz;
7966     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7967         if (is_u) {
7968             tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7969         } else {
7970             tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7971         }
7972         tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7973         write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7974     }
7975     clear_vec_high(s, true, a->rd);
7976     return true;
7977 }
7978 
7979 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7980 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7981 
7982 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7983 {
7984     assert(shift >= 0 && shift <= 64);
7985     tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7986 }
7987 
7988 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7989 {
7990     assert(shift >= 0 && shift <= 64);
7991     if (shift == 64) {
7992         tcg_gen_movi_i64(dst, 0);
7993     } else {
7994         tcg_gen_shri_i64(dst, src, shift);
7995     }
7996 }
7997 
7998 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7999 {
8000     gen_sshr_d(src, src, shift);
8001     tcg_gen_add_i64(dst, dst, src);
8002 }
8003 
8004 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8005 {
8006     gen_ushr_d(src, src, shift);
8007     tcg_gen_add_i64(dst, dst, src);
8008 }
8009 
8010 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8011 {
8012     assert(shift >= 0 && shift <= 32);
8013     if (shift) {
8014         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
8015         tcg_gen_add_i64(dst, src, rnd);
8016         tcg_gen_sari_i64(dst, dst, shift);
8017     } else {
8018         tcg_gen_mov_i64(dst, src);
8019     }
8020 }
8021 
8022 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8023 {
8024     assert(shift >= 0 && shift <= 32);
8025     if (shift) {
8026         TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
8027         tcg_gen_add_i64(dst, src, rnd);
8028         tcg_gen_shri_i64(dst, dst, shift);
8029     } else {
8030         tcg_gen_mov_i64(dst, src);
8031     }
8032 }
8033 
8034 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8035 {
8036     assert(shift >= 0 && shift <= 64);
8037     if (shift == 0) {
8038         tcg_gen_mov_i64(dst, src);
8039     } else if (shift == 64) {
8040         /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
8041         tcg_gen_movi_i64(dst, 0);
8042     } else {
8043         TCGv_i64 rnd = tcg_temp_new_i64();
8044         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
8045         tcg_gen_sari_i64(dst, src, shift);
8046         tcg_gen_add_i64(dst, dst, rnd);
8047     }
8048 }
8049 
8050 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8051 {
8052     assert(shift >= 0 && shift <= 64);
8053     if (shift == 0) {
8054         tcg_gen_mov_i64(dst, src);
8055     } else if (shift == 64) {
8056         /* Rounding will propagate bit 63 into bit 64. */
8057         tcg_gen_shri_i64(dst, src, 63);
8058     } else {
8059         TCGv_i64 rnd = tcg_temp_new_i64();
8060         tcg_gen_extract_i64(rnd, src, shift - 1, 1);
8061         tcg_gen_shri_i64(dst, src, shift);
8062         tcg_gen_add_i64(dst, dst, rnd);
8063     }
8064 }
8065 
8066 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8067 {
8068     gen_srshr_d(src, src, shift);
8069     tcg_gen_add_i64(dst, dst, src);
8070 }
8071 
8072 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8073 {
8074     gen_urshr_d(src, src, shift);
8075     tcg_gen_add_i64(dst, dst, src);
8076 }
8077 
8078 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8079 {
8080     /* If shift is 64, dst is unchanged. */
8081     if (shift != 64) {
8082         tcg_gen_shri_i64(src, src, shift);
8083         tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
8084     }
8085 }
8086 
8087 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
8088 {
8089     tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
8090 }
8091 
8092 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
8093                                     WideShiftImmFn * const fns[3], MemOp sign)
8094 {
8095     TCGv_i64 tcg_rn, tcg_rd;
8096     int esz = a->esz;
8097     int esize;
8098     WideShiftImmFn *fn;
8099 
8100     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
8101 
8102     if (!fp_access_check(s)) {
8103         return true;
8104     }
8105 
8106     tcg_rn = tcg_temp_new_i64();
8107     tcg_rd = tcg_temp_new_i64();
8108     tcg_gen_movi_i64(tcg_rd, 0);
8109 
8110     fn = fns[esz];
8111     esize = 8 << esz;
8112     for (int i = 0, elements = 8 >> esz; i < elements; i++) {
8113         read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
8114         fn(tcg_rn, tcg_rn, a->imm);
8115         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
8116     }
8117 
8118     write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
8119     clear_vec_high(s, a->q, a->rd);
8120     return true;
8121 }
8122 
8123 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8124 {
8125     tcg_gen_sari_i64(d, s, i);
8126     tcg_gen_ext16u_i64(d, d);
8127     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
8128 }
8129 
8130 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8131 {
8132     tcg_gen_sari_i64(d, s, i);
8133     tcg_gen_ext32u_i64(d, d);
8134     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
8135 }
8136 
8137 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8138 {
8139     gen_sshr_d(d, s, i);
8140     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
8141 }
8142 
8143 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8144 {
8145     tcg_gen_shri_i64(d, s, i);
8146     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
8147 }
8148 
8149 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8150 {
8151     tcg_gen_shri_i64(d, s, i);
8152     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
8153 }
8154 
8155 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8156 {
8157     gen_ushr_d(d, s, i);
8158     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
8159 }
8160 
8161 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8162 {
8163     tcg_gen_sari_i64(d, s, i);
8164     tcg_gen_ext16u_i64(d, d);
8165     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
8166 }
8167 
8168 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8169 {
8170     tcg_gen_sari_i64(d, s, i);
8171     tcg_gen_ext32u_i64(d, d);
8172     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
8173 }
8174 
8175 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8176 {
8177     gen_sshr_d(d, s, i);
8178     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
8179 }
8180 
8181 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8182 {
8183     gen_srshr_bhs(d, s, i);
8184     tcg_gen_ext16u_i64(d, d);
8185     gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
8186 }
8187 
8188 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8189 {
8190     gen_srshr_bhs(d, s, i);
8191     tcg_gen_ext32u_i64(d, d);
8192     gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
8193 }
8194 
8195 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8196 {
8197     gen_srshr_d(d, s, i);
8198     gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
8199 }
8200 
8201 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8202 {
8203     gen_urshr_bhs(d, s, i);
8204     gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
8205 }
8206 
8207 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8208 {
8209     gen_urshr_bhs(d, s, i);
8210     gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
8211 }
8212 
8213 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8214 {
8215     gen_urshr_d(d, s, i);
8216     gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
8217 }
8218 
8219 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8220 {
8221     gen_srshr_bhs(d, s, i);
8222     tcg_gen_ext16u_i64(d, d);
8223     gen_helper_neon_unarrow_sat8(d, tcg_env, d);
8224 }
8225 
8226 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8227 {
8228     gen_srshr_bhs(d, s, i);
8229     tcg_gen_ext32u_i64(d, d);
8230     gen_helper_neon_unarrow_sat16(d, tcg_env, d);
8231 }
8232 
8233 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8234 {
8235     gen_srshr_d(d, s, i);
8236     gen_helper_neon_unarrow_sat32(d, tcg_env, d);
8237 }
8238 
8239 static WideShiftImmFn * const shrn_fns[] = {
8240     tcg_gen_shri_i64,
8241     tcg_gen_shri_i64,
8242     gen_ushr_d,
8243 };
8244 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
8245 
8246 static WideShiftImmFn * const rshrn_fns[] = {
8247     gen_urshr_bhs,
8248     gen_urshr_bhs,
8249     gen_urshr_d,
8250 };
8251 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
8252 
8253 static WideShiftImmFn * const sqshrn_fns[] = {
8254     gen_sqshrn_b,
8255     gen_sqshrn_h,
8256     gen_sqshrn_s,
8257 };
8258 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
8259 
8260 static WideShiftImmFn * const uqshrn_fns[] = {
8261     gen_uqshrn_b,
8262     gen_uqshrn_h,
8263     gen_uqshrn_s,
8264 };
8265 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
8266 
8267 static WideShiftImmFn * const sqshrun_fns[] = {
8268     gen_sqshrun_b,
8269     gen_sqshrun_h,
8270     gen_sqshrun_s,
8271 };
8272 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
8273 
8274 static WideShiftImmFn * const sqrshrn_fns[] = {
8275     gen_sqrshrn_b,
8276     gen_sqrshrn_h,
8277     gen_sqrshrn_s,
8278 };
8279 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
8280 
8281 static WideShiftImmFn * const uqrshrn_fns[] = {
8282     gen_uqrshrn_b,
8283     gen_uqrshrn_h,
8284     gen_uqrshrn_s,
8285 };
8286 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
8287 
8288 static WideShiftImmFn * const sqrshrun_fns[] = {
8289     gen_sqrshrun_b,
8290     gen_sqrshrun_h,
8291     gen_sqrshrun_s,
8292 };
8293 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
8294 
8295 /*
8296  * Advanced SIMD Scalar Shift by Immediate
8297  */
8298 
8299 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
8300                                 WideShiftImmFn *fn, bool accumulate,
8301                                 MemOp sign)
8302 {
8303     if (fp_access_check(s)) {
8304         TCGv_i64 rd = tcg_temp_new_i64();
8305         TCGv_i64 rn = tcg_temp_new_i64();
8306 
8307         read_vec_element(s, rn, a->rn, 0, a->esz | sign);
8308         if (accumulate) {
8309             read_vec_element(s, rd, a->rd, 0, a->esz | sign);
8310         }
8311         fn(rd, rn, a->imm);
8312         write_fp_dreg(s, a->rd, rd);
8313     }
8314     return true;
8315 }
8316 
8317 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
8318 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
8319 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
8320 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
8321 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
8322 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
8323 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
8324 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
8325 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
8326 
8327 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
8328 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
8329 
8330 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
8331                               NeonGenTwoOpEnvFn *fn)
8332 {
8333     TCGv_i32 t = tcg_temp_new_i32();
8334     tcg_gen_extrl_i64_i32(t, s);
8335     fn(t, tcg_env, t, tcg_constant_i32(i));
8336     tcg_gen_extu_i32_i64(d, t);
8337 }
8338 
8339 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8340 {
8341     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
8342 }
8343 
8344 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8345 {
8346     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
8347 }
8348 
8349 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8350 {
8351     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
8352 }
8353 
8354 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
8355 {
8356     gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
8357 }
8358 
8359 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8360 {
8361     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
8362 }
8363 
8364 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8365 {
8366     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
8367 }
8368 
8369 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8370 {
8371     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
8372 }
8373 
8374 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
8375 {
8376     gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
8377 }
8378 
8379 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
8380 {
8381     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
8382 }
8383 
8384 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
8385 {
8386     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
8387 }
8388 
8389 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
8390 {
8391     trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
8392 }
8393 
8394 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
8395 {
8396     gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
8397 }
8398 
8399 static WideShiftImmFn * const f_scalar_sqshli[] = {
8400     gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
8401 };
8402 
8403 static WideShiftImmFn * const f_scalar_uqshli[] = {
8404     gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
8405 };
8406 
8407 static WideShiftImmFn * const f_scalar_sqshlui[] = {
8408     gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
8409 };
8410 
8411 /* Note that the helpers sign-extend their inputs, so don't do it here. */
8412 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
8413 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
8414 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
8415 
8416 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
8417                                        WideShiftImmFn * const fns[3],
8418                                        MemOp sign, bool zext)
8419 {
8420     MemOp esz = a->esz;
8421 
8422     tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
8423 
8424     if (fp_access_check(s)) {
8425         TCGv_i64 rd = tcg_temp_new_i64();
8426         TCGv_i64 rn = tcg_temp_new_i64();
8427 
8428         read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
8429         fns[esz](rd, rn, a->imm);
8430         if (zext) {
8431             tcg_gen_ext_i64(rd, rd, esz);
8432         }
8433         write_fp_dreg(s, a->rd, rd);
8434     }
8435     return true;
8436 }
8437 
8438 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
8439 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
8440 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
8441 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
8442 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
8443 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
8444 
8445 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
8446 {
8447     TCGv_i64 tcg_n, tcg_m, tcg_rd;
8448     tcg_rd = cpu_reg(s, a->rd);
8449 
8450     if (!a->sf && is_signed) {
8451         tcg_n = tcg_temp_new_i64();
8452         tcg_m = tcg_temp_new_i64();
8453         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
8454         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
8455     } else {
8456         tcg_n = read_cpu_reg(s, a->rn, a->sf);
8457         tcg_m = read_cpu_reg(s, a->rm, a->sf);
8458     }
8459 
8460     if (is_signed) {
8461         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
8462     } else {
8463         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
8464     }
8465 
8466     if (!a->sf) { /* zero extend final result */
8467         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8468     }
8469     return true;
8470 }
8471 
8472 TRANS(SDIV, do_div, a, true)
8473 TRANS(UDIV, do_div, a, false)
8474 
8475 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
8476  * Note that it is the caller's responsibility to ensure that the
8477  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
8478  * mandated semantics for out of range shifts.
8479  */
8480 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
8481                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
8482 {
8483     switch (shift_type) {
8484     case A64_SHIFT_TYPE_LSL:
8485         tcg_gen_shl_i64(dst, src, shift_amount);
8486         break;
8487     case A64_SHIFT_TYPE_LSR:
8488         tcg_gen_shr_i64(dst, src, shift_amount);
8489         break;
8490     case A64_SHIFT_TYPE_ASR:
8491         if (!sf) {
8492             tcg_gen_ext32s_i64(dst, src);
8493         }
8494         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8495         break;
8496     case A64_SHIFT_TYPE_ROR:
8497         if (sf) {
8498             tcg_gen_rotr_i64(dst, src, shift_amount);
8499         } else {
8500             TCGv_i32 t0, t1;
8501             t0 = tcg_temp_new_i32();
8502             t1 = tcg_temp_new_i32();
8503             tcg_gen_extrl_i64_i32(t0, src);
8504             tcg_gen_extrl_i64_i32(t1, shift_amount);
8505             tcg_gen_rotr_i32(t0, t0, t1);
8506             tcg_gen_extu_i32_i64(dst, t0);
8507         }
8508         break;
8509     default:
8510         assert(FALSE); /* all shift types should be handled */
8511         break;
8512     }
8513 
8514     if (!sf) { /* zero extend final result */
8515         tcg_gen_ext32u_i64(dst, dst);
8516     }
8517 }
8518 
8519 /* Shift a TCGv src by immediate, put result in dst.
8520  * The shift amount must be in range (this should always be true as the
8521  * relevant instructions will UNDEF on bad shift immediates).
8522  */
8523 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8524                           enum a64_shift_type shift_type, unsigned int shift_i)
8525 {
8526     assert(shift_i < (sf ? 64 : 32));
8527 
8528     if (shift_i == 0) {
8529         tcg_gen_mov_i64(dst, src);
8530     } else {
8531         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8532     }
8533 }
8534 
8535 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8536                          enum a64_shift_type shift_type)
8537 {
8538     TCGv_i64 tcg_shift = tcg_temp_new_i64();
8539     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8540     TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8541 
8542     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8543     shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8544     return true;
8545 }
8546 
8547 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8548 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8549 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8550 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8551 
8552 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8553 {
8554     TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8555     TCGv_i32 tcg_bytes;
8556 
8557     switch (a->esz) {
8558     case MO_8:
8559     case MO_16:
8560     case MO_32:
8561         tcg_val = tcg_temp_new_i64();
8562         tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8563         break;
8564     case MO_64:
8565         tcg_val = cpu_reg(s, a->rm);
8566         break;
8567     default:
8568         g_assert_not_reached();
8569     }
8570     tcg_acc = cpu_reg(s, a->rn);
8571     tcg_bytes = tcg_constant_i32(1 << a->esz);
8572     tcg_rd = cpu_reg(s, a->rd);
8573 
8574     if (crc32c) {
8575         gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8576     } else {
8577         gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8578     }
8579     return true;
8580 }
8581 
8582 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8583 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8584 
8585 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8586 {
8587     TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8588     TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8589     TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8590 
8591     tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8592     tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8593 
8594     if (setflag) {
8595         gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8596     } else {
8597         tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8598     }
8599     return true;
8600 }
8601 
8602 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8603 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8604 
8605 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8606 {
8607     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8608         TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8609         TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8610 
8611         if (s->ata[0]) {
8612             gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8613         } else {
8614             gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8615         }
8616         return true;
8617     }
8618     return false;
8619 }
8620 
8621 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8622 {
8623     if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8624         TCGv_i64 t = tcg_temp_new_i64();
8625 
8626         tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8627         tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8628         tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8629         return true;
8630     }
8631     return false;
8632 }
8633 
8634 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8635 {
8636     if (dc_isar_feature(aa64_pauth, s)) {
8637         gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8638                          cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8639         return true;
8640     }
8641     return false;
8642 }
8643 
8644 static bool gen_rrr(DisasContext *s, arg_rrr_sf *a, ArithTwoOp fn)
8645 {
8646     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8647     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8648     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8649 
8650     fn(tcg_rd, tcg_rn, tcg_rm);
8651     if (!a->sf) {
8652         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8653     }
8654     return true;
8655 }
8656 
8657 TRANS_FEAT(SMAX, aa64_cssc, gen_rrr, a,
8658            a->sf ? tcg_gen_smax_i64 : gen_smax32_i64)
8659 TRANS_FEAT(SMIN, aa64_cssc, gen_rrr, a,
8660            a->sf ? tcg_gen_smin_i64 : gen_smin32_i64)
8661 TRANS_FEAT(UMAX, aa64_cssc, gen_rrr, a,
8662            a->sf ? tcg_gen_umax_i64 : gen_umax32_i64)
8663 TRANS_FEAT(UMIN, aa64_cssc, gen_rrr, a,
8664            a->sf ? tcg_gen_umin_i64 : gen_umin32_i64)
8665 
8666 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8667 
8668 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8669 {
8670     fn(cpu_reg(s, rd), cpu_reg(s, rn));
8671     return true;
8672 }
8673 
8674 /*
8675  * Perform 32-bit operation fn on the low half of n;
8676  * the high half of the output is zeroed.
8677  */
8678 static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn)
8679 {
8680     TCGv_i32 t = tcg_temp_new_i32();
8681 
8682     tcg_gen_extrl_i64_i32(t, n);
8683     fn(t, t);
8684     tcg_gen_extu_i32_i64(d, t);
8685 }
8686 
8687 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8688 {
8689     gen_wrap2_i32(tcg_rd, tcg_rn, gen_helper_rbit);
8690 }
8691 
8692 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8693 {
8694     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8695 
8696     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8697     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8698     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8699     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8700     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8701 }
8702 
8703 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8704 {
8705     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8706 }
8707 
8708 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8709 {
8710     gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8711 }
8712 
8713 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8714 {
8715     tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8716 }
8717 
8718 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8719 {
8720     tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8721     tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8722 }
8723 
8724 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8725 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8726 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8727 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8728 
8729 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8730 {
8731     TCGv_i32 t32 = tcg_temp_new_i32();
8732 
8733     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8734     tcg_gen_clzi_i32(t32, t32, 32);
8735     tcg_gen_extu_i32_i64(tcg_rd, t32);
8736 }
8737 
8738 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8739 {
8740     tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8741 }
8742 
8743 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8744 {
8745     gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_clrsb_i32);
8746 }
8747 
8748 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8749 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8750 
8751 static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8752 {
8753     TCGv_i32 t32 = tcg_temp_new_i32();
8754 
8755     tcg_gen_extrl_i64_i32(t32, tcg_rn);
8756     tcg_gen_ctzi_i32(t32, t32, 32);
8757     tcg_gen_extu_i32_i64(tcg_rd, t32);
8758 }
8759 
8760 static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8761 {
8762     tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64);
8763 }
8764 
8765 static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8766 {
8767     gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_ctpop_i32);
8768 }
8769 
8770 static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8771 {
8772     gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_abs_i32);
8773 }
8774 
8775 TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn,
8776            a->sf ? gen_ctz64 : gen_ctz32)
8777 TRANS_FEAT(CNT, aa64_cssc, gen_rr, a->rd, a->rn,
8778            a->sf ? tcg_gen_ctpop_i64 : gen_cnt32)
8779 TRANS_FEAT(ABS, aa64_cssc, gen_rr, a->rd, a->rn,
8780            a->sf ? tcg_gen_abs_i64 : gen_abs32)
8781 
8782 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8783 {
8784     TCGv_i64 tcg_rd, tcg_rn;
8785 
8786     if (a->z) {
8787         if (a->rn != 31) {
8788             return false;
8789         }
8790         tcg_rn = tcg_constant_i64(0);
8791     } else {
8792         tcg_rn = cpu_reg_sp(s, a->rn);
8793     }
8794     if (s->pauth_active) {
8795         tcg_rd = cpu_reg(s, a->rd);
8796         fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8797     }
8798     return true;
8799 }
8800 
8801 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8802 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8803 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8804 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8805 
8806 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8807 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8808 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8809 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8810 
8811 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8812 {
8813     if (s->pauth_active) {
8814         TCGv_i64 tcg_rd = cpu_reg(s, rd);
8815         fn(tcg_rd, tcg_env, tcg_rd);
8816     }
8817     return true;
8818 }
8819 
8820 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8821 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8822 
8823 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8824                          ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8825 {
8826     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8827 
8828     if (!a->sf && (a->sa & (1 << 5))) {
8829         return false;
8830     }
8831 
8832     tcg_rd = cpu_reg(s, a->rd);
8833     tcg_rn = cpu_reg(s, a->rn);
8834 
8835     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8836     if (a->sa) {
8837         shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8838     }
8839 
8840     (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8841     if (!a->sf) {
8842         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8843     }
8844     if (setflags) {
8845         gen_logic_CC(a->sf, tcg_rd);
8846     }
8847     return true;
8848 }
8849 
8850 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8851 {
8852     /*
8853      * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8854      * register-register MOV and MVN, so it is worth special casing.
8855      */
8856     if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8857         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8858         TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8859 
8860         if (a->n) {
8861             tcg_gen_not_i64(tcg_rd, tcg_rm);
8862             if (!a->sf) {
8863                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8864             }
8865         } else {
8866             if (a->sf) {
8867                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8868             } else {
8869                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8870             }
8871         }
8872         return true;
8873     }
8874 
8875     return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8876 }
8877 
8878 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8879 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8880 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8881 
8882 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8883                           bool sub_op, bool setflags)
8884 {
8885     TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8886 
8887     if (a->sa > 4) {
8888         return false;
8889     }
8890 
8891     /* non-flag setting ops may use SP */
8892     if (!setflags) {
8893         tcg_rd = cpu_reg_sp(s, a->rd);
8894     } else {
8895         tcg_rd = cpu_reg(s, a->rd);
8896     }
8897     tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8898 
8899     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8900     ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8901 
8902     tcg_result = tcg_temp_new_i64();
8903     if (!setflags) {
8904         if (sub_op) {
8905             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8906         } else {
8907             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8908         }
8909     } else {
8910         if (sub_op) {
8911             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8912         } else {
8913             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8914         }
8915     }
8916 
8917     if (a->sf) {
8918         tcg_gen_mov_i64(tcg_rd, tcg_result);
8919     } else {
8920         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8921     }
8922     return true;
8923 }
8924 
8925 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8926 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8927 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8928 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8929 
8930 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8931                           bool sub_op, bool setflags)
8932 {
8933     TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8934 
8935     if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8936         return false;
8937     }
8938 
8939     tcg_rd = cpu_reg(s, a->rd);
8940     tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8941     tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8942 
8943     shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8944 
8945     tcg_result = tcg_temp_new_i64();
8946     if (!setflags) {
8947         if (sub_op) {
8948             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8949         } else {
8950             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8951         }
8952     } else {
8953         if (sub_op) {
8954             gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8955         } else {
8956             gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8957         }
8958     }
8959 
8960     if (a->sf) {
8961         tcg_gen_mov_i64(tcg_rd, tcg_result);
8962     } else {
8963         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8964     }
8965     return true;
8966 }
8967 
8968 TRANS(ADD_r, do_addsub_reg, a, false, false)
8969 TRANS(SUB_r, do_addsub_reg, a, true, false)
8970 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8971 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8972 
8973 static bool do_mulh(DisasContext *s, arg_rrr *a,
8974                     void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8975 {
8976     TCGv_i64 discard = tcg_temp_new_i64();
8977     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8978     TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8979     TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8980 
8981     fn(discard, tcg_rd, tcg_rn, tcg_rm);
8982     return true;
8983 }
8984 
8985 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8986 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8987 
8988 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8989                       bool sf, bool is_sub, MemOp mop)
8990 {
8991     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8992     TCGv_i64 tcg_op1, tcg_op2;
8993 
8994     if (mop == MO_64) {
8995         tcg_op1 = cpu_reg(s, a->rn);
8996         tcg_op2 = cpu_reg(s, a->rm);
8997     } else {
8998         tcg_op1 = tcg_temp_new_i64();
8999         tcg_op2 = tcg_temp_new_i64();
9000         tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
9001         tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
9002     }
9003 
9004     if (a->ra == 31 && !is_sub) {
9005         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
9006         tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
9007     } else {
9008         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9009         TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
9010 
9011         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
9012         if (is_sub) {
9013             tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
9014         } else {
9015             tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
9016         }
9017     }
9018 
9019     if (!sf) {
9020         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
9021     }
9022     return true;
9023 }
9024 
9025 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
9026 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
9027 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
9028 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
9029 
9030 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
9031 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
9032 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
9033 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
9034 
9035 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
9036                        bool is_sub, bool setflags)
9037 {
9038     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
9039 
9040     tcg_rd = cpu_reg(s, a->rd);
9041     tcg_rn = cpu_reg(s, a->rn);
9042 
9043     if (is_sub) {
9044         tcg_y = tcg_temp_new_i64();
9045         tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
9046     } else {
9047         tcg_y = cpu_reg(s, a->rm);
9048     }
9049 
9050     if (setflags) {
9051         gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
9052     } else {
9053         gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
9054     }
9055     return true;
9056 }
9057 
9058 TRANS(ADC, do_adc_sbc, a, false, false)
9059 TRANS(SBC, do_adc_sbc, a, true, false)
9060 TRANS(ADCS, do_adc_sbc, a, false, true)
9061 TRANS(SBCS, do_adc_sbc, a, true, true)
9062 
9063 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
9064 {
9065     int mask = a->mask;
9066     TCGv_i64 tcg_rn;
9067     TCGv_i32 nzcv;
9068 
9069     if (!dc_isar_feature(aa64_condm_4, s)) {
9070         return false;
9071     }
9072 
9073     tcg_rn = read_cpu_reg(s, a->rn, 1);
9074     tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
9075 
9076     nzcv = tcg_temp_new_i32();
9077     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
9078 
9079     if (mask & 8) { /* N */
9080         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
9081     }
9082     if (mask & 4) { /* Z */
9083         tcg_gen_not_i32(cpu_ZF, nzcv);
9084         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
9085     }
9086     if (mask & 2) { /* C */
9087         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
9088     }
9089     if (mask & 1) { /* V */
9090         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
9091     }
9092     return true;
9093 }
9094 
9095 static bool do_setf(DisasContext *s, int rn, int shift)
9096 {
9097     TCGv_i32 tmp = tcg_temp_new_i32();
9098 
9099     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
9100     tcg_gen_shli_i32(cpu_NF, tmp, shift);
9101     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
9102     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
9103     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
9104     return true;
9105 }
9106 
9107 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
9108 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
9109 
9110 /* CCMP, CCMN */
9111 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
9112 {
9113     TCGv_i32 tcg_t0 = tcg_temp_new_i32();
9114     TCGv_i32 tcg_t1 = tcg_temp_new_i32();
9115     TCGv_i32 tcg_t2 = tcg_temp_new_i32();
9116     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9117     TCGv_i64 tcg_rn, tcg_y;
9118     DisasCompare c;
9119     unsigned nzcv;
9120     bool has_andc;
9121 
9122     /* Set T0 = !COND.  */
9123     arm_test_cc(&c, a->cond);
9124     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
9125 
9126     /* Load the arguments for the new comparison.  */
9127     if (a->imm) {
9128         tcg_y = tcg_constant_i64(a->y);
9129     } else {
9130         tcg_y = cpu_reg(s, a->y);
9131     }
9132     tcg_rn = cpu_reg(s, a->rn);
9133 
9134     /* Set the flags for the new comparison.  */
9135     if (a->op) {
9136         gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
9137     } else {
9138         gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
9139     }
9140 
9141     /*
9142      * If COND was false, force the flags to #nzcv.  Compute two masks
9143      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
9144      * For tcg hosts that support ANDC, we can make do with just T1.
9145      * In either case, allow the tcg optimizer to delete any unused mask.
9146      */
9147     tcg_gen_neg_i32(tcg_t1, tcg_t0);
9148     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
9149 
9150     nzcv = a->nzcv;
9151     has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0);
9152     if (nzcv & 8) { /* N */
9153         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
9154     } else {
9155         if (has_andc) {
9156             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
9157         } else {
9158             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
9159         }
9160     }
9161     if (nzcv & 4) { /* Z */
9162         if (has_andc) {
9163             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
9164         } else {
9165             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
9166         }
9167     } else {
9168         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
9169     }
9170     if (nzcv & 2) { /* C */
9171         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
9172     } else {
9173         if (has_andc) {
9174             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
9175         } else {
9176             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
9177         }
9178     }
9179     if (nzcv & 1) { /* V */
9180         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
9181     } else {
9182         if (has_andc) {
9183             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
9184         } else {
9185             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
9186         }
9187     }
9188     return true;
9189 }
9190 
9191 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
9192 {
9193     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9194     TCGv_i64 zero = tcg_constant_i64(0);
9195     DisasCompare64 c;
9196 
9197     a64_test_cc(&c, a->cond);
9198 
9199     if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
9200         /* CSET & CSETM.  */
9201         if (a->else_inv) {
9202             tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
9203                                    tcg_rd, c.value, zero);
9204         } else {
9205             tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
9206                                 tcg_rd, c.value, zero);
9207         }
9208     } else {
9209         TCGv_i64 t_true = cpu_reg(s, a->rn);
9210         TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
9211 
9212         if (a->else_inv && a->else_inc) {
9213             tcg_gen_neg_i64(t_false, t_false);
9214         } else if (a->else_inv) {
9215             tcg_gen_not_i64(t_false, t_false);
9216         } else if (a->else_inc) {
9217             tcg_gen_addi_i64(t_false, t_false, 1);
9218         }
9219         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
9220     }
9221 
9222     if (!a->sf) {
9223         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
9224     }
9225     return true;
9226 }
9227 
9228 typedef struct FPScalar1Int {
9229     void (*gen_h)(TCGv_i32, TCGv_i32);
9230     void (*gen_s)(TCGv_i32, TCGv_i32);
9231     void (*gen_d)(TCGv_i64, TCGv_i64);
9232 } FPScalar1Int;
9233 
9234 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
9235                               const FPScalar1Int *f,
9236                               bool merging)
9237 {
9238     switch (a->esz) {
9239     case MO_64:
9240         if (fp_access_check(s)) {
9241             TCGv_i64 t = read_fp_dreg(s, a->rn);
9242             f->gen_d(t, t);
9243             if (merging) {
9244                 write_fp_dreg_merging(s, a->rd, a->rd, t);
9245             } else {
9246                 write_fp_dreg(s, a->rd, t);
9247             }
9248         }
9249         break;
9250     case MO_32:
9251         if (fp_access_check(s)) {
9252             TCGv_i32 t = read_fp_sreg(s, a->rn);
9253             f->gen_s(t, t);
9254             if (merging) {
9255                 write_fp_sreg_merging(s, a->rd, a->rd, t);
9256             } else {
9257                 write_fp_sreg(s, a->rd, t);
9258             }
9259         }
9260         break;
9261     case MO_16:
9262         if (!dc_isar_feature(aa64_fp16, s)) {
9263             return false;
9264         }
9265         if (fp_access_check(s)) {
9266             TCGv_i32 t = read_fp_hreg(s, a->rn);
9267             f->gen_h(t, t);
9268             if (merging) {
9269                 write_fp_hreg_merging(s, a->rd, a->rd, t);
9270             } else {
9271                 write_fp_sreg(s, a->rd, t);
9272             }
9273         }
9274         break;
9275     default:
9276         return false;
9277     }
9278     return true;
9279 }
9280 
9281 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
9282                                   const FPScalar1Int *fnormal,
9283                                   const FPScalar1Int *fah)
9284 {
9285     return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
9286 }
9287 
9288 static const FPScalar1Int f_scalar_fmov = {
9289     tcg_gen_mov_i32,
9290     tcg_gen_mov_i32,
9291     tcg_gen_mov_i64,
9292 };
9293 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
9294 
9295 static const FPScalar1Int f_scalar_fabs = {
9296     gen_vfp_absh,
9297     gen_vfp_abss,
9298     gen_vfp_absd,
9299 };
9300 static const FPScalar1Int f_scalar_ah_fabs = {
9301     gen_vfp_ah_absh,
9302     gen_vfp_ah_abss,
9303     gen_vfp_ah_absd,
9304 };
9305 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
9306 
9307 static const FPScalar1Int f_scalar_fneg = {
9308     gen_vfp_negh,
9309     gen_vfp_negs,
9310     gen_vfp_negd,
9311 };
9312 static const FPScalar1Int f_scalar_ah_fneg = {
9313     gen_vfp_ah_negh,
9314     gen_vfp_ah_negs,
9315     gen_vfp_ah_negd,
9316 };
9317 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
9318 
9319 typedef struct FPScalar1 {
9320     void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
9321     void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
9322     void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
9323 } FPScalar1;
9324 
9325 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
9326                                         const FPScalar1 *f, int rmode,
9327                                         ARMFPStatusFlavour fpsttype)
9328 {
9329     TCGv_i32 tcg_rmode = NULL;
9330     TCGv_ptr fpst;
9331     TCGv_i64 t64;
9332     TCGv_i32 t32;
9333     int check = fp_access_check_scalar_hsd(s, a->esz);
9334 
9335     if (check <= 0) {
9336         return check == 0;
9337     }
9338 
9339     fpst = fpstatus_ptr(fpsttype);
9340     if (rmode >= 0) {
9341         tcg_rmode = gen_set_rmode(rmode, fpst);
9342     }
9343 
9344     switch (a->esz) {
9345     case MO_64:
9346         t64 = read_fp_dreg(s, a->rn);
9347         f->gen_d(t64, t64, fpst);
9348         write_fp_dreg_merging(s, a->rd, a->rd, t64);
9349         break;
9350     case MO_32:
9351         t32 = read_fp_sreg(s, a->rn);
9352         f->gen_s(t32, t32, fpst);
9353         write_fp_sreg_merging(s, a->rd, a->rd, t32);
9354         break;
9355     case MO_16:
9356         t32 = read_fp_hreg(s, a->rn);
9357         f->gen_h(t32, t32, fpst);
9358         write_fp_hreg_merging(s, a->rd, a->rd, t32);
9359         break;
9360     default:
9361         g_assert_not_reached();
9362     }
9363 
9364     if (rmode >= 0) {
9365         gen_restore_rmode(tcg_rmode, fpst);
9366     }
9367     return true;
9368 }
9369 
9370 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
9371                           const FPScalar1 *f, int rmode)
9372 {
9373     return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
9374                                        a->esz == MO_16 ?
9375                                        FPST_A64_F16 : FPST_A64);
9376 }
9377 
9378 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
9379                              const FPScalar1 *f, int rmode)
9380 {
9381     return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
9382 }
9383 
9384 static const FPScalar1 f_scalar_fsqrt = {
9385     gen_helper_vfp_sqrth,
9386     gen_helper_vfp_sqrts,
9387     gen_helper_vfp_sqrtd,
9388 };
9389 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
9390 
9391 static const FPScalar1 f_scalar_frint = {
9392     gen_helper_advsimd_rinth,
9393     gen_helper_rints,
9394     gen_helper_rintd,
9395 };
9396 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9397 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
9398 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
9399 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
9400 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9401 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
9402 
9403 static const FPScalar1 f_scalar_frintx = {
9404     gen_helper_advsimd_rinth_exact,
9405     gen_helper_rints_exact,
9406     gen_helper_rintd_exact,
9407 };
9408 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
9409 
9410 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
9411 {
9412     ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
9413     TCGv_i32 t32;
9414     int check;
9415 
9416     if (!dc_isar_feature(aa64_bf16, s)) {
9417         return false;
9418     }
9419 
9420     check = fp_access_check_scalar_hsd(s, a->esz);
9421 
9422     if (check <= 0) {
9423         return check == 0;
9424     }
9425 
9426     t32 = read_fp_sreg(s, a->rn);
9427     gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
9428     write_fp_hreg_merging(s, a->rd, a->rd, t32);
9429     return true;
9430 }
9431 
9432 static const FPScalar1 f_scalar_frint32 = {
9433     NULL,
9434     gen_helper_frint32_s,
9435     gen_helper_frint32_d,
9436 };
9437 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
9438            &f_scalar_frint32, FPROUNDING_ZERO)
9439 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
9440 
9441 static const FPScalar1 f_scalar_frint64 = {
9442     NULL,
9443     gen_helper_frint64_s,
9444     gen_helper_frint64_d,
9445 };
9446 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
9447            &f_scalar_frint64, FPROUNDING_ZERO)
9448 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
9449 
9450 static const FPScalar1 f_scalar_frecpe = {
9451     gen_helper_recpe_f16,
9452     gen_helper_recpe_f32,
9453     gen_helper_recpe_f64,
9454 };
9455 static const FPScalar1 f_scalar_frecpe_rpres = {
9456     gen_helper_recpe_f16,
9457     gen_helper_recpe_rpres_f32,
9458     gen_helper_recpe_f64,
9459 };
9460 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
9461       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
9462       &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
9463 
9464 static const FPScalar1 f_scalar_frecpx = {
9465     gen_helper_frecpx_f16,
9466     gen_helper_frecpx_f32,
9467     gen_helper_frecpx_f64,
9468 };
9469 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
9470 
9471 static const FPScalar1 f_scalar_frsqrte = {
9472     gen_helper_rsqrte_f16,
9473     gen_helper_rsqrte_f32,
9474     gen_helper_rsqrte_f64,
9475 };
9476 static const FPScalar1 f_scalar_frsqrte_rpres = {
9477     gen_helper_rsqrte_f16,
9478     gen_helper_rsqrte_rpres_f32,
9479     gen_helper_rsqrte_f64,
9480 };
9481 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
9482       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
9483       &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
9484 
9485 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
9486 {
9487     if (fp_access_check(s)) {
9488         TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
9489         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9490         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9491 
9492         gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
9493         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9494     }
9495     return true;
9496 }
9497 
9498 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
9499 {
9500     if (fp_access_check(s)) {
9501         TCGv_i32 tmp = read_fp_sreg(s, a->rn);
9502         TCGv_i32 ahp = get_ahp_flag();
9503         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9504 
9505         gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
9506         /* write_fp_hreg_merging is OK here because top half of result is zero */
9507         write_fp_hreg_merging(s, a->rd, a->rd, tmp);
9508     }
9509     return true;
9510 }
9511 
9512 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
9513 {
9514     if (fp_access_check(s)) {
9515         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
9516         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9517         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9518 
9519         gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
9520         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9521     }
9522     return true;
9523 }
9524 
9525 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
9526 {
9527     if (fp_access_check(s)) {
9528         TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
9529         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9530         TCGv_i32 ahp = get_ahp_flag();
9531         TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9532 
9533         gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9534         /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
9535         write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
9536     }
9537     return true;
9538 }
9539 
9540 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
9541 {
9542     if (fp_access_check(s)) {
9543         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9544         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9545         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9546         TCGv_i32 tcg_ahp = get_ahp_flag();
9547 
9548         gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9549         write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9550     }
9551     return true;
9552 }
9553 
9554 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9555 {
9556     if (fp_access_check(s)) {
9557         TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9558         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9559         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9560         TCGv_i32 tcg_ahp = get_ahp_flag();
9561 
9562         gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9563         write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9564     }
9565     return true;
9566 }
9567 
9568 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9569                            TCGv_i64 tcg_int, bool is_signed)
9570 {
9571     TCGv_ptr tcg_fpstatus;
9572     TCGv_i32 tcg_shift, tcg_single;
9573     TCGv_i64 tcg_double;
9574 
9575     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9576     tcg_shift = tcg_constant_i32(shift);
9577 
9578     switch (esz) {
9579     case MO_64:
9580         tcg_double = tcg_temp_new_i64();
9581         if (is_signed) {
9582             gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9583         } else {
9584             gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9585         }
9586         write_fp_dreg_merging(s, rd, rd, tcg_double);
9587         break;
9588 
9589     case MO_32:
9590         tcg_single = tcg_temp_new_i32();
9591         if (is_signed) {
9592             gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9593         } else {
9594             gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9595         }
9596         write_fp_sreg_merging(s, rd, rd, tcg_single);
9597         break;
9598 
9599     case MO_16:
9600         tcg_single = tcg_temp_new_i32();
9601         if (is_signed) {
9602             gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9603         } else {
9604             gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9605         }
9606         write_fp_hreg_merging(s, rd, rd, tcg_single);
9607         break;
9608 
9609     default:
9610         g_assert_not_reached();
9611     }
9612     return true;
9613 }
9614 
9615 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9616 {
9617     TCGv_i64 tcg_int;
9618     int check = fp_access_check_scalar_hsd(s, a->esz);
9619 
9620     if (check <= 0) {
9621         return check == 0;
9622     }
9623 
9624     if (a->sf) {
9625         tcg_int = cpu_reg(s, a->rn);
9626     } else {
9627         tcg_int = read_cpu_reg(s, a->rn, true);
9628         if (is_signed) {
9629             tcg_gen_ext32s_i64(tcg_int, tcg_int);
9630         } else {
9631             tcg_gen_ext32u_i64(tcg_int, tcg_int);
9632         }
9633     }
9634     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9635 }
9636 
9637 TRANS(SCVTF_g, do_cvtf_g, a, true)
9638 TRANS(UCVTF_g, do_cvtf_g, a, false)
9639 
9640 /*
9641  * [US]CVTF (vector), scalar version.
9642  * Which sounds weird, but really just means input from fp register
9643  * instead of input from general register.  Input and output element
9644  * size are always equal.
9645  */
9646 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9647 {
9648     TCGv_i64 tcg_int;
9649     int check = fp_access_check_scalar_hsd(s, a->esz);
9650 
9651     if (check <= 0) {
9652         return check == 0;
9653     }
9654 
9655     tcg_int = tcg_temp_new_i64();
9656     read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9657     return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9658 }
9659 
9660 TRANS(SCVTF_f, do_cvtf_f, a, true)
9661 TRANS(UCVTF_f, do_cvtf_f, a, false)
9662 
9663 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9664                            TCGv_i64 tcg_out, int shift, int rn,
9665                            ARMFPRounding rmode)
9666 {
9667     TCGv_ptr tcg_fpstatus;
9668     TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9669 
9670     tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9671     tcg_shift = tcg_constant_i32(shift);
9672     tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9673 
9674     switch (esz) {
9675     case MO_64:
9676         read_vec_element(s, tcg_out, rn, 0, MO_64);
9677         switch (out) {
9678         case MO_64 | MO_SIGN:
9679             gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9680             break;
9681         case MO_64:
9682             gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9683             break;
9684         case MO_32 | MO_SIGN:
9685             gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9686             break;
9687         case MO_32:
9688             gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9689             break;
9690         default:
9691             g_assert_not_reached();
9692         }
9693         break;
9694 
9695     case MO_32:
9696         tcg_single = read_fp_sreg(s, rn);
9697         switch (out) {
9698         case MO_64 | MO_SIGN:
9699             gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9700             break;
9701         case MO_64:
9702             gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9703             break;
9704         case MO_32 | MO_SIGN:
9705             gen_helper_vfp_tosls(tcg_single, tcg_single,
9706                                  tcg_shift, tcg_fpstatus);
9707             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9708             break;
9709         case MO_32:
9710             gen_helper_vfp_touls(tcg_single, tcg_single,
9711                                  tcg_shift, tcg_fpstatus);
9712             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9713             break;
9714         default:
9715             g_assert_not_reached();
9716         }
9717         break;
9718 
9719     case MO_16:
9720         tcg_single = read_fp_hreg(s, rn);
9721         switch (out) {
9722         case MO_64 | MO_SIGN:
9723             gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9724             break;
9725         case MO_64:
9726             gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9727             break;
9728         case MO_32 | MO_SIGN:
9729             gen_helper_vfp_toslh(tcg_single, tcg_single,
9730                                  tcg_shift, tcg_fpstatus);
9731             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9732             break;
9733         case MO_32:
9734             gen_helper_vfp_toulh(tcg_single, tcg_single,
9735                                  tcg_shift, tcg_fpstatus);
9736             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9737             break;
9738         case MO_16 | MO_SIGN:
9739             gen_helper_vfp_toshh(tcg_single, tcg_single,
9740                                  tcg_shift, tcg_fpstatus);
9741             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9742             break;
9743         case MO_16:
9744             gen_helper_vfp_touhh(tcg_single, tcg_single,
9745                                  tcg_shift, tcg_fpstatus);
9746             tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9747             break;
9748         default:
9749             g_assert_not_reached();
9750         }
9751         break;
9752 
9753     default:
9754         g_assert_not_reached();
9755     }
9756 
9757     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9758 }
9759 
9760 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9761                       ARMFPRounding rmode, bool is_signed)
9762 {
9763     TCGv_i64 tcg_int;
9764     int check = fp_access_check_scalar_hsd(s, a->esz);
9765 
9766     if (check <= 0) {
9767         return check == 0;
9768     }
9769 
9770     tcg_int = cpu_reg(s, a->rd);
9771     do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9772                    a->esz, tcg_int, a->shift, a->rn, rmode);
9773 
9774     if (!a->sf) {
9775         tcg_gen_ext32u_i64(tcg_int, tcg_int);
9776     }
9777     return true;
9778 }
9779 
9780 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9781 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9782 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9783 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9784 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9785 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9786 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9787 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9788 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9789 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9790 
9791 /*
9792  * FCVT* (vector), scalar version.
9793  * Which sounds weird, but really just means output to fp register
9794  * instead of output to general register.  Input and output element
9795  * size are always equal.
9796  */
9797 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9798                       ARMFPRounding rmode, bool is_signed)
9799 {
9800     TCGv_i64 tcg_int;
9801     int check = fp_access_check_scalar_hsd(s, a->esz);
9802 
9803     if (check <= 0) {
9804         return check == 0;
9805     }
9806 
9807     tcg_int = tcg_temp_new_i64();
9808     do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9809                    a->esz, tcg_int, a->shift, a->rn, rmode);
9810 
9811     if (!s->fpcr_nep) {
9812         clear_vec(s, a->rd);
9813     }
9814     write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9815     return true;
9816 }
9817 
9818 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9819 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9820 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9821 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9822 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9823 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9824 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9825 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9826 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9827 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9828 
9829 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9830 {
9831     if (!dc_isar_feature(aa64_jscvt, s)) {
9832         return false;
9833     }
9834     if (fp_access_check(s)) {
9835         TCGv_i64 t = read_fp_dreg(s, a->rn);
9836         TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9837 
9838         gen_helper_fjcvtzs(t, t, fpstatus);
9839 
9840         tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9841         tcg_gen_extrh_i64_i32(cpu_ZF, t);
9842         tcg_gen_movi_i32(cpu_CF, 0);
9843         tcg_gen_movi_i32(cpu_NF, 0);
9844         tcg_gen_movi_i32(cpu_VF, 0);
9845     }
9846     return true;
9847 }
9848 
9849 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9850 {
9851     if (!dc_isar_feature(aa64_fp16, s)) {
9852         return false;
9853     }
9854     if (fp_access_check(s)) {
9855         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9856         TCGv_i64 tmp = tcg_temp_new_i64();
9857         tcg_gen_ext16u_i64(tmp, tcg_rn);
9858         write_fp_dreg(s, a->rd, tmp);
9859     }
9860     return true;
9861 }
9862 
9863 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9864 {
9865     if (fp_access_check(s)) {
9866         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9867         TCGv_i64 tmp = tcg_temp_new_i64();
9868         tcg_gen_ext32u_i64(tmp, tcg_rn);
9869         write_fp_dreg(s, a->rd, tmp);
9870     }
9871     return true;
9872 }
9873 
9874 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9875 {
9876     if (fp_access_check(s)) {
9877         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9878         write_fp_dreg(s, a->rd, tcg_rn);
9879     }
9880     return true;
9881 }
9882 
9883 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9884 {
9885     if (fp_access_check(s)) {
9886         TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9887         tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9888         clear_vec_high(s, true, a->rd);
9889     }
9890     return true;
9891 }
9892 
9893 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9894 {
9895     if (!dc_isar_feature(aa64_fp16, s)) {
9896         return false;
9897     }
9898     if (fp_access_check(s)) {
9899         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9900         tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9901     }
9902     return true;
9903 }
9904 
9905 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9906 {
9907     if (fp_access_check(s)) {
9908         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9909         tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9910     }
9911     return true;
9912 }
9913 
9914 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9915 {
9916     if (fp_access_check(s)) {
9917         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9918         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9919     }
9920     return true;
9921 }
9922 
9923 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9924 {
9925     if (fp_access_check(s)) {
9926         TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9927         tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9928     }
9929     return true;
9930 }
9931 
9932 typedef struct ENVScalar1 {
9933     NeonGenOneOpEnvFn *gen_bhs[3];
9934     NeonGenOne64OpEnvFn *gen_d;
9935 } ENVScalar1;
9936 
9937 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9938 {
9939     if (!fp_access_check(s)) {
9940         return true;
9941     }
9942     if (a->esz == MO_64) {
9943         TCGv_i64 t = read_fp_dreg(s, a->rn);
9944         f->gen_d(t, tcg_env, t);
9945         write_fp_dreg(s, a->rd, t);
9946     } else {
9947         TCGv_i32 t = tcg_temp_new_i32();
9948 
9949         read_vec_element_i32(s, t, a->rn, 0, a->esz);
9950         f->gen_bhs[a->esz](t, tcg_env, t);
9951         write_fp_sreg(s, a->rd, t);
9952     }
9953     return true;
9954 }
9955 
9956 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9957 {
9958     if (a->esz == MO_64 && !a->q) {
9959         return false;
9960     }
9961     if (!fp_access_check(s)) {
9962         return true;
9963     }
9964     if (a->esz == MO_64) {
9965         TCGv_i64 t = tcg_temp_new_i64();
9966 
9967         for (int i = 0; i < 2; ++i) {
9968             read_vec_element(s, t, a->rn, i, MO_64);
9969             f->gen_d(t, tcg_env, t);
9970             write_vec_element(s, t, a->rd, i, MO_64);
9971         }
9972     } else {
9973         TCGv_i32 t = tcg_temp_new_i32();
9974         int n = (a->q ? 16 : 8) >> a->esz;
9975 
9976         for (int i = 0; i < n; ++i) {
9977             read_vec_element_i32(s, t, a->rn, i, a->esz);
9978             f->gen_bhs[a->esz](t, tcg_env, t);
9979             write_vec_element_i32(s, t, a->rd, i, a->esz);
9980         }
9981     }
9982     clear_vec_high(s, a->q, a->rd);
9983     return true;
9984 }
9985 
9986 static const ENVScalar1 f_scalar_sqabs = {
9987     { gen_helper_neon_qabs_s8,
9988       gen_helper_neon_qabs_s16,
9989       gen_helper_neon_qabs_s32 },
9990     gen_helper_neon_qabs_s64,
9991 };
9992 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9993 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9994 
9995 static const ENVScalar1 f_scalar_sqneg = {
9996     { gen_helper_neon_qneg_s8,
9997       gen_helper_neon_qneg_s16,
9998       gen_helper_neon_qneg_s32 },
9999     gen_helper_neon_qneg_s64,
10000 };
10001 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
10002 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
10003 
10004 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
10005 {
10006     if (fp_access_check(s)) {
10007         TCGv_i64 t = read_fp_dreg(s, a->rn);
10008         f(t, t);
10009         write_fp_dreg(s, a->rd, t);
10010     }
10011     return true;
10012 }
10013 
10014 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
10015 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
10016 
10017 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
10018 {
10019     if (fp_access_check(s)) {
10020         TCGv_i64 t = read_fp_dreg(s, a->rn);
10021         tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
10022         write_fp_dreg(s, a->rd, t);
10023     }
10024     return true;
10025 }
10026 
10027 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
10028 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
10029 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
10030 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
10031 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
10032 
10033 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
10034                                    ArithOneOp * const fn[3])
10035 {
10036     if (a->esz == MO_64) {
10037         return false;
10038     }
10039     if (fp_access_check(s)) {
10040         TCGv_i64 t = tcg_temp_new_i64();
10041 
10042         read_vec_element(s, t, a->rn, 0, a->esz + 1);
10043         fn[a->esz](t, t);
10044         clear_vec(s, a->rd);
10045         write_vec_element(s, t, a->rd, 0, a->esz);
10046     }
10047     return true;
10048 }
10049 
10050 #define WRAP_ENV(NAME) \
10051     static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
10052     { gen_helper_##NAME(d, tcg_env, n); }
10053 
10054 WRAP_ENV(neon_unarrow_sat8)
10055 WRAP_ENV(neon_unarrow_sat16)
10056 WRAP_ENV(neon_unarrow_sat32)
10057 
10058 static ArithOneOp * const f_scalar_sqxtun[] = {
10059     gen_neon_unarrow_sat8,
10060     gen_neon_unarrow_sat16,
10061     gen_neon_unarrow_sat32,
10062 };
10063 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
10064 
10065 WRAP_ENV(neon_narrow_sat_s8)
10066 WRAP_ENV(neon_narrow_sat_s16)
10067 WRAP_ENV(neon_narrow_sat_s32)
10068 
10069 static ArithOneOp * const f_scalar_sqxtn[] = {
10070     gen_neon_narrow_sat_s8,
10071     gen_neon_narrow_sat_s16,
10072     gen_neon_narrow_sat_s32,
10073 };
10074 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
10075 
10076 WRAP_ENV(neon_narrow_sat_u8)
10077 WRAP_ENV(neon_narrow_sat_u16)
10078 WRAP_ENV(neon_narrow_sat_u32)
10079 
10080 static ArithOneOp * const f_scalar_uqxtn[] = {
10081     gen_neon_narrow_sat_u8,
10082     gen_neon_narrow_sat_u16,
10083     gen_neon_narrow_sat_u32,
10084 };
10085 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
10086 
10087 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
10088 {
10089     if (fp_access_check(s)) {
10090         /*
10091          * 64 bit to 32 bit float conversion
10092          * with von Neumann rounding (round to odd)
10093          */
10094         TCGv_i64 src = read_fp_dreg(s, a->rn);
10095         TCGv_i32 dst = tcg_temp_new_i32();
10096         gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
10097         write_fp_sreg_merging(s, a->rd, a->rd, dst);
10098     }
10099     return true;
10100 }
10101 
10102 #undef WRAP_ENV
10103 
10104 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
10105 {
10106     if (!a->q && a->esz == MO_64) {
10107         return false;
10108     }
10109     if (fp_access_check(s)) {
10110         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
10111     }
10112     return true;
10113 }
10114 
10115 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
10116 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
10117 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
10118 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
10119 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
10120 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
10121 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
10122 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
10123 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
10124 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
10125 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
10126 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
10127 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
10128 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
10129 
10130 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
10131 {
10132     if (a->esz == MO_64) {
10133         return false;
10134     }
10135     if (fp_access_check(s)) {
10136         gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
10137     }
10138     return true;
10139 }
10140 
10141 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
10142 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
10143 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
10144 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
10145 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
10146 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
10147 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
10148 
10149 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
10150                                    ArithOneOp * const fn[3])
10151 {
10152     if (a->esz == MO_64) {
10153         return false;
10154     }
10155     if (fp_access_check(s)) {
10156         TCGv_i64 t0 = tcg_temp_new_i64();
10157         TCGv_i64 t1 = tcg_temp_new_i64();
10158 
10159         read_vec_element(s, t0, a->rn, 0, MO_64);
10160         read_vec_element(s, t1, a->rn, 1, MO_64);
10161         fn[a->esz](t0, t0);
10162         fn[a->esz](t1, t1);
10163         write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
10164         write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
10165         clear_vec_high(s, a->q, a->rd);
10166     }
10167     return true;
10168 }
10169 
10170 static ArithOneOp * const f_scalar_xtn[] = {
10171     gen_helper_neon_narrow_u8,
10172     gen_helper_neon_narrow_u16,
10173     tcg_gen_ext32u_i64,
10174 };
10175 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
10176 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
10177 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
10178 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
10179 
10180 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
10181 {
10182     TCGv_i32 tcg_lo = tcg_temp_new_i32();
10183     TCGv_i32 tcg_hi = tcg_temp_new_i32();
10184     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
10185     TCGv_i32 ahp = get_ahp_flag();
10186 
10187     tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
10188     gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10189     gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10190     tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
10191     tcg_gen_extu_i32_i64(d, tcg_lo);
10192 }
10193 
10194 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
10195 {
10196     TCGv_i32 tmp = tcg_temp_new_i32();
10197     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
10198 
10199     gen_helper_vfp_fcvtsd(tmp, n, fpst);
10200     tcg_gen_extu_i32_i64(d, tmp);
10201 }
10202 
10203 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
10204 {
10205     /*
10206      * 64 bit to 32 bit float conversion
10207      * with von Neumann rounding (round to odd)
10208      */
10209     TCGv_i32 tmp = tcg_temp_new_i32();
10210     gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
10211     tcg_gen_extu_i32_i64(d, tmp);
10212 }
10213 
10214 static ArithOneOp * const f_vector_fcvtn[] = {
10215     NULL,
10216     gen_fcvtn_hs,
10217     gen_fcvtn_sd,
10218 };
10219 static ArithOneOp * const f_scalar_fcvtxn[] = {
10220     NULL,
10221     NULL,
10222     gen_fcvtxn_sd,
10223 };
10224 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
10225 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
10226 
10227 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
10228 {
10229     TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
10230     TCGv_i32 tmp = tcg_temp_new_i32();
10231     gen_helper_bfcvt_pair(tmp, n, fpst);
10232     tcg_gen_extu_i32_i64(d, tmp);
10233 }
10234 
10235 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
10236 {
10237     TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
10238     TCGv_i32 tmp = tcg_temp_new_i32();
10239     gen_helper_bfcvt_pair(tmp, n, fpst);
10240     tcg_gen_extu_i32_i64(d, tmp);
10241 }
10242 
10243 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
10244     {
10245         NULL,
10246         gen_bfcvtn_hs,
10247         NULL,
10248     }, {
10249         NULL,
10250         gen_bfcvtn_ah_hs,
10251         NULL,
10252     }
10253 };
10254 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
10255            f_vector_bfcvtn[s->fpcr_ah])
10256 
10257 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
10258 {
10259     static NeonGenWidenFn * const widenfns[3] = {
10260         gen_helper_neon_widen_u8,
10261         gen_helper_neon_widen_u16,
10262         tcg_gen_extu_i32_i64,
10263     };
10264     NeonGenWidenFn *widenfn;
10265     TCGv_i64 tcg_res[2];
10266     TCGv_i32 tcg_op;
10267     int part, pass;
10268 
10269     if (a->esz == MO_64) {
10270         return false;
10271     }
10272     if (!fp_access_check(s)) {
10273         return true;
10274     }
10275 
10276     tcg_op = tcg_temp_new_i32();
10277     widenfn = widenfns[a->esz];
10278     part = a->q ? 2 : 0;
10279 
10280     for (pass = 0; pass < 2; pass++) {
10281         read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
10282         tcg_res[pass] = tcg_temp_new_i64();
10283         widenfn(tcg_res[pass], tcg_op);
10284         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
10285     }
10286 
10287     for (pass = 0; pass < 2; pass++) {
10288         write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10289     }
10290     return true;
10291 }
10292 
10293 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
10294 {
10295     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
10296 
10297     if (check <= 0) {
10298         return check == 0;
10299     }
10300 
10301     gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
10302     return true;
10303 }
10304 
10305 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
10306 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
10307 
10308 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
10309                           const FPScalar1 *f, int rmode)
10310 {
10311     TCGv_i32 tcg_rmode = NULL;
10312     TCGv_ptr fpst;
10313     int check = fp_access_check_vector_hsd(s, a->q, a->esz);
10314 
10315     if (check <= 0) {
10316         return check == 0;
10317     }
10318 
10319     fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
10320     if (rmode >= 0) {
10321         tcg_rmode = gen_set_rmode(rmode, fpst);
10322     }
10323 
10324     if (a->esz == MO_64) {
10325         TCGv_i64 t64 = tcg_temp_new_i64();
10326 
10327         for (int pass = 0; pass < 2; ++pass) {
10328             read_vec_element(s, t64, a->rn, pass, MO_64);
10329             f->gen_d(t64, t64, fpst);
10330             write_vec_element(s, t64, a->rd, pass, MO_64);
10331         }
10332     } else {
10333         TCGv_i32 t32 = tcg_temp_new_i32();
10334         void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
10335             = (a->esz == MO_16 ? f->gen_h : f->gen_s);
10336 
10337         for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
10338             read_vec_element_i32(s, t32, a->rn, pass, a->esz);
10339             gen(t32, t32, fpst);
10340             write_vec_element_i32(s, t32, a->rd, pass, a->esz);
10341         }
10342     }
10343     clear_vec_high(s, a->q, a->rd);
10344 
10345     if (rmode >= 0) {
10346         gen_restore_rmode(tcg_rmode, fpst);
10347     }
10348     return true;
10349 }
10350 
10351 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
10352 
10353 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
10354 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
10355 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
10356 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
10357 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
10358 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
10359 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
10360 
10361 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
10362            &f_scalar_frint32, FPROUNDING_ZERO)
10363 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
10364 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
10365            &f_scalar_frint64, FPROUNDING_ZERO)
10366 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
10367 
10368 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
10369                                            bool is_q, int rd, int rn, int data,
10370                                            gen_helper_gvec_2_ptr * const fns[3],
10371                                            ARMFPStatusFlavour fpsttype)
10372 {
10373     int check = fp_access_check_vector_hsd(s, is_q, esz);
10374     TCGv_ptr fpst;
10375 
10376     if (check <= 0) {
10377         return check == 0;
10378     }
10379 
10380     fpst = fpstatus_ptr(fpsttype);
10381     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
10382                        vec_full_reg_offset(s, rn), fpst,
10383                        is_q ? 16 : 8, vec_full_reg_size(s),
10384                        data, fns[esz - 1]);
10385     return true;
10386 }
10387 
10388 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
10389                              int rd, int rn, int data,
10390                              gen_helper_gvec_2_ptr * const fns[3])
10391 {
10392     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
10393                                           esz == MO_16 ? FPST_A64_F16 :
10394                                           FPST_A64);
10395 }
10396 
10397 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
10398                                 int rd, int rn, int data,
10399                                 gen_helper_gvec_2_ptr * const fns[3])
10400 {
10401     return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
10402                                           fns, select_ah_fpst(s, esz));
10403 }
10404 
10405 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
10406     gen_helper_gvec_vcvt_sh,
10407     gen_helper_gvec_vcvt_sf,
10408     gen_helper_gvec_vcvt_sd,
10409 };
10410 TRANS(SCVTF_vi, do_gvec_op2_fpst,
10411       a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
10412 TRANS(SCVTF_vf, do_gvec_op2_fpst,
10413       a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
10414 
10415 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
10416     gen_helper_gvec_vcvt_uh,
10417     gen_helper_gvec_vcvt_uf,
10418     gen_helper_gvec_vcvt_ud,
10419 };
10420 TRANS(UCVTF_vi, do_gvec_op2_fpst,
10421       a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
10422 TRANS(UCVTF_vf, do_gvec_op2_fpst,
10423       a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
10424 
10425 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
10426     gen_helper_gvec_vcvt_rz_hs,
10427     gen_helper_gvec_vcvt_rz_fs,
10428     gen_helper_gvec_vcvt_rz_ds,
10429 };
10430 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
10431       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
10432 
10433 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
10434     gen_helper_gvec_vcvt_rz_hu,
10435     gen_helper_gvec_vcvt_rz_fu,
10436     gen_helper_gvec_vcvt_rz_du,
10437 };
10438 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
10439       a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
10440 
10441 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
10442     gen_helper_gvec_vcvt_rm_sh,
10443     gen_helper_gvec_vcvt_rm_ss,
10444     gen_helper_gvec_vcvt_rm_sd,
10445 };
10446 
10447 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
10448     gen_helper_gvec_vcvt_rm_uh,
10449     gen_helper_gvec_vcvt_rm_us,
10450     gen_helper_gvec_vcvt_rm_ud,
10451 };
10452 
10453 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
10454       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
10455 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
10456       a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
10457 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
10458       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
10459 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
10460       a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
10461 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
10462       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
10463 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
10464       a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
10465 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
10466       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
10467 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
10468       a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
10469 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
10470       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
10471 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
10472       a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
10473 
10474 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
10475     gen_helper_gvec_fceq0_h,
10476     gen_helper_gvec_fceq0_s,
10477     gen_helper_gvec_fceq0_d,
10478 };
10479 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
10480 
10481 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
10482     gen_helper_gvec_fcgt0_h,
10483     gen_helper_gvec_fcgt0_s,
10484     gen_helper_gvec_fcgt0_d,
10485 };
10486 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
10487 
10488 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
10489     gen_helper_gvec_fcge0_h,
10490     gen_helper_gvec_fcge0_s,
10491     gen_helper_gvec_fcge0_d,
10492 };
10493 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
10494 
10495 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
10496     gen_helper_gvec_fclt0_h,
10497     gen_helper_gvec_fclt0_s,
10498     gen_helper_gvec_fclt0_d,
10499 };
10500 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
10501 
10502 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
10503     gen_helper_gvec_fcle0_h,
10504     gen_helper_gvec_fcle0_s,
10505     gen_helper_gvec_fcle0_d,
10506 };
10507 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
10508 
10509 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
10510     gen_helper_gvec_frecpe_h,
10511     gen_helper_gvec_frecpe_s,
10512     gen_helper_gvec_frecpe_d,
10513 };
10514 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
10515     gen_helper_gvec_frecpe_h,
10516     gen_helper_gvec_frecpe_rpres_s,
10517     gen_helper_gvec_frecpe_d,
10518 };
10519 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
10520       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
10521 
10522 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
10523     gen_helper_gvec_frsqrte_h,
10524     gen_helper_gvec_frsqrte_s,
10525     gen_helper_gvec_frsqrte_d,
10526 };
10527 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
10528     gen_helper_gvec_frsqrte_h,
10529     gen_helper_gvec_frsqrte_rpres_s,
10530     gen_helper_gvec_frsqrte_d,
10531 };
10532 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
10533       s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
10534 
10535 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
10536 {
10537     /* Handle 2-reg-misc ops which are widening (so each size element
10538      * in the source becomes a 2*size element in the destination.
10539      * The only instruction like this is FCVTL.
10540      */
10541     int pass;
10542     TCGv_ptr fpst;
10543 
10544     if (!fp_access_check(s)) {
10545         return true;
10546     }
10547 
10548     if (a->esz == MO_64) {
10549         /* 32 -> 64 bit fp conversion */
10550         TCGv_i64 tcg_res[2];
10551         TCGv_i32 tcg_op = tcg_temp_new_i32();
10552         int srcelt = a->q ? 2 : 0;
10553 
10554         fpst = fpstatus_ptr(FPST_A64);
10555 
10556         for (pass = 0; pass < 2; pass++) {
10557             tcg_res[pass] = tcg_temp_new_i64();
10558             read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10559             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10560         }
10561         for (pass = 0; pass < 2; pass++) {
10562             write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10563         }
10564     } else {
10565         /* 16 -> 32 bit fp conversion */
10566         int srcelt = a->q ? 4 : 0;
10567         TCGv_i32 tcg_res[4];
10568         TCGv_i32 ahp = get_ahp_flag();
10569 
10570         fpst = fpstatus_ptr(FPST_A64_F16);
10571 
10572         for (pass = 0; pass < 4; pass++) {
10573             tcg_res[pass] = tcg_temp_new_i32();
10574             read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10575             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10576                                            fpst, ahp);
10577         }
10578         for (pass = 0; pass < 4; pass++) {
10579             write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10580         }
10581     }
10582     clear_vec_high(s, true, a->rd);
10583     return true;
10584 }
10585 
10586 static bool trans_OK(DisasContext *s, arg_OK *a)
10587 {
10588     return true;
10589 }
10590 
10591 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10592 {
10593     s->is_nonstreaming = true;
10594     return true;
10595 }
10596 
10597 /**
10598  * btype_destination_ok:
10599  * @insn: The instruction at the branch destination
10600  * @bt: SCTLR_ELx.BT
10601  * @btype: PSTATE.BTYPE, and is non-zero
10602  *
10603  * On a guarded page, there are a limited number of insns
10604  * that may be present at the branch target:
10605  *   - branch target identifiers,
10606  *   - paciasp, pacibsp,
10607  *   - BRK insn
10608  *   - HLT insn
10609  * Anything else causes a Branch Target Exception.
10610  *
10611  * Return true if the branch is compatible, false to raise BTITRAP.
10612  */
10613 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10614 {
10615     if ((insn & 0xfffff01fu) == 0xd503201fu) {
10616         /* HINT space */
10617         switch (extract32(insn, 5, 7)) {
10618         case 0b011001: /* PACIASP */
10619         case 0b011011: /* PACIBSP */
10620             /*
10621              * If SCTLR_ELx.BT, then PACI*SP are not compatible
10622              * with btype == 3.  Otherwise all btype are ok.
10623              */
10624             return !bt || btype != 3;
10625         case 0b100000: /* BTI */
10626             /* Not compatible with any btype.  */
10627             return false;
10628         case 0b100010: /* BTI c */
10629             /* Not compatible with btype == 3 */
10630             return btype != 3;
10631         case 0b100100: /* BTI j */
10632             /* Not compatible with btype == 2 */
10633             return btype != 2;
10634         case 0b100110: /* BTI jc */
10635             /* Compatible with any btype.  */
10636             return true;
10637         }
10638     } else {
10639         switch (insn & 0xffe0001fu) {
10640         case 0xd4200000u: /* BRK */
10641         case 0xd4400000u: /* HLT */
10642             /* Give priority to the breakpoint exception.  */
10643             return true;
10644         }
10645     }
10646     return false;
10647 }
10648 
10649 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10650                                           CPUState *cpu)
10651 {
10652     DisasContext *dc = container_of(dcbase, DisasContext, base);
10653     CPUARMState *env = cpu_env(cpu);
10654     ARMCPU *arm_cpu = env_archcpu(env);
10655     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10656     int bound, core_mmu_idx;
10657 
10658     dc->isar = &arm_cpu->isar;
10659     dc->condjmp = 0;
10660     dc->pc_save = dc->base.pc_first;
10661     dc->aarch64 = true;
10662     dc->thumb = false;
10663     dc->sctlr_b = 0;
10664     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10665     dc->condexec_mask = 0;
10666     dc->condexec_cond = 0;
10667     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10668     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10669     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10670     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10671     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10672     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10673 #if !defined(CONFIG_USER_ONLY)
10674     dc->user = (dc->current_el == 0);
10675 #endif
10676     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10677     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10678     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10679     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10680     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10681     dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10682     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10683     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10684     dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL);
10685     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10686     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10687     dc->max_svl = arm_cpu->sme_max_vq * 16;
10688     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10689     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10690     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10691     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10692     dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10693     dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10694     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10695     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10696     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10697     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10698     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10699     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10700     dc->e2h = EX_TBFLAG_A64(tb_flags, E2H);
10701     dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10702     dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10703     dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10704     dc->nv2_mem_e20 = dc->nv2 && dc->e2h;
10705     dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10706     dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10707     dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10708     dc->gcs_en = EX_TBFLAG_A64(tb_flags, GCS_EN);
10709     dc->gcs_rvcen = EX_TBFLAG_A64(tb_flags, GCS_RVCEN);
10710     dc->gcsstr_el = EX_TBFLAG_A64(tb_flags, GCSSTR_EL);
10711     dc->vec_len = 0;
10712     dc->vec_stride = 0;
10713     dc->cp_regs = arm_cpu->cp_regs;
10714     dc->features = env->features;
10715     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10716     dc->gm_blocksize = arm_cpu->gm_blocksize;
10717 
10718 #ifdef CONFIG_USER_ONLY
10719     /* In sve_probe_page, we assume TBI is enabled. */
10720     tcg_debug_assert(dc->tbid & 1);
10721 #endif
10722 
10723     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10724 
10725     /* Single step state. The code-generation logic here is:
10726      *  SS_ACTIVE == 0:
10727      *   generate code with no special handling for single-stepping (except
10728      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10729      *   this happens anyway because those changes are all system register or
10730      *   PSTATE writes).
10731      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10732      *   emit code for one insn
10733      *   emit code to clear PSTATE.SS
10734      *   emit code to generate software step exception for completed step
10735      *   end TB (as usual for having generated an exception)
10736      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10737      *   emit code to generate a software step exception
10738      *   end the TB
10739      */
10740     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10741     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10742     dc->is_ldex = false;
10743 
10744     /* Bound the number of insns to execute to those left on the page.  */
10745     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10746 
10747     /* If architectural single step active, limit to 1.  */
10748     if (dc->ss_active) {
10749         bound = 1;
10750     }
10751     dc->base.max_insns = MIN(dc->base.max_insns, bound);
10752 }
10753 
10754 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10755 {
10756 }
10757 
10758 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10759 {
10760     DisasContext *dc = container_of(dcbase, DisasContext, base);
10761     target_ulong pc_arg = dc->base.pc_next;
10762 
10763     if (tb_cflags(dcbase->tb) & CF_PCREL) {
10764         pc_arg &= ~TARGET_PAGE_MASK;
10765     }
10766     tcg_gen_insn_start(pc_arg, 0, 0);
10767     dc->insn_start_updated = false;
10768 }
10769 
10770 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10771 {
10772     DisasContext *s = container_of(dcbase, DisasContext, base);
10773     CPUARMState *env = cpu_env(cpu);
10774     uint64_t pc = s->base.pc_next;
10775     uint32_t insn;
10776 
10777     /* Singlestep exceptions have the highest priority. */
10778     if (s->ss_active && !s->pstate_ss) {
10779         /* Singlestep state is Active-pending.
10780          * If we're in this state at the start of a TB then either
10781          *  a) we just took an exception to an EL which is being debugged
10782          *     and this is the first insn in the exception handler
10783          *  b) debug exceptions were masked and we just unmasked them
10784          *     without changing EL (eg by clearing PSTATE.D)
10785          * In either case we're going to take a swstep exception in the
10786          * "did not step an insn" case, and so the syndrome ISV and EX
10787          * bits should be zero.
10788          */
10789         assert(s->base.num_insns == 1);
10790         gen_swstep_exception(s, 0, 0);
10791         s->base.is_jmp = DISAS_NORETURN;
10792         s->base.pc_next = pc + 4;
10793         return;
10794     }
10795 
10796     if (pc & 3) {
10797         /*
10798          * PC alignment fault.  This has priority over the instruction abort
10799          * that we would receive from a translation fault via arm_ldl_code.
10800          * This should only be possible after an indirect branch, at the
10801          * start of the TB.
10802          */
10803         assert(s->base.num_insns == 1);
10804         gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc));
10805         s->base.is_jmp = DISAS_NORETURN;
10806         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10807         return;
10808     }
10809 
10810     s->pc_curr = pc;
10811     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10812     s->insn = insn;
10813     s->base.pc_next = pc + 4;
10814 
10815     s->fp_access_checked = 0;
10816     s->sve_access_checked = 0;
10817 
10818     if (s->pstate_il) {
10819         /*
10820          * Illegal execution state. This has priority over BTI
10821          * exceptions, but comes after instruction abort exceptions.
10822          */
10823         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10824         return;
10825     }
10826 
10827     if (dc_isar_feature(aa64_bti, s)) {
10828         if (s->base.num_insns == 1) {
10829             /* First insn can have btype set to non-zero.  */
10830             tcg_debug_assert(s->btype >= 0);
10831 
10832             /*
10833              * Note that the Branch Target Exception has fairly high
10834              * priority -- below debugging exceptions but above most
10835              * everything else.  This allows us to handle this now
10836              * instead of waiting until the insn is otherwise decoded.
10837              *
10838              * We can check all but the guarded page check here;
10839              * defer the latter to a helper.
10840              */
10841             if (s->btype != 0
10842                 && !btype_destination_ok(insn, s->bt, s->btype)) {
10843                 gen_helper_guarded_page_check(tcg_env);
10844             }
10845         } else {
10846             /* Not the first insn: btype must be 0.  */
10847             tcg_debug_assert(s->btype == 0);
10848         }
10849     }
10850 
10851     s->is_nonstreaming = false;
10852     if (s->sme_trap_nonstreaming) {
10853         disas_sme_fa64(s, insn);
10854     }
10855 
10856     if (!disas_a64(s, insn) &&
10857         !disas_sme(s, insn) &&
10858         !disas_sve(s, insn)) {
10859         unallocated_encoding(s);
10860     }
10861 
10862     /*
10863      * After execution of most insns, btype is reset to 0.
10864      * Note that we set btype == -1 when the insn sets btype.
10865      */
10866     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10867         reset_btype(s);
10868     }
10869 }
10870 
10871 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10872 {
10873     DisasContext *dc = container_of(dcbase, DisasContext, base);
10874 
10875     if (unlikely(dc->ss_active)) {
10876         /* Note that this means single stepping WFI doesn't halt the CPU.
10877          * For conditional branch insns this is harmless unreachable code as
10878          * gen_goto_tb() has already handled emitting the debug exception
10879          * (and thus a tb-jump is not possible when singlestepping).
10880          */
10881         switch (dc->base.is_jmp) {
10882         default:
10883             gen_a64_update_pc(dc, 4);
10884             /* fall through */
10885         case DISAS_EXIT:
10886         case DISAS_JUMP:
10887             gen_step_complete_exception(dc);
10888             break;
10889         case DISAS_NORETURN:
10890             break;
10891         }
10892     } else {
10893         switch (dc->base.is_jmp) {
10894         case DISAS_NEXT:
10895         case DISAS_TOO_MANY:
10896             gen_goto_tb(dc, 1, 4);
10897             break;
10898         default:
10899         case DISAS_UPDATE_EXIT:
10900             gen_a64_update_pc(dc, 4);
10901             /* fall through */
10902         case DISAS_EXIT:
10903             tcg_gen_exit_tb(NULL, 0);
10904             break;
10905         case DISAS_UPDATE_NOCHAIN:
10906             gen_a64_update_pc(dc, 4);
10907             /* fall through */
10908         case DISAS_JUMP:
10909             tcg_gen_lookup_and_goto_ptr();
10910             break;
10911         case DISAS_NORETURN:
10912         case DISAS_SWI:
10913             break;
10914         case DISAS_WFE:
10915             gen_a64_update_pc(dc, 4);
10916             gen_helper_wfe(tcg_env);
10917             break;
10918         case DISAS_YIELD:
10919             gen_a64_update_pc(dc, 4);
10920             gen_helper_yield(tcg_env);
10921             break;
10922         case DISAS_WFI:
10923             /*
10924              * This is a special case because we don't want to just halt
10925              * the CPU if trying to debug across a WFI.
10926              */
10927             gen_a64_update_pc(dc, 4);
10928             gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10929             /*
10930              * The helper doesn't necessarily throw an exception, but we
10931              * must go back to the main loop to check for interrupts anyway.
10932              */
10933             tcg_gen_exit_tb(NULL, 0);
10934             break;
10935         }
10936     }
10937 
10938     emit_delayed_exceptions(dc);
10939 }
10940 
10941 const TranslatorOps aarch64_translator_ops = {
10942     .init_disas_context = aarch64_tr_init_disas_context,
10943     .tb_start           = aarch64_tr_tb_start,
10944     .insn_start         = aarch64_tr_insn_start,
10945     .translate_insn     = aarch64_tr_translate_insn,
10946     .tb_stop            = aarch64_tr_tb_stop,
10947 };
10948