xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 83f624d9)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "disas/disas.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Include the generated decoders.
51  */
52 
53 #include "decode-sme-fa64.c.inc"
54 #include "decode-a64.c.inc"
55 
56 /* Table based decoder typedefs - used when the relevant bits for decode
57  * are too awkwardly scattered across the instruction (eg SIMD).
58  */
59 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
60 
61 typedef struct AArch64DecodeTable {
62     uint32_t pattern;
63     uint32_t mask;
64     AArch64DecodeFn *disas_fn;
65 } AArch64DecodeTable;
66 
67 /* initialize TCG globals.  */
68 void a64_translate_init(void)
69 {
70     int i;
71 
72     cpu_pc = tcg_global_mem_new_i64(cpu_env,
73                                     offsetof(CPUARMState, pc),
74                                     "pc");
75     for (i = 0; i < 32; i++) {
76         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
77                                           offsetof(CPUARMState, xregs[i]),
78                                           regnames[i]);
79     }
80 
81     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
82         offsetof(CPUARMState, exclusive_high), "exclusive_high");
83 }
84 
85 /*
86  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
87  */
88 static int get_a64_user_mem_index(DisasContext *s)
89 {
90     /*
91      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
92      * which is the usual mmu_idx for this cpu state.
93      */
94     ARMMMUIdx useridx = s->mmu_idx;
95 
96     if (s->unpriv) {
97         /*
98          * We have pre-computed the condition for AccType_UNPRIV.
99          * Therefore we should never get here with a mmu_idx for
100          * which we do not know the corresponding user mmu_idx.
101          */
102         switch (useridx) {
103         case ARMMMUIdx_E10_1:
104         case ARMMMUIdx_E10_1_PAN:
105             useridx = ARMMMUIdx_E10_0;
106             break;
107         case ARMMMUIdx_E20_2:
108         case ARMMMUIdx_E20_2_PAN:
109             useridx = ARMMMUIdx_E20_0;
110             break;
111         default:
112             g_assert_not_reached();
113         }
114     }
115     return arm_to_core_mmu_idx(useridx);
116 }
117 
118 static void set_btype_raw(int val)
119 {
120     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
121                    offsetof(CPUARMState, btype));
122 }
123 
124 static void set_btype(DisasContext *s, int val)
125 {
126     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
127     tcg_debug_assert(val >= 1 && val <= 3);
128     set_btype_raw(val);
129     s->btype = -1;
130 }
131 
132 static void reset_btype(DisasContext *s)
133 {
134     if (s->btype != 0) {
135         set_btype_raw(0);
136         s->btype = 0;
137     }
138 }
139 
140 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
141 {
142     assert(s->pc_save != -1);
143     if (tb_cflags(s->base.tb) & CF_PCREL) {
144         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
145     } else {
146         tcg_gen_movi_i64(dest, s->pc_curr + diff);
147     }
148 }
149 
150 void gen_a64_update_pc(DisasContext *s, target_long diff)
151 {
152     gen_pc_plus_diff(s, cpu_pc, diff);
153     s->pc_save = s->pc_curr + diff;
154 }
155 
156 /*
157  * Handle Top Byte Ignore (TBI) bits.
158  *
159  * If address tagging is enabled via the TCR TBI bits:
160  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
161  *    then the address is zero-extended, clearing bits [63:56]
162  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163  *    and TBI1 controls addressses with bit 55 == 1.
164  *    If the appropriate TBI bit is set for the address then
165  *    the address is sign-extended from bit 55 into bits [63:56]
166  *
167  * Here We have concatenated TBI{1,0} into tbi.
168  */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170                                 TCGv_i64 src, int tbi)
171 {
172     if (tbi == 0) {
173         /* Load unmodified address */
174         tcg_gen_mov_i64(dst, src);
175     } else if (!regime_has_2_ranges(s->mmu_idx)) {
176         /* Force tag byte to all zero */
177         tcg_gen_extract_i64(dst, src, 0, 56);
178     } else {
179         /* Sign-extend from bit 55.  */
180         tcg_gen_sextract_i64(dst, src, 0, 56);
181 
182         switch (tbi) {
183         case 1:
184             /* tbi0 but !tbi1: only use the extension if positive */
185             tcg_gen_and_i64(dst, dst, src);
186             break;
187         case 2:
188             /* !tbi0 but tbi1: only use the extension if negative */
189             tcg_gen_or_i64(dst, dst, src);
190             break;
191         case 3:
192             /* tbi0 and tbi1: always use the extension */
193             break;
194         default:
195             g_assert_not_reached();
196         }
197     }
198 }
199 
200 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
201 {
202     /*
203      * If address tagging is enabled for instructions via the TCR TBI bits,
204      * then loading an address into the PC will clear out any tag.
205      */
206     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
207     s->pc_save = -1;
208 }
209 
210 /*
211  * Handle MTE and/or TBI.
212  *
213  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
214  * for the tag to be present in the FAR_ELx register.  But for user-only
215  * mode we do not have a TLB with which to implement this, so we must
216  * remove the top byte now.
217  *
218  * Always return a fresh temporary that we can increment independently
219  * of the write-back address.
220  */
221 
222 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
223 {
224     TCGv_i64 clean = tcg_temp_new_i64();
225 #ifdef CONFIG_USER_ONLY
226     gen_top_byte_ignore(s, clean, addr, s->tbid);
227 #else
228     tcg_gen_mov_i64(clean, addr);
229 #endif
230     return clean;
231 }
232 
233 /* Insert a zero tag into src, with the result at dst. */
234 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
235 {
236     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
237 }
238 
239 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
240                              MMUAccessType acc, int log2_size)
241 {
242     gen_helper_probe_access(cpu_env, ptr,
243                             tcg_constant_i32(acc),
244                             tcg_constant_i32(get_mem_index(s)),
245                             tcg_constant_i32(1 << log2_size));
246 }
247 
248 /*
249  * For MTE, check a single logical or atomic access.  This probes a single
250  * address, the exact one specified.  The size and alignment of the access
251  * is not relevant to MTE, per se, but watchpoints do require the size,
252  * and we want to recognize those before making any other changes to state.
253  */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255                                       bool is_write, bool tag_checked,
256                                       MemOp memop, bool is_unpriv,
257                                       int core_idx)
258 {
259     if (tag_checked && s->mte_active[is_unpriv]) {
260         TCGv_i64 ret;
261         int desc = 0;
262 
263         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
264         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
265         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
266         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
267         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
268         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
269 
270         ret = tcg_temp_new_i64();
271         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
272 
273         return ret;
274     }
275     return clean_data_tbi(s, addr);
276 }
277 
278 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
279                         bool tag_checked, MemOp memop)
280 {
281     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
282                                  false, get_mem_index(s));
283 }
284 
285 /*
286  * For MTE, check multiple logical sequential accesses.
287  */
288 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
289                         bool tag_checked, int total_size, MemOp single_mop)
290 {
291     if (tag_checked && s->mte_active[0]) {
292         TCGv_i64 ret;
293         int desc = 0;
294 
295         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
296         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
297         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
298         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
299         desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
300         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
301 
302         ret = tcg_temp_new_i64();
303         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
304 
305         return ret;
306     }
307     return clean_data_tbi(s, addr);
308 }
309 
310 typedef struct DisasCompare64 {
311     TCGCond cond;
312     TCGv_i64 value;
313 } DisasCompare64;
314 
315 static void a64_test_cc(DisasCompare64 *c64, int cc)
316 {
317     DisasCompare c32;
318 
319     arm_test_cc(&c32, cc);
320 
321     /*
322      * Sign-extend the 32-bit value so that the GE/LT comparisons work
323      * properly.  The NE/EQ comparisons are also fine with this choice.
324       */
325     c64->cond = c32.cond;
326     c64->value = tcg_temp_new_i64();
327     tcg_gen_ext_i32_i64(c64->value, c32.value);
328 }
329 
330 static void gen_rebuild_hflags(DisasContext *s)
331 {
332     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
333 }
334 
335 static void gen_exception_internal(int excp)
336 {
337     assert(excp_is_internal(excp));
338     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
339 }
340 
341 static void gen_exception_internal_insn(DisasContext *s, int excp)
342 {
343     gen_a64_update_pc(s, 0);
344     gen_exception_internal(excp);
345     s->base.is_jmp = DISAS_NORETURN;
346 }
347 
348 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
349 {
350     gen_a64_update_pc(s, 0);
351     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
352     s->base.is_jmp = DISAS_NORETURN;
353 }
354 
355 static void gen_step_complete_exception(DisasContext *s)
356 {
357     /* We just completed step of an insn. Move from Active-not-pending
358      * to Active-pending, and then also take the swstep exception.
359      * This corresponds to making the (IMPDEF) choice to prioritize
360      * swstep exceptions over asynchronous exceptions taken to an exception
361      * level where debug is disabled. This choice has the advantage that
362      * we do not need to maintain internal state corresponding to the
363      * ISV/EX syndrome bits between completion of the step and generation
364      * of the exception, and our syndrome information is always correct.
365      */
366     gen_ss_advance(s);
367     gen_swstep_exception(s, 1, s->is_ldex);
368     s->base.is_jmp = DISAS_NORETURN;
369 }
370 
371 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
372 {
373     if (s->ss_active) {
374         return false;
375     }
376     return translator_use_goto_tb(&s->base, dest);
377 }
378 
379 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
380 {
381     if (use_goto_tb(s, s->pc_curr + diff)) {
382         /*
383          * For pcrel, the pc must always be up-to-date on entry to
384          * the linked TB, so that it can use simple additions for all
385          * further adjustments.  For !pcrel, the linked TB is compiled
386          * to know its full virtual address, so we can delay the
387          * update to pc to the unlinked path.  A long chain of links
388          * can thus avoid many updates to the PC.
389          */
390         if (tb_cflags(s->base.tb) & CF_PCREL) {
391             gen_a64_update_pc(s, diff);
392             tcg_gen_goto_tb(n);
393         } else {
394             tcg_gen_goto_tb(n);
395             gen_a64_update_pc(s, diff);
396         }
397         tcg_gen_exit_tb(s->base.tb, n);
398         s->base.is_jmp = DISAS_NORETURN;
399     } else {
400         gen_a64_update_pc(s, diff);
401         if (s->ss_active) {
402             gen_step_complete_exception(s);
403         } else {
404             tcg_gen_lookup_and_goto_ptr();
405             s->base.is_jmp = DISAS_NORETURN;
406         }
407     }
408 }
409 
410 /*
411  * Register access functions
412  *
413  * These functions are used for directly accessing a register in where
414  * changes to the final register value are likely to be made. If you
415  * need to use a register for temporary calculation (e.g. index type
416  * operations) use the read_* form.
417  *
418  * B1.2.1 Register mappings
419  *
420  * In instruction register encoding 31 can refer to ZR (zero register) or
421  * the SP (stack pointer) depending on context. In QEMU's case we map SP
422  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
423  * This is the point of the _sp forms.
424  */
425 TCGv_i64 cpu_reg(DisasContext *s, int reg)
426 {
427     if (reg == 31) {
428         TCGv_i64 t = tcg_temp_new_i64();
429         tcg_gen_movi_i64(t, 0);
430         return t;
431     } else {
432         return cpu_X[reg];
433     }
434 }
435 
436 /* register access for when 31 == SP */
437 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
438 {
439     return cpu_X[reg];
440 }
441 
442 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
443  * representing the register contents. This TCGv is an auto-freed
444  * temporary so it need not be explicitly freed, and may be modified.
445  */
446 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
447 {
448     TCGv_i64 v = tcg_temp_new_i64();
449     if (reg != 31) {
450         if (sf) {
451             tcg_gen_mov_i64(v, cpu_X[reg]);
452         } else {
453             tcg_gen_ext32u_i64(v, cpu_X[reg]);
454         }
455     } else {
456         tcg_gen_movi_i64(v, 0);
457     }
458     return v;
459 }
460 
461 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
462 {
463     TCGv_i64 v = tcg_temp_new_i64();
464     if (sf) {
465         tcg_gen_mov_i64(v, cpu_X[reg]);
466     } else {
467         tcg_gen_ext32u_i64(v, cpu_X[reg]);
468     }
469     return v;
470 }
471 
472 /* Return the offset into CPUARMState of a slice (from
473  * the least significant end) of FP register Qn (ie
474  * Dn, Sn, Hn or Bn).
475  * (Note that this is not the same mapping as for A32; see cpu.h)
476  */
477 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
478 {
479     return vec_reg_offset(s, regno, 0, size);
480 }
481 
482 /* Offset of the high half of the 128 bit vector Qn */
483 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
484 {
485     return vec_reg_offset(s, regno, 1, MO_64);
486 }
487 
488 /* Convenience accessors for reading and writing single and double
489  * FP registers. Writing clears the upper parts of the associated
490  * 128 bit vector register, as required by the architecture.
491  * Note that unlike the GP register accessors, the values returned
492  * by the read functions must be manually freed.
493  */
494 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
495 {
496     TCGv_i64 v = tcg_temp_new_i64();
497 
498     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
499     return v;
500 }
501 
502 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
503 {
504     TCGv_i32 v = tcg_temp_new_i32();
505 
506     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
507     return v;
508 }
509 
510 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
511 {
512     TCGv_i32 v = tcg_temp_new_i32();
513 
514     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
515     return v;
516 }
517 
518 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
519  * If SVE is not enabled, then there are only 128 bits in the vector.
520  */
521 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
522 {
523     unsigned ofs = fp_reg_offset(s, rd, MO_64);
524     unsigned vsz = vec_full_reg_size(s);
525 
526     /* Nop move, with side effect of clearing the tail. */
527     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
528 }
529 
530 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
531 {
532     unsigned ofs = fp_reg_offset(s, reg, MO_64);
533 
534     tcg_gen_st_i64(v, cpu_env, ofs);
535     clear_vec_high(s, false, reg);
536 }
537 
538 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
539 {
540     TCGv_i64 tmp = tcg_temp_new_i64();
541 
542     tcg_gen_extu_i32_i64(tmp, v);
543     write_fp_dreg(s, reg, tmp);
544 }
545 
546 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
547 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
548                          GVecGen2Fn *gvec_fn, int vece)
549 {
550     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
551             is_q ? 16 : 8, vec_full_reg_size(s));
552 }
553 
554 /* Expand a 2-operand + immediate AdvSIMD vector operation using
555  * an expander function.
556  */
557 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
558                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
559 {
560     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
561             imm, is_q ? 16 : 8, vec_full_reg_size(s));
562 }
563 
564 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
565 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
566                          GVecGen3Fn *gvec_fn, int vece)
567 {
568     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
569             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
570 }
571 
572 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
573 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
574                          int rx, GVecGen4Fn *gvec_fn, int vece)
575 {
576     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
577             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
578             is_q ? 16 : 8, vec_full_reg_size(s));
579 }
580 
581 /* Expand a 2-operand operation using an out-of-line helper.  */
582 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
583                              int rn, int data, gen_helper_gvec_2 *fn)
584 {
585     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
586                        vec_full_reg_offset(s, rn),
587                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
588 }
589 
590 /* Expand a 3-operand operation using an out-of-line helper.  */
591 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
592                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
593 {
594     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
595                        vec_full_reg_offset(s, rn),
596                        vec_full_reg_offset(s, rm),
597                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
598 }
599 
600 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
601  * an out-of-line helper.
602  */
603 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
604                               int rm, bool is_fp16, int data,
605                               gen_helper_gvec_3_ptr *fn)
606 {
607     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
608     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
609                        vec_full_reg_offset(s, rn),
610                        vec_full_reg_offset(s, rm), fpst,
611                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
612 }
613 
614 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
615 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
616                             int rm, gen_helper_gvec_3_ptr *fn)
617 {
618     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
619 
620     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
621     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
622                        vec_full_reg_offset(s, rn),
623                        vec_full_reg_offset(s, rm), qc_ptr,
624                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
625 }
626 
627 /* Expand a 4-operand operation using an out-of-line helper.  */
628 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
629                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
630 {
631     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
632                        vec_full_reg_offset(s, rn),
633                        vec_full_reg_offset(s, rm),
634                        vec_full_reg_offset(s, ra),
635                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
636 }
637 
638 /*
639  * Expand a 4-operand + fpstatus pointer + simd data value operation using
640  * an out-of-line helper.
641  */
642 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
643                               int rm, int ra, bool is_fp16, int data,
644                               gen_helper_gvec_4_ptr *fn)
645 {
646     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
647     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
648                        vec_full_reg_offset(s, rn),
649                        vec_full_reg_offset(s, rm),
650                        vec_full_reg_offset(s, ra), fpst,
651                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
652 }
653 
654 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
655  * than the 32 bit equivalent.
656  */
657 static inline void gen_set_NZ64(TCGv_i64 result)
658 {
659     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
660     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
661 }
662 
663 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
664 static inline void gen_logic_CC(int sf, TCGv_i64 result)
665 {
666     if (sf) {
667         gen_set_NZ64(result);
668     } else {
669         tcg_gen_extrl_i64_i32(cpu_ZF, result);
670         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
671     }
672     tcg_gen_movi_i32(cpu_CF, 0);
673     tcg_gen_movi_i32(cpu_VF, 0);
674 }
675 
676 /* dest = T0 + T1; compute C, N, V and Z flags */
677 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
678 {
679     TCGv_i64 result, flag, tmp;
680     result = tcg_temp_new_i64();
681     flag = tcg_temp_new_i64();
682     tmp = tcg_temp_new_i64();
683 
684     tcg_gen_movi_i64(tmp, 0);
685     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
686 
687     tcg_gen_extrl_i64_i32(cpu_CF, flag);
688 
689     gen_set_NZ64(result);
690 
691     tcg_gen_xor_i64(flag, result, t0);
692     tcg_gen_xor_i64(tmp, t0, t1);
693     tcg_gen_andc_i64(flag, flag, tmp);
694     tcg_gen_extrh_i64_i32(cpu_VF, flag);
695 
696     tcg_gen_mov_i64(dest, result);
697 }
698 
699 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
700 {
701     TCGv_i32 t0_32 = tcg_temp_new_i32();
702     TCGv_i32 t1_32 = tcg_temp_new_i32();
703     TCGv_i32 tmp = tcg_temp_new_i32();
704 
705     tcg_gen_movi_i32(tmp, 0);
706     tcg_gen_extrl_i64_i32(t0_32, t0);
707     tcg_gen_extrl_i64_i32(t1_32, t1);
708     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
709     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
710     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
711     tcg_gen_xor_i32(tmp, t0_32, t1_32);
712     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
713     tcg_gen_extu_i32_i64(dest, cpu_NF);
714 }
715 
716 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
717 {
718     if (sf) {
719         gen_add64_CC(dest, t0, t1);
720     } else {
721         gen_add32_CC(dest, t0, t1);
722     }
723 }
724 
725 /* dest = T0 - T1; compute C, N, V and Z flags */
726 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
727 {
728     /* 64 bit arithmetic */
729     TCGv_i64 result, flag, tmp;
730 
731     result = tcg_temp_new_i64();
732     flag = tcg_temp_new_i64();
733     tcg_gen_sub_i64(result, t0, t1);
734 
735     gen_set_NZ64(result);
736 
737     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
738     tcg_gen_extrl_i64_i32(cpu_CF, flag);
739 
740     tcg_gen_xor_i64(flag, result, t0);
741     tmp = tcg_temp_new_i64();
742     tcg_gen_xor_i64(tmp, t0, t1);
743     tcg_gen_and_i64(flag, flag, tmp);
744     tcg_gen_extrh_i64_i32(cpu_VF, flag);
745     tcg_gen_mov_i64(dest, result);
746 }
747 
748 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
749 {
750     /* 32 bit arithmetic */
751     TCGv_i32 t0_32 = tcg_temp_new_i32();
752     TCGv_i32 t1_32 = tcg_temp_new_i32();
753     TCGv_i32 tmp;
754 
755     tcg_gen_extrl_i64_i32(t0_32, t0);
756     tcg_gen_extrl_i64_i32(t1_32, t1);
757     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
758     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
759     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
760     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
761     tmp = tcg_temp_new_i32();
762     tcg_gen_xor_i32(tmp, t0_32, t1_32);
763     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
764     tcg_gen_extu_i32_i64(dest, cpu_NF);
765 }
766 
767 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
768 {
769     if (sf) {
770         gen_sub64_CC(dest, t0, t1);
771     } else {
772         gen_sub32_CC(dest, t0, t1);
773     }
774 }
775 
776 /* dest = T0 + T1 + CF; do not compute flags. */
777 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
778 {
779     TCGv_i64 flag = tcg_temp_new_i64();
780     tcg_gen_extu_i32_i64(flag, cpu_CF);
781     tcg_gen_add_i64(dest, t0, t1);
782     tcg_gen_add_i64(dest, dest, flag);
783 
784     if (!sf) {
785         tcg_gen_ext32u_i64(dest, dest);
786     }
787 }
788 
789 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
790 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
791 {
792     if (sf) {
793         TCGv_i64 result = tcg_temp_new_i64();
794         TCGv_i64 cf_64 = tcg_temp_new_i64();
795         TCGv_i64 vf_64 = tcg_temp_new_i64();
796         TCGv_i64 tmp = tcg_temp_new_i64();
797         TCGv_i64 zero = tcg_constant_i64(0);
798 
799         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
800         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
801         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
802         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
803         gen_set_NZ64(result);
804 
805         tcg_gen_xor_i64(vf_64, result, t0);
806         tcg_gen_xor_i64(tmp, t0, t1);
807         tcg_gen_andc_i64(vf_64, vf_64, tmp);
808         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
809 
810         tcg_gen_mov_i64(dest, result);
811     } else {
812         TCGv_i32 t0_32 = tcg_temp_new_i32();
813         TCGv_i32 t1_32 = tcg_temp_new_i32();
814         TCGv_i32 tmp = tcg_temp_new_i32();
815         TCGv_i32 zero = tcg_constant_i32(0);
816 
817         tcg_gen_extrl_i64_i32(t0_32, t0);
818         tcg_gen_extrl_i64_i32(t1_32, t1);
819         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
820         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
821 
822         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
823         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
824         tcg_gen_xor_i32(tmp, t0_32, t1_32);
825         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
826         tcg_gen_extu_i32_i64(dest, cpu_NF);
827     }
828 }
829 
830 /*
831  * Load/Store generators
832  */
833 
834 /*
835  * Store from GPR register to memory.
836  */
837 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
838                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
839                              bool iss_valid,
840                              unsigned int iss_srt,
841                              bool iss_sf, bool iss_ar)
842 {
843     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
844 
845     if (iss_valid) {
846         uint32_t syn;
847 
848         syn = syn_data_abort_with_iss(0,
849                                       (memop & MO_SIZE),
850                                       false,
851                                       iss_srt,
852                                       iss_sf,
853                                       iss_ar,
854                                       0, 0, 0, 0, 0, false);
855         disas_set_insn_syndrome(s, syn);
856     }
857 }
858 
859 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
860                       TCGv_i64 tcg_addr, MemOp memop,
861                       bool iss_valid,
862                       unsigned int iss_srt,
863                       bool iss_sf, bool iss_ar)
864 {
865     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
866                      iss_valid, iss_srt, iss_sf, iss_ar);
867 }
868 
869 /*
870  * Load from memory to GPR register
871  */
872 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
873                              MemOp memop, bool extend, int memidx,
874                              bool iss_valid, unsigned int iss_srt,
875                              bool iss_sf, bool iss_ar)
876 {
877     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
878 
879     if (extend && (memop & MO_SIGN)) {
880         g_assert((memop & MO_SIZE) <= MO_32);
881         tcg_gen_ext32u_i64(dest, dest);
882     }
883 
884     if (iss_valid) {
885         uint32_t syn;
886 
887         syn = syn_data_abort_with_iss(0,
888                                       (memop & MO_SIZE),
889                                       (memop & MO_SIGN) != 0,
890                                       iss_srt,
891                                       iss_sf,
892                                       iss_ar,
893                                       0, 0, 0, 0, 0, false);
894         disas_set_insn_syndrome(s, syn);
895     }
896 }
897 
898 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
899                       MemOp memop, bool extend,
900                       bool iss_valid, unsigned int iss_srt,
901                       bool iss_sf, bool iss_ar)
902 {
903     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
904                      iss_valid, iss_srt, iss_sf, iss_ar);
905 }
906 
907 /*
908  * Store from FP register to memory
909  */
910 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
911 {
912     /* This writes the bottom N bits of a 128 bit wide vector to memory */
913     TCGv_i64 tmplo = tcg_temp_new_i64();
914 
915     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
916 
917     if ((mop & MO_SIZE) < MO_128) {
918         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
919     } else {
920         TCGv_i64 tmphi = tcg_temp_new_i64();
921         TCGv_i128 t16 = tcg_temp_new_i128();
922 
923         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
924         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
925 
926         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
927     }
928 }
929 
930 /*
931  * Load from memory to FP register
932  */
933 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
934 {
935     /* This always zero-extends and writes to a full 128 bit wide vector */
936     TCGv_i64 tmplo = tcg_temp_new_i64();
937     TCGv_i64 tmphi = NULL;
938 
939     if ((mop & MO_SIZE) < MO_128) {
940         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
941     } else {
942         TCGv_i128 t16 = tcg_temp_new_i128();
943 
944         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
945 
946         tmphi = tcg_temp_new_i64();
947         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
948     }
949 
950     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
951 
952     if (tmphi) {
953         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
954     }
955     clear_vec_high(s, tmphi != NULL, destidx);
956 }
957 
958 /*
959  * Vector load/store helpers.
960  *
961  * The principal difference between this and a FP load is that we don't
962  * zero extend as we are filling a partial chunk of the vector register.
963  * These functions don't support 128 bit loads/stores, which would be
964  * normal load/store operations.
965  *
966  * The _i32 versions are useful when operating on 32 bit quantities
967  * (eg for floating point single or using Neon helper functions).
968  */
969 
970 /* Get value of an element within a vector register */
971 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
972                              int element, MemOp memop)
973 {
974     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
975     switch ((unsigned)memop) {
976     case MO_8:
977         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
978         break;
979     case MO_16:
980         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
981         break;
982     case MO_32:
983         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
984         break;
985     case MO_8|MO_SIGN:
986         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
987         break;
988     case MO_16|MO_SIGN:
989         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
990         break;
991     case MO_32|MO_SIGN:
992         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
993         break;
994     case MO_64:
995     case MO_64|MO_SIGN:
996         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
997         break;
998     default:
999         g_assert_not_reached();
1000     }
1001 }
1002 
1003 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1004                                  int element, MemOp memop)
1005 {
1006     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1007     switch (memop) {
1008     case MO_8:
1009         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1010         break;
1011     case MO_16:
1012         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1013         break;
1014     case MO_8|MO_SIGN:
1015         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1016         break;
1017     case MO_16|MO_SIGN:
1018         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1019         break;
1020     case MO_32:
1021     case MO_32|MO_SIGN:
1022         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1023         break;
1024     default:
1025         g_assert_not_reached();
1026     }
1027 }
1028 
1029 /* Set value of an element within a vector register */
1030 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1031                               int element, MemOp memop)
1032 {
1033     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1034     switch (memop) {
1035     case MO_8:
1036         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1037         break;
1038     case MO_16:
1039         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1040         break;
1041     case MO_32:
1042         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1043         break;
1044     case MO_64:
1045         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1046         break;
1047     default:
1048         g_assert_not_reached();
1049     }
1050 }
1051 
1052 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1053                                   int destidx, int element, MemOp memop)
1054 {
1055     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1056     switch (memop) {
1057     case MO_8:
1058         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1059         break;
1060     case MO_16:
1061         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1062         break;
1063     case MO_32:
1064         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1065         break;
1066     default:
1067         g_assert_not_reached();
1068     }
1069 }
1070 
1071 /* Store from vector register to memory */
1072 static void do_vec_st(DisasContext *s, int srcidx, int element,
1073                       TCGv_i64 tcg_addr, MemOp mop)
1074 {
1075     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1076 
1077     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1078     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1079 }
1080 
1081 /* Load from memory to vector register */
1082 static void do_vec_ld(DisasContext *s, int destidx, int element,
1083                       TCGv_i64 tcg_addr, MemOp mop)
1084 {
1085     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1086 
1087     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1088     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1089 }
1090 
1091 /* Check that FP/Neon access is enabled. If it is, return
1092  * true. If not, emit code to generate an appropriate exception,
1093  * and return false; the caller should not emit any code for
1094  * the instruction. Note that this check must happen after all
1095  * unallocated-encoding checks (otherwise the syndrome information
1096  * for the resulting exception will be incorrect).
1097  */
1098 static bool fp_access_check_only(DisasContext *s)
1099 {
1100     if (s->fp_excp_el) {
1101         assert(!s->fp_access_checked);
1102         s->fp_access_checked = true;
1103 
1104         gen_exception_insn_el(s, 0, EXCP_UDEF,
1105                               syn_fp_access_trap(1, 0xe, false, 0),
1106                               s->fp_excp_el);
1107         return false;
1108     }
1109     s->fp_access_checked = true;
1110     return true;
1111 }
1112 
1113 static bool fp_access_check(DisasContext *s)
1114 {
1115     if (!fp_access_check_only(s)) {
1116         return false;
1117     }
1118     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1119         gen_exception_insn(s, 0, EXCP_UDEF,
1120                            syn_smetrap(SME_ET_Streaming, false));
1121         return false;
1122     }
1123     return true;
1124 }
1125 
1126 /*
1127  * Check that SVE access is enabled.  If it is, return true.
1128  * If not, emit code to generate an appropriate exception and return false.
1129  * This function corresponds to CheckSVEEnabled().
1130  */
1131 bool sve_access_check(DisasContext *s)
1132 {
1133     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1134         assert(dc_isar_feature(aa64_sme, s));
1135         if (!sme_sm_enabled_check(s)) {
1136             goto fail_exit;
1137         }
1138     } else if (s->sve_excp_el) {
1139         gen_exception_insn_el(s, 0, EXCP_UDEF,
1140                               syn_sve_access_trap(), s->sve_excp_el);
1141         goto fail_exit;
1142     }
1143     s->sve_access_checked = true;
1144     return fp_access_check(s);
1145 
1146  fail_exit:
1147     /* Assert that we only raise one exception per instruction. */
1148     assert(!s->sve_access_checked);
1149     s->sve_access_checked = true;
1150     return false;
1151 }
1152 
1153 /*
1154  * Check that SME access is enabled, raise an exception if not.
1155  * Note that this function corresponds to CheckSMEAccess and is
1156  * only used directly for cpregs.
1157  */
1158 static bool sme_access_check(DisasContext *s)
1159 {
1160     if (s->sme_excp_el) {
1161         gen_exception_insn_el(s, 0, EXCP_UDEF,
1162                               syn_smetrap(SME_ET_AccessTrap, false),
1163                               s->sme_excp_el);
1164         return false;
1165     }
1166     return true;
1167 }
1168 
1169 /* This function corresponds to CheckSMEEnabled. */
1170 bool sme_enabled_check(DisasContext *s)
1171 {
1172     /*
1173      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1174      * to be zero when fp_excp_el has priority.  This is because we need
1175      * sme_excp_el by itself for cpregs access checks.
1176      */
1177     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1178         s->fp_access_checked = true;
1179         return sme_access_check(s);
1180     }
1181     return fp_access_check_only(s);
1182 }
1183 
1184 /* Common subroutine for CheckSMEAnd*Enabled. */
1185 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1186 {
1187     if (!sme_enabled_check(s)) {
1188         return false;
1189     }
1190     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1191         gen_exception_insn(s, 0, EXCP_UDEF,
1192                            syn_smetrap(SME_ET_NotStreaming, false));
1193         return false;
1194     }
1195     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1196         gen_exception_insn(s, 0, EXCP_UDEF,
1197                            syn_smetrap(SME_ET_InactiveZA, false));
1198         return false;
1199     }
1200     return true;
1201 }
1202 
1203 /*
1204  * This utility function is for doing register extension with an
1205  * optional shift. You will likely want to pass a temporary for the
1206  * destination register. See DecodeRegExtend() in the ARM ARM.
1207  */
1208 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1209                               int option, unsigned int shift)
1210 {
1211     int extsize = extract32(option, 0, 2);
1212     bool is_signed = extract32(option, 2, 1);
1213 
1214     if (is_signed) {
1215         switch (extsize) {
1216         case 0:
1217             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1218             break;
1219         case 1:
1220             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1221             break;
1222         case 2:
1223             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1224             break;
1225         case 3:
1226             tcg_gen_mov_i64(tcg_out, tcg_in);
1227             break;
1228         }
1229     } else {
1230         switch (extsize) {
1231         case 0:
1232             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1233             break;
1234         case 1:
1235             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1236             break;
1237         case 2:
1238             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1239             break;
1240         case 3:
1241             tcg_gen_mov_i64(tcg_out, tcg_in);
1242             break;
1243         }
1244     }
1245 
1246     if (shift) {
1247         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1248     }
1249 }
1250 
1251 static inline void gen_check_sp_alignment(DisasContext *s)
1252 {
1253     /* The AArch64 architecture mandates that (if enabled via PSTATE
1254      * or SCTLR bits) there is a check that SP is 16-aligned on every
1255      * SP-relative load or store (with an exception generated if it is not).
1256      * In line with general QEMU practice regarding misaligned accesses,
1257      * we omit these checks for the sake of guest program performance.
1258      * This function is provided as a hook so we can more easily add these
1259      * checks in future (possibly as a "favour catching guest program bugs
1260      * over speed" user selectable option).
1261      */
1262 }
1263 
1264 /*
1265  * This provides a simple table based table lookup decoder. It is
1266  * intended to be used when the relevant bits for decode are too
1267  * awkwardly placed and switch/if based logic would be confusing and
1268  * deeply nested. Since it's a linear search through the table, tables
1269  * should be kept small.
1270  *
1271  * It returns the first handler where insn & mask == pattern, or
1272  * NULL if there is no match.
1273  * The table is terminated by an empty mask (i.e. 0)
1274  */
1275 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1276                                                uint32_t insn)
1277 {
1278     const AArch64DecodeTable *tptr = table;
1279 
1280     while (tptr->mask) {
1281         if ((insn & tptr->mask) == tptr->pattern) {
1282             return tptr->disas_fn;
1283         }
1284         tptr++;
1285     }
1286     return NULL;
1287 }
1288 
1289 /*
1290  * The instruction disassembly implemented here matches
1291  * the instruction encoding classifications in chapter C4
1292  * of the ARM Architecture Reference Manual (DDI0487B_a);
1293  * classification names and decode diagrams here should generally
1294  * match up with those in the manual.
1295  */
1296 
1297 static bool trans_B(DisasContext *s, arg_i *a)
1298 {
1299     reset_btype(s);
1300     gen_goto_tb(s, 0, a->imm);
1301     return true;
1302 }
1303 
1304 static bool trans_BL(DisasContext *s, arg_i *a)
1305 {
1306     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1307     reset_btype(s);
1308     gen_goto_tb(s, 0, a->imm);
1309     return true;
1310 }
1311 
1312 
1313 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1314 {
1315     DisasLabel match;
1316     TCGv_i64 tcg_cmp;
1317 
1318     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1319     reset_btype(s);
1320 
1321     match = gen_disas_label(s);
1322     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1323                         tcg_cmp, 0, match.label);
1324     gen_goto_tb(s, 0, 4);
1325     set_disas_label(s, match);
1326     gen_goto_tb(s, 1, a->imm);
1327     return true;
1328 }
1329 
1330 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1331 {
1332     DisasLabel match;
1333     TCGv_i64 tcg_cmp;
1334 
1335     tcg_cmp = tcg_temp_new_i64();
1336     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1337 
1338     reset_btype(s);
1339 
1340     match = gen_disas_label(s);
1341     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1342                         tcg_cmp, 0, match.label);
1343     gen_goto_tb(s, 0, 4);
1344     set_disas_label(s, match);
1345     gen_goto_tb(s, 1, a->imm);
1346     return true;
1347 }
1348 
1349 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1350 {
1351     reset_btype(s);
1352     if (a->cond < 0x0e) {
1353         /* genuinely conditional branches */
1354         DisasLabel match = gen_disas_label(s);
1355         arm_gen_test_cc(a->cond, match.label);
1356         gen_goto_tb(s, 0, 4);
1357         set_disas_label(s, match);
1358         gen_goto_tb(s, 1, a->imm);
1359     } else {
1360         /* 0xe and 0xf are both "always" conditions */
1361         gen_goto_tb(s, 0, a->imm);
1362     }
1363     return true;
1364 }
1365 
1366 static void set_btype_for_br(DisasContext *s, int rn)
1367 {
1368     if (dc_isar_feature(aa64_bti, s)) {
1369         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1370         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1371     }
1372 }
1373 
1374 static void set_btype_for_blr(DisasContext *s)
1375 {
1376     if (dc_isar_feature(aa64_bti, s)) {
1377         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1378         set_btype(s, 2);
1379     }
1380 }
1381 
1382 static bool trans_BR(DisasContext *s, arg_r *a)
1383 {
1384     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1385     set_btype_for_br(s, a->rn);
1386     s->base.is_jmp = DISAS_JUMP;
1387     return true;
1388 }
1389 
1390 static bool trans_BLR(DisasContext *s, arg_r *a)
1391 {
1392     TCGv_i64 dst = cpu_reg(s, a->rn);
1393     TCGv_i64 lr = cpu_reg(s, 30);
1394     if (dst == lr) {
1395         TCGv_i64 tmp = tcg_temp_new_i64();
1396         tcg_gen_mov_i64(tmp, dst);
1397         dst = tmp;
1398     }
1399     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1400     gen_a64_set_pc(s, dst);
1401     set_btype_for_blr(s);
1402     s->base.is_jmp = DISAS_JUMP;
1403     return true;
1404 }
1405 
1406 static bool trans_RET(DisasContext *s, arg_r *a)
1407 {
1408     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1409     s->base.is_jmp = DISAS_JUMP;
1410     return true;
1411 }
1412 
1413 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1414                                    TCGv_i64 modifier, bool use_key_a)
1415 {
1416     TCGv_i64 truedst;
1417     /*
1418      * Return the branch target for a BRAA/RETA/etc, which is either
1419      * just the destination dst, or that value with the pauth check
1420      * done and the code removed from the high bits.
1421      */
1422     if (!s->pauth_active) {
1423         return dst;
1424     }
1425 
1426     truedst = tcg_temp_new_i64();
1427     if (use_key_a) {
1428         gen_helper_autia(truedst, cpu_env, dst, modifier);
1429     } else {
1430         gen_helper_autib(truedst, cpu_env, dst, modifier);
1431     }
1432     return truedst;
1433 }
1434 
1435 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1436 {
1437     TCGv_i64 dst;
1438 
1439     if (!dc_isar_feature(aa64_pauth, s)) {
1440         return false;
1441     }
1442 
1443     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1444     gen_a64_set_pc(s, dst);
1445     set_btype_for_br(s, a->rn);
1446     s->base.is_jmp = DISAS_JUMP;
1447     return true;
1448 }
1449 
1450 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1451 {
1452     TCGv_i64 dst, lr;
1453 
1454     if (!dc_isar_feature(aa64_pauth, s)) {
1455         return false;
1456     }
1457 
1458     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1459     lr = cpu_reg(s, 30);
1460     if (dst == lr) {
1461         TCGv_i64 tmp = tcg_temp_new_i64();
1462         tcg_gen_mov_i64(tmp, dst);
1463         dst = tmp;
1464     }
1465     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1466     gen_a64_set_pc(s, dst);
1467     set_btype_for_blr(s);
1468     s->base.is_jmp = DISAS_JUMP;
1469     return true;
1470 }
1471 
1472 static bool trans_RETA(DisasContext *s, arg_reta *a)
1473 {
1474     TCGv_i64 dst;
1475 
1476     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1477     gen_a64_set_pc(s, dst);
1478     s->base.is_jmp = DISAS_JUMP;
1479     return true;
1480 }
1481 
1482 static bool trans_BRA(DisasContext *s, arg_bra *a)
1483 {
1484     TCGv_i64 dst;
1485 
1486     if (!dc_isar_feature(aa64_pauth, s)) {
1487         return false;
1488     }
1489     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1490     gen_a64_set_pc(s, dst);
1491     set_btype_for_br(s, a->rn);
1492     s->base.is_jmp = DISAS_JUMP;
1493     return true;
1494 }
1495 
1496 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1497 {
1498     TCGv_i64 dst, lr;
1499 
1500     if (!dc_isar_feature(aa64_pauth, s)) {
1501         return false;
1502     }
1503     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1504     lr = cpu_reg(s, 30);
1505     if (dst == lr) {
1506         TCGv_i64 tmp = tcg_temp_new_i64();
1507         tcg_gen_mov_i64(tmp, dst);
1508         dst = tmp;
1509     }
1510     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1511     gen_a64_set_pc(s, dst);
1512     set_btype_for_blr(s);
1513     s->base.is_jmp = DISAS_JUMP;
1514     return true;
1515 }
1516 
1517 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1518 {
1519     TCGv_i64 dst;
1520 
1521     if (s->current_el == 0) {
1522         return false;
1523     }
1524     if (s->fgt_eret) {
1525         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1526         return true;
1527     }
1528     dst = tcg_temp_new_i64();
1529     tcg_gen_ld_i64(dst, cpu_env,
1530                    offsetof(CPUARMState, elr_el[s->current_el]));
1531 
1532     translator_io_start(&s->base);
1533 
1534     gen_helper_exception_return(cpu_env, dst);
1535     /* Must exit loop to check un-masked IRQs */
1536     s->base.is_jmp = DISAS_EXIT;
1537     return true;
1538 }
1539 
1540 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1541 {
1542     TCGv_i64 dst;
1543 
1544     if (!dc_isar_feature(aa64_pauth, s)) {
1545         return false;
1546     }
1547     if (s->current_el == 0) {
1548         return false;
1549     }
1550     /* The FGT trap takes precedence over an auth trap. */
1551     if (s->fgt_eret) {
1552         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1553         return true;
1554     }
1555     dst = tcg_temp_new_i64();
1556     tcg_gen_ld_i64(dst, cpu_env,
1557                    offsetof(CPUARMState, elr_el[s->current_el]));
1558 
1559     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1560 
1561     translator_io_start(&s->base);
1562 
1563     gen_helper_exception_return(cpu_env, dst);
1564     /* Must exit loop to check un-masked IRQs */
1565     s->base.is_jmp = DISAS_EXIT;
1566     return true;
1567 }
1568 
1569 /* HINT instruction group, including various allocated HINTs */
1570 static void handle_hint(DisasContext *s, uint32_t insn,
1571                         unsigned int op1, unsigned int op2, unsigned int crm)
1572 {
1573     unsigned int selector = crm << 3 | op2;
1574 
1575     if (op1 != 3) {
1576         unallocated_encoding(s);
1577         return;
1578     }
1579 
1580     switch (selector) {
1581     case 0b00000: /* NOP */
1582         break;
1583     case 0b00011: /* WFI */
1584         s->base.is_jmp = DISAS_WFI;
1585         break;
1586     case 0b00001: /* YIELD */
1587         /* When running in MTTCG we don't generate jumps to the yield and
1588          * WFE helpers as it won't affect the scheduling of other vCPUs.
1589          * If we wanted to more completely model WFE/SEV so we don't busy
1590          * spin unnecessarily we would need to do something more involved.
1591          */
1592         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1593             s->base.is_jmp = DISAS_YIELD;
1594         }
1595         break;
1596     case 0b00010: /* WFE */
1597         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1598             s->base.is_jmp = DISAS_WFE;
1599         }
1600         break;
1601     case 0b00100: /* SEV */
1602     case 0b00101: /* SEVL */
1603     case 0b00110: /* DGH */
1604         /* we treat all as NOP at least for now */
1605         break;
1606     case 0b00111: /* XPACLRI */
1607         if (s->pauth_active) {
1608             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1609         }
1610         break;
1611     case 0b01000: /* PACIA1716 */
1612         if (s->pauth_active) {
1613             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1614         }
1615         break;
1616     case 0b01010: /* PACIB1716 */
1617         if (s->pauth_active) {
1618             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1619         }
1620         break;
1621     case 0b01100: /* AUTIA1716 */
1622         if (s->pauth_active) {
1623             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1624         }
1625         break;
1626     case 0b01110: /* AUTIB1716 */
1627         if (s->pauth_active) {
1628             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1629         }
1630         break;
1631     case 0b10000: /* ESB */
1632         /* Without RAS, we must implement this as NOP. */
1633         if (dc_isar_feature(aa64_ras, s)) {
1634             /*
1635              * QEMU does not have a source of physical SErrors,
1636              * so we are only concerned with virtual SErrors.
1637              * The pseudocode in the ARM for this case is
1638              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1639              *      AArch64.vESBOperation();
1640              * Most of the condition can be evaluated at translation time.
1641              * Test for EL2 present, and defer test for SEL2 to runtime.
1642              */
1643             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1644                 gen_helper_vesb(cpu_env);
1645             }
1646         }
1647         break;
1648     case 0b11000: /* PACIAZ */
1649         if (s->pauth_active) {
1650             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1651                              tcg_constant_i64(0));
1652         }
1653         break;
1654     case 0b11001: /* PACIASP */
1655         if (s->pauth_active) {
1656             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1657         }
1658         break;
1659     case 0b11010: /* PACIBZ */
1660         if (s->pauth_active) {
1661             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1662                              tcg_constant_i64(0));
1663         }
1664         break;
1665     case 0b11011: /* PACIBSP */
1666         if (s->pauth_active) {
1667             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1668         }
1669         break;
1670     case 0b11100: /* AUTIAZ */
1671         if (s->pauth_active) {
1672             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1673                              tcg_constant_i64(0));
1674         }
1675         break;
1676     case 0b11101: /* AUTIASP */
1677         if (s->pauth_active) {
1678             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1679         }
1680         break;
1681     case 0b11110: /* AUTIBZ */
1682         if (s->pauth_active) {
1683             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1684                              tcg_constant_i64(0));
1685         }
1686         break;
1687     case 0b11111: /* AUTIBSP */
1688         if (s->pauth_active) {
1689             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1690         }
1691         break;
1692     default:
1693         /* default specified as NOP equivalent */
1694         break;
1695     }
1696 }
1697 
1698 static void gen_clrex(DisasContext *s, uint32_t insn)
1699 {
1700     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1701 }
1702 
1703 /* CLREX, DSB, DMB, ISB */
1704 static void handle_sync(DisasContext *s, uint32_t insn,
1705                         unsigned int op1, unsigned int op2, unsigned int crm)
1706 {
1707     TCGBar bar;
1708 
1709     if (op1 != 3) {
1710         unallocated_encoding(s);
1711         return;
1712     }
1713 
1714     switch (op2) {
1715     case 2: /* CLREX */
1716         gen_clrex(s, insn);
1717         return;
1718     case 4: /* DSB */
1719     case 5: /* DMB */
1720         switch (crm & 3) {
1721         case 1: /* MBReqTypes_Reads */
1722             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1723             break;
1724         case 2: /* MBReqTypes_Writes */
1725             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1726             break;
1727         default: /* MBReqTypes_All */
1728             bar = TCG_BAR_SC | TCG_MO_ALL;
1729             break;
1730         }
1731         tcg_gen_mb(bar);
1732         return;
1733     case 6: /* ISB */
1734         /* We need to break the TB after this insn to execute
1735          * a self-modified code correctly and also to take
1736          * any pending interrupts immediately.
1737          */
1738         reset_btype(s);
1739         gen_goto_tb(s, 0, 4);
1740         return;
1741 
1742     case 7: /* SB */
1743         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1744             goto do_unallocated;
1745         }
1746         /*
1747          * TODO: There is no speculation barrier opcode for TCG;
1748          * MB and end the TB instead.
1749          */
1750         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1751         gen_goto_tb(s, 0, 4);
1752         return;
1753 
1754     default:
1755     do_unallocated:
1756         unallocated_encoding(s);
1757         return;
1758     }
1759 }
1760 
1761 static void gen_xaflag(void)
1762 {
1763     TCGv_i32 z = tcg_temp_new_i32();
1764 
1765     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1766 
1767     /*
1768      * (!C & !Z) << 31
1769      * (!(C | Z)) << 31
1770      * ~((C | Z) << 31)
1771      * ~-(C | Z)
1772      * (C | Z) - 1
1773      */
1774     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1775     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1776 
1777     /* !(Z & C) */
1778     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1779     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1780 
1781     /* (!C & Z) << 31 -> -(Z & ~C) */
1782     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1783     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1784 
1785     /* C | Z */
1786     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1787 }
1788 
1789 static void gen_axflag(void)
1790 {
1791     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1792     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1793 
1794     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1795     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1796 
1797     tcg_gen_movi_i32(cpu_NF, 0);
1798     tcg_gen_movi_i32(cpu_VF, 0);
1799 }
1800 
1801 /* MSR (immediate) - move immediate to processor state field */
1802 static void handle_msr_i(DisasContext *s, uint32_t insn,
1803                          unsigned int op1, unsigned int op2, unsigned int crm)
1804 {
1805     int op = op1 << 3 | op2;
1806 
1807     /* End the TB by default, chaining is ok.  */
1808     s->base.is_jmp = DISAS_TOO_MANY;
1809 
1810     switch (op) {
1811     case 0x00: /* CFINV */
1812         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1813             goto do_unallocated;
1814         }
1815         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1816         s->base.is_jmp = DISAS_NEXT;
1817         break;
1818 
1819     case 0x01: /* XAFlag */
1820         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1821             goto do_unallocated;
1822         }
1823         gen_xaflag();
1824         s->base.is_jmp = DISAS_NEXT;
1825         break;
1826 
1827     case 0x02: /* AXFlag */
1828         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1829             goto do_unallocated;
1830         }
1831         gen_axflag();
1832         s->base.is_jmp = DISAS_NEXT;
1833         break;
1834 
1835     case 0x03: /* UAO */
1836         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1837             goto do_unallocated;
1838         }
1839         if (crm & 1) {
1840             set_pstate_bits(PSTATE_UAO);
1841         } else {
1842             clear_pstate_bits(PSTATE_UAO);
1843         }
1844         gen_rebuild_hflags(s);
1845         break;
1846 
1847     case 0x04: /* PAN */
1848         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1849             goto do_unallocated;
1850         }
1851         if (crm & 1) {
1852             set_pstate_bits(PSTATE_PAN);
1853         } else {
1854             clear_pstate_bits(PSTATE_PAN);
1855         }
1856         gen_rebuild_hflags(s);
1857         break;
1858 
1859     case 0x05: /* SPSel */
1860         if (s->current_el == 0) {
1861             goto do_unallocated;
1862         }
1863         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1864         break;
1865 
1866     case 0x19: /* SSBS */
1867         if (!dc_isar_feature(aa64_ssbs, s)) {
1868             goto do_unallocated;
1869         }
1870         if (crm & 1) {
1871             set_pstate_bits(PSTATE_SSBS);
1872         } else {
1873             clear_pstate_bits(PSTATE_SSBS);
1874         }
1875         /* Don't need to rebuild hflags since SSBS is a nop */
1876         break;
1877 
1878     case 0x1a: /* DIT */
1879         if (!dc_isar_feature(aa64_dit, s)) {
1880             goto do_unallocated;
1881         }
1882         if (crm & 1) {
1883             set_pstate_bits(PSTATE_DIT);
1884         } else {
1885             clear_pstate_bits(PSTATE_DIT);
1886         }
1887         /* There's no need to rebuild hflags because DIT is a nop */
1888         break;
1889 
1890     case 0x1e: /* DAIFSet */
1891         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1892         break;
1893 
1894     case 0x1f: /* DAIFClear */
1895         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1896         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1897         s->base.is_jmp = DISAS_UPDATE_EXIT;
1898         break;
1899 
1900     case 0x1c: /* TCO */
1901         if (dc_isar_feature(aa64_mte, s)) {
1902             /* Full MTE is enabled -- set the TCO bit as directed. */
1903             if (crm & 1) {
1904                 set_pstate_bits(PSTATE_TCO);
1905             } else {
1906                 clear_pstate_bits(PSTATE_TCO);
1907             }
1908             gen_rebuild_hflags(s);
1909             /* Many factors, including TCO, go into MTE_ACTIVE. */
1910             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1911         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1912             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1913             s->base.is_jmp = DISAS_NEXT;
1914         } else {
1915             goto do_unallocated;
1916         }
1917         break;
1918 
1919     case 0x1b: /* SVCR* */
1920         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1921             goto do_unallocated;
1922         }
1923         if (sme_access_check(s)) {
1924             int old = s->pstate_sm | (s->pstate_za << 1);
1925             int new = (crm & 1) * 3;
1926             int msk = (crm >> 1) & 3;
1927 
1928             if ((old ^ new) & msk) {
1929                 /* At least one bit changes. */
1930                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1931                                     tcg_constant_i32(msk));
1932             } else {
1933                 s->base.is_jmp = DISAS_NEXT;
1934             }
1935         }
1936         break;
1937 
1938     default:
1939     do_unallocated:
1940         unallocated_encoding(s);
1941         return;
1942     }
1943 }
1944 
1945 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1946 {
1947     TCGv_i32 tmp = tcg_temp_new_i32();
1948     TCGv_i32 nzcv = tcg_temp_new_i32();
1949 
1950     /* build bit 31, N */
1951     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1952     /* build bit 30, Z */
1953     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1954     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1955     /* build bit 29, C */
1956     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1957     /* build bit 28, V */
1958     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1959     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1960     /* generate result */
1961     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1962 }
1963 
1964 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1965 {
1966     TCGv_i32 nzcv = tcg_temp_new_i32();
1967 
1968     /* take NZCV from R[t] */
1969     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1970 
1971     /* bit 31, N */
1972     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1973     /* bit 30, Z */
1974     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1975     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1976     /* bit 29, C */
1977     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1978     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1979     /* bit 28, V */
1980     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1981     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1982 }
1983 
1984 static void gen_sysreg_undef(DisasContext *s, bool isread,
1985                              uint8_t op0, uint8_t op1, uint8_t op2,
1986                              uint8_t crn, uint8_t crm, uint8_t rt)
1987 {
1988     /*
1989      * Generate code to emit an UNDEF with correct syndrome
1990      * information for a failed system register access.
1991      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
1992      * but if FEAT_IDST is implemented then read accesses to registers
1993      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
1994      * syndrome.
1995      */
1996     uint32_t syndrome;
1997 
1998     if (isread && dc_isar_feature(aa64_ids, s) &&
1999         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2000         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2001     } else {
2002         syndrome = syn_uncategorized();
2003     }
2004     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2005 }
2006 
2007 /* MRS - move from system register
2008  * MSR (register) - move to system register
2009  * SYS
2010  * SYSL
2011  * These are all essentially the same insn in 'read' and 'write'
2012  * versions, with varying op0 fields.
2013  */
2014 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
2015                        unsigned int op0, unsigned int op1, unsigned int op2,
2016                        unsigned int crn, unsigned int crm, unsigned int rt)
2017 {
2018     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2019                                       crn, crm, op0, op1, op2);
2020     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2021     bool need_exit_tb = false;
2022     TCGv_ptr tcg_ri = NULL;
2023     TCGv_i64 tcg_rt;
2024 
2025     if (!ri) {
2026         /* Unknown register; this might be a guest error or a QEMU
2027          * unimplemented feature.
2028          */
2029         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2030                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2031                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2032         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2033         return;
2034     }
2035 
2036     /* Check access permissions */
2037     if (!cp_access_ok(s->current_el, ri, isread)) {
2038         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2039         return;
2040     }
2041 
2042     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2043         /* Emit code to perform further access permissions checks at
2044          * runtime; this may result in an exception.
2045          */
2046         uint32_t syndrome;
2047 
2048         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2049         gen_a64_update_pc(s, 0);
2050         tcg_ri = tcg_temp_new_ptr();
2051         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2052                                        tcg_constant_i32(key),
2053                                        tcg_constant_i32(syndrome),
2054                                        tcg_constant_i32(isread));
2055     } else if (ri->type & ARM_CP_RAISES_EXC) {
2056         /*
2057          * The readfn or writefn might raise an exception;
2058          * synchronize the CPU state in case it does.
2059          */
2060         gen_a64_update_pc(s, 0);
2061     }
2062 
2063     /* Handle special cases first */
2064     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2065     case 0:
2066         break;
2067     case ARM_CP_NOP:
2068         return;
2069     case ARM_CP_NZCV:
2070         tcg_rt = cpu_reg(s, rt);
2071         if (isread) {
2072             gen_get_nzcv(tcg_rt);
2073         } else {
2074             gen_set_nzcv(tcg_rt);
2075         }
2076         return;
2077     case ARM_CP_CURRENTEL:
2078         /* Reads as current EL value from pstate, which is
2079          * guaranteed to be constant by the tb flags.
2080          */
2081         tcg_rt = cpu_reg(s, rt);
2082         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2083         return;
2084     case ARM_CP_DC_ZVA:
2085         /* Writes clear the aligned block of memory which rt points into. */
2086         if (s->mte_active[0]) {
2087             int desc = 0;
2088 
2089             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2090             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2091             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2092 
2093             tcg_rt = tcg_temp_new_i64();
2094             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2095                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2096         } else {
2097             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2098         }
2099         gen_helper_dc_zva(cpu_env, tcg_rt);
2100         return;
2101     case ARM_CP_DC_GVA:
2102         {
2103             TCGv_i64 clean_addr, tag;
2104 
2105             /*
2106              * DC_GVA, like DC_ZVA, requires that we supply the original
2107              * pointer for an invalid page.  Probe that address first.
2108              */
2109             tcg_rt = cpu_reg(s, rt);
2110             clean_addr = clean_data_tbi(s, tcg_rt);
2111             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2112 
2113             if (s->ata) {
2114                 /* Extract the tag from the register to match STZGM.  */
2115                 tag = tcg_temp_new_i64();
2116                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2117                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2118             }
2119         }
2120         return;
2121     case ARM_CP_DC_GZVA:
2122         {
2123             TCGv_i64 clean_addr, tag;
2124 
2125             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2126             tcg_rt = cpu_reg(s, rt);
2127             clean_addr = clean_data_tbi(s, tcg_rt);
2128             gen_helper_dc_zva(cpu_env, clean_addr);
2129 
2130             if (s->ata) {
2131                 /* Extract the tag from the register to match STZGM.  */
2132                 tag = tcg_temp_new_i64();
2133                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2134                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2135             }
2136         }
2137         return;
2138     default:
2139         g_assert_not_reached();
2140     }
2141     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2142         return;
2143     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2144         return;
2145     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2146         return;
2147     }
2148 
2149     if (ri->type & ARM_CP_IO) {
2150         /* I/O operations must end the TB here (whether read or write) */
2151         need_exit_tb = translator_io_start(&s->base);
2152     }
2153 
2154     tcg_rt = cpu_reg(s, rt);
2155 
2156     if (isread) {
2157         if (ri->type & ARM_CP_CONST) {
2158             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2159         } else if (ri->readfn) {
2160             if (!tcg_ri) {
2161                 tcg_ri = gen_lookup_cp_reg(key);
2162             }
2163             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2164         } else {
2165             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2166         }
2167     } else {
2168         if (ri->type & ARM_CP_CONST) {
2169             /* If not forbidden by access permissions, treat as WI */
2170             return;
2171         } else if (ri->writefn) {
2172             if (!tcg_ri) {
2173                 tcg_ri = gen_lookup_cp_reg(key);
2174             }
2175             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2176         } else {
2177             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2178         }
2179     }
2180 
2181     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2182         /*
2183          * A write to any coprocessor regiser that ends a TB
2184          * must rebuild the hflags for the next TB.
2185          */
2186         gen_rebuild_hflags(s);
2187         /*
2188          * We default to ending the TB on a coprocessor register write,
2189          * but allow this to be suppressed by the register definition
2190          * (usually only necessary to work around guest bugs).
2191          */
2192         need_exit_tb = true;
2193     }
2194     if (need_exit_tb) {
2195         s->base.is_jmp = DISAS_UPDATE_EXIT;
2196     }
2197 }
2198 
2199 /* System
2200  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2201  * +---------------------+---+-----+-----+-------+-------+-----+------+
2202  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2203  * +---------------------+---+-----+-----+-------+-------+-----+------+
2204  */
2205 static void disas_system(DisasContext *s, uint32_t insn)
2206 {
2207     unsigned int l, op0, op1, crn, crm, op2, rt;
2208     l = extract32(insn, 21, 1);
2209     op0 = extract32(insn, 19, 2);
2210     op1 = extract32(insn, 16, 3);
2211     crn = extract32(insn, 12, 4);
2212     crm = extract32(insn, 8, 4);
2213     op2 = extract32(insn, 5, 3);
2214     rt = extract32(insn, 0, 5);
2215 
2216     if (op0 == 0) {
2217         if (l || rt != 31) {
2218             unallocated_encoding(s);
2219             return;
2220         }
2221         switch (crn) {
2222         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2223             handle_hint(s, insn, op1, op2, crm);
2224             break;
2225         case 3: /* CLREX, DSB, DMB, ISB */
2226             handle_sync(s, insn, op1, op2, crm);
2227             break;
2228         case 4: /* MSR (immediate) */
2229             handle_msr_i(s, insn, op1, op2, crm);
2230             break;
2231         default:
2232             unallocated_encoding(s);
2233             break;
2234         }
2235         return;
2236     }
2237     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2238 }
2239 
2240 /* Exception generation
2241  *
2242  *  31             24 23 21 20                     5 4   2 1  0
2243  * +-----------------+-----+------------------------+-----+----+
2244  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2245  * +-----------------------+------------------------+----------+
2246  */
2247 static void disas_exc(DisasContext *s, uint32_t insn)
2248 {
2249     int opc = extract32(insn, 21, 3);
2250     int op2_ll = extract32(insn, 0, 5);
2251     int imm16 = extract32(insn, 5, 16);
2252     uint32_t syndrome;
2253 
2254     switch (opc) {
2255     case 0:
2256         /* For SVC, HVC and SMC we advance the single-step state
2257          * machine before taking the exception. This is architecturally
2258          * mandated, to ensure that single-stepping a system call
2259          * instruction works properly.
2260          */
2261         switch (op2_ll) {
2262         case 1:                                                     /* SVC */
2263             syndrome = syn_aa64_svc(imm16);
2264             if (s->fgt_svc) {
2265                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2266                 break;
2267             }
2268             gen_ss_advance(s);
2269             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2270             break;
2271         case 2:                                                     /* HVC */
2272             if (s->current_el == 0) {
2273                 unallocated_encoding(s);
2274                 break;
2275             }
2276             /* The pre HVC helper handles cases when HVC gets trapped
2277              * as an undefined insn by runtime configuration.
2278              */
2279             gen_a64_update_pc(s, 0);
2280             gen_helper_pre_hvc(cpu_env);
2281             gen_ss_advance(s);
2282             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2283             break;
2284         case 3:                                                     /* SMC */
2285             if (s->current_el == 0) {
2286                 unallocated_encoding(s);
2287                 break;
2288             }
2289             gen_a64_update_pc(s, 0);
2290             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2291             gen_ss_advance(s);
2292             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2293             break;
2294         default:
2295             unallocated_encoding(s);
2296             break;
2297         }
2298         break;
2299     case 1:
2300         if (op2_ll != 0) {
2301             unallocated_encoding(s);
2302             break;
2303         }
2304         /* BRK */
2305         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2306         break;
2307     case 2:
2308         if (op2_ll != 0) {
2309             unallocated_encoding(s);
2310             break;
2311         }
2312         /* HLT. This has two purposes.
2313          * Architecturally, it is an external halting debug instruction.
2314          * Since QEMU doesn't implement external debug, we treat this as
2315          * it is required for halting debug disabled: it will UNDEF.
2316          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2317          */
2318         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2319             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2320         } else {
2321             unallocated_encoding(s);
2322         }
2323         break;
2324     case 5:
2325         if (op2_ll < 1 || op2_ll > 3) {
2326             unallocated_encoding(s);
2327             break;
2328         }
2329         /* DCPS1, DCPS2, DCPS3 */
2330         unallocated_encoding(s);
2331         break;
2332     default:
2333         unallocated_encoding(s);
2334         break;
2335     }
2336 }
2337 
2338 /* Branches, exception generating and system instructions */
2339 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2340 {
2341     switch (extract32(insn, 25, 7)) {
2342     case 0x6a: /* Exception generation / System */
2343         if (insn & (1 << 24)) {
2344             if (extract32(insn, 22, 2) == 0) {
2345                 disas_system(s, insn);
2346             } else {
2347                 unallocated_encoding(s);
2348             }
2349         } else {
2350             disas_exc(s, insn);
2351         }
2352         break;
2353     default:
2354         unallocated_encoding(s);
2355         break;
2356     }
2357 }
2358 
2359 /*
2360  * Load/Store exclusive instructions are implemented by remembering
2361  * the value/address loaded, and seeing if these are the same
2362  * when the store is performed. This is not actually the architecturally
2363  * mandated semantics, but it works for typical guest code sequences
2364  * and avoids having to monitor regular stores.
2365  *
2366  * The store exclusive uses the atomic cmpxchg primitives to avoid
2367  * races in multi-threaded linux-user and when MTTCG softmmu is
2368  * enabled.
2369  */
2370 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2371                                int size, bool is_pair)
2372 {
2373     int idx = get_mem_index(s);
2374     TCGv_i64 dirty_addr, clean_addr;
2375     MemOp memop;
2376 
2377     /*
2378      * For pairs:
2379      * if size == 2, the operation is single-copy atomic for the doubleword.
2380      * if size == 3, the operation is single-copy atomic for *each* doubleword,
2381      * not the entire quadword, however it must be quadword aligned.
2382      */
2383     memop = size + is_pair;
2384     if (memop == MO_128) {
2385         memop = finalize_memop_atom(s, MO_128 | MO_ALIGN,
2386                                     MO_ATOM_IFALIGN_PAIR);
2387     } else {
2388         memop = finalize_memop(s, memop | MO_ALIGN);
2389     }
2390 
2391     s->is_ldex = true;
2392     dirty_addr = cpu_reg_sp(s, rn);
2393     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2394 
2395     g_assert(size <= 3);
2396     if (is_pair) {
2397         g_assert(size >= 2);
2398         if (size == 2) {
2399             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2400             if (s->be_data == MO_LE) {
2401                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2402                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2403             } else {
2404                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2405                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2406             }
2407         } else {
2408             TCGv_i128 t16 = tcg_temp_new_i128();
2409 
2410             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2411 
2412             if (s->be_data == MO_LE) {
2413                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2414                                       cpu_exclusive_high, t16);
2415             } else {
2416                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2417                                       cpu_exclusive_val, t16);
2418             }
2419             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2420             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2421         }
2422     } else {
2423         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2424         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2425     }
2426     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2427 }
2428 
2429 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2430                                 int rn, int size, int is_pair)
2431 {
2432     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2433      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2434      *     [addr] = {Rt};
2435      *     if (is_pair) {
2436      *         [addr + datasize] = {Rt2};
2437      *     }
2438      *     {Rd} = 0;
2439      * } else {
2440      *     {Rd} = 1;
2441      * }
2442      * env->exclusive_addr = -1;
2443      */
2444     TCGLabel *fail_label = gen_new_label();
2445     TCGLabel *done_label = gen_new_label();
2446     TCGv_i64 tmp, dirty_addr, clean_addr;
2447     MemOp memop;
2448 
2449     memop = (size + is_pair) | MO_ALIGN;
2450     memop = finalize_memop(s, memop);
2451 
2452     dirty_addr = cpu_reg_sp(s, rn);
2453     clean_addr = gen_mte_check1(s, dirty_addr, true, rn != 31, memop);
2454 
2455     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2456 
2457     tmp = tcg_temp_new_i64();
2458     if (is_pair) {
2459         if (size == 2) {
2460             if (s->be_data == MO_LE) {
2461                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2462             } else {
2463                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2464             }
2465             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2466                                        cpu_exclusive_val, tmp,
2467                                        get_mem_index(s), memop);
2468             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2469         } else {
2470             TCGv_i128 t16 = tcg_temp_new_i128();
2471             TCGv_i128 c16 = tcg_temp_new_i128();
2472             TCGv_i64 a, b;
2473 
2474             if (s->be_data == MO_LE) {
2475                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2476                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2477                                         cpu_exclusive_high);
2478             } else {
2479                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2480                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2481                                         cpu_exclusive_val);
2482             }
2483 
2484             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2485                                         get_mem_index(s), memop);
2486 
2487             a = tcg_temp_new_i64();
2488             b = tcg_temp_new_i64();
2489             if (s->be_data == MO_LE) {
2490                 tcg_gen_extr_i128_i64(a, b, t16);
2491             } else {
2492                 tcg_gen_extr_i128_i64(b, a, t16);
2493             }
2494 
2495             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2496             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2497             tcg_gen_or_i64(tmp, a, b);
2498 
2499             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2500         }
2501     } else {
2502         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2503                                    cpu_reg(s, rt), get_mem_index(s), memop);
2504         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2505     }
2506     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2507     tcg_gen_br(done_label);
2508 
2509     gen_set_label(fail_label);
2510     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2511     gen_set_label(done_label);
2512     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2513 }
2514 
2515 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2516                                  int rn, int size)
2517 {
2518     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2519     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2520     int memidx = get_mem_index(s);
2521     TCGv_i64 clean_addr;
2522     MemOp memop;
2523 
2524     if (rn == 31) {
2525         gen_check_sp_alignment(s);
2526     }
2527     memop = finalize_memop(s, size | MO_ALIGN);
2528     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2529     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2530                                memidx, memop);
2531 }
2532 
2533 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2534                                       int rn, int size)
2535 {
2536     TCGv_i64 s1 = cpu_reg(s, rs);
2537     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2538     TCGv_i64 t1 = cpu_reg(s, rt);
2539     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2540     TCGv_i64 clean_addr;
2541     int memidx = get_mem_index(s);
2542     MemOp memop;
2543 
2544     if (rn == 31) {
2545         gen_check_sp_alignment(s);
2546     }
2547 
2548     /* This is a single atomic access, despite the "pair". */
2549     memop = finalize_memop(s, (size + 1) | MO_ALIGN);
2550     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2551 
2552     if (size == 2) {
2553         TCGv_i64 cmp = tcg_temp_new_i64();
2554         TCGv_i64 val = tcg_temp_new_i64();
2555 
2556         if (s->be_data == MO_LE) {
2557             tcg_gen_concat32_i64(val, t1, t2);
2558             tcg_gen_concat32_i64(cmp, s1, s2);
2559         } else {
2560             tcg_gen_concat32_i64(val, t2, t1);
2561             tcg_gen_concat32_i64(cmp, s2, s1);
2562         }
2563 
2564         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2565 
2566         if (s->be_data == MO_LE) {
2567             tcg_gen_extr32_i64(s1, s2, cmp);
2568         } else {
2569             tcg_gen_extr32_i64(s2, s1, cmp);
2570         }
2571     } else {
2572         TCGv_i128 cmp = tcg_temp_new_i128();
2573         TCGv_i128 val = tcg_temp_new_i128();
2574 
2575         if (s->be_data == MO_LE) {
2576             tcg_gen_concat_i64_i128(val, t1, t2);
2577             tcg_gen_concat_i64_i128(cmp, s1, s2);
2578         } else {
2579             tcg_gen_concat_i64_i128(val, t2, t1);
2580             tcg_gen_concat_i64_i128(cmp, s2, s1);
2581         }
2582 
2583         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2584 
2585         if (s->be_data == MO_LE) {
2586             tcg_gen_extr_i128_i64(s1, s2, cmp);
2587         } else {
2588             tcg_gen_extr_i128_i64(s2, s1, cmp);
2589         }
2590     }
2591 }
2592 
2593 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2594  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2595  */
2596 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2597 {
2598     int opc0 = extract32(opc, 0, 1);
2599     int regsize;
2600 
2601     if (is_signed) {
2602         regsize = opc0 ? 32 : 64;
2603     } else {
2604         regsize = size == 3 ? 64 : 32;
2605     }
2606     return regsize == 64;
2607 }
2608 
2609 /* Load/store exclusive
2610  *
2611  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2612  * +-----+-------------+----+---+----+------+----+-------+------+------+
2613  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2614  * +-----+-------------+----+---+----+------+----+-------+------+------+
2615  *
2616  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2617  *   L: 0 -> store, 1 -> load
2618  *  o2: 0 -> exclusive, 1 -> not
2619  *  o1: 0 -> single register, 1 -> register pair
2620  *  o0: 1 -> load-acquire/store-release, 0 -> not
2621  */
2622 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2623 {
2624     int rt = extract32(insn, 0, 5);
2625     int rn = extract32(insn, 5, 5);
2626     int rt2 = extract32(insn, 10, 5);
2627     int rs = extract32(insn, 16, 5);
2628     int is_lasr = extract32(insn, 15, 1);
2629     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2630     int size = extract32(insn, 30, 2);
2631     TCGv_i64 clean_addr;
2632     MemOp memop;
2633 
2634     switch (o2_L_o1_o0) {
2635     case 0x0: /* STXR */
2636     case 0x1: /* STLXR */
2637         if (rn == 31) {
2638             gen_check_sp_alignment(s);
2639         }
2640         if (is_lasr) {
2641             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2642         }
2643         gen_store_exclusive(s, rs, rt, rt2, rn, size, false);
2644         return;
2645 
2646     case 0x4: /* LDXR */
2647     case 0x5: /* LDAXR */
2648         if (rn == 31) {
2649             gen_check_sp_alignment(s);
2650         }
2651         gen_load_exclusive(s, rt, rt2, rn, size, false);
2652         if (is_lasr) {
2653             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2654         }
2655         return;
2656 
2657     case 0x8: /* STLLR */
2658         if (!dc_isar_feature(aa64_lor, s)) {
2659             break;
2660         }
2661         /* StoreLORelease is the same as Store-Release for QEMU.  */
2662         /* fall through */
2663     case 0x9: /* STLR */
2664         /* Generate ISS for non-exclusive accesses including LASR.  */
2665         if (rn == 31) {
2666             gen_check_sp_alignment(s);
2667         }
2668         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2669         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2670         memop = finalize_memop(s, size | MO_ALIGN);
2671         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2672                                     true, rn != 31, memop);
2673         do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt,
2674                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2675         return;
2676 
2677     case 0xc: /* LDLAR */
2678         if (!dc_isar_feature(aa64_lor, s)) {
2679             break;
2680         }
2681         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2682         /* fall through */
2683     case 0xd: /* LDAR */
2684         /* Generate ISS for non-exclusive accesses including LASR.  */
2685         if (rn == 31) {
2686             gen_check_sp_alignment(s);
2687         }
2688         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2689         memop = finalize_memop(s, size | MO_ALIGN);
2690         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2691                                     false, rn != 31, memop);
2692         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true,
2693                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2694         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2695         return;
2696 
2697     case 0x2: case 0x3: /* CASP / STXP */
2698         if (size & 2) { /* STXP / STLXP */
2699             if (rn == 31) {
2700                 gen_check_sp_alignment(s);
2701             }
2702             if (is_lasr) {
2703                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2704             }
2705             gen_store_exclusive(s, rs, rt, rt2, rn, size, true);
2706             return;
2707         }
2708         if (rt2 == 31
2709             && ((rt | rs) & 1) == 0
2710             && dc_isar_feature(aa64_atomics, s)) {
2711             /* CASP / CASPL */
2712             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2713             return;
2714         }
2715         break;
2716 
2717     case 0x6: case 0x7: /* CASPA / LDXP */
2718         if (size & 2) { /* LDXP / LDAXP */
2719             if (rn == 31) {
2720                 gen_check_sp_alignment(s);
2721             }
2722             gen_load_exclusive(s, rt, rt2, rn, size, true);
2723             if (is_lasr) {
2724                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2725             }
2726             return;
2727         }
2728         if (rt2 == 31
2729             && ((rt | rs) & 1) == 0
2730             && dc_isar_feature(aa64_atomics, s)) {
2731             /* CASPA / CASPAL */
2732             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2733             return;
2734         }
2735         break;
2736 
2737     case 0xa: /* CAS */
2738     case 0xb: /* CASL */
2739     case 0xe: /* CASA */
2740     case 0xf: /* CASAL */
2741         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2742             gen_compare_and_swap(s, rs, rt, rn, size);
2743             return;
2744         }
2745         break;
2746     }
2747     unallocated_encoding(s);
2748 }
2749 
2750 /*
2751  * Load register (literal)
2752  *
2753  *  31 30 29   27  26 25 24 23                5 4     0
2754  * +-----+-------+---+-----+-------------------+-------+
2755  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2756  * +-----+-------+---+-----+-------------------+-------+
2757  *
2758  * V: 1 -> vector (simd/fp)
2759  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2760  *                   10-> 32 bit signed, 11 -> prefetch
2761  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2762  */
2763 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2764 {
2765     int rt = extract32(insn, 0, 5);
2766     int64_t imm = sextract32(insn, 5, 19) << 2;
2767     bool is_vector = extract32(insn, 26, 1);
2768     int opc = extract32(insn, 30, 2);
2769     bool is_signed = false;
2770     int size = 2;
2771     TCGv_i64 tcg_rt, clean_addr;
2772     MemOp memop;
2773 
2774     if (is_vector) {
2775         if (opc == 3) {
2776             unallocated_encoding(s);
2777             return;
2778         }
2779         size = 2 + opc;
2780         if (!fp_access_check(s)) {
2781             return;
2782         }
2783         memop = finalize_memop_asimd(s, size);
2784     } else {
2785         if (opc == 3) {
2786             /* PRFM (literal) : prefetch */
2787             return;
2788         }
2789         size = 2 + extract32(opc, 0, 1);
2790         is_signed = extract32(opc, 1, 1);
2791         memop = finalize_memop(s, size + is_signed * MO_SIGN);
2792     }
2793 
2794     tcg_rt = cpu_reg(s, rt);
2795 
2796     clean_addr = tcg_temp_new_i64();
2797     gen_pc_plus_diff(s, clean_addr, imm);
2798 
2799     if (is_vector) {
2800         do_fp_ld(s, rt, clean_addr, memop);
2801     } else {
2802         /* Only unsigned 32bit loads target 32bit registers.  */
2803         bool iss_sf = opc != 0;
2804         do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false);
2805     }
2806 }
2807 
2808 /*
2809  * LDNP (Load Pair - non-temporal hint)
2810  * LDP (Load Pair - non vector)
2811  * LDPSW (Load Pair Signed Word - non vector)
2812  * STNP (Store Pair - non-temporal hint)
2813  * STP (Store Pair - non vector)
2814  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2815  * LDP (Load Pair of SIMD&FP)
2816  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2817  * STP (Store Pair of SIMD&FP)
2818  *
2819  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2820  * +-----+-------+---+---+-------+---+-----------------------------+
2821  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2822  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2823  *
2824  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2825  *      LDPSW/STGP               01
2826  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2827  *   V: 0 -> GPR, 1 -> Vector
2828  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2829  *      10 -> signed offset, 11 -> pre-index
2830  *   L: 0 -> Store 1 -> Load
2831  *
2832  * Rt, Rt2 = GPR or SIMD registers to be stored
2833  * Rn = general purpose register containing address
2834  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2835  */
2836 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2837 {
2838     int rt = extract32(insn, 0, 5);
2839     int rn = extract32(insn, 5, 5);
2840     int rt2 = extract32(insn, 10, 5);
2841     uint64_t offset = sextract64(insn, 15, 7);
2842     int index = extract32(insn, 23, 2);
2843     bool is_vector = extract32(insn, 26, 1);
2844     bool is_load = extract32(insn, 22, 1);
2845     int opc = extract32(insn, 30, 2);
2846     bool is_signed = false;
2847     bool postindex = false;
2848     bool wback = false;
2849     bool set_tag = false;
2850     TCGv_i64 clean_addr, dirty_addr;
2851     MemOp mop;
2852     int size;
2853 
2854     if (opc == 3) {
2855         unallocated_encoding(s);
2856         return;
2857     }
2858 
2859     if (is_vector) {
2860         size = 2 + opc;
2861     } else if (opc == 1 && !is_load) {
2862         /* STGP */
2863         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2864             unallocated_encoding(s);
2865             return;
2866         }
2867         size = 3;
2868         set_tag = true;
2869     } else {
2870         size = 2 + extract32(opc, 1, 1);
2871         is_signed = extract32(opc, 0, 1);
2872         if (!is_load && is_signed) {
2873             unallocated_encoding(s);
2874             return;
2875         }
2876     }
2877 
2878     switch (index) {
2879     case 1: /* post-index */
2880         postindex = true;
2881         wback = true;
2882         break;
2883     case 0:
2884         /* signed offset with "non-temporal" hint. Since we don't emulate
2885          * caches we don't care about hints to the cache system about
2886          * data access patterns, and handle this identically to plain
2887          * signed offset.
2888          */
2889         if (is_signed) {
2890             /* There is no non-temporal-hint version of LDPSW */
2891             unallocated_encoding(s);
2892             return;
2893         }
2894         postindex = false;
2895         break;
2896     case 2: /* signed offset, rn not updated */
2897         postindex = false;
2898         break;
2899     case 3: /* pre-index */
2900         postindex = false;
2901         wback = true;
2902         break;
2903     }
2904 
2905     if (is_vector && !fp_access_check(s)) {
2906         return;
2907     }
2908 
2909     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2910 
2911     if (rn == 31) {
2912         gen_check_sp_alignment(s);
2913     }
2914 
2915     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2916     if (!postindex) {
2917         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2918     }
2919 
2920     if (set_tag) {
2921         if (!s->ata) {
2922             /*
2923              * TODO: We could rely on the stores below, at least for
2924              * system mode, if we arrange to add MO_ALIGN_16.
2925              */
2926             gen_helper_stg_stub(cpu_env, dirty_addr);
2927         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2928             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2929         } else {
2930             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2931         }
2932     }
2933 
2934     if (is_vector) {
2935         mop = finalize_memop_asimd(s, size);
2936     } else {
2937         mop = finalize_memop(s, size);
2938     }
2939     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2940                                 (wback || rn != 31) && !set_tag,
2941                                 2 << size, mop);
2942 
2943     if (is_vector) {
2944         /* LSE2 does not merge FP pairs; leave these as separate operations. */
2945         if (is_load) {
2946             do_fp_ld(s, rt, clean_addr, mop);
2947         } else {
2948             do_fp_st(s, rt, clean_addr, mop);
2949         }
2950         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2951         if (is_load) {
2952             do_fp_ld(s, rt2, clean_addr, mop);
2953         } else {
2954             do_fp_st(s, rt2, clean_addr, mop);
2955         }
2956     } else {
2957         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2958         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2959 
2960         /*
2961          * We built mop above for the single logical access -- rebuild it
2962          * now for the paired operation.
2963          *
2964          * With LSE2, non-sign-extending pairs are treated atomically if
2965          * aligned, and if unaligned one of the pair will be completely
2966          * within a 16-byte block and that element will be atomic.
2967          * Otherwise each element is separately atomic.
2968          * In all cases, issue one operation with the correct atomicity.
2969          *
2970          * This treats sign-extending loads like zero-extending loads,
2971          * since that reuses the most code below.
2972          */
2973         mop = size + 1;
2974         if (s->align_mem) {
2975             mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2976         }
2977         mop = finalize_memop_pair(s, mop);
2978 
2979         if (is_load) {
2980             if (size == 2) {
2981                 int o2 = s->be_data == MO_LE ? 32 : 0;
2982                 int o1 = o2 ^ 32;
2983 
2984                 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
2985                 if (is_signed) {
2986                     tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
2987                     tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
2988                 } else {
2989                     tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
2990                     tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
2991                 }
2992             } else {
2993                 TCGv_i128 tmp = tcg_temp_new_i128();
2994 
2995                 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
2996                 if (s->be_data == MO_LE) {
2997                     tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
2998                 } else {
2999                     tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3000                 }
3001             }
3002         } else {
3003             if (size == 2) {
3004                 TCGv_i64 tmp = tcg_temp_new_i64();
3005 
3006                 if (s->be_data == MO_LE) {
3007                     tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3008                 } else {
3009                     tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3010                 }
3011                 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3012             } else {
3013                 TCGv_i128 tmp = tcg_temp_new_i128();
3014 
3015                 if (s->be_data == MO_LE) {
3016                     tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3017                 } else {
3018                     tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3019                 }
3020                 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3021             }
3022         }
3023     }
3024 
3025     if (wback) {
3026         if (postindex) {
3027             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3028         }
3029         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3030     }
3031 }
3032 
3033 /*
3034  * Load/store (immediate post-indexed)
3035  * Load/store (immediate pre-indexed)
3036  * Load/store (unscaled immediate)
3037  *
3038  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3039  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3040  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3041  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3042  *
3043  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3044          10 -> unprivileged
3045  * V = 0 -> non-vector
3046  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3047  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3048  */
3049 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3050                                 int opc,
3051                                 int size,
3052                                 int rt,
3053                                 bool is_vector)
3054 {
3055     int rn = extract32(insn, 5, 5);
3056     int imm9 = sextract32(insn, 12, 9);
3057     int idx = extract32(insn, 10, 2);
3058     bool is_signed = false;
3059     bool is_store = false;
3060     bool is_extended = false;
3061     bool is_unpriv = (idx == 2);
3062     bool iss_valid;
3063     bool post_index;
3064     bool writeback;
3065     int memidx;
3066     MemOp memop;
3067     TCGv_i64 clean_addr, dirty_addr;
3068 
3069     if (is_vector) {
3070         size |= (opc & 2) << 1;
3071         if (size > 4 || is_unpriv) {
3072             unallocated_encoding(s);
3073             return;
3074         }
3075         is_store = ((opc & 1) == 0);
3076         if (!fp_access_check(s)) {
3077             return;
3078         }
3079         memop = finalize_memop_asimd(s, size);
3080     } else {
3081         if (size == 3 && opc == 2) {
3082             /* PRFM - prefetch */
3083             if (idx != 0) {
3084                 unallocated_encoding(s);
3085                 return;
3086             }
3087             return;
3088         }
3089         if (opc == 3 && size > 1) {
3090             unallocated_encoding(s);
3091             return;
3092         }
3093         is_store = (opc == 0);
3094         is_signed = !is_store && extract32(opc, 1, 1);
3095         is_extended = (size < 3) && extract32(opc, 0, 1);
3096         memop = finalize_memop(s, size + is_signed * MO_SIGN);
3097     }
3098 
3099     switch (idx) {
3100     case 0:
3101     case 2:
3102         post_index = false;
3103         writeback = false;
3104         break;
3105     case 1:
3106         post_index = true;
3107         writeback = true;
3108         break;
3109     case 3:
3110         post_index = false;
3111         writeback = true;
3112         break;
3113     default:
3114         g_assert_not_reached();
3115     }
3116 
3117     iss_valid = !is_vector && !writeback;
3118 
3119     if (rn == 31) {
3120         gen_check_sp_alignment(s);
3121     }
3122 
3123     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3124     if (!post_index) {
3125         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3126     }
3127 
3128     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3129 
3130     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3131                                        writeback || rn != 31,
3132                                        size, is_unpriv, memidx);
3133 
3134     if (is_vector) {
3135         if (is_store) {
3136             do_fp_st(s, rt, clean_addr, memop);
3137         } else {
3138             do_fp_ld(s, rt, clean_addr, memop);
3139         }
3140     } else {
3141         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3142         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3143 
3144         if (is_store) {
3145             do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx,
3146                              iss_valid, rt, iss_sf, false);
3147         } else {
3148             do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop,
3149                              is_extended, memidx,
3150                              iss_valid, rt, iss_sf, false);
3151         }
3152     }
3153 
3154     if (writeback) {
3155         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3156         if (post_index) {
3157             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3158         }
3159         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3160     }
3161 }
3162 
3163 /*
3164  * Load/store (register offset)
3165  *
3166  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3167  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3168  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3169  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3170  *
3171  * For non-vector:
3172  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3173  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3174  * For vector:
3175  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3176  *   opc<0>: 0 -> store, 1 -> load
3177  * V: 1 -> vector/simd
3178  * opt: extend encoding (see DecodeRegExtend)
3179  * S: if S=1 then scale (essentially index by sizeof(size))
3180  * Rt: register to transfer into/out of
3181  * Rn: address register or SP for base
3182  * Rm: offset register or ZR for offset
3183  */
3184 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3185                                    int opc,
3186                                    int size,
3187                                    int rt,
3188                                    bool is_vector)
3189 {
3190     int rn = extract32(insn, 5, 5);
3191     int shift = extract32(insn, 12, 1);
3192     int rm = extract32(insn, 16, 5);
3193     int opt = extract32(insn, 13, 3);
3194     bool is_signed = false;
3195     bool is_store = false;
3196     bool is_extended = false;
3197     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3198     MemOp memop;
3199 
3200     if (extract32(opt, 1, 1) == 0) {
3201         unallocated_encoding(s);
3202         return;
3203     }
3204 
3205     if (is_vector) {
3206         size |= (opc & 2) << 1;
3207         if (size > 4) {
3208             unallocated_encoding(s);
3209             return;
3210         }
3211         is_store = !extract32(opc, 0, 1);
3212         if (!fp_access_check(s)) {
3213             return;
3214         }
3215     } else {
3216         if (size == 3 && opc == 2) {
3217             /* PRFM - prefetch */
3218             return;
3219         }
3220         if (opc == 3 && size > 1) {
3221             unallocated_encoding(s);
3222             return;
3223         }
3224         is_store = (opc == 0);
3225         is_signed = !is_store && extract32(opc, 1, 1);
3226         is_extended = (size < 3) && extract32(opc, 0, 1);
3227     }
3228 
3229     if (rn == 31) {
3230         gen_check_sp_alignment(s);
3231     }
3232     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3233 
3234     tcg_rm = read_cpu_reg(s, rm, 1);
3235     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3236 
3237     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3238 
3239     memop = finalize_memop(s, size + is_signed * MO_SIGN);
3240     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop);
3241 
3242     if (is_vector) {
3243         if (is_store) {
3244             do_fp_st(s, rt, clean_addr, memop);
3245         } else {
3246             do_fp_ld(s, rt, clean_addr, memop);
3247         }
3248     } else {
3249         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3250         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3251 
3252         if (is_store) {
3253             do_gpr_st(s, tcg_rt, clean_addr, memop,
3254                       true, rt, iss_sf, false);
3255         } else {
3256             do_gpr_ld(s, tcg_rt, clean_addr, memop,
3257                       is_extended, true, rt, iss_sf, false);
3258         }
3259     }
3260 }
3261 
3262 /*
3263  * Load/store (unsigned immediate)
3264  *
3265  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3266  * +----+-------+---+-----+-----+------------+-------+------+
3267  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3268  * +----+-------+---+-----+-----+------------+-------+------+
3269  *
3270  * For non-vector:
3271  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3272  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3273  * For vector:
3274  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3275  *   opc<0>: 0 -> store, 1 -> load
3276  * Rn: base address register (inc SP)
3277  * Rt: target register
3278  */
3279 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3280                                         int opc,
3281                                         int size,
3282                                         int rt,
3283                                         bool is_vector)
3284 {
3285     int rn = extract32(insn, 5, 5);
3286     unsigned int imm12 = extract32(insn, 10, 12);
3287     unsigned int offset;
3288     TCGv_i64 clean_addr, dirty_addr;
3289     bool is_store;
3290     bool is_signed = false;
3291     bool is_extended = false;
3292     MemOp memop;
3293 
3294     if (is_vector) {
3295         size |= (opc & 2) << 1;
3296         if (size > 4) {
3297             unallocated_encoding(s);
3298             return;
3299         }
3300         is_store = !extract32(opc, 0, 1);
3301         if (!fp_access_check(s)) {
3302             return;
3303         }
3304     } else {
3305         if (size == 3 && opc == 2) {
3306             /* PRFM - prefetch */
3307             return;
3308         }
3309         if (opc == 3 && size > 1) {
3310             unallocated_encoding(s);
3311             return;
3312         }
3313         is_store = (opc == 0);
3314         is_signed = !is_store && extract32(opc, 1, 1);
3315         is_extended = (size < 3) && extract32(opc, 0, 1);
3316     }
3317 
3318     if (rn == 31) {
3319         gen_check_sp_alignment(s);
3320     }
3321     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3322     offset = imm12 << size;
3323     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3324 
3325     memop = finalize_memop(s, size + is_signed * MO_SIGN);
3326     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop);
3327 
3328     if (is_vector) {
3329         if (is_store) {
3330             do_fp_st(s, rt, clean_addr, memop);
3331         } else {
3332             do_fp_ld(s, rt, clean_addr, memop);
3333         }
3334     } else {
3335         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3336         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3337         if (is_store) {
3338             do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false);
3339         } else {
3340             do_gpr_ld(s, tcg_rt, clean_addr, memop,
3341                       is_extended, true, rt, iss_sf, false);
3342         }
3343     }
3344 }
3345 
3346 /* Atomic memory operations
3347  *
3348  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3349  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3350  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3351  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3352  *
3353  * Rt: the result register
3354  * Rn: base address or SP
3355  * Rs: the source register for the operation
3356  * V: vector flag (always 0 as of v8.3)
3357  * A: acquire flag
3358  * R: release flag
3359  */
3360 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3361                               int size, int rt, bool is_vector)
3362 {
3363     int rs = extract32(insn, 16, 5);
3364     int rn = extract32(insn, 5, 5);
3365     int o3_opc = extract32(insn, 12, 4);
3366     bool r = extract32(insn, 22, 1);
3367     bool a = extract32(insn, 23, 1);
3368     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3369     AtomicThreeOpFn *fn = NULL;
3370     MemOp mop = finalize_memop(s, size | MO_ALIGN);
3371 
3372     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3373         unallocated_encoding(s);
3374         return;
3375     }
3376     switch (o3_opc) {
3377     case 000: /* LDADD */
3378         fn = tcg_gen_atomic_fetch_add_i64;
3379         break;
3380     case 001: /* LDCLR */
3381         fn = tcg_gen_atomic_fetch_and_i64;
3382         break;
3383     case 002: /* LDEOR */
3384         fn = tcg_gen_atomic_fetch_xor_i64;
3385         break;
3386     case 003: /* LDSET */
3387         fn = tcg_gen_atomic_fetch_or_i64;
3388         break;
3389     case 004: /* LDSMAX */
3390         fn = tcg_gen_atomic_fetch_smax_i64;
3391         mop |= MO_SIGN;
3392         break;
3393     case 005: /* LDSMIN */
3394         fn = tcg_gen_atomic_fetch_smin_i64;
3395         mop |= MO_SIGN;
3396         break;
3397     case 006: /* LDUMAX */
3398         fn = tcg_gen_atomic_fetch_umax_i64;
3399         break;
3400     case 007: /* LDUMIN */
3401         fn = tcg_gen_atomic_fetch_umin_i64;
3402         break;
3403     case 010: /* SWP */
3404         fn = tcg_gen_atomic_xchg_i64;
3405         break;
3406     case 014: /* LDAPR, LDAPRH, LDAPRB */
3407         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3408             rs != 31 || a != 1 || r != 0) {
3409             unallocated_encoding(s);
3410             return;
3411         }
3412         break;
3413     default:
3414         unallocated_encoding(s);
3415         return;
3416     }
3417 
3418     if (rn == 31) {
3419         gen_check_sp_alignment(s);
3420     }
3421     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop);
3422 
3423     if (o3_opc == 014) {
3424         /*
3425          * LDAPR* are a special case because they are a simple load, not a
3426          * fetch-and-do-something op.
3427          * The architectural consistency requirements here are weaker than
3428          * full load-acquire (we only need "load-acquire processor consistent"),
3429          * but we choose to implement them as full LDAQ.
3430          */
3431         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false,
3432                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3433         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3434         return;
3435     }
3436 
3437     tcg_rs = read_cpu_reg(s, rs, true);
3438     tcg_rt = cpu_reg(s, rt);
3439 
3440     if (o3_opc == 1) { /* LDCLR */
3441         tcg_gen_not_i64(tcg_rs, tcg_rs);
3442     }
3443 
3444     /* The tcg atomic primitives are all full barriers.  Therefore we
3445      * can ignore the Acquire and Release bits of this instruction.
3446      */
3447     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3448 
3449     if ((mop & MO_SIGN) && size != MO_64) {
3450         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3451     }
3452 }
3453 
3454 /*
3455  * PAC memory operations
3456  *
3457  *  31  30      27  26    24    22  21       12  11  10    5     0
3458  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3459  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3460  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3461  *
3462  * Rt: the result register
3463  * Rn: base address or SP
3464  * V: vector flag (always 0 as of v8.3)
3465  * M: clear for key DA, set for key DB
3466  * W: pre-indexing flag
3467  * S: sign for imm9.
3468  */
3469 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3470                            int size, int rt, bool is_vector)
3471 {
3472     int rn = extract32(insn, 5, 5);
3473     bool is_wback = extract32(insn, 11, 1);
3474     bool use_key_a = !extract32(insn, 23, 1);
3475     int offset;
3476     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3477     MemOp memop;
3478 
3479     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3480         unallocated_encoding(s);
3481         return;
3482     }
3483 
3484     if (rn == 31) {
3485         gen_check_sp_alignment(s);
3486     }
3487     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3488 
3489     if (s->pauth_active) {
3490         if (use_key_a) {
3491             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3492                              tcg_constant_i64(0));
3493         } else {
3494             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3495                              tcg_constant_i64(0));
3496         }
3497     }
3498 
3499     /* Form the 10-bit signed, scaled offset.  */
3500     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3501     offset = sextract32(offset << size, 0, 10 + size);
3502     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3503 
3504     memop = finalize_memop(s, size);
3505 
3506     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3507     clean_addr = gen_mte_check1(s, dirty_addr, false,
3508                                 is_wback || rn != 31, memop);
3509 
3510     tcg_rt = cpu_reg(s, rt);
3511     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3512               /* extend */ false, /* iss_valid */ !is_wback,
3513               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3514 
3515     if (is_wback) {
3516         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3517     }
3518 }
3519 
3520 /*
3521  * LDAPR/STLR (unscaled immediate)
3522  *
3523  *  31  30            24    22  21       12    10    5     0
3524  * +------+-------------+-----+---+--------+-----+----+-----+
3525  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3526  * +------+-------------+-----+---+--------+-----+----+-----+
3527  *
3528  * Rt: source or destination register
3529  * Rn: base register
3530  * imm9: unscaled immediate offset
3531  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3532  * size: size of load/store
3533  */
3534 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3535 {
3536     int rt = extract32(insn, 0, 5);
3537     int rn = extract32(insn, 5, 5);
3538     int offset = sextract32(insn, 12, 9);
3539     int opc = extract32(insn, 22, 2);
3540     int size = extract32(insn, 30, 2);
3541     TCGv_i64 clean_addr, dirty_addr;
3542     bool is_store = false;
3543     bool extend = false;
3544     bool iss_sf;
3545     MemOp mop;
3546 
3547     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3548         unallocated_encoding(s);
3549         return;
3550     }
3551 
3552     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3553     mop = finalize_memop(s, size | MO_ALIGN);
3554 
3555     switch (opc) {
3556     case 0: /* STLURB */
3557         is_store = true;
3558         break;
3559     case 1: /* LDAPUR* */
3560         break;
3561     case 2: /* LDAPURS* 64-bit variant */
3562         if (size == 3) {
3563             unallocated_encoding(s);
3564             return;
3565         }
3566         mop |= MO_SIGN;
3567         break;
3568     case 3: /* LDAPURS* 32-bit variant */
3569         if (size > 1) {
3570             unallocated_encoding(s);
3571             return;
3572         }
3573         mop |= MO_SIGN;
3574         extend = true; /* zero-extend 32->64 after signed load */
3575         break;
3576     default:
3577         g_assert_not_reached();
3578     }
3579 
3580     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3581 
3582     if (rn == 31) {
3583         gen_check_sp_alignment(s);
3584     }
3585 
3586     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3587     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3588     clean_addr = clean_data_tbi(s, dirty_addr);
3589 
3590     if (is_store) {
3591         /* Store-Release semantics */
3592         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3593         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3594     } else {
3595         /*
3596          * Load-AcquirePC semantics; we implement as the slightly more
3597          * restrictive Load-Acquire.
3598          */
3599         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3600                   extend, true, rt, iss_sf, true);
3601         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3602     }
3603 }
3604 
3605 /* Load/store register (all forms) */
3606 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3607 {
3608     int rt = extract32(insn, 0, 5);
3609     int opc = extract32(insn, 22, 2);
3610     bool is_vector = extract32(insn, 26, 1);
3611     int size = extract32(insn, 30, 2);
3612 
3613     switch (extract32(insn, 24, 2)) {
3614     case 0:
3615         if (extract32(insn, 21, 1) == 0) {
3616             /* Load/store register (unscaled immediate)
3617              * Load/store immediate pre/post-indexed
3618              * Load/store register unprivileged
3619              */
3620             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3621             return;
3622         }
3623         switch (extract32(insn, 10, 2)) {
3624         case 0:
3625             disas_ldst_atomic(s, insn, size, rt, is_vector);
3626             return;
3627         case 2:
3628             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3629             return;
3630         default:
3631             disas_ldst_pac(s, insn, size, rt, is_vector);
3632             return;
3633         }
3634         break;
3635     case 1:
3636         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3637         return;
3638     }
3639     unallocated_encoding(s);
3640 }
3641 
3642 /* AdvSIMD load/store multiple structures
3643  *
3644  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3645  * +---+---+---------------+---+-------------+--------+------+------+------+
3646  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3647  * +---+---+---------------+---+-------------+--------+------+------+------+
3648  *
3649  * AdvSIMD load/store multiple structures (post-indexed)
3650  *
3651  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3652  * +---+---+---------------+---+---+---------+--------+------+------+------+
3653  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3654  * +---+---+---------------+---+---+---------+--------+------+------+------+
3655  *
3656  * Rt: first (or only) SIMD&FP register to be transferred
3657  * Rn: base address or SP
3658  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3659  */
3660 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3661 {
3662     int rt = extract32(insn, 0, 5);
3663     int rn = extract32(insn, 5, 5);
3664     int rm = extract32(insn, 16, 5);
3665     int size = extract32(insn, 10, 2);
3666     int opcode = extract32(insn, 12, 4);
3667     bool is_store = !extract32(insn, 22, 1);
3668     bool is_postidx = extract32(insn, 23, 1);
3669     bool is_q = extract32(insn, 30, 1);
3670     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3671     MemOp endian, align, mop;
3672 
3673     int total;    /* total bytes */
3674     int elements; /* elements per vector */
3675     int rpt;    /* num iterations */
3676     int selem;  /* structure elements */
3677     int r;
3678 
3679     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3680         unallocated_encoding(s);
3681         return;
3682     }
3683 
3684     if (!is_postidx && rm != 0) {
3685         unallocated_encoding(s);
3686         return;
3687     }
3688 
3689     /* From the shared decode logic */
3690     switch (opcode) {
3691     case 0x0:
3692         rpt = 1;
3693         selem = 4;
3694         break;
3695     case 0x2:
3696         rpt = 4;
3697         selem = 1;
3698         break;
3699     case 0x4:
3700         rpt = 1;
3701         selem = 3;
3702         break;
3703     case 0x6:
3704         rpt = 3;
3705         selem = 1;
3706         break;
3707     case 0x7:
3708         rpt = 1;
3709         selem = 1;
3710         break;
3711     case 0x8:
3712         rpt = 1;
3713         selem = 2;
3714         break;
3715     case 0xa:
3716         rpt = 2;
3717         selem = 1;
3718         break;
3719     default:
3720         unallocated_encoding(s);
3721         return;
3722     }
3723 
3724     if (size == 3 && !is_q && selem != 1) {
3725         /* reserved */
3726         unallocated_encoding(s);
3727         return;
3728     }
3729 
3730     if (!fp_access_check(s)) {
3731         return;
3732     }
3733 
3734     if (rn == 31) {
3735         gen_check_sp_alignment(s);
3736     }
3737 
3738     /* For our purposes, bytes are always little-endian.  */
3739     endian = s->be_data;
3740     if (size == 0) {
3741         endian = MO_LE;
3742     }
3743 
3744     total = rpt * selem * (is_q ? 16 : 8);
3745     tcg_rn = cpu_reg_sp(s, rn);
3746 
3747     /*
3748      * Issue the MTE check vs the logical repeat count, before we
3749      * promote consecutive little-endian elements below.
3750      */
3751     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3752                                 total, finalize_memop(s, size));
3753 
3754     /*
3755      * Consecutive little-endian elements from a single register
3756      * can be promoted to a larger little-endian operation.
3757      */
3758     align = MO_ALIGN;
3759     if (selem == 1 && endian == MO_LE) {
3760         align = pow2_align(size);
3761         size = 3;
3762     }
3763     if (!s->align_mem) {
3764         align = 0;
3765     }
3766     mop = endian | size | align;
3767 
3768     elements = (is_q ? 16 : 8) >> size;
3769     tcg_ebytes = tcg_constant_i64(1 << size);
3770     for (r = 0; r < rpt; r++) {
3771         int e;
3772         for (e = 0; e < elements; e++) {
3773             int xs;
3774             for (xs = 0; xs < selem; xs++) {
3775                 int tt = (rt + r + xs) % 32;
3776                 if (is_store) {
3777                     do_vec_st(s, tt, e, clean_addr, mop);
3778                 } else {
3779                     do_vec_ld(s, tt, e, clean_addr, mop);
3780                 }
3781                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3782             }
3783         }
3784     }
3785 
3786     if (!is_store) {
3787         /* For non-quad operations, setting a slice of the low
3788          * 64 bits of the register clears the high 64 bits (in
3789          * the ARM ARM pseudocode this is implicit in the fact
3790          * that 'rval' is a 64 bit wide variable).
3791          * For quad operations, we might still need to zero the
3792          * high bits of SVE.
3793          */
3794         for (r = 0; r < rpt * selem; r++) {
3795             int tt = (rt + r) % 32;
3796             clear_vec_high(s, is_q, tt);
3797         }
3798     }
3799 
3800     if (is_postidx) {
3801         if (rm == 31) {
3802             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3803         } else {
3804             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3805         }
3806     }
3807 }
3808 
3809 /* AdvSIMD load/store single structure
3810  *
3811  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3812  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3813  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3814  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3815  *
3816  * AdvSIMD load/store single structure (post-indexed)
3817  *
3818  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3819  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3820  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3821  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3822  *
3823  * Rt: first (or only) SIMD&FP register to be transferred
3824  * Rn: base address or SP
3825  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3826  * index = encoded in Q:S:size dependent on size
3827  *
3828  * lane_size = encoded in R, opc
3829  * transfer width = encoded in opc, S, size
3830  */
3831 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3832 {
3833     int rt = extract32(insn, 0, 5);
3834     int rn = extract32(insn, 5, 5);
3835     int rm = extract32(insn, 16, 5);
3836     int size = extract32(insn, 10, 2);
3837     int S = extract32(insn, 12, 1);
3838     int opc = extract32(insn, 13, 3);
3839     int R = extract32(insn, 21, 1);
3840     int is_load = extract32(insn, 22, 1);
3841     int is_postidx = extract32(insn, 23, 1);
3842     int is_q = extract32(insn, 30, 1);
3843 
3844     int scale = extract32(opc, 1, 2);
3845     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3846     bool replicate = false;
3847     int index = is_q << 3 | S << 2 | size;
3848     int xs, total;
3849     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3850     MemOp mop;
3851 
3852     if (extract32(insn, 31, 1)) {
3853         unallocated_encoding(s);
3854         return;
3855     }
3856     if (!is_postidx && rm != 0) {
3857         unallocated_encoding(s);
3858         return;
3859     }
3860 
3861     switch (scale) {
3862     case 3:
3863         if (!is_load || S) {
3864             unallocated_encoding(s);
3865             return;
3866         }
3867         scale = size;
3868         replicate = true;
3869         break;
3870     case 0:
3871         break;
3872     case 1:
3873         if (extract32(size, 0, 1)) {
3874             unallocated_encoding(s);
3875             return;
3876         }
3877         index >>= 1;
3878         break;
3879     case 2:
3880         if (extract32(size, 1, 1)) {
3881             unallocated_encoding(s);
3882             return;
3883         }
3884         if (!extract32(size, 0, 1)) {
3885             index >>= 2;
3886         } else {
3887             if (S) {
3888                 unallocated_encoding(s);
3889                 return;
3890             }
3891             index >>= 3;
3892             scale = 3;
3893         }
3894         break;
3895     default:
3896         g_assert_not_reached();
3897     }
3898 
3899     if (!fp_access_check(s)) {
3900         return;
3901     }
3902 
3903     if (rn == 31) {
3904         gen_check_sp_alignment(s);
3905     }
3906 
3907     total = selem << scale;
3908     tcg_rn = cpu_reg_sp(s, rn);
3909 
3910     mop = finalize_memop(s, scale);
3911 
3912     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3913                                 total, mop);
3914 
3915     tcg_ebytes = tcg_constant_i64(1 << scale);
3916     for (xs = 0; xs < selem; xs++) {
3917         if (replicate) {
3918             /* Load and replicate to all elements */
3919             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3920 
3921             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3922             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3923                                  (is_q + 1) * 8, vec_full_reg_size(s),
3924                                  tcg_tmp);
3925         } else {
3926             /* Load/store one element per register */
3927             if (is_load) {
3928                 do_vec_ld(s, rt, index, clean_addr, mop);
3929             } else {
3930                 do_vec_st(s, rt, index, clean_addr, mop);
3931             }
3932         }
3933         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3934         rt = (rt + 1) % 32;
3935     }
3936 
3937     if (is_postidx) {
3938         if (rm == 31) {
3939             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3940         } else {
3941             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3942         }
3943     }
3944 }
3945 
3946 /*
3947  * Load/Store memory tags
3948  *
3949  *  31 30 29         24     22  21     12    10      5      0
3950  * +-----+-------------+-----+---+------+-----+------+------+
3951  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3952  * +-----+-------------+-----+---+------+-----+------+------+
3953  */
3954 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3955 {
3956     int rt = extract32(insn, 0, 5);
3957     int rn = extract32(insn, 5, 5);
3958     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3959     int op2 = extract32(insn, 10, 2);
3960     int op1 = extract32(insn, 22, 2);
3961     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3962     int index = 0;
3963     TCGv_i64 addr, clean_addr, tcg_rt;
3964 
3965     /* We checked insn bits [29:24,21] in the caller.  */
3966     if (extract32(insn, 30, 2) != 3) {
3967         goto do_unallocated;
3968     }
3969 
3970     /*
3971      * @index is a tri-state variable which has 3 states:
3972      * < 0 : post-index, writeback
3973      * = 0 : signed offset
3974      * > 0 : pre-index, writeback
3975      */
3976     switch (op1) {
3977     case 0:
3978         if (op2 != 0) {
3979             /* STG */
3980             index = op2 - 2;
3981         } else {
3982             /* STZGM */
3983             if (s->current_el == 0 || offset != 0) {
3984                 goto do_unallocated;
3985             }
3986             is_mult = is_zero = true;
3987         }
3988         break;
3989     case 1:
3990         if (op2 != 0) {
3991             /* STZG */
3992             is_zero = true;
3993             index = op2 - 2;
3994         } else {
3995             /* LDG */
3996             is_load = true;
3997         }
3998         break;
3999     case 2:
4000         if (op2 != 0) {
4001             /* ST2G */
4002             is_pair = true;
4003             index = op2 - 2;
4004         } else {
4005             /* STGM */
4006             if (s->current_el == 0 || offset != 0) {
4007                 goto do_unallocated;
4008             }
4009             is_mult = true;
4010         }
4011         break;
4012     case 3:
4013         if (op2 != 0) {
4014             /* STZ2G */
4015             is_pair = is_zero = true;
4016             index = op2 - 2;
4017         } else {
4018             /* LDGM */
4019             if (s->current_el == 0 || offset != 0) {
4020                 goto do_unallocated;
4021             }
4022             is_mult = is_load = true;
4023         }
4024         break;
4025 
4026     default:
4027     do_unallocated:
4028         unallocated_encoding(s);
4029         return;
4030     }
4031 
4032     if (is_mult
4033         ? !dc_isar_feature(aa64_mte, s)
4034         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
4035         goto do_unallocated;
4036     }
4037 
4038     if (rn == 31) {
4039         gen_check_sp_alignment(s);
4040     }
4041 
4042     addr = read_cpu_reg_sp(s, rn, true);
4043     if (index >= 0) {
4044         /* pre-index or signed offset */
4045         tcg_gen_addi_i64(addr, addr, offset);
4046     }
4047 
4048     if (is_mult) {
4049         tcg_rt = cpu_reg(s, rt);
4050 
4051         if (is_zero) {
4052             int size = 4 << s->dcz_blocksize;
4053 
4054             if (s->ata) {
4055                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4056             }
4057             /*
4058              * The non-tags portion of STZGM is mostly like DC_ZVA,
4059              * except the alignment happens before the access.
4060              */
4061             clean_addr = clean_data_tbi(s, addr);
4062             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4063             gen_helper_dc_zva(cpu_env, clean_addr);
4064         } else if (s->ata) {
4065             if (is_load) {
4066                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4067             } else {
4068                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4069             }
4070         } else {
4071             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4072             int size = 4 << GMID_EL1_BS;
4073 
4074             clean_addr = clean_data_tbi(s, addr);
4075             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4076             gen_probe_access(s, clean_addr, acc, size);
4077 
4078             if (is_load) {
4079                 /* The result tags are zeros.  */
4080                 tcg_gen_movi_i64(tcg_rt, 0);
4081             }
4082         }
4083         return;
4084     }
4085 
4086     if (is_load) {
4087         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4088         tcg_rt = cpu_reg(s, rt);
4089         if (s->ata) {
4090             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4091         } else {
4092             clean_addr = clean_data_tbi(s, addr);
4093             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4094             gen_address_with_allocation_tag0(tcg_rt, addr);
4095         }
4096     } else {
4097         tcg_rt = cpu_reg_sp(s, rt);
4098         if (!s->ata) {
4099             /*
4100              * For STG and ST2G, we need to check alignment and probe memory.
4101              * TODO: For STZG and STZ2G, we could rely on the stores below,
4102              * at least for system mode; user-only won't enforce alignment.
4103              */
4104             if (is_pair) {
4105                 gen_helper_st2g_stub(cpu_env, addr);
4106             } else {
4107                 gen_helper_stg_stub(cpu_env, addr);
4108             }
4109         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4110             if (is_pair) {
4111                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4112             } else {
4113                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4114             }
4115         } else {
4116             if (is_pair) {
4117                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4118             } else {
4119                 gen_helper_stg(cpu_env, addr, tcg_rt);
4120             }
4121         }
4122     }
4123 
4124     if (is_zero) {
4125         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4126         TCGv_i64 zero64 = tcg_constant_i64(0);
4127         TCGv_i128 zero128 = tcg_temp_new_i128();
4128         int mem_index = get_mem_index(s);
4129         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4130 
4131         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4132 
4133         /* This is 1 or 2 atomic 16-byte operations. */
4134         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4135         if (is_pair) {
4136             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4137             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4138         }
4139     }
4140 
4141     if (index != 0) {
4142         /* pre-index or post-index */
4143         if (index < 0) {
4144             /* post-index */
4145             tcg_gen_addi_i64(addr, addr, offset);
4146         }
4147         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4148     }
4149 }
4150 
4151 /* Loads and stores */
4152 static void disas_ldst(DisasContext *s, uint32_t insn)
4153 {
4154     switch (extract32(insn, 24, 6)) {
4155     case 0x08: /* Load/store exclusive */
4156         disas_ldst_excl(s, insn);
4157         break;
4158     case 0x18: case 0x1c: /* Load register (literal) */
4159         disas_ld_lit(s, insn);
4160         break;
4161     case 0x28: case 0x29:
4162     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4163         disas_ldst_pair(s, insn);
4164         break;
4165     case 0x38: case 0x39:
4166     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4167         disas_ldst_reg(s, insn);
4168         break;
4169     case 0x0c: /* AdvSIMD load/store multiple structures */
4170         disas_ldst_multiple_struct(s, insn);
4171         break;
4172     case 0x0d: /* AdvSIMD load/store single structure */
4173         disas_ldst_single_struct(s, insn);
4174         break;
4175     case 0x19:
4176         if (extract32(insn, 21, 1) != 0) {
4177             disas_ldst_tag(s, insn);
4178         } else if (extract32(insn, 10, 2) == 0) {
4179             disas_ldst_ldapr_stlr(s, insn);
4180         } else {
4181             unallocated_encoding(s);
4182         }
4183         break;
4184     default:
4185         unallocated_encoding(s);
4186         break;
4187     }
4188 }
4189 
4190 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4191 
4192 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4193                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4194 {
4195     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4196     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4197     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4198 
4199     fn(tcg_rd, tcg_rn, tcg_imm);
4200     if (!a->sf) {
4201         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4202     }
4203     return true;
4204 }
4205 
4206 /*
4207  * PC-rel. addressing
4208  */
4209 
4210 static bool trans_ADR(DisasContext *s, arg_ri *a)
4211 {
4212     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4213     return true;
4214 }
4215 
4216 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4217 {
4218     int64_t offset = (int64_t)a->imm << 12;
4219 
4220     /* The page offset is ok for CF_PCREL. */
4221     offset -= s->pc_curr & 0xfff;
4222     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4223     return true;
4224 }
4225 
4226 /*
4227  * Add/subtract (immediate)
4228  */
4229 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4230 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4231 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4232 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4233 
4234 /*
4235  * Add/subtract (immediate, with tags)
4236  */
4237 
4238 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4239                                       bool sub_op)
4240 {
4241     TCGv_i64 tcg_rn, tcg_rd;
4242     int imm;
4243 
4244     imm = a->uimm6 << LOG2_TAG_GRANULE;
4245     if (sub_op) {
4246         imm = -imm;
4247     }
4248 
4249     tcg_rn = cpu_reg_sp(s, a->rn);
4250     tcg_rd = cpu_reg_sp(s, a->rd);
4251 
4252     if (s->ata) {
4253         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4254                            tcg_constant_i32(imm),
4255                            tcg_constant_i32(a->uimm4));
4256     } else {
4257         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4258         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4259     }
4260     return true;
4261 }
4262 
4263 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4264 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4265 
4266 /* The input should be a value in the bottom e bits (with higher
4267  * bits zero); returns that value replicated into every element
4268  * of size e in a 64 bit integer.
4269  */
4270 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4271 {
4272     assert(e != 0);
4273     while (e < 64) {
4274         mask |= mask << e;
4275         e *= 2;
4276     }
4277     return mask;
4278 }
4279 
4280 /*
4281  * Logical (immediate)
4282  */
4283 
4284 /*
4285  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4286  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4287  * value (ie should cause a guest UNDEF exception), and true if they are
4288  * valid, in which case the decoded bit pattern is written to result.
4289  */
4290 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4291                             unsigned int imms, unsigned int immr)
4292 {
4293     uint64_t mask;
4294     unsigned e, levels, s, r;
4295     int len;
4296 
4297     assert(immn < 2 && imms < 64 && immr < 64);
4298 
4299     /* The bit patterns we create here are 64 bit patterns which
4300      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4301      * 64 bits each. Each element contains the same value: a run
4302      * of between 1 and e-1 non-zero bits, rotated within the
4303      * element by between 0 and e-1 bits.
4304      *
4305      * The element size and run length are encoded into immn (1 bit)
4306      * and imms (6 bits) as follows:
4307      * 64 bit elements: immn = 1, imms = <length of run - 1>
4308      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4309      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4310      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4311      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4312      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4313      * Notice that immn = 0, imms = 11111x is the only combination
4314      * not covered by one of the above options; this is reserved.
4315      * Further, <length of run - 1> all-ones is a reserved pattern.
4316      *
4317      * In all cases the rotation is by immr % e (and immr is 6 bits).
4318      */
4319 
4320     /* First determine the element size */
4321     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4322     if (len < 1) {
4323         /* This is the immn == 0, imms == 0x11111x case */
4324         return false;
4325     }
4326     e = 1 << len;
4327 
4328     levels = e - 1;
4329     s = imms & levels;
4330     r = immr & levels;
4331 
4332     if (s == levels) {
4333         /* <length of run - 1> mustn't be all-ones. */
4334         return false;
4335     }
4336 
4337     /* Create the value of one element: s+1 set bits rotated
4338      * by r within the element (which is e bits wide)...
4339      */
4340     mask = MAKE_64BIT_MASK(0, s + 1);
4341     if (r) {
4342         mask = (mask >> r) | (mask << (e - r));
4343         mask &= MAKE_64BIT_MASK(0, e);
4344     }
4345     /* ...then replicate the element over the whole 64 bit value */
4346     mask = bitfield_replicate(mask, e);
4347     *result = mask;
4348     return true;
4349 }
4350 
4351 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4352                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4353 {
4354     TCGv_i64 tcg_rd, tcg_rn;
4355     uint64_t imm;
4356 
4357     /* Some immediate field values are reserved. */
4358     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4359                                 extract32(a->dbm, 0, 6),
4360                                 extract32(a->dbm, 6, 6))) {
4361         return false;
4362     }
4363     if (!a->sf) {
4364         imm &= 0xffffffffull;
4365     }
4366 
4367     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4368     tcg_rn = cpu_reg(s, a->rn);
4369 
4370     fn(tcg_rd, tcg_rn, imm);
4371     if (set_cc) {
4372         gen_logic_CC(a->sf, tcg_rd);
4373     }
4374     if (!a->sf) {
4375         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4376     }
4377     return true;
4378 }
4379 
4380 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4381 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4382 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4383 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4384 
4385 /*
4386  * Move wide (immediate)
4387  */
4388 
4389 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4390 {
4391     int pos = a->hw << 4;
4392     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4393     return true;
4394 }
4395 
4396 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4397 {
4398     int pos = a->hw << 4;
4399     uint64_t imm = a->imm;
4400 
4401     imm = ~(imm << pos);
4402     if (!a->sf) {
4403         imm = (uint32_t)imm;
4404     }
4405     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4406     return true;
4407 }
4408 
4409 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4410 {
4411     int pos = a->hw << 4;
4412     TCGv_i64 tcg_rd, tcg_im;
4413 
4414     tcg_rd = cpu_reg(s, a->rd);
4415     tcg_im = tcg_constant_i64(a->imm);
4416     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4417     if (!a->sf) {
4418         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4419     }
4420     return true;
4421 }
4422 
4423 /*
4424  * Bitfield
4425  */
4426 
4427 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4428 {
4429     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4430     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4431     unsigned int bitsize = a->sf ? 64 : 32;
4432     unsigned int ri = a->immr;
4433     unsigned int si = a->imms;
4434     unsigned int pos, len;
4435 
4436     if (si >= ri) {
4437         /* Wd<s-r:0> = Wn<s:r> */
4438         len = (si - ri) + 1;
4439         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4440         if (!a->sf) {
4441             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4442         }
4443     } else {
4444         /* Wd<32+s-r,32-r> = Wn<s:0> */
4445         len = si + 1;
4446         pos = (bitsize - ri) & (bitsize - 1);
4447 
4448         if (len < ri) {
4449             /*
4450              * Sign extend the destination field from len to fill the
4451              * balance of the word.  Let the deposit below insert all
4452              * of those sign bits.
4453              */
4454             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4455             len = ri;
4456         }
4457 
4458         /*
4459          * We start with zero, and we haven't modified any bits outside
4460          * bitsize, therefore no final zero-extension is unneeded for !sf.
4461          */
4462         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4463     }
4464     return true;
4465 }
4466 
4467 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4468 {
4469     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4470     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4471     unsigned int bitsize = a->sf ? 64 : 32;
4472     unsigned int ri = a->immr;
4473     unsigned int si = a->imms;
4474     unsigned int pos, len;
4475 
4476     tcg_rd = cpu_reg(s, a->rd);
4477     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4478 
4479     if (si >= ri) {
4480         /* Wd<s-r:0> = Wn<s:r> */
4481         len = (si - ri) + 1;
4482         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4483     } else {
4484         /* Wd<32+s-r,32-r> = Wn<s:0> */
4485         len = si + 1;
4486         pos = (bitsize - ri) & (bitsize - 1);
4487         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4488     }
4489     return true;
4490 }
4491 
4492 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4493 {
4494     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4495     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4496     unsigned int bitsize = a->sf ? 64 : 32;
4497     unsigned int ri = a->immr;
4498     unsigned int si = a->imms;
4499     unsigned int pos, len;
4500 
4501     tcg_rd = cpu_reg(s, a->rd);
4502     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4503 
4504     if (si >= ri) {
4505         /* Wd<s-r:0> = Wn<s:r> */
4506         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4507         len = (si - ri) + 1;
4508         pos = 0;
4509     } else {
4510         /* Wd<32+s-r,32-r> = Wn<s:0> */
4511         len = si + 1;
4512         pos = (bitsize - ri) & (bitsize - 1);
4513     }
4514 
4515     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4516     if (!a->sf) {
4517         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4518     }
4519     return true;
4520 }
4521 
4522 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4523 {
4524     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4525 
4526     tcg_rd = cpu_reg(s, a->rd);
4527 
4528     if (unlikely(a->imm == 0)) {
4529         /*
4530          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4531          * so an extract from bit 0 is a special case.
4532          */
4533         if (a->sf) {
4534             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4535         } else {
4536             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4537         }
4538     } else {
4539         tcg_rm = cpu_reg(s, a->rm);
4540         tcg_rn = cpu_reg(s, a->rn);
4541 
4542         if (a->sf) {
4543             /* Specialization to ROR happens in EXTRACT2.  */
4544             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4545         } else {
4546             TCGv_i32 t0 = tcg_temp_new_i32();
4547 
4548             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4549             if (a->rm == a->rn) {
4550                 tcg_gen_rotri_i32(t0, t0, a->imm);
4551             } else {
4552                 TCGv_i32 t1 = tcg_temp_new_i32();
4553                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4554                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4555             }
4556             tcg_gen_extu_i32_i64(tcg_rd, t0);
4557         }
4558     }
4559     return true;
4560 }
4561 
4562 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4563  * Note that it is the caller's responsibility to ensure that the
4564  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4565  * mandated semantics for out of range shifts.
4566  */
4567 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4568                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4569 {
4570     switch (shift_type) {
4571     case A64_SHIFT_TYPE_LSL:
4572         tcg_gen_shl_i64(dst, src, shift_amount);
4573         break;
4574     case A64_SHIFT_TYPE_LSR:
4575         tcg_gen_shr_i64(dst, src, shift_amount);
4576         break;
4577     case A64_SHIFT_TYPE_ASR:
4578         if (!sf) {
4579             tcg_gen_ext32s_i64(dst, src);
4580         }
4581         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4582         break;
4583     case A64_SHIFT_TYPE_ROR:
4584         if (sf) {
4585             tcg_gen_rotr_i64(dst, src, shift_amount);
4586         } else {
4587             TCGv_i32 t0, t1;
4588             t0 = tcg_temp_new_i32();
4589             t1 = tcg_temp_new_i32();
4590             tcg_gen_extrl_i64_i32(t0, src);
4591             tcg_gen_extrl_i64_i32(t1, shift_amount);
4592             tcg_gen_rotr_i32(t0, t0, t1);
4593             tcg_gen_extu_i32_i64(dst, t0);
4594         }
4595         break;
4596     default:
4597         assert(FALSE); /* all shift types should be handled */
4598         break;
4599     }
4600 
4601     if (!sf) { /* zero extend final result */
4602         tcg_gen_ext32u_i64(dst, dst);
4603     }
4604 }
4605 
4606 /* Shift a TCGv src by immediate, put result in dst.
4607  * The shift amount must be in range (this should always be true as the
4608  * relevant instructions will UNDEF on bad shift immediates).
4609  */
4610 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4611                           enum a64_shift_type shift_type, unsigned int shift_i)
4612 {
4613     assert(shift_i < (sf ? 64 : 32));
4614 
4615     if (shift_i == 0) {
4616         tcg_gen_mov_i64(dst, src);
4617     } else {
4618         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4619     }
4620 }
4621 
4622 /* Logical (shifted register)
4623  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4624  * +----+-----+-----------+-------+---+------+--------+------+------+
4625  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4626  * +----+-----+-----------+-------+---+------+--------+------+------+
4627  */
4628 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4629 {
4630     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4631     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4632 
4633     sf = extract32(insn, 31, 1);
4634     opc = extract32(insn, 29, 2);
4635     shift_type = extract32(insn, 22, 2);
4636     invert = extract32(insn, 21, 1);
4637     rm = extract32(insn, 16, 5);
4638     shift_amount = extract32(insn, 10, 6);
4639     rn = extract32(insn, 5, 5);
4640     rd = extract32(insn, 0, 5);
4641 
4642     if (!sf && (shift_amount & (1 << 5))) {
4643         unallocated_encoding(s);
4644         return;
4645     }
4646 
4647     tcg_rd = cpu_reg(s, rd);
4648 
4649     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4650         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4651          * register-register MOV and MVN, so it is worth special casing.
4652          */
4653         tcg_rm = cpu_reg(s, rm);
4654         if (invert) {
4655             tcg_gen_not_i64(tcg_rd, tcg_rm);
4656             if (!sf) {
4657                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4658             }
4659         } else {
4660             if (sf) {
4661                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4662             } else {
4663                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4664             }
4665         }
4666         return;
4667     }
4668 
4669     tcg_rm = read_cpu_reg(s, rm, sf);
4670 
4671     if (shift_amount) {
4672         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4673     }
4674 
4675     tcg_rn = cpu_reg(s, rn);
4676 
4677     switch (opc | (invert << 2)) {
4678     case 0: /* AND */
4679     case 3: /* ANDS */
4680         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4681         break;
4682     case 1: /* ORR */
4683         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4684         break;
4685     case 2: /* EOR */
4686         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4687         break;
4688     case 4: /* BIC */
4689     case 7: /* BICS */
4690         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4691         break;
4692     case 5: /* ORN */
4693         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4694         break;
4695     case 6: /* EON */
4696         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4697         break;
4698     default:
4699         assert(FALSE);
4700         break;
4701     }
4702 
4703     if (!sf) {
4704         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4705     }
4706 
4707     if (opc == 3) {
4708         gen_logic_CC(sf, tcg_rd);
4709     }
4710 }
4711 
4712 /*
4713  * Add/subtract (extended register)
4714  *
4715  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4716  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4717  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4718  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4719  *
4720  *  sf: 0 -> 32bit, 1 -> 64bit
4721  *  op: 0 -> add  , 1 -> sub
4722  *   S: 1 -> set flags
4723  * opt: 00
4724  * option: extension type (see DecodeRegExtend)
4725  * imm3: optional shift to Rm
4726  *
4727  * Rd = Rn + LSL(extend(Rm), amount)
4728  */
4729 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4730 {
4731     int rd = extract32(insn, 0, 5);
4732     int rn = extract32(insn, 5, 5);
4733     int imm3 = extract32(insn, 10, 3);
4734     int option = extract32(insn, 13, 3);
4735     int rm = extract32(insn, 16, 5);
4736     int opt = extract32(insn, 22, 2);
4737     bool setflags = extract32(insn, 29, 1);
4738     bool sub_op = extract32(insn, 30, 1);
4739     bool sf = extract32(insn, 31, 1);
4740 
4741     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4742     TCGv_i64 tcg_rd;
4743     TCGv_i64 tcg_result;
4744 
4745     if (imm3 > 4 || opt != 0) {
4746         unallocated_encoding(s);
4747         return;
4748     }
4749 
4750     /* non-flag setting ops may use SP */
4751     if (!setflags) {
4752         tcg_rd = cpu_reg_sp(s, rd);
4753     } else {
4754         tcg_rd = cpu_reg(s, rd);
4755     }
4756     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4757 
4758     tcg_rm = read_cpu_reg(s, rm, sf);
4759     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4760 
4761     tcg_result = tcg_temp_new_i64();
4762 
4763     if (!setflags) {
4764         if (sub_op) {
4765             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4766         } else {
4767             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4768         }
4769     } else {
4770         if (sub_op) {
4771             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4772         } else {
4773             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4774         }
4775     }
4776 
4777     if (sf) {
4778         tcg_gen_mov_i64(tcg_rd, tcg_result);
4779     } else {
4780         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4781     }
4782 }
4783 
4784 /*
4785  * Add/subtract (shifted register)
4786  *
4787  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4788  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4789  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4790  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4791  *
4792  *    sf: 0 -> 32bit, 1 -> 64bit
4793  *    op: 0 -> add  , 1 -> sub
4794  *     S: 1 -> set flags
4795  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4796  *  imm6: Shift amount to apply to Rm before the add/sub
4797  */
4798 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4799 {
4800     int rd = extract32(insn, 0, 5);
4801     int rn = extract32(insn, 5, 5);
4802     int imm6 = extract32(insn, 10, 6);
4803     int rm = extract32(insn, 16, 5);
4804     int shift_type = extract32(insn, 22, 2);
4805     bool setflags = extract32(insn, 29, 1);
4806     bool sub_op = extract32(insn, 30, 1);
4807     bool sf = extract32(insn, 31, 1);
4808 
4809     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4810     TCGv_i64 tcg_rn, tcg_rm;
4811     TCGv_i64 tcg_result;
4812 
4813     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4814         unallocated_encoding(s);
4815         return;
4816     }
4817 
4818     tcg_rn = read_cpu_reg(s, rn, sf);
4819     tcg_rm = read_cpu_reg(s, rm, sf);
4820 
4821     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4822 
4823     tcg_result = tcg_temp_new_i64();
4824 
4825     if (!setflags) {
4826         if (sub_op) {
4827             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4828         } else {
4829             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4830         }
4831     } else {
4832         if (sub_op) {
4833             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4834         } else {
4835             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4836         }
4837     }
4838 
4839     if (sf) {
4840         tcg_gen_mov_i64(tcg_rd, tcg_result);
4841     } else {
4842         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4843     }
4844 }
4845 
4846 /* Data-processing (3 source)
4847  *
4848  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4849  *  +--+------+-----------+------+------+----+------+------+------+
4850  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4851  *  +--+------+-----------+------+------+----+------+------+------+
4852  */
4853 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4854 {
4855     int rd = extract32(insn, 0, 5);
4856     int rn = extract32(insn, 5, 5);
4857     int ra = extract32(insn, 10, 5);
4858     int rm = extract32(insn, 16, 5);
4859     int op_id = (extract32(insn, 29, 3) << 4) |
4860         (extract32(insn, 21, 3) << 1) |
4861         extract32(insn, 15, 1);
4862     bool sf = extract32(insn, 31, 1);
4863     bool is_sub = extract32(op_id, 0, 1);
4864     bool is_high = extract32(op_id, 2, 1);
4865     bool is_signed = false;
4866     TCGv_i64 tcg_op1;
4867     TCGv_i64 tcg_op2;
4868     TCGv_i64 tcg_tmp;
4869 
4870     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4871     switch (op_id) {
4872     case 0x42: /* SMADDL */
4873     case 0x43: /* SMSUBL */
4874     case 0x44: /* SMULH */
4875         is_signed = true;
4876         break;
4877     case 0x0: /* MADD (32bit) */
4878     case 0x1: /* MSUB (32bit) */
4879     case 0x40: /* MADD (64bit) */
4880     case 0x41: /* MSUB (64bit) */
4881     case 0x4a: /* UMADDL */
4882     case 0x4b: /* UMSUBL */
4883     case 0x4c: /* UMULH */
4884         break;
4885     default:
4886         unallocated_encoding(s);
4887         return;
4888     }
4889 
4890     if (is_high) {
4891         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4892         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4893         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4894         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4895 
4896         if (is_signed) {
4897             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4898         } else {
4899             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4900         }
4901         return;
4902     }
4903 
4904     tcg_op1 = tcg_temp_new_i64();
4905     tcg_op2 = tcg_temp_new_i64();
4906     tcg_tmp = tcg_temp_new_i64();
4907 
4908     if (op_id < 0x42) {
4909         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4910         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4911     } else {
4912         if (is_signed) {
4913             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4914             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4915         } else {
4916             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4917             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4918         }
4919     }
4920 
4921     if (ra == 31 && !is_sub) {
4922         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4923         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4924     } else {
4925         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4926         if (is_sub) {
4927             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4928         } else {
4929             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4930         }
4931     }
4932 
4933     if (!sf) {
4934         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4935     }
4936 }
4937 
4938 /* Add/subtract (with carry)
4939  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4940  * +--+--+--+------------------------+------+-------------+------+-----+
4941  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4942  * +--+--+--+------------------------+------+-------------+------+-----+
4943  */
4944 
4945 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4946 {
4947     unsigned int sf, op, setflags, rm, rn, rd;
4948     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4949 
4950     sf = extract32(insn, 31, 1);
4951     op = extract32(insn, 30, 1);
4952     setflags = extract32(insn, 29, 1);
4953     rm = extract32(insn, 16, 5);
4954     rn = extract32(insn, 5, 5);
4955     rd = extract32(insn, 0, 5);
4956 
4957     tcg_rd = cpu_reg(s, rd);
4958     tcg_rn = cpu_reg(s, rn);
4959 
4960     if (op) {
4961         tcg_y = tcg_temp_new_i64();
4962         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4963     } else {
4964         tcg_y = cpu_reg(s, rm);
4965     }
4966 
4967     if (setflags) {
4968         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4969     } else {
4970         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4971     }
4972 }
4973 
4974 /*
4975  * Rotate right into flags
4976  *  31 30 29                21       15          10      5  4      0
4977  * +--+--+--+-----------------+--------+-----------+------+--+------+
4978  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4979  * +--+--+--+-----------------+--------+-----------+------+--+------+
4980  */
4981 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4982 {
4983     int mask = extract32(insn, 0, 4);
4984     int o2 = extract32(insn, 4, 1);
4985     int rn = extract32(insn, 5, 5);
4986     int imm6 = extract32(insn, 15, 6);
4987     int sf_op_s = extract32(insn, 29, 3);
4988     TCGv_i64 tcg_rn;
4989     TCGv_i32 nzcv;
4990 
4991     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4992         unallocated_encoding(s);
4993         return;
4994     }
4995 
4996     tcg_rn = read_cpu_reg(s, rn, 1);
4997     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4998 
4999     nzcv = tcg_temp_new_i32();
5000     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5001 
5002     if (mask & 8) { /* N */
5003         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5004     }
5005     if (mask & 4) { /* Z */
5006         tcg_gen_not_i32(cpu_ZF, nzcv);
5007         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5008     }
5009     if (mask & 2) { /* C */
5010         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5011     }
5012     if (mask & 1) { /* V */
5013         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5014     }
5015 }
5016 
5017 /*
5018  * Evaluate into flags
5019  *  31 30 29                21        15   14        10      5  4      0
5020  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5021  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5022  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5023  */
5024 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5025 {
5026     int o3_mask = extract32(insn, 0, 5);
5027     int rn = extract32(insn, 5, 5);
5028     int o2 = extract32(insn, 15, 6);
5029     int sz = extract32(insn, 14, 1);
5030     int sf_op_s = extract32(insn, 29, 3);
5031     TCGv_i32 tmp;
5032     int shift;
5033 
5034     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5035         !dc_isar_feature(aa64_condm_4, s)) {
5036         unallocated_encoding(s);
5037         return;
5038     }
5039     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5040 
5041     tmp = tcg_temp_new_i32();
5042     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5043     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5044     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5045     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5046     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5047 }
5048 
5049 /* Conditional compare (immediate / register)
5050  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5051  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5052  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5053  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5054  *        [1]                             y                [0]       [0]
5055  */
5056 static void disas_cc(DisasContext *s, uint32_t insn)
5057 {
5058     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5059     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5060     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5061     DisasCompare c;
5062 
5063     if (!extract32(insn, 29, 1)) {
5064         unallocated_encoding(s);
5065         return;
5066     }
5067     if (insn & (1 << 10 | 1 << 4)) {
5068         unallocated_encoding(s);
5069         return;
5070     }
5071     sf = extract32(insn, 31, 1);
5072     op = extract32(insn, 30, 1);
5073     is_imm = extract32(insn, 11, 1);
5074     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5075     cond = extract32(insn, 12, 4);
5076     rn = extract32(insn, 5, 5);
5077     nzcv = extract32(insn, 0, 4);
5078 
5079     /* Set T0 = !COND.  */
5080     tcg_t0 = tcg_temp_new_i32();
5081     arm_test_cc(&c, cond);
5082     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5083 
5084     /* Load the arguments for the new comparison.  */
5085     if (is_imm) {
5086         tcg_y = tcg_temp_new_i64();
5087         tcg_gen_movi_i64(tcg_y, y);
5088     } else {
5089         tcg_y = cpu_reg(s, y);
5090     }
5091     tcg_rn = cpu_reg(s, rn);
5092 
5093     /* Set the flags for the new comparison.  */
5094     tcg_tmp = tcg_temp_new_i64();
5095     if (op) {
5096         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5097     } else {
5098         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5099     }
5100 
5101     /* If COND was false, force the flags to #nzcv.  Compute two masks
5102      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5103      * For tcg hosts that support ANDC, we can make do with just T1.
5104      * In either case, allow the tcg optimizer to delete any unused mask.
5105      */
5106     tcg_t1 = tcg_temp_new_i32();
5107     tcg_t2 = tcg_temp_new_i32();
5108     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5109     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5110 
5111     if (nzcv & 8) { /* N */
5112         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5113     } else {
5114         if (TCG_TARGET_HAS_andc_i32) {
5115             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5116         } else {
5117             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5118         }
5119     }
5120     if (nzcv & 4) { /* Z */
5121         if (TCG_TARGET_HAS_andc_i32) {
5122             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5123         } else {
5124             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5125         }
5126     } else {
5127         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5128     }
5129     if (nzcv & 2) { /* C */
5130         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5131     } else {
5132         if (TCG_TARGET_HAS_andc_i32) {
5133             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5134         } else {
5135             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5136         }
5137     }
5138     if (nzcv & 1) { /* V */
5139         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5140     } else {
5141         if (TCG_TARGET_HAS_andc_i32) {
5142             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5143         } else {
5144             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5145         }
5146     }
5147 }
5148 
5149 /* Conditional select
5150  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5151  * +----+----+---+-----------------+------+------+-----+------+------+
5152  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5153  * +----+----+---+-----------------+------+------+-----+------+------+
5154  */
5155 static void disas_cond_select(DisasContext *s, uint32_t insn)
5156 {
5157     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5158     TCGv_i64 tcg_rd, zero;
5159     DisasCompare64 c;
5160 
5161     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5162         /* S == 1 or op2<1> == 1 */
5163         unallocated_encoding(s);
5164         return;
5165     }
5166     sf = extract32(insn, 31, 1);
5167     else_inv = extract32(insn, 30, 1);
5168     rm = extract32(insn, 16, 5);
5169     cond = extract32(insn, 12, 4);
5170     else_inc = extract32(insn, 10, 1);
5171     rn = extract32(insn, 5, 5);
5172     rd = extract32(insn, 0, 5);
5173 
5174     tcg_rd = cpu_reg(s, rd);
5175 
5176     a64_test_cc(&c, cond);
5177     zero = tcg_constant_i64(0);
5178 
5179     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5180         /* CSET & CSETM.  */
5181         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5182         if (else_inv) {
5183             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5184         }
5185     } else {
5186         TCGv_i64 t_true = cpu_reg(s, rn);
5187         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5188         if (else_inv && else_inc) {
5189             tcg_gen_neg_i64(t_false, t_false);
5190         } else if (else_inv) {
5191             tcg_gen_not_i64(t_false, t_false);
5192         } else if (else_inc) {
5193             tcg_gen_addi_i64(t_false, t_false, 1);
5194         }
5195         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5196     }
5197 
5198     if (!sf) {
5199         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5200     }
5201 }
5202 
5203 static void handle_clz(DisasContext *s, unsigned int sf,
5204                        unsigned int rn, unsigned int rd)
5205 {
5206     TCGv_i64 tcg_rd, tcg_rn;
5207     tcg_rd = cpu_reg(s, rd);
5208     tcg_rn = cpu_reg(s, rn);
5209 
5210     if (sf) {
5211         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5212     } else {
5213         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5214         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5215         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5216         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5217     }
5218 }
5219 
5220 static void handle_cls(DisasContext *s, unsigned int sf,
5221                        unsigned int rn, unsigned int rd)
5222 {
5223     TCGv_i64 tcg_rd, tcg_rn;
5224     tcg_rd = cpu_reg(s, rd);
5225     tcg_rn = cpu_reg(s, rn);
5226 
5227     if (sf) {
5228         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5229     } else {
5230         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5231         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5232         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5233         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5234     }
5235 }
5236 
5237 static void handle_rbit(DisasContext *s, unsigned int sf,
5238                         unsigned int rn, unsigned int rd)
5239 {
5240     TCGv_i64 tcg_rd, tcg_rn;
5241     tcg_rd = cpu_reg(s, rd);
5242     tcg_rn = cpu_reg(s, rn);
5243 
5244     if (sf) {
5245         gen_helper_rbit64(tcg_rd, tcg_rn);
5246     } else {
5247         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5248         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5249         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5250         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5251     }
5252 }
5253 
5254 /* REV with sf==1, opcode==3 ("REV64") */
5255 static void handle_rev64(DisasContext *s, unsigned int sf,
5256                          unsigned int rn, unsigned int rd)
5257 {
5258     if (!sf) {
5259         unallocated_encoding(s);
5260         return;
5261     }
5262     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5263 }
5264 
5265 /* REV with sf==0, opcode==2
5266  * REV32 (sf==1, opcode==2)
5267  */
5268 static void handle_rev32(DisasContext *s, unsigned int sf,
5269                          unsigned int rn, unsigned int rd)
5270 {
5271     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5272     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5273 
5274     if (sf) {
5275         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5276         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5277     } else {
5278         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5279     }
5280 }
5281 
5282 /* REV16 (opcode==1) */
5283 static void handle_rev16(DisasContext *s, unsigned int sf,
5284                          unsigned int rn, unsigned int rd)
5285 {
5286     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5287     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5288     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5289     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5290 
5291     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5292     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5293     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5294     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5295     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5296 }
5297 
5298 /* Data-processing (1 source)
5299  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5300  * +----+---+---+-----------------+---------+--------+------+------+
5301  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5302  * +----+---+---+-----------------+---------+--------+------+------+
5303  */
5304 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5305 {
5306     unsigned int sf, opcode, opcode2, rn, rd;
5307     TCGv_i64 tcg_rd;
5308 
5309     if (extract32(insn, 29, 1)) {
5310         unallocated_encoding(s);
5311         return;
5312     }
5313 
5314     sf = extract32(insn, 31, 1);
5315     opcode = extract32(insn, 10, 6);
5316     opcode2 = extract32(insn, 16, 5);
5317     rn = extract32(insn, 5, 5);
5318     rd = extract32(insn, 0, 5);
5319 
5320 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5321 
5322     switch (MAP(sf, opcode2, opcode)) {
5323     case MAP(0, 0x00, 0x00): /* RBIT */
5324     case MAP(1, 0x00, 0x00):
5325         handle_rbit(s, sf, rn, rd);
5326         break;
5327     case MAP(0, 0x00, 0x01): /* REV16 */
5328     case MAP(1, 0x00, 0x01):
5329         handle_rev16(s, sf, rn, rd);
5330         break;
5331     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5332     case MAP(1, 0x00, 0x02):
5333         handle_rev32(s, sf, rn, rd);
5334         break;
5335     case MAP(1, 0x00, 0x03): /* REV64 */
5336         handle_rev64(s, sf, rn, rd);
5337         break;
5338     case MAP(0, 0x00, 0x04): /* CLZ */
5339     case MAP(1, 0x00, 0x04):
5340         handle_clz(s, sf, rn, rd);
5341         break;
5342     case MAP(0, 0x00, 0x05): /* CLS */
5343     case MAP(1, 0x00, 0x05):
5344         handle_cls(s, sf, rn, rd);
5345         break;
5346     case MAP(1, 0x01, 0x00): /* PACIA */
5347         if (s->pauth_active) {
5348             tcg_rd = cpu_reg(s, rd);
5349             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5350         } else if (!dc_isar_feature(aa64_pauth, s)) {
5351             goto do_unallocated;
5352         }
5353         break;
5354     case MAP(1, 0x01, 0x01): /* PACIB */
5355         if (s->pauth_active) {
5356             tcg_rd = cpu_reg(s, rd);
5357             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5358         } else if (!dc_isar_feature(aa64_pauth, s)) {
5359             goto do_unallocated;
5360         }
5361         break;
5362     case MAP(1, 0x01, 0x02): /* PACDA */
5363         if (s->pauth_active) {
5364             tcg_rd = cpu_reg(s, rd);
5365             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5366         } else if (!dc_isar_feature(aa64_pauth, s)) {
5367             goto do_unallocated;
5368         }
5369         break;
5370     case MAP(1, 0x01, 0x03): /* PACDB */
5371         if (s->pauth_active) {
5372             tcg_rd = cpu_reg(s, rd);
5373             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5374         } else if (!dc_isar_feature(aa64_pauth, s)) {
5375             goto do_unallocated;
5376         }
5377         break;
5378     case MAP(1, 0x01, 0x04): /* AUTIA */
5379         if (s->pauth_active) {
5380             tcg_rd = cpu_reg(s, rd);
5381             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5382         } else if (!dc_isar_feature(aa64_pauth, s)) {
5383             goto do_unallocated;
5384         }
5385         break;
5386     case MAP(1, 0x01, 0x05): /* AUTIB */
5387         if (s->pauth_active) {
5388             tcg_rd = cpu_reg(s, rd);
5389             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5390         } else if (!dc_isar_feature(aa64_pauth, s)) {
5391             goto do_unallocated;
5392         }
5393         break;
5394     case MAP(1, 0x01, 0x06): /* AUTDA */
5395         if (s->pauth_active) {
5396             tcg_rd = cpu_reg(s, rd);
5397             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5398         } else if (!dc_isar_feature(aa64_pauth, s)) {
5399             goto do_unallocated;
5400         }
5401         break;
5402     case MAP(1, 0x01, 0x07): /* AUTDB */
5403         if (s->pauth_active) {
5404             tcg_rd = cpu_reg(s, rd);
5405             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5406         } else if (!dc_isar_feature(aa64_pauth, s)) {
5407             goto do_unallocated;
5408         }
5409         break;
5410     case MAP(1, 0x01, 0x08): /* PACIZA */
5411         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5412             goto do_unallocated;
5413         } else if (s->pauth_active) {
5414             tcg_rd = cpu_reg(s, rd);
5415             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5416         }
5417         break;
5418     case MAP(1, 0x01, 0x09): /* PACIZB */
5419         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5420             goto do_unallocated;
5421         } else if (s->pauth_active) {
5422             tcg_rd = cpu_reg(s, rd);
5423             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5424         }
5425         break;
5426     case MAP(1, 0x01, 0x0a): /* PACDZA */
5427         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5428             goto do_unallocated;
5429         } else if (s->pauth_active) {
5430             tcg_rd = cpu_reg(s, rd);
5431             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5432         }
5433         break;
5434     case MAP(1, 0x01, 0x0b): /* PACDZB */
5435         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5436             goto do_unallocated;
5437         } else if (s->pauth_active) {
5438             tcg_rd = cpu_reg(s, rd);
5439             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5440         }
5441         break;
5442     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5443         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5444             goto do_unallocated;
5445         } else if (s->pauth_active) {
5446             tcg_rd = cpu_reg(s, rd);
5447             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5448         }
5449         break;
5450     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5451         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5452             goto do_unallocated;
5453         } else if (s->pauth_active) {
5454             tcg_rd = cpu_reg(s, rd);
5455             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5456         }
5457         break;
5458     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5459         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5460             goto do_unallocated;
5461         } else if (s->pauth_active) {
5462             tcg_rd = cpu_reg(s, rd);
5463             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5464         }
5465         break;
5466     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5467         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5468             goto do_unallocated;
5469         } else if (s->pauth_active) {
5470             tcg_rd = cpu_reg(s, rd);
5471             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5472         }
5473         break;
5474     case MAP(1, 0x01, 0x10): /* XPACI */
5475         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5476             goto do_unallocated;
5477         } else if (s->pauth_active) {
5478             tcg_rd = cpu_reg(s, rd);
5479             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5480         }
5481         break;
5482     case MAP(1, 0x01, 0x11): /* XPACD */
5483         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5484             goto do_unallocated;
5485         } else if (s->pauth_active) {
5486             tcg_rd = cpu_reg(s, rd);
5487             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5488         }
5489         break;
5490     default:
5491     do_unallocated:
5492         unallocated_encoding(s);
5493         break;
5494     }
5495 
5496 #undef MAP
5497 }
5498 
5499 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5500                        unsigned int rm, unsigned int rn, unsigned int rd)
5501 {
5502     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5503     tcg_rd = cpu_reg(s, rd);
5504 
5505     if (!sf && is_signed) {
5506         tcg_n = tcg_temp_new_i64();
5507         tcg_m = tcg_temp_new_i64();
5508         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5509         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5510     } else {
5511         tcg_n = read_cpu_reg(s, rn, sf);
5512         tcg_m = read_cpu_reg(s, rm, sf);
5513     }
5514 
5515     if (is_signed) {
5516         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5517     } else {
5518         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5519     }
5520 
5521     if (!sf) { /* zero extend final result */
5522         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5523     }
5524 }
5525 
5526 /* LSLV, LSRV, ASRV, RORV */
5527 static void handle_shift_reg(DisasContext *s,
5528                              enum a64_shift_type shift_type, unsigned int sf,
5529                              unsigned int rm, unsigned int rn, unsigned int rd)
5530 {
5531     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5532     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5533     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5534 
5535     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5536     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5537 }
5538 
5539 /* CRC32[BHWX], CRC32C[BHWX] */
5540 static void handle_crc32(DisasContext *s,
5541                          unsigned int sf, unsigned int sz, bool crc32c,
5542                          unsigned int rm, unsigned int rn, unsigned int rd)
5543 {
5544     TCGv_i64 tcg_acc, tcg_val;
5545     TCGv_i32 tcg_bytes;
5546 
5547     if (!dc_isar_feature(aa64_crc32, s)
5548         || (sf == 1 && sz != 3)
5549         || (sf == 0 && sz == 3)) {
5550         unallocated_encoding(s);
5551         return;
5552     }
5553 
5554     if (sz == 3) {
5555         tcg_val = cpu_reg(s, rm);
5556     } else {
5557         uint64_t mask;
5558         switch (sz) {
5559         case 0:
5560             mask = 0xFF;
5561             break;
5562         case 1:
5563             mask = 0xFFFF;
5564             break;
5565         case 2:
5566             mask = 0xFFFFFFFF;
5567             break;
5568         default:
5569             g_assert_not_reached();
5570         }
5571         tcg_val = tcg_temp_new_i64();
5572         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5573     }
5574 
5575     tcg_acc = cpu_reg(s, rn);
5576     tcg_bytes = tcg_constant_i32(1 << sz);
5577 
5578     if (crc32c) {
5579         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5580     } else {
5581         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5582     }
5583 }
5584 
5585 /* Data-processing (2 source)
5586  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5587  * +----+---+---+-----------------+------+--------+------+------+
5588  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5589  * +----+---+---+-----------------+------+--------+------+------+
5590  */
5591 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5592 {
5593     unsigned int sf, rm, opcode, rn, rd, setflag;
5594     sf = extract32(insn, 31, 1);
5595     setflag = extract32(insn, 29, 1);
5596     rm = extract32(insn, 16, 5);
5597     opcode = extract32(insn, 10, 6);
5598     rn = extract32(insn, 5, 5);
5599     rd = extract32(insn, 0, 5);
5600 
5601     if (setflag && opcode != 0) {
5602         unallocated_encoding(s);
5603         return;
5604     }
5605 
5606     switch (opcode) {
5607     case 0: /* SUBP(S) */
5608         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5609             goto do_unallocated;
5610         } else {
5611             TCGv_i64 tcg_n, tcg_m, tcg_d;
5612 
5613             tcg_n = read_cpu_reg_sp(s, rn, true);
5614             tcg_m = read_cpu_reg_sp(s, rm, true);
5615             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5616             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5617             tcg_d = cpu_reg(s, rd);
5618 
5619             if (setflag) {
5620                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5621             } else {
5622                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5623             }
5624         }
5625         break;
5626     case 2: /* UDIV */
5627         handle_div(s, false, sf, rm, rn, rd);
5628         break;
5629     case 3: /* SDIV */
5630         handle_div(s, true, sf, rm, rn, rd);
5631         break;
5632     case 4: /* IRG */
5633         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5634             goto do_unallocated;
5635         }
5636         if (s->ata) {
5637             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5638                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5639         } else {
5640             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5641                                              cpu_reg_sp(s, rn));
5642         }
5643         break;
5644     case 5: /* GMI */
5645         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5646             goto do_unallocated;
5647         } else {
5648             TCGv_i64 t = tcg_temp_new_i64();
5649 
5650             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5651             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5652             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5653         }
5654         break;
5655     case 8: /* LSLV */
5656         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5657         break;
5658     case 9: /* LSRV */
5659         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5660         break;
5661     case 10: /* ASRV */
5662         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5663         break;
5664     case 11: /* RORV */
5665         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5666         break;
5667     case 12: /* PACGA */
5668         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5669             goto do_unallocated;
5670         }
5671         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5672                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5673         break;
5674     case 16:
5675     case 17:
5676     case 18:
5677     case 19:
5678     case 20:
5679     case 21:
5680     case 22:
5681     case 23: /* CRC32 */
5682     {
5683         int sz = extract32(opcode, 0, 2);
5684         bool crc32c = extract32(opcode, 2, 1);
5685         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5686         break;
5687     }
5688     default:
5689     do_unallocated:
5690         unallocated_encoding(s);
5691         break;
5692     }
5693 }
5694 
5695 /*
5696  * Data processing - register
5697  *  31  30 29  28      25    21  20  16      10         0
5698  * +--+---+--+---+-------+-----+-------+-------+---------+
5699  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5700  * +--+---+--+---+-------+-----+-------+-------+---------+
5701  */
5702 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5703 {
5704     int op0 = extract32(insn, 30, 1);
5705     int op1 = extract32(insn, 28, 1);
5706     int op2 = extract32(insn, 21, 4);
5707     int op3 = extract32(insn, 10, 6);
5708 
5709     if (!op1) {
5710         if (op2 & 8) {
5711             if (op2 & 1) {
5712                 /* Add/sub (extended register) */
5713                 disas_add_sub_ext_reg(s, insn);
5714             } else {
5715                 /* Add/sub (shifted register) */
5716                 disas_add_sub_reg(s, insn);
5717             }
5718         } else {
5719             /* Logical (shifted register) */
5720             disas_logic_reg(s, insn);
5721         }
5722         return;
5723     }
5724 
5725     switch (op2) {
5726     case 0x0:
5727         switch (op3) {
5728         case 0x00: /* Add/subtract (with carry) */
5729             disas_adc_sbc(s, insn);
5730             break;
5731 
5732         case 0x01: /* Rotate right into flags */
5733         case 0x21:
5734             disas_rotate_right_into_flags(s, insn);
5735             break;
5736 
5737         case 0x02: /* Evaluate into flags */
5738         case 0x12:
5739         case 0x22:
5740         case 0x32:
5741             disas_evaluate_into_flags(s, insn);
5742             break;
5743 
5744         default:
5745             goto do_unallocated;
5746         }
5747         break;
5748 
5749     case 0x2: /* Conditional compare */
5750         disas_cc(s, insn); /* both imm and reg forms */
5751         break;
5752 
5753     case 0x4: /* Conditional select */
5754         disas_cond_select(s, insn);
5755         break;
5756 
5757     case 0x6: /* Data-processing */
5758         if (op0) {    /* (1 source) */
5759             disas_data_proc_1src(s, insn);
5760         } else {      /* (2 source) */
5761             disas_data_proc_2src(s, insn);
5762         }
5763         break;
5764     case 0x8 ... 0xf: /* (3 source) */
5765         disas_data_proc_3src(s, insn);
5766         break;
5767 
5768     default:
5769     do_unallocated:
5770         unallocated_encoding(s);
5771         break;
5772     }
5773 }
5774 
5775 static void handle_fp_compare(DisasContext *s, int size,
5776                               unsigned int rn, unsigned int rm,
5777                               bool cmp_with_zero, bool signal_all_nans)
5778 {
5779     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5780     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5781 
5782     if (size == MO_64) {
5783         TCGv_i64 tcg_vn, tcg_vm;
5784 
5785         tcg_vn = read_fp_dreg(s, rn);
5786         if (cmp_with_zero) {
5787             tcg_vm = tcg_constant_i64(0);
5788         } else {
5789             tcg_vm = read_fp_dreg(s, rm);
5790         }
5791         if (signal_all_nans) {
5792             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5793         } else {
5794             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5795         }
5796     } else {
5797         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5798         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5799 
5800         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5801         if (cmp_with_zero) {
5802             tcg_gen_movi_i32(tcg_vm, 0);
5803         } else {
5804             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5805         }
5806 
5807         switch (size) {
5808         case MO_32:
5809             if (signal_all_nans) {
5810                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5811             } else {
5812                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5813             }
5814             break;
5815         case MO_16:
5816             if (signal_all_nans) {
5817                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5818             } else {
5819                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5820             }
5821             break;
5822         default:
5823             g_assert_not_reached();
5824         }
5825     }
5826 
5827     gen_set_nzcv(tcg_flags);
5828 }
5829 
5830 /* Floating point compare
5831  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5832  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5833  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5834  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5835  */
5836 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5837 {
5838     unsigned int mos, type, rm, op, rn, opc, op2r;
5839     int size;
5840 
5841     mos = extract32(insn, 29, 3);
5842     type = extract32(insn, 22, 2);
5843     rm = extract32(insn, 16, 5);
5844     op = extract32(insn, 14, 2);
5845     rn = extract32(insn, 5, 5);
5846     opc = extract32(insn, 3, 2);
5847     op2r = extract32(insn, 0, 3);
5848 
5849     if (mos || op || op2r) {
5850         unallocated_encoding(s);
5851         return;
5852     }
5853 
5854     switch (type) {
5855     case 0:
5856         size = MO_32;
5857         break;
5858     case 1:
5859         size = MO_64;
5860         break;
5861     case 3:
5862         size = MO_16;
5863         if (dc_isar_feature(aa64_fp16, s)) {
5864             break;
5865         }
5866         /* fallthru */
5867     default:
5868         unallocated_encoding(s);
5869         return;
5870     }
5871 
5872     if (!fp_access_check(s)) {
5873         return;
5874     }
5875 
5876     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5877 }
5878 
5879 /* Floating point conditional compare
5880  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5881  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5882  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5883  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5884  */
5885 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5886 {
5887     unsigned int mos, type, rm, cond, rn, op, nzcv;
5888     TCGLabel *label_continue = NULL;
5889     int size;
5890 
5891     mos = extract32(insn, 29, 3);
5892     type = extract32(insn, 22, 2);
5893     rm = extract32(insn, 16, 5);
5894     cond = extract32(insn, 12, 4);
5895     rn = extract32(insn, 5, 5);
5896     op = extract32(insn, 4, 1);
5897     nzcv = extract32(insn, 0, 4);
5898 
5899     if (mos) {
5900         unallocated_encoding(s);
5901         return;
5902     }
5903 
5904     switch (type) {
5905     case 0:
5906         size = MO_32;
5907         break;
5908     case 1:
5909         size = MO_64;
5910         break;
5911     case 3:
5912         size = MO_16;
5913         if (dc_isar_feature(aa64_fp16, s)) {
5914             break;
5915         }
5916         /* fallthru */
5917     default:
5918         unallocated_encoding(s);
5919         return;
5920     }
5921 
5922     if (!fp_access_check(s)) {
5923         return;
5924     }
5925 
5926     if (cond < 0x0e) { /* not always */
5927         TCGLabel *label_match = gen_new_label();
5928         label_continue = gen_new_label();
5929         arm_gen_test_cc(cond, label_match);
5930         /* nomatch: */
5931         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5932         tcg_gen_br(label_continue);
5933         gen_set_label(label_match);
5934     }
5935 
5936     handle_fp_compare(s, size, rn, rm, false, op);
5937 
5938     if (cond < 0x0e) {
5939         gen_set_label(label_continue);
5940     }
5941 }
5942 
5943 /* Floating point conditional select
5944  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5945  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5946  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5947  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5948  */
5949 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5950 {
5951     unsigned int mos, type, rm, cond, rn, rd;
5952     TCGv_i64 t_true, t_false;
5953     DisasCompare64 c;
5954     MemOp sz;
5955 
5956     mos = extract32(insn, 29, 3);
5957     type = extract32(insn, 22, 2);
5958     rm = extract32(insn, 16, 5);
5959     cond = extract32(insn, 12, 4);
5960     rn = extract32(insn, 5, 5);
5961     rd = extract32(insn, 0, 5);
5962 
5963     if (mos) {
5964         unallocated_encoding(s);
5965         return;
5966     }
5967 
5968     switch (type) {
5969     case 0:
5970         sz = MO_32;
5971         break;
5972     case 1:
5973         sz = MO_64;
5974         break;
5975     case 3:
5976         sz = MO_16;
5977         if (dc_isar_feature(aa64_fp16, s)) {
5978             break;
5979         }
5980         /* fallthru */
5981     default:
5982         unallocated_encoding(s);
5983         return;
5984     }
5985 
5986     if (!fp_access_check(s)) {
5987         return;
5988     }
5989 
5990     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5991     t_true = tcg_temp_new_i64();
5992     t_false = tcg_temp_new_i64();
5993     read_vec_element(s, t_true, rn, 0, sz);
5994     read_vec_element(s, t_false, rm, 0, sz);
5995 
5996     a64_test_cc(&c, cond);
5997     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5998                         t_true, t_false);
5999 
6000     /* Note that sregs & hregs write back zeros to the high bits,
6001        and we've already done the zero-extension.  */
6002     write_fp_dreg(s, rd, t_true);
6003 }
6004 
6005 /* Floating-point data-processing (1 source) - half precision */
6006 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6007 {
6008     TCGv_ptr fpst = NULL;
6009     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6010     TCGv_i32 tcg_res = tcg_temp_new_i32();
6011 
6012     switch (opcode) {
6013     case 0x0: /* FMOV */
6014         tcg_gen_mov_i32(tcg_res, tcg_op);
6015         break;
6016     case 0x1: /* FABS */
6017         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6018         break;
6019     case 0x2: /* FNEG */
6020         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6021         break;
6022     case 0x3: /* FSQRT */
6023         fpst = fpstatus_ptr(FPST_FPCR_F16);
6024         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6025         break;
6026     case 0x8: /* FRINTN */
6027     case 0x9: /* FRINTP */
6028     case 0xa: /* FRINTM */
6029     case 0xb: /* FRINTZ */
6030     case 0xc: /* FRINTA */
6031     {
6032         TCGv_i32 tcg_rmode;
6033 
6034         fpst = fpstatus_ptr(FPST_FPCR_F16);
6035         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6036         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6037         gen_restore_rmode(tcg_rmode, fpst);
6038         break;
6039     }
6040     case 0xe: /* FRINTX */
6041         fpst = fpstatus_ptr(FPST_FPCR_F16);
6042         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6043         break;
6044     case 0xf: /* FRINTI */
6045         fpst = fpstatus_ptr(FPST_FPCR_F16);
6046         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6047         break;
6048     default:
6049         g_assert_not_reached();
6050     }
6051 
6052     write_fp_sreg(s, rd, tcg_res);
6053 }
6054 
6055 /* Floating-point data-processing (1 source) - single precision */
6056 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6057 {
6058     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6059     TCGv_i32 tcg_op, tcg_res;
6060     TCGv_ptr fpst;
6061     int rmode = -1;
6062 
6063     tcg_op = read_fp_sreg(s, rn);
6064     tcg_res = tcg_temp_new_i32();
6065 
6066     switch (opcode) {
6067     case 0x0: /* FMOV */
6068         tcg_gen_mov_i32(tcg_res, tcg_op);
6069         goto done;
6070     case 0x1: /* FABS */
6071         gen_helper_vfp_abss(tcg_res, tcg_op);
6072         goto done;
6073     case 0x2: /* FNEG */
6074         gen_helper_vfp_negs(tcg_res, tcg_op);
6075         goto done;
6076     case 0x3: /* FSQRT */
6077         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6078         goto done;
6079     case 0x6: /* BFCVT */
6080         gen_fpst = gen_helper_bfcvt;
6081         break;
6082     case 0x8: /* FRINTN */
6083     case 0x9: /* FRINTP */
6084     case 0xa: /* FRINTM */
6085     case 0xb: /* FRINTZ */
6086     case 0xc: /* FRINTA */
6087         rmode = opcode & 7;
6088         gen_fpst = gen_helper_rints;
6089         break;
6090     case 0xe: /* FRINTX */
6091         gen_fpst = gen_helper_rints_exact;
6092         break;
6093     case 0xf: /* FRINTI */
6094         gen_fpst = gen_helper_rints;
6095         break;
6096     case 0x10: /* FRINT32Z */
6097         rmode = FPROUNDING_ZERO;
6098         gen_fpst = gen_helper_frint32_s;
6099         break;
6100     case 0x11: /* FRINT32X */
6101         gen_fpst = gen_helper_frint32_s;
6102         break;
6103     case 0x12: /* FRINT64Z */
6104         rmode = FPROUNDING_ZERO;
6105         gen_fpst = gen_helper_frint64_s;
6106         break;
6107     case 0x13: /* FRINT64X */
6108         gen_fpst = gen_helper_frint64_s;
6109         break;
6110     default:
6111         g_assert_not_reached();
6112     }
6113 
6114     fpst = fpstatus_ptr(FPST_FPCR);
6115     if (rmode >= 0) {
6116         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6117         gen_fpst(tcg_res, tcg_op, fpst);
6118         gen_restore_rmode(tcg_rmode, fpst);
6119     } else {
6120         gen_fpst(tcg_res, tcg_op, fpst);
6121     }
6122 
6123  done:
6124     write_fp_sreg(s, rd, tcg_res);
6125 }
6126 
6127 /* Floating-point data-processing (1 source) - double precision */
6128 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6129 {
6130     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6131     TCGv_i64 tcg_op, tcg_res;
6132     TCGv_ptr fpst;
6133     int rmode = -1;
6134 
6135     switch (opcode) {
6136     case 0x0: /* FMOV */
6137         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6138         return;
6139     }
6140 
6141     tcg_op = read_fp_dreg(s, rn);
6142     tcg_res = tcg_temp_new_i64();
6143 
6144     switch (opcode) {
6145     case 0x1: /* FABS */
6146         gen_helper_vfp_absd(tcg_res, tcg_op);
6147         goto done;
6148     case 0x2: /* FNEG */
6149         gen_helper_vfp_negd(tcg_res, tcg_op);
6150         goto done;
6151     case 0x3: /* FSQRT */
6152         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6153         goto done;
6154     case 0x8: /* FRINTN */
6155     case 0x9: /* FRINTP */
6156     case 0xa: /* FRINTM */
6157     case 0xb: /* FRINTZ */
6158     case 0xc: /* FRINTA */
6159         rmode = opcode & 7;
6160         gen_fpst = gen_helper_rintd;
6161         break;
6162     case 0xe: /* FRINTX */
6163         gen_fpst = gen_helper_rintd_exact;
6164         break;
6165     case 0xf: /* FRINTI */
6166         gen_fpst = gen_helper_rintd;
6167         break;
6168     case 0x10: /* FRINT32Z */
6169         rmode = FPROUNDING_ZERO;
6170         gen_fpst = gen_helper_frint32_d;
6171         break;
6172     case 0x11: /* FRINT32X */
6173         gen_fpst = gen_helper_frint32_d;
6174         break;
6175     case 0x12: /* FRINT64Z */
6176         rmode = FPROUNDING_ZERO;
6177         gen_fpst = gen_helper_frint64_d;
6178         break;
6179     case 0x13: /* FRINT64X */
6180         gen_fpst = gen_helper_frint64_d;
6181         break;
6182     default:
6183         g_assert_not_reached();
6184     }
6185 
6186     fpst = fpstatus_ptr(FPST_FPCR);
6187     if (rmode >= 0) {
6188         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6189         gen_fpst(tcg_res, tcg_op, fpst);
6190         gen_restore_rmode(tcg_rmode, fpst);
6191     } else {
6192         gen_fpst(tcg_res, tcg_op, fpst);
6193     }
6194 
6195  done:
6196     write_fp_dreg(s, rd, tcg_res);
6197 }
6198 
6199 static void handle_fp_fcvt(DisasContext *s, int opcode,
6200                            int rd, int rn, int dtype, int ntype)
6201 {
6202     switch (ntype) {
6203     case 0x0:
6204     {
6205         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6206         if (dtype == 1) {
6207             /* Single to double */
6208             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6209             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6210             write_fp_dreg(s, rd, tcg_rd);
6211         } else {
6212             /* Single to half */
6213             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6214             TCGv_i32 ahp = get_ahp_flag();
6215             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6216 
6217             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6218             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6219             write_fp_sreg(s, rd, tcg_rd);
6220         }
6221         break;
6222     }
6223     case 0x1:
6224     {
6225         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6226         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6227         if (dtype == 0) {
6228             /* Double to single */
6229             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6230         } else {
6231             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6232             TCGv_i32 ahp = get_ahp_flag();
6233             /* Double to half */
6234             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6235             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6236         }
6237         write_fp_sreg(s, rd, tcg_rd);
6238         break;
6239     }
6240     case 0x3:
6241     {
6242         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6243         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6244         TCGv_i32 tcg_ahp = get_ahp_flag();
6245         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6246         if (dtype == 0) {
6247             /* Half to single */
6248             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6249             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6250             write_fp_sreg(s, rd, tcg_rd);
6251         } else {
6252             /* Half to double */
6253             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6254             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6255             write_fp_dreg(s, rd, tcg_rd);
6256         }
6257         break;
6258     }
6259     default:
6260         g_assert_not_reached();
6261     }
6262 }
6263 
6264 /* Floating point data-processing (1 source)
6265  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6266  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6267  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6268  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6269  */
6270 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6271 {
6272     int mos = extract32(insn, 29, 3);
6273     int type = extract32(insn, 22, 2);
6274     int opcode = extract32(insn, 15, 6);
6275     int rn = extract32(insn, 5, 5);
6276     int rd = extract32(insn, 0, 5);
6277 
6278     if (mos) {
6279         goto do_unallocated;
6280     }
6281 
6282     switch (opcode) {
6283     case 0x4: case 0x5: case 0x7:
6284     {
6285         /* FCVT between half, single and double precision */
6286         int dtype = extract32(opcode, 0, 2);
6287         if (type == 2 || dtype == type) {
6288             goto do_unallocated;
6289         }
6290         if (!fp_access_check(s)) {
6291             return;
6292         }
6293 
6294         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6295         break;
6296     }
6297 
6298     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6299         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6300             goto do_unallocated;
6301         }
6302         /* fall through */
6303     case 0x0 ... 0x3:
6304     case 0x8 ... 0xc:
6305     case 0xe ... 0xf:
6306         /* 32-to-32 and 64-to-64 ops */
6307         switch (type) {
6308         case 0:
6309             if (!fp_access_check(s)) {
6310                 return;
6311             }
6312             handle_fp_1src_single(s, opcode, rd, rn);
6313             break;
6314         case 1:
6315             if (!fp_access_check(s)) {
6316                 return;
6317             }
6318             handle_fp_1src_double(s, opcode, rd, rn);
6319             break;
6320         case 3:
6321             if (!dc_isar_feature(aa64_fp16, s)) {
6322                 goto do_unallocated;
6323             }
6324 
6325             if (!fp_access_check(s)) {
6326                 return;
6327             }
6328             handle_fp_1src_half(s, opcode, rd, rn);
6329             break;
6330         default:
6331             goto do_unallocated;
6332         }
6333         break;
6334 
6335     case 0x6:
6336         switch (type) {
6337         case 1: /* BFCVT */
6338             if (!dc_isar_feature(aa64_bf16, s)) {
6339                 goto do_unallocated;
6340             }
6341             if (!fp_access_check(s)) {
6342                 return;
6343             }
6344             handle_fp_1src_single(s, opcode, rd, rn);
6345             break;
6346         default:
6347             goto do_unallocated;
6348         }
6349         break;
6350 
6351     default:
6352     do_unallocated:
6353         unallocated_encoding(s);
6354         break;
6355     }
6356 }
6357 
6358 /* Floating-point data-processing (2 source) - single precision */
6359 static void handle_fp_2src_single(DisasContext *s, int opcode,
6360                                   int rd, int rn, int rm)
6361 {
6362     TCGv_i32 tcg_op1;
6363     TCGv_i32 tcg_op2;
6364     TCGv_i32 tcg_res;
6365     TCGv_ptr fpst;
6366 
6367     tcg_res = tcg_temp_new_i32();
6368     fpst = fpstatus_ptr(FPST_FPCR);
6369     tcg_op1 = read_fp_sreg(s, rn);
6370     tcg_op2 = read_fp_sreg(s, rm);
6371 
6372     switch (opcode) {
6373     case 0x0: /* FMUL */
6374         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6375         break;
6376     case 0x1: /* FDIV */
6377         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6378         break;
6379     case 0x2: /* FADD */
6380         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6381         break;
6382     case 0x3: /* FSUB */
6383         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6384         break;
6385     case 0x4: /* FMAX */
6386         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6387         break;
6388     case 0x5: /* FMIN */
6389         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6390         break;
6391     case 0x6: /* FMAXNM */
6392         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6393         break;
6394     case 0x7: /* FMINNM */
6395         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6396         break;
6397     case 0x8: /* FNMUL */
6398         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6399         gen_helper_vfp_negs(tcg_res, tcg_res);
6400         break;
6401     }
6402 
6403     write_fp_sreg(s, rd, tcg_res);
6404 }
6405 
6406 /* Floating-point data-processing (2 source) - double precision */
6407 static void handle_fp_2src_double(DisasContext *s, int opcode,
6408                                   int rd, int rn, int rm)
6409 {
6410     TCGv_i64 tcg_op1;
6411     TCGv_i64 tcg_op2;
6412     TCGv_i64 tcg_res;
6413     TCGv_ptr fpst;
6414 
6415     tcg_res = tcg_temp_new_i64();
6416     fpst = fpstatus_ptr(FPST_FPCR);
6417     tcg_op1 = read_fp_dreg(s, rn);
6418     tcg_op2 = read_fp_dreg(s, rm);
6419 
6420     switch (opcode) {
6421     case 0x0: /* FMUL */
6422         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6423         break;
6424     case 0x1: /* FDIV */
6425         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6426         break;
6427     case 0x2: /* FADD */
6428         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6429         break;
6430     case 0x3: /* FSUB */
6431         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6432         break;
6433     case 0x4: /* FMAX */
6434         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6435         break;
6436     case 0x5: /* FMIN */
6437         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6438         break;
6439     case 0x6: /* FMAXNM */
6440         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6441         break;
6442     case 0x7: /* FMINNM */
6443         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6444         break;
6445     case 0x8: /* FNMUL */
6446         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6447         gen_helper_vfp_negd(tcg_res, tcg_res);
6448         break;
6449     }
6450 
6451     write_fp_dreg(s, rd, tcg_res);
6452 }
6453 
6454 /* Floating-point data-processing (2 source) - half precision */
6455 static void handle_fp_2src_half(DisasContext *s, int opcode,
6456                                 int rd, int rn, int rm)
6457 {
6458     TCGv_i32 tcg_op1;
6459     TCGv_i32 tcg_op2;
6460     TCGv_i32 tcg_res;
6461     TCGv_ptr fpst;
6462 
6463     tcg_res = tcg_temp_new_i32();
6464     fpst = fpstatus_ptr(FPST_FPCR_F16);
6465     tcg_op1 = read_fp_hreg(s, rn);
6466     tcg_op2 = read_fp_hreg(s, rm);
6467 
6468     switch (opcode) {
6469     case 0x0: /* FMUL */
6470         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6471         break;
6472     case 0x1: /* FDIV */
6473         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6474         break;
6475     case 0x2: /* FADD */
6476         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6477         break;
6478     case 0x3: /* FSUB */
6479         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6480         break;
6481     case 0x4: /* FMAX */
6482         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6483         break;
6484     case 0x5: /* FMIN */
6485         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6486         break;
6487     case 0x6: /* FMAXNM */
6488         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6489         break;
6490     case 0x7: /* FMINNM */
6491         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6492         break;
6493     case 0x8: /* FNMUL */
6494         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6495         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6496         break;
6497     default:
6498         g_assert_not_reached();
6499     }
6500 
6501     write_fp_sreg(s, rd, tcg_res);
6502 }
6503 
6504 /* Floating point data-processing (2 source)
6505  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6506  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6507  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6508  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6509  */
6510 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6511 {
6512     int mos = extract32(insn, 29, 3);
6513     int type = extract32(insn, 22, 2);
6514     int rd = extract32(insn, 0, 5);
6515     int rn = extract32(insn, 5, 5);
6516     int rm = extract32(insn, 16, 5);
6517     int opcode = extract32(insn, 12, 4);
6518 
6519     if (opcode > 8 || mos) {
6520         unallocated_encoding(s);
6521         return;
6522     }
6523 
6524     switch (type) {
6525     case 0:
6526         if (!fp_access_check(s)) {
6527             return;
6528         }
6529         handle_fp_2src_single(s, opcode, rd, rn, rm);
6530         break;
6531     case 1:
6532         if (!fp_access_check(s)) {
6533             return;
6534         }
6535         handle_fp_2src_double(s, opcode, rd, rn, rm);
6536         break;
6537     case 3:
6538         if (!dc_isar_feature(aa64_fp16, s)) {
6539             unallocated_encoding(s);
6540             return;
6541         }
6542         if (!fp_access_check(s)) {
6543             return;
6544         }
6545         handle_fp_2src_half(s, opcode, rd, rn, rm);
6546         break;
6547     default:
6548         unallocated_encoding(s);
6549     }
6550 }
6551 
6552 /* Floating-point data-processing (3 source) - single precision */
6553 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6554                                   int rd, int rn, int rm, int ra)
6555 {
6556     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6557     TCGv_i32 tcg_res = tcg_temp_new_i32();
6558     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6559 
6560     tcg_op1 = read_fp_sreg(s, rn);
6561     tcg_op2 = read_fp_sreg(s, rm);
6562     tcg_op3 = read_fp_sreg(s, ra);
6563 
6564     /* These are fused multiply-add, and must be done as one
6565      * floating point operation with no rounding between the
6566      * multiplication and addition steps.
6567      * NB that doing the negations here as separate steps is
6568      * correct : an input NaN should come out with its sign bit
6569      * flipped if it is a negated-input.
6570      */
6571     if (o1 == true) {
6572         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6573     }
6574 
6575     if (o0 != o1) {
6576         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6577     }
6578 
6579     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6580 
6581     write_fp_sreg(s, rd, tcg_res);
6582 }
6583 
6584 /* Floating-point data-processing (3 source) - double precision */
6585 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6586                                   int rd, int rn, int rm, int ra)
6587 {
6588     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6589     TCGv_i64 tcg_res = tcg_temp_new_i64();
6590     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6591 
6592     tcg_op1 = read_fp_dreg(s, rn);
6593     tcg_op2 = read_fp_dreg(s, rm);
6594     tcg_op3 = read_fp_dreg(s, ra);
6595 
6596     /* These are fused multiply-add, and must be done as one
6597      * floating point operation with no rounding between the
6598      * multiplication and addition steps.
6599      * NB that doing the negations here as separate steps is
6600      * correct : an input NaN should come out with its sign bit
6601      * flipped if it is a negated-input.
6602      */
6603     if (o1 == true) {
6604         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6605     }
6606 
6607     if (o0 != o1) {
6608         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6609     }
6610 
6611     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6612 
6613     write_fp_dreg(s, rd, tcg_res);
6614 }
6615 
6616 /* Floating-point data-processing (3 source) - half precision */
6617 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6618                                 int rd, int rn, int rm, int ra)
6619 {
6620     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6621     TCGv_i32 tcg_res = tcg_temp_new_i32();
6622     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6623 
6624     tcg_op1 = read_fp_hreg(s, rn);
6625     tcg_op2 = read_fp_hreg(s, rm);
6626     tcg_op3 = read_fp_hreg(s, ra);
6627 
6628     /* These are fused multiply-add, and must be done as one
6629      * floating point operation with no rounding between the
6630      * multiplication and addition steps.
6631      * NB that doing the negations here as separate steps is
6632      * correct : an input NaN should come out with its sign bit
6633      * flipped if it is a negated-input.
6634      */
6635     if (o1 == true) {
6636         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6637     }
6638 
6639     if (o0 != o1) {
6640         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6641     }
6642 
6643     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6644 
6645     write_fp_sreg(s, rd, tcg_res);
6646 }
6647 
6648 /* Floating point data-processing (3 source)
6649  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6650  * +---+---+---+-----------+------+----+------+----+------+------+------+
6651  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6652  * +---+---+---+-----------+------+----+------+----+------+------+------+
6653  */
6654 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6655 {
6656     int mos = extract32(insn, 29, 3);
6657     int type = extract32(insn, 22, 2);
6658     int rd = extract32(insn, 0, 5);
6659     int rn = extract32(insn, 5, 5);
6660     int ra = extract32(insn, 10, 5);
6661     int rm = extract32(insn, 16, 5);
6662     bool o0 = extract32(insn, 15, 1);
6663     bool o1 = extract32(insn, 21, 1);
6664 
6665     if (mos) {
6666         unallocated_encoding(s);
6667         return;
6668     }
6669 
6670     switch (type) {
6671     case 0:
6672         if (!fp_access_check(s)) {
6673             return;
6674         }
6675         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6676         break;
6677     case 1:
6678         if (!fp_access_check(s)) {
6679             return;
6680         }
6681         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6682         break;
6683     case 3:
6684         if (!dc_isar_feature(aa64_fp16, s)) {
6685             unallocated_encoding(s);
6686             return;
6687         }
6688         if (!fp_access_check(s)) {
6689             return;
6690         }
6691         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6692         break;
6693     default:
6694         unallocated_encoding(s);
6695     }
6696 }
6697 
6698 /* Floating point immediate
6699  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6700  * +---+---+---+-----------+------+---+------------+-------+------+------+
6701  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6702  * +---+---+---+-----------+------+---+------------+-------+------+------+
6703  */
6704 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6705 {
6706     int rd = extract32(insn, 0, 5);
6707     int imm5 = extract32(insn, 5, 5);
6708     int imm8 = extract32(insn, 13, 8);
6709     int type = extract32(insn, 22, 2);
6710     int mos = extract32(insn, 29, 3);
6711     uint64_t imm;
6712     MemOp sz;
6713 
6714     if (mos || imm5) {
6715         unallocated_encoding(s);
6716         return;
6717     }
6718 
6719     switch (type) {
6720     case 0:
6721         sz = MO_32;
6722         break;
6723     case 1:
6724         sz = MO_64;
6725         break;
6726     case 3:
6727         sz = MO_16;
6728         if (dc_isar_feature(aa64_fp16, s)) {
6729             break;
6730         }
6731         /* fallthru */
6732     default:
6733         unallocated_encoding(s);
6734         return;
6735     }
6736 
6737     if (!fp_access_check(s)) {
6738         return;
6739     }
6740 
6741     imm = vfp_expand_imm(sz, imm8);
6742     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6743 }
6744 
6745 /* Handle floating point <=> fixed point conversions. Note that we can
6746  * also deal with fp <=> integer conversions as a special case (scale == 64)
6747  * OPTME: consider handling that special case specially or at least skipping
6748  * the call to scalbn in the helpers for zero shifts.
6749  */
6750 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6751                            bool itof, int rmode, int scale, int sf, int type)
6752 {
6753     bool is_signed = !(opcode & 1);
6754     TCGv_ptr tcg_fpstatus;
6755     TCGv_i32 tcg_shift, tcg_single;
6756     TCGv_i64 tcg_double;
6757 
6758     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6759 
6760     tcg_shift = tcg_constant_i32(64 - scale);
6761 
6762     if (itof) {
6763         TCGv_i64 tcg_int = cpu_reg(s, rn);
6764         if (!sf) {
6765             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6766 
6767             if (is_signed) {
6768                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6769             } else {
6770                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6771             }
6772 
6773             tcg_int = tcg_extend;
6774         }
6775 
6776         switch (type) {
6777         case 1: /* float64 */
6778             tcg_double = tcg_temp_new_i64();
6779             if (is_signed) {
6780                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6781                                      tcg_shift, tcg_fpstatus);
6782             } else {
6783                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6784                                      tcg_shift, tcg_fpstatus);
6785             }
6786             write_fp_dreg(s, rd, tcg_double);
6787             break;
6788 
6789         case 0: /* float32 */
6790             tcg_single = tcg_temp_new_i32();
6791             if (is_signed) {
6792                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6793                                      tcg_shift, tcg_fpstatus);
6794             } else {
6795                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6796                                      tcg_shift, tcg_fpstatus);
6797             }
6798             write_fp_sreg(s, rd, tcg_single);
6799             break;
6800 
6801         case 3: /* float16 */
6802             tcg_single = tcg_temp_new_i32();
6803             if (is_signed) {
6804                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6805                                      tcg_shift, tcg_fpstatus);
6806             } else {
6807                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6808                                      tcg_shift, tcg_fpstatus);
6809             }
6810             write_fp_sreg(s, rd, tcg_single);
6811             break;
6812 
6813         default:
6814             g_assert_not_reached();
6815         }
6816     } else {
6817         TCGv_i64 tcg_int = cpu_reg(s, rd);
6818         TCGv_i32 tcg_rmode;
6819 
6820         if (extract32(opcode, 2, 1)) {
6821             /* There are too many rounding modes to all fit into rmode,
6822              * so FCVTA[US] is a special case.
6823              */
6824             rmode = FPROUNDING_TIEAWAY;
6825         }
6826 
6827         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6828 
6829         switch (type) {
6830         case 1: /* float64 */
6831             tcg_double = read_fp_dreg(s, rn);
6832             if (is_signed) {
6833                 if (!sf) {
6834                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6835                                          tcg_shift, tcg_fpstatus);
6836                 } else {
6837                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6838                                          tcg_shift, tcg_fpstatus);
6839                 }
6840             } else {
6841                 if (!sf) {
6842                     gen_helper_vfp_tould(tcg_int, tcg_double,
6843                                          tcg_shift, tcg_fpstatus);
6844                 } else {
6845                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6846                                          tcg_shift, tcg_fpstatus);
6847                 }
6848             }
6849             if (!sf) {
6850                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6851             }
6852             break;
6853 
6854         case 0: /* float32 */
6855             tcg_single = read_fp_sreg(s, rn);
6856             if (sf) {
6857                 if (is_signed) {
6858                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6859                                          tcg_shift, tcg_fpstatus);
6860                 } else {
6861                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6862                                          tcg_shift, tcg_fpstatus);
6863                 }
6864             } else {
6865                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6866                 if (is_signed) {
6867                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6868                                          tcg_shift, tcg_fpstatus);
6869                 } else {
6870                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6871                                          tcg_shift, tcg_fpstatus);
6872                 }
6873                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6874             }
6875             break;
6876 
6877         case 3: /* float16 */
6878             tcg_single = read_fp_sreg(s, rn);
6879             if (sf) {
6880                 if (is_signed) {
6881                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6882                                          tcg_shift, tcg_fpstatus);
6883                 } else {
6884                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6885                                          tcg_shift, tcg_fpstatus);
6886                 }
6887             } else {
6888                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6889                 if (is_signed) {
6890                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6891                                          tcg_shift, tcg_fpstatus);
6892                 } else {
6893                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6894                                          tcg_shift, tcg_fpstatus);
6895                 }
6896                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6897             }
6898             break;
6899 
6900         default:
6901             g_assert_not_reached();
6902         }
6903 
6904         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6905     }
6906 }
6907 
6908 /* Floating point <-> fixed point conversions
6909  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6910  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6911  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6912  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6913  */
6914 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6915 {
6916     int rd = extract32(insn, 0, 5);
6917     int rn = extract32(insn, 5, 5);
6918     int scale = extract32(insn, 10, 6);
6919     int opcode = extract32(insn, 16, 3);
6920     int rmode = extract32(insn, 19, 2);
6921     int type = extract32(insn, 22, 2);
6922     bool sbit = extract32(insn, 29, 1);
6923     bool sf = extract32(insn, 31, 1);
6924     bool itof;
6925 
6926     if (sbit || (!sf && scale < 32)) {
6927         unallocated_encoding(s);
6928         return;
6929     }
6930 
6931     switch (type) {
6932     case 0: /* float32 */
6933     case 1: /* float64 */
6934         break;
6935     case 3: /* float16 */
6936         if (dc_isar_feature(aa64_fp16, s)) {
6937             break;
6938         }
6939         /* fallthru */
6940     default:
6941         unallocated_encoding(s);
6942         return;
6943     }
6944 
6945     switch ((rmode << 3) | opcode) {
6946     case 0x2: /* SCVTF */
6947     case 0x3: /* UCVTF */
6948         itof = true;
6949         break;
6950     case 0x18: /* FCVTZS */
6951     case 0x19: /* FCVTZU */
6952         itof = false;
6953         break;
6954     default:
6955         unallocated_encoding(s);
6956         return;
6957     }
6958 
6959     if (!fp_access_check(s)) {
6960         return;
6961     }
6962 
6963     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6964 }
6965 
6966 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6967 {
6968     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6969      * without conversion.
6970      */
6971 
6972     if (itof) {
6973         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6974         TCGv_i64 tmp;
6975 
6976         switch (type) {
6977         case 0:
6978             /* 32 bit */
6979             tmp = tcg_temp_new_i64();
6980             tcg_gen_ext32u_i64(tmp, tcg_rn);
6981             write_fp_dreg(s, rd, tmp);
6982             break;
6983         case 1:
6984             /* 64 bit */
6985             write_fp_dreg(s, rd, tcg_rn);
6986             break;
6987         case 2:
6988             /* 64 bit to top half. */
6989             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6990             clear_vec_high(s, true, rd);
6991             break;
6992         case 3:
6993             /* 16 bit */
6994             tmp = tcg_temp_new_i64();
6995             tcg_gen_ext16u_i64(tmp, tcg_rn);
6996             write_fp_dreg(s, rd, tmp);
6997             break;
6998         default:
6999             g_assert_not_reached();
7000         }
7001     } else {
7002         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7003 
7004         switch (type) {
7005         case 0:
7006             /* 32 bit */
7007             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
7008             break;
7009         case 1:
7010             /* 64 bit */
7011             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
7012             break;
7013         case 2:
7014             /* 64 bits from top half */
7015             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7016             break;
7017         case 3:
7018             /* 16 bit */
7019             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7020             break;
7021         default:
7022             g_assert_not_reached();
7023         }
7024     }
7025 }
7026 
7027 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7028 {
7029     TCGv_i64 t = read_fp_dreg(s, rn);
7030     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7031 
7032     gen_helper_fjcvtzs(t, t, fpstatus);
7033 
7034     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7035     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7036     tcg_gen_movi_i32(cpu_CF, 0);
7037     tcg_gen_movi_i32(cpu_NF, 0);
7038     tcg_gen_movi_i32(cpu_VF, 0);
7039 }
7040 
7041 /* Floating point <-> integer conversions
7042  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7043  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7044  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7045  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7046  */
7047 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7048 {
7049     int rd = extract32(insn, 0, 5);
7050     int rn = extract32(insn, 5, 5);
7051     int opcode = extract32(insn, 16, 3);
7052     int rmode = extract32(insn, 19, 2);
7053     int type = extract32(insn, 22, 2);
7054     bool sbit = extract32(insn, 29, 1);
7055     bool sf = extract32(insn, 31, 1);
7056     bool itof = false;
7057 
7058     if (sbit) {
7059         goto do_unallocated;
7060     }
7061 
7062     switch (opcode) {
7063     case 2: /* SCVTF */
7064     case 3: /* UCVTF */
7065         itof = true;
7066         /* fallthru */
7067     case 4: /* FCVTAS */
7068     case 5: /* FCVTAU */
7069         if (rmode != 0) {
7070             goto do_unallocated;
7071         }
7072         /* fallthru */
7073     case 0: /* FCVT[NPMZ]S */
7074     case 1: /* FCVT[NPMZ]U */
7075         switch (type) {
7076         case 0: /* float32 */
7077         case 1: /* float64 */
7078             break;
7079         case 3: /* float16 */
7080             if (!dc_isar_feature(aa64_fp16, s)) {
7081                 goto do_unallocated;
7082             }
7083             break;
7084         default:
7085             goto do_unallocated;
7086         }
7087         if (!fp_access_check(s)) {
7088             return;
7089         }
7090         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7091         break;
7092 
7093     default:
7094         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7095         case 0b01100110: /* FMOV half <-> 32-bit int */
7096         case 0b01100111:
7097         case 0b11100110: /* FMOV half <-> 64-bit int */
7098         case 0b11100111:
7099             if (!dc_isar_feature(aa64_fp16, s)) {
7100                 goto do_unallocated;
7101             }
7102             /* fallthru */
7103         case 0b00000110: /* FMOV 32-bit */
7104         case 0b00000111:
7105         case 0b10100110: /* FMOV 64-bit */
7106         case 0b10100111:
7107         case 0b11001110: /* FMOV top half of 128-bit */
7108         case 0b11001111:
7109             if (!fp_access_check(s)) {
7110                 return;
7111             }
7112             itof = opcode & 1;
7113             handle_fmov(s, rd, rn, type, itof);
7114             break;
7115 
7116         case 0b00111110: /* FJCVTZS */
7117             if (!dc_isar_feature(aa64_jscvt, s)) {
7118                 goto do_unallocated;
7119             } else if (fp_access_check(s)) {
7120                 handle_fjcvtzs(s, rd, rn);
7121             }
7122             break;
7123 
7124         default:
7125         do_unallocated:
7126             unallocated_encoding(s);
7127             return;
7128         }
7129         break;
7130     }
7131 }
7132 
7133 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7134  *   31  30  29 28     25 24                          0
7135  * +---+---+---+---------+-----------------------------+
7136  * |   | 0 |   | 1 1 1 1 |                             |
7137  * +---+---+---+---------+-----------------------------+
7138  */
7139 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7140 {
7141     if (extract32(insn, 24, 1)) {
7142         /* Floating point data-processing (3 source) */
7143         disas_fp_3src(s, insn);
7144     } else if (extract32(insn, 21, 1) == 0) {
7145         /* Floating point to fixed point conversions */
7146         disas_fp_fixed_conv(s, insn);
7147     } else {
7148         switch (extract32(insn, 10, 2)) {
7149         case 1:
7150             /* Floating point conditional compare */
7151             disas_fp_ccomp(s, insn);
7152             break;
7153         case 2:
7154             /* Floating point data-processing (2 source) */
7155             disas_fp_2src(s, insn);
7156             break;
7157         case 3:
7158             /* Floating point conditional select */
7159             disas_fp_csel(s, insn);
7160             break;
7161         case 0:
7162             switch (ctz32(extract32(insn, 12, 4))) {
7163             case 0: /* [15:12] == xxx1 */
7164                 /* Floating point immediate */
7165                 disas_fp_imm(s, insn);
7166                 break;
7167             case 1: /* [15:12] == xx10 */
7168                 /* Floating point compare */
7169                 disas_fp_compare(s, insn);
7170                 break;
7171             case 2: /* [15:12] == x100 */
7172                 /* Floating point data-processing (1 source) */
7173                 disas_fp_1src(s, insn);
7174                 break;
7175             case 3: /* [15:12] == 1000 */
7176                 unallocated_encoding(s);
7177                 break;
7178             default: /* [15:12] == 0000 */
7179                 /* Floating point <-> integer conversions */
7180                 disas_fp_int_conv(s, insn);
7181                 break;
7182             }
7183             break;
7184         }
7185     }
7186 }
7187 
7188 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7189                      int pos)
7190 {
7191     /* Extract 64 bits from the middle of two concatenated 64 bit
7192      * vector register slices left:right. The extracted bits start
7193      * at 'pos' bits into the right (least significant) side.
7194      * We return the result in tcg_right, and guarantee not to
7195      * trash tcg_left.
7196      */
7197     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7198     assert(pos > 0 && pos < 64);
7199 
7200     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7201     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7202     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7203 }
7204 
7205 /* EXT
7206  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7207  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7208  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7209  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7210  */
7211 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7212 {
7213     int is_q = extract32(insn, 30, 1);
7214     int op2 = extract32(insn, 22, 2);
7215     int imm4 = extract32(insn, 11, 4);
7216     int rm = extract32(insn, 16, 5);
7217     int rn = extract32(insn, 5, 5);
7218     int rd = extract32(insn, 0, 5);
7219     int pos = imm4 << 3;
7220     TCGv_i64 tcg_resl, tcg_resh;
7221 
7222     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7223         unallocated_encoding(s);
7224         return;
7225     }
7226 
7227     if (!fp_access_check(s)) {
7228         return;
7229     }
7230 
7231     tcg_resh = tcg_temp_new_i64();
7232     tcg_resl = tcg_temp_new_i64();
7233 
7234     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7235      * either extracting 128 bits from a 128:128 concatenation, or
7236      * extracting 64 bits from a 64:64 concatenation.
7237      */
7238     if (!is_q) {
7239         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7240         if (pos != 0) {
7241             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7242             do_ext64(s, tcg_resh, tcg_resl, pos);
7243         }
7244     } else {
7245         TCGv_i64 tcg_hh;
7246         typedef struct {
7247             int reg;
7248             int elt;
7249         } EltPosns;
7250         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7251         EltPosns *elt = eltposns;
7252 
7253         if (pos >= 64) {
7254             elt++;
7255             pos -= 64;
7256         }
7257 
7258         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7259         elt++;
7260         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7261         elt++;
7262         if (pos != 0) {
7263             do_ext64(s, tcg_resh, tcg_resl, pos);
7264             tcg_hh = tcg_temp_new_i64();
7265             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7266             do_ext64(s, tcg_hh, tcg_resh, pos);
7267         }
7268     }
7269 
7270     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7271     if (is_q) {
7272         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7273     }
7274     clear_vec_high(s, is_q, rd);
7275 }
7276 
7277 /* TBL/TBX
7278  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7279  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7280  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7281  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7282  */
7283 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7284 {
7285     int op2 = extract32(insn, 22, 2);
7286     int is_q = extract32(insn, 30, 1);
7287     int rm = extract32(insn, 16, 5);
7288     int rn = extract32(insn, 5, 5);
7289     int rd = extract32(insn, 0, 5);
7290     int is_tbx = extract32(insn, 12, 1);
7291     int len = (extract32(insn, 13, 2) + 1) * 16;
7292 
7293     if (op2 != 0) {
7294         unallocated_encoding(s);
7295         return;
7296     }
7297 
7298     if (!fp_access_check(s)) {
7299         return;
7300     }
7301 
7302     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7303                        vec_full_reg_offset(s, rm), cpu_env,
7304                        is_q ? 16 : 8, vec_full_reg_size(s),
7305                        (len << 6) | (is_tbx << 5) | rn,
7306                        gen_helper_simd_tblx);
7307 }
7308 
7309 /* ZIP/UZP/TRN
7310  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7311  * +---+---+-------------+------+---+------+---+------------------+------+
7312  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7313  * +---+---+-------------+------+---+------+---+------------------+------+
7314  */
7315 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7316 {
7317     int rd = extract32(insn, 0, 5);
7318     int rn = extract32(insn, 5, 5);
7319     int rm = extract32(insn, 16, 5);
7320     int size = extract32(insn, 22, 2);
7321     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7322      * bit 2 indicates 1 vs 2 variant of the insn.
7323      */
7324     int opcode = extract32(insn, 12, 2);
7325     bool part = extract32(insn, 14, 1);
7326     bool is_q = extract32(insn, 30, 1);
7327     int esize = 8 << size;
7328     int i;
7329     int datasize = is_q ? 128 : 64;
7330     int elements = datasize / esize;
7331     TCGv_i64 tcg_res[2], tcg_ele;
7332 
7333     if (opcode == 0 || (size == 3 && !is_q)) {
7334         unallocated_encoding(s);
7335         return;
7336     }
7337 
7338     if (!fp_access_check(s)) {
7339         return;
7340     }
7341 
7342     tcg_res[0] = tcg_temp_new_i64();
7343     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7344     tcg_ele = tcg_temp_new_i64();
7345 
7346     for (i = 0; i < elements; i++) {
7347         int o, w;
7348 
7349         switch (opcode) {
7350         case 1: /* UZP1/2 */
7351         {
7352             int midpoint = elements / 2;
7353             if (i < midpoint) {
7354                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7355             } else {
7356                 read_vec_element(s, tcg_ele, rm,
7357                                  2 * (i - midpoint) + part, size);
7358             }
7359             break;
7360         }
7361         case 2: /* TRN1/2 */
7362             if (i & 1) {
7363                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7364             } else {
7365                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7366             }
7367             break;
7368         case 3: /* ZIP1/2 */
7369         {
7370             int base = part * elements / 2;
7371             if (i & 1) {
7372                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7373             } else {
7374                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7375             }
7376             break;
7377         }
7378         default:
7379             g_assert_not_reached();
7380         }
7381 
7382         w = (i * esize) / 64;
7383         o = (i * esize) % 64;
7384         if (o == 0) {
7385             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7386         } else {
7387             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7388             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7389         }
7390     }
7391 
7392     for (i = 0; i <= is_q; ++i) {
7393         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7394     }
7395     clear_vec_high(s, is_q, rd);
7396 }
7397 
7398 /*
7399  * do_reduction_op helper
7400  *
7401  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7402  * important for correct NaN propagation that we do these
7403  * operations in exactly the order specified by the pseudocode.
7404  *
7405  * This is a recursive function, TCG temps should be freed by the
7406  * calling function once it is done with the values.
7407  */
7408 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7409                                 int esize, int size, int vmap, TCGv_ptr fpst)
7410 {
7411     if (esize == size) {
7412         int element;
7413         MemOp msize = esize == 16 ? MO_16 : MO_32;
7414         TCGv_i32 tcg_elem;
7415 
7416         /* We should have one register left here */
7417         assert(ctpop8(vmap) == 1);
7418         element = ctz32(vmap);
7419         assert(element < 8);
7420 
7421         tcg_elem = tcg_temp_new_i32();
7422         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7423         return tcg_elem;
7424     } else {
7425         int bits = size / 2;
7426         int shift = ctpop8(vmap) / 2;
7427         int vmap_lo = (vmap >> shift) & vmap;
7428         int vmap_hi = (vmap & ~vmap_lo);
7429         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7430 
7431         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7432         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7433         tcg_res = tcg_temp_new_i32();
7434 
7435         switch (fpopcode) {
7436         case 0x0c: /* fmaxnmv half-precision */
7437             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7438             break;
7439         case 0x0f: /* fmaxv half-precision */
7440             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7441             break;
7442         case 0x1c: /* fminnmv half-precision */
7443             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7444             break;
7445         case 0x1f: /* fminv half-precision */
7446             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7447             break;
7448         case 0x2c: /* fmaxnmv */
7449             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7450             break;
7451         case 0x2f: /* fmaxv */
7452             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7453             break;
7454         case 0x3c: /* fminnmv */
7455             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7456             break;
7457         case 0x3f: /* fminv */
7458             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7459             break;
7460         default:
7461             g_assert_not_reached();
7462         }
7463         return tcg_res;
7464     }
7465 }
7466 
7467 /* AdvSIMD across lanes
7468  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7469  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7470  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7471  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7472  */
7473 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7474 {
7475     int rd = extract32(insn, 0, 5);
7476     int rn = extract32(insn, 5, 5);
7477     int size = extract32(insn, 22, 2);
7478     int opcode = extract32(insn, 12, 5);
7479     bool is_q = extract32(insn, 30, 1);
7480     bool is_u = extract32(insn, 29, 1);
7481     bool is_fp = false;
7482     bool is_min = false;
7483     int esize;
7484     int elements;
7485     int i;
7486     TCGv_i64 tcg_res, tcg_elt;
7487 
7488     switch (opcode) {
7489     case 0x1b: /* ADDV */
7490         if (is_u) {
7491             unallocated_encoding(s);
7492             return;
7493         }
7494         /* fall through */
7495     case 0x3: /* SADDLV, UADDLV */
7496     case 0xa: /* SMAXV, UMAXV */
7497     case 0x1a: /* SMINV, UMINV */
7498         if (size == 3 || (size == 2 && !is_q)) {
7499             unallocated_encoding(s);
7500             return;
7501         }
7502         break;
7503     case 0xc: /* FMAXNMV, FMINNMV */
7504     case 0xf: /* FMAXV, FMINV */
7505         /* Bit 1 of size field encodes min vs max and the actual size
7506          * depends on the encoding of the U bit. If not set (and FP16
7507          * enabled) then we do half-precision float instead of single
7508          * precision.
7509          */
7510         is_min = extract32(size, 1, 1);
7511         is_fp = true;
7512         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7513             size = 1;
7514         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7515             unallocated_encoding(s);
7516             return;
7517         } else {
7518             size = 2;
7519         }
7520         break;
7521     default:
7522         unallocated_encoding(s);
7523         return;
7524     }
7525 
7526     if (!fp_access_check(s)) {
7527         return;
7528     }
7529 
7530     esize = 8 << size;
7531     elements = (is_q ? 128 : 64) / esize;
7532 
7533     tcg_res = tcg_temp_new_i64();
7534     tcg_elt = tcg_temp_new_i64();
7535 
7536     /* These instructions operate across all lanes of a vector
7537      * to produce a single result. We can guarantee that a 64
7538      * bit intermediate is sufficient:
7539      *  + for [US]ADDLV the maximum element size is 32 bits, and
7540      *    the result type is 64 bits
7541      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7542      *    same as the element size, which is 32 bits at most
7543      * For the integer operations we can choose to work at 64
7544      * or 32 bits and truncate at the end; for simplicity
7545      * we use 64 bits always. The floating point
7546      * ops do require 32 bit intermediates, though.
7547      */
7548     if (!is_fp) {
7549         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7550 
7551         for (i = 1; i < elements; i++) {
7552             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7553 
7554             switch (opcode) {
7555             case 0x03: /* SADDLV / UADDLV */
7556             case 0x1b: /* ADDV */
7557                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7558                 break;
7559             case 0x0a: /* SMAXV / UMAXV */
7560                 if (is_u) {
7561                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7562                 } else {
7563                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7564                 }
7565                 break;
7566             case 0x1a: /* SMINV / UMINV */
7567                 if (is_u) {
7568                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7569                 } else {
7570                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7571                 }
7572                 break;
7573             default:
7574                 g_assert_not_reached();
7575             }
7576 
7577         }
7578     } else {
7579         /* Floating point vector reduction ops which work across 32
7580          * bit (single) or 16 bit (half-precision) intermediates.
7581          * Note that correct NaN propagation requires that we do these
7582          * operations in exactly the order specified by the pseudocode.
7583          */
7584         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7585         int fpopcode = opcode | is_min << 4 | is_u << 5;
7586         int vmap = (1 << elements) - 1;
7587         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7588                                              (is_q ? 128 : 64), vmap, fpst);
7589         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7590     }
7591 
7592     /* Now truncate the result to the width required for the final output */
7593     if (opcode == 0x03) {
7594         /* SADDLV, UADDLV: result is 2*esize */
7595         size++;
7596     }
7597 
7598     switch (size) {
7599     case 0:
7600         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7601         break;
7602     case 1:
7603         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7604         break;
7605     case 2:
7606         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7607         break;
7608     case 3:
7609         break;
7610     default:
7611         g_assert_not_reached();
7612     }
7613 
7614     write_fp_dreg(s, rd, tcg_res);
7615 }
7616 
7617 /* DUP (Element, Vector)
7618  *
7619  *  31  30   29              21 20    16 15        10  9    5 4    0
7620  * +---+---+-------------------+--------+-------------+------+------+
7621  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7622  * +---+---+-------------------+--------+-------------+------+------+
7623  *
7624  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7625  */
7626 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7627                              int imm5)
7628 {
7629     int size = ctz32(imm5);
7630     int index;
7631 
7632     if (size > 3 || (size == 3 && !is_q)) {
7633         unallocated_encoding(s);
7634         return;
7635     }
7636 
7637     if (!fp_access_check(s)) {
7638         return;
7639     }
7640 
7641     index = imm5 >> (size + 1);
7642     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7643                          vec_reg_offset(s, rn, index, size),
7644                          is_q ? 16 : 8, vec_full_reg_size(s));
7645 }
7646 
7647 /* DUP (element, scalar)
7648  *  31                   21 20    16 15        10  9    5 4    0
7649  * +-----------------------+--------+-------------+------+------+
7650  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7651  * +-----------------------+--------+-------------+------+------+
7652  */
7653 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7654                               int imm5)
7655 {
7656     int size = ctz32(imm5);
7657     int index;
7658     TCGv_i64 tmp;
7659 
7660     if (size > 3) {
7661         unallocated_encoding(s);
7662         return;
7663     }
7664 
7665     if (!fp_access_check(s)) {
7666         return;
7667     }
7668 
7669     index = imm5 >> (size + 1);
7670 
7671     /* This instruction just extracts the specified element and
7672      * zero-extends it into the bottom of the destination register.
7673      */
7674     tmp = tcg_temp_new_i64();
7675     read_vec_element(s, tmp, rn, index, size);
7676     write_fp_dreg(s, rd, tmp);
7677 }
7678 
7679 /* DUP (General)
7680  *
7681  *  31  30   29              21 20    16 15        10  9    5 4    0
7682  * +---+---+-------------------+--------+-------------+------+------+
7683  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7684  * +---+---+-------------------+--------+-------------+------+------+
7685  *
7686  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7687  */
7688 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7689                              int imm5)
7690 {
7691     int size = ctz32(imm5);
7692     uint32_t dofs, oprsz, maxsz;
7693 
7694     if (size > 3 || ((size == 3) && !is_q)) {
7695         unallocated_encoding(s);
7696         return;
7697     }
7698 
7699     if (!fp_access_check(s)) {
7700         return;
7701     }
7702 
7703     dofs = vec_full_reg_offset(s, rd);
7704     oprsz = is_q ? 16 : 8;
7705     maxsz = vec_full_reg_size(s);
7706 
7707     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7708 }
7709 
7710 /* INS (Element)
7711  *
7712  *  31                   21 20    16 15  14    11  10 9    5 4    0
7713  * +-----------------------+--------+------------+---+------+------+
7714  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7715  * +-----------------------+--------+------------+---+------+------+
7716  *
7717  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7718  * index: encoded in imm5<4:size+1>
7719  */
7720 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7721                              int imm4, int imm5)
7722 {
7723     int size = ctz32(imm5);
7724     int src_index, dst_index;
7725     TCGv_i64 tmp;
7726 
7727     if (size > 3) {
7728         unallocated_encoding(s);
7729         return;
7730     }
7731 
7732     if (!fp_access_check(s)) {
7733         return;
7734     }
7735 
7736     dst_index = extract32(imm5, 1+size, 5);
7737     src_index = extract32(imm4, size, 4);
7738 
7739     tmp = tcg_temp_new_i64();
7740 
7741     read_vec_element(s, tmp, rn, src_index, size);
7742     write_vec_element(s, tmp, rd, dst_index, size);
7743 
7744     /* INS is considered a 128-bit write for SVE. */
7745     clear_vec_high(s, true, rd);
7746 }
7747 
7748 
7749 /* INS (General)
7750  *
7751  *  31                   21 20    16 15        10  9    5 4    0
7752  * +-----------------------+--------+-------------+------+------+
7753  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7754  * +-----------------------+--------+-------------+------+------+
7755  *
7756  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7757  * index: encoded in imm5<4:size+1>
7758  */
7759 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7760 {
7761     int size = ctz32(imm5);
7762     int idx;
7763 
7764     if (size > 3) {
7765         unallocated_encoding(s);
7766         return;
7767     }
7768 
7769     if (!fp_access_check(s)) {
7770         return;
7771     }
7772 
7773     idx = extract32(imm5, 1 + size, 4 - size);
7774     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7775 
7776     /* INS is considered a 128-bit write for SVE. */
7777     clear_vec_high(s, true, rd);
7778 }
7779 
7780 /*
7781  * UMOV (General)
7782  * SMOV (General)
7783  *
7784  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7785  * +---+---+-------------------+--------+-------------+------+------+
7786  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7787  * +---+---+-------------------+--------+-------------+------+------+
7788  *
7789  * U: unsigned when set
7790  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7791  */
7792 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7793                                   int rn, int rd, int imm5)
7794 {
7795     int size = ctz32(imm5);
7796     int element;
7797     TCGv_i64 tcg_rd;
7798 
7799     /* Check for UnallocatedEncodings */
7800     if (is_signed) {
7801         if (size > 2 || (size == 2 && !is_q)) {
7802             unallocated_encoding(s);
7803             return;
7804         }
7805     } else {
7806         if (size > 3
7807             || (size < 3 && is_q)
7808             || (size == 3 && !is_q)) {
7809             unallocated_encoding(s);
7810             return;
7811         }
7812     }
7813 
7814     if (!fp_access_check(s)) {
7815         return;
7816     }
7817 
7818     element = extract32(imm5, 1+size, 4);
7819 
7820     tcg_rd = cpu_reg(s, rd);
7821     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7822     if (is_signed && !is_q) {
7823         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7824     }
7825 }
7826 
7827 /* AdvSIMD copy
7828  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7829  * +---+---+----+-----------------+------+---+------+---+------+------+
7830  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7831  * +---+---+----+-----------------+------+---+------+---+------+------+
7832  */
7833 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7834 {
7835     int rd = extract32(insn, 0, 5);
7836     int rn = extract32(insn, 5, 5);
7837     int imm4 = extract32(insn, 11, 4);
7838     int op = extract32(insn, 29, 1);
7839     int is_q = extract32(insn, 30, 1);
7840     int imm5 = extract32(insn, 16, 5);
7841 
7842     if (op) {
7843         if (is_q) {
7844             /* INS (element) */
7845             handle_simd_inse(s, rd, rn, imm4, imm5);
7846         } else {
7847             unallocated_encoding(s);
7848         }
7849     } else {
7850         switch (imm4) {
7851         case 0:
7852             /* DUP (element - vector) */
7853             handle_simd_dupe(s, is_q, rd, rn, imm5);
7854             break;
7855         case 1:
7856             /* DUP (general) */
7857             handle_simd_dupg(s, is_q, rd, rn, imm5);
7858             break;
7859         case 3:
7860             if (is_q) {
7861                 /* INS (general) */
7862                 handle_simd_insg(s, rd, rn, imm5);
7863             } else {
7864                 unallocated_encoding(s);
7865             }
7866             break;
7867         case 5:
7868         case 7:
7869             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7870             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7871             break;
7872         default:
7873             unallocated_encoding(s);
7874             break;
7875         }
7876     }
7877 }
7878 
7879 /* AdvSIMD modified immediate
7880  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7881  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7882  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7883  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7884  *
7885  * There are a number of operations that can be carried out here:
7886  *   MOVI - move (shifted) imm into register
7887  *   MVNI - move inverted (shifted) imm into register
7888  *   ORR  - bitwise OR of (shifted) imm with register
7889  *   BIC  - bitwise clear of (shifted) imm with register
7890  * With ARMv8.2 we also have:
7891  *   FMOV half-precision
7892  */
7893 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7894 {
7895     int rd = extract32(insn, 0, 5);
7896     int cmode = extract32(insn, 12, 4);
7897     int o2 = extract32(insn, 11, 1);
7898     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7899     bool is_neg = extract32(insn, 29, 1);
7900     bool is_q = extract32(insn, 30, 1);
7901     uint64_t imm = 0;
7902 
7903     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7904         /* Check for FMOV (vector, immediate) - half-precision */
7905         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7906             unallocated_encoding(s);
7907             return;
7908         }
7909     }
7910 
7911     if (!fp_access_check(s)) {
7912         return;
7913     }
7914 
7915     if (cmode == 15 && o2 && !is_neg) {
7916         /* FMOV (vector, immediate) - half-precision */
7917         imm = vfp_expand_imm(MO_16, abcdefgh);
7918         /* now duplicate across the lanes */
7919         imm = dup_const(MO_16, imm);
7920     } else {
7921         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7922     }
7923 
7924     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7925         /* MOVI or MVNI, with MVNI negation handled above.  */
7926         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7927                              vec_full_reg_size(s), imm);
7928     } else {
7929         /* ORR or BIC, with BIC negation to AND handled above.  */
7930         if (is_neg) {
7931             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7932         } else {
7933             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7934         }
7935     }
7936 }
7937 
7938 /* AdvSIMD scalar copy
7939  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7940  * +-----+----+-----------------+------+---+------+---+------+------+
7941  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7942  * +-----+----+-----------------+------+---+------+---+------+------+
7943  */
7944 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7945 {
7946     int rd = extract32(insn, 0, 5);
7947     int rn = extract32(insn, 5, 5);
7948     int imm4 = extract32(insn, 11, 4);
7949     int imm5 = extract32(insn, 16, 5);
7950     int op = extract32(insn, 29, 1);
7951 
7952     if (op != 0 || imm4 != 0) {
7953         unallocated_encoding(s);
7954         return;
7955     }
7956 
7957     /* DUP (element, scalar) */
7958     handle_simd_dupes(s, rd, rn, imm5);
7959 }
7960 
7961 /* AdvSIMD scalar pairwise
7962  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7963  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7964  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7965  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7966  */
7967 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7968 {
7969     int u = extract32(insn, 29, 1);
7970     int size = extract32(insn, 22, 2);
7971     int opcode = extract32(insn, 12, 5);
7972     int rn = extract32(insn, 5, 5);
7973     int rd = extract32(insn, 0, 5);
7974     TCGv_ptr fpst;
7975 
7976     /* For some ops (the FP ones), size[1] is part of the encoding.
7977      * For ADDP strictly it is not but size[1] is always 1 for valid
7978      * encodings.
7979      */
7980     opcode |= (extract32(size, 1, 1) << 5);
7981 
7982     switch (opcode) {
7983     case 0x3b: /* ADDP */
7984         if (u || size != 3) {
7985             unallocated_encoding(s);
7986             return;
7987         }
7988         if (!fp_access_check(s)) {
7989             return;
7990         }
7991 
7992         fpst = NULL;
7993         break;
7994     case 0xc: /* FMAXNMP */
7995     case 0xd: /* FADDP */
7996     case 0xf: /* FMAXP */
7997     case 0x2c: /* FMINNMP */
7998     case 0x2f: /* FMINP */
7999         /* FP op, size[0] is 32 or 64 bit*/
8000         if (!u) {
8001             if (!dc_isar_feature(aa64_fp16, s)) {
8002                 unallocated_encoding(s);
8003                 return;
8004             } else {
8005                 size = MO_16;
8006             }
8007         } else {
8008             size = extract32(size, 0, 1) ? MO_64 : MO_32;
8009         }
8010 
8011         if (!fp_access_check(s)) {
8012             return;
8013         }
8014 
8015         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8016         break;
8017     default:
8018         unallocated_encoding(s);
8019         return;
8020     }
8021 
8022     if (size == MO_64) {
8023         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8024         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8025         TCGv_i64 tcg_res = tcg_temp_new_i64();
8026 
8027         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8028         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8029 
8030         switch (opcode) {
8031         case 0x3b: /* ADDP */
8032             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8033             break;
8034         case 0xc: /* FMAXNMP */
8035             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8036             break;
8037         case 0xd: /* FADDP */
8038             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8039             break;
8040         case 0xf: /* FMAXP */
8041             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8042             break;
8043         case 0x2c: /* FMINNMP */
8044             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8045             break;
8046         case 0x2f: /* FMINP */
8047             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8048             break;
8049         default:
8050             g_assert_not_reached();
8051         }
8052 
8053         write_fp_dreg(s, rd, tcg_res);
8054     } else {
8055         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8056         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8057         TCGv_i32 tcg_res = tcg_temp_new_i32();
8058 
8059         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8060         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8061 
8062         if (size == MO_16) {
8063             switch (opcode) {
8064             case 0xc: /* FMAXNMP */
8065                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8066                 break;
8067             case 0xd: /* FADDP */
8068                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8069                 break;
8070             case 0xf: /* FMAXP */
8071                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8072                 break;
8073             case 0x2c: /* FMINNMP */
8074                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8075                 break;
8076             case 0x2f: /* FMINP */
8077                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8078                 break;
8079             default:
8080                 g_assert_not_reached();
8081             }
8082         } else {
8083             switch (opcode) {
8084             case 0xc: /* FMAXNMP */
8085                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8086                 break;
8087             case 0xd: /* FADDP */
8088                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8089                 break;
8090             case 0xf: /* FMAXP */
8091                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8092                 break;
8093             case 0x2c: /* FMINNMP */
8094                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8095                 break;
8096             case 0x2f: /* FMINP */
8097                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8098                 break;
8099             default:
8100                 g_assert_not_reached();
8101             }
8102         }
8103 
8104         write_fp_sreg(s, rd, tcg_res);
8105     }
8106 }
8107 
8108 /*
8109  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8110  *
8111  * This code is handles the common shifting code and is used by both
8112  * the vector and scalar code.
8113  */
8114 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8115                                     TCGv_i64 tcg_rnd, bool accumulate,
8116                                     bool is_u, int size, int shift)
8117 {
8118     bool extended_result = false;
8119     bool round = tcg_rnd != NULL;
8120     int ext_lshift = 0;
8121     TCGv_i64 tcg_src_hi;
8122 
8123     if (round && size == 3) {
8124         extended_result = true;
8125         ext_lshift = 64 - shift;
8126         tcg_src_hi = tcg_temp_new_i64();
8127     } else if (shift == 64) {
8128         if (!accumulate && is_u) {
8129             /* result is zero */
8130             tcg_gen_movi_i64(tcg_res, 0);
8131             return;
8132         }
8133     }
8134 
8135     /* Deal with the rounding step */
8136     if (round) {
8137         if (extended_result) {
8138             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8139             if (!is_u) {
8140                 /* take care of sign extending tcg_res */
8141                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8142                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8143                                  tcg_src, tcg_src_hi,
8144                                  tcg_rnd, tcg_zero);
8145             } else {
8146                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8147                                  tcg_src, tcg_zero,
8148                                  tcg_rnd, tcg_zero);
8149             }
8150         } else {
8151             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8152         }
8153     }
8154 
8155     /* Now do the shift right */
8156     if (round && extended_result) {
8157         /* extended case, >64 bit precision required */
8158         if (ext_lshift == 0) {
8159             /* special case, only high bits matter */
8160             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8161         } else {
8162             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8163             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8164             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8165         }
8166     } else {
8167         if (is_u) {
8168             if (shift == 64) {
8169                 /* essentially shifting in 64 zeros */
8170                 tcg_gen_movi_i64(tcg_src, 0);
8171             } else {
8172                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8173             }
8174         } else {
8175             if (shift == 64) {
8176                 /* effectively extending the sign-bit */
8177                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8178             } else {
8179                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8180             }
8181         }
8182     }
8183 
8184     if (accumulate) {
8185         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8186     } else {
8187         tcg_gen_mov_i64(tcg_res, tcg_src);
8188     }
8189 }
8190 
8191 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8192 static void handle_scalar_simd_shri(DisasContext *s,
8193                                     bool is_u, int immh, int immb,
8194                                     int opcode, int rn, int rd)
8195 {
8196     const int size = 3;
8197     int immhb = immh << 3 | immb;
8198     int shift = 2 * (8 << size) - immhb;
8199     bool accumulate = false;
8200     bool round = false;
8201     bool insert = false;
8202     TCGv_i64 tcg_rn;
8203     TCGv_i64 tcg_rd;
8204     TCGv_i64 tcg_round;
8205 
8206     if (!extract32(immh, 3, 1)) {
8207         unallocated_encoding(s);
8208         return;
8209     }
8210 
8211     if (!fp_access_check(s)) {
8212         return;
8213     }
8214 
8215     switch (opcode) {
8216     case 0x02: /* SSRA / USRA (accumulate) */
8217         accumulate = true;
8218         break;
8219     case 0x04: /* SRSHR / URSHR (rounding) */
8220         round = true;
8221         break;
8222     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8223         accumulate = round = true;
8224         break;
8225     case 0x08: /* SRI */
8226         insert = true;
8227         break;
8228     }
8229 
8230     if (round) {
8231         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8232     } else {
8233         tcg_round = NULL;
8234     }
8235 
8236     tcg_rn = read_fp_dreg(s, rn);
8237     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8238 
8239     if (insert) {
8240         /* shift count same as element size is valid but does nothing;
8241          * special case to avoid potential shift by 64.
8242          */
8243         int esize = 8 << size;
8244         if (shift != esize) {
8245             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8246             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8247         }
8248     } else {
8249         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8250                                 accumulate, is_u, size, shift);
8251     }
8252 
8253     write_fp_dreg(s, rd, tcg_rd);
8254 }
8255 
8256 /* SHL/SLI - Scalar shift left */
8257 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8258                                     int immh, int immb, int opcode,
8259                                     int rn, int rd)
8260 {
8261     int size = 32 - clz32(immh) - 1;
8262     int immhb = immh << 3 | immb;
8263     int shift = immhb - (8 << size);
8264     TCGv_i64 tcg_rn;
8265     TCGv_i64 tcg_rd;
8266 
8267     if (!extract32(immh, 3, 1)) {
8268         unallocated_encoding(s);
8269         return;
8270     }
8271 
8272     if (!fp_access_check(s)) {
8273         return;
8274     }
8275 
8276     tcg_rn = read_fp_dreg(s, rn);
8277     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8278 
8279     if (insert) {
8280         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8281     } else {
8282         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8283     }
8284 
8285     write_fp_dreg(s, rd, tcg_rd);
8286 }
8287 
8288 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8289  * (signed/unsigned) narrowing */
8290 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8291                                    bool is_u_shift, bool is_u_narrow,
8292                                    int immh, int immb, int opcode,
8293                                    int rn, int rd)
8294 {
8295     int immhb = immh << 3 | immb;
8296     int size = 32 - clz32(immh) - 1;
8297     int esize = 8 << size;
8298     int shift = (2 * esize) - immhb;
8299     int elements = is_scalar ? 1 : (64 / esize);
8300     bool round = extract32(opcode, 0, 1);
8301     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8302     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8303     TCGv_i32 tcg_rd_narrowed;
8304     TCGv_i64 tcg_final;
8305 
8306     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8307         { gen_helper_neon_narrow_sat_s8,
8308           gen_helper_neon_unarrow_sat8 },
8309         { gen_helper_neon_narrow_sat_s16,
8310           gen_helper_neon_unarrow_sat16 },
8311         { gen_helper_neon_narrow_sat_s32,
8312           gen_helper_neon_unarrow_sat32 },
8313         { NULL, NULL },
8314     };
8315     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8316         gen_helper_neon_narrow_sat_u8,
8317         gen_helper_neon_narrow_sat_u16,
8318         gen_helper_neon_narrow_sat_u32,
8319         NULL
8320     };
8321     NeonGenNarrowEnvFn *narrowfn;
8322 
8323     int i;
8324 
8325     assert(size < 4);
8326 
8327     if (extract32(immh, 3, 1)) {
8328         unallocated_encoding(s);
8329         return;
8330     }
8331 
8332     if (!fp_access_check(s)) {
8333         return;
8334     }
8335 
8336     if (is_u_shift) {
8337         narrowfn = unsigned_narrow_fns[size];
8338     } else {
8339         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8340     }
8341 
8342     tcg_rn = tcg_temp_new_i64();
8343     tcg_rd = tcg_temp_new_i64();
8344     tcg_rd_narrowed = tcg_temp_new_i32();
8345     tcg_final = tcg_temp_new_i64();
8346 
8347     if (round) {
8348         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8349     } else {
8350         tcg_round = NULL;
8351     }
8352 
8353     for (i = 0; i < elements; i++) {
8354         read_vec_element(s, tcg_rn, rn, i, ldop);
8355         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8356                                 false, is_u_shift, size+1, shift);
8357         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8358         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8359         if (i == 0) {
8360             tcg_gen_mov_i64(tcg_final, tcg_rd);
8361         } else {
8362             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8363         }
8364     }
8365 
8366     if (!is_q) {
8367         write_vec_element(s, tcg_final, rd, 0, MO_64);
8368     } else {
8369         write_vec_element(s, tcg_final, rd, 1, MO_64);
8370     }
8371     clear_vec_high(s, is_q, rd);
8372 }
8373 
8374 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8375 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8376                              bool src_unsigned, bool dst_unsigned,
8377                              int immh, int immb, int rn, int rd)
8378 {
8379     int immhb = immh << 3 | immb;
8380     int size = 32 - clz32(immh) - 1;
8381     int shift = immhb - (8 << size);
8382     int pass;
8383 
8384     assert(immh != 0);
8385     assert(!(scalar && is_q));
8386 
8387     if (!scalar) {
8388         if (!is_q && extract32(immh, 3, 1)) {
8389             unallocated_encoding(s);
8390             return;
8391         }
8392 
8393         /* Since we use the variable-shift helpers we must
8394          * replicate the shift count into each element of
8395          * the tcg_shift value.
8396          */
8397         switch (size) {
8398         case 0:
8399             shift |= shift << 8;
8400             /* fall through */
8401         case 1:
8402             shift |= shift << 16;
8403             break;
8404         case 2:
8405         case 3:
8406             break;
8407         default:
8408             g_assert_not_reached();
8409         }
8410     }
8411 
8412     if (!fp_access_check(s)) {
8413         return;
8414     }
8415 
8416     if (size == 3) {
8417         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8418         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8419             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8420             { NULL, gen_helper_neon_qshl_u64 },
8421         };
8422         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8423         int maxpass = is_q ? 2 : 1;
8424 
8425         for (pass = 0; pass < maxpass; pass++) {
8426             TCGv_i64 tcg_op = tcg_temp_new_i64();
8427 
8428             read_vec_element(s, tcg_op, rn, pass, MO_64);
8429             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8430             write_vec_element(s, tcg_op, rd, pass, MO_64);
8431         }
8432         clear_vec_high(s, is_q, rd);
8433     } else {
8434         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8435         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8436             {
8437                 { gen_helper_neon_qshl_s8,
8438                   gen_helper_neon_qshl_s16,
8439                   gen_helper_neon_qshl_s32 },
8440                 { gen_helper_neon_qshlu_s8,
8441                   gen_helper_neon_qshlu_s16,
8442                   gen_helper_neon_qshlu_s32 }
8443             }, {
8444                 { NULL, NULL, NULL },
8445                 { gen_helper_neon_qshl_u8,
8446                   gen_helper_neon_qshl_u16,
8447                   gen_helper_neon_qshl_u32 }
8448             }
8449         };
8450         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8451         MemOp memop = scalar ? size : MO_32;
8452         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8453 
8454         for (pass = 0; pass < maxpass; pass++) {
8455             TCGv_i32 tcg_op = tcg_temp_new_i32();
8456 
8457             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8458             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8459             if (scalar) {
8460                 switch (size) {
8461                 case 0:
8462                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8463                     break;
8464                 case 1:
8465                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8466                     break;
8467                 case 2:
8468                     break;
8469                 default:
8470                     g_assert_not_reached();
8471                 }
8472                 write_fp_sreg(s, rd, tcg_op);
8473             } else {
8474                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8475             }
8476         }
8477 
8478         if (!scalar) {
8479             clear_vec_high(s, is_q, rd);
8480         }
8481     }
8482 }
8483 
8484 /* Common vector code for handling integer to FP conversion */
8485 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8486                                    int elements, int is_signed,
8487                                    int fracbits, int size)
8488 {
8489     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8490     TCGv_i32 tcg_shift = NULL;
8491 
8492     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8493     int pass;
8494 
8495     if (fracbits || size == MO_64) {
8496         tcg_shift = tcg_constant_i32(fracbits);
8497     }
8498 
8499     if (size == MO_64) {
8500         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8501         TCGv_i64 tcg_double = tcg_temp_new_i64();
8502 
8503         for (pass = 0; pass < elements; pass++) {
8504             read_vec_element(s, tcg_int64, rn, pass, mop);
8505 
8506             if (is_signed) {
8507                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8508                                      tcg_shift, tcg_fpst);
8509             } else {
8510                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8511                                      tcg_shift, tcg_fpst);
8512             }
8513             if (elements == 1) {
8514                 write_fp_dreg(s, rd, tcg_double);
8515             } else {
8516                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8517             }
8518         }
8519     } else {
8520         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8521         TCGv_i32 tcg_float = tcg_temp_new_i32();
8522 
8523         for (pass = 0; pass < elements; pass++) {
8524             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8525 
8526             switch (size) {
8527             case MO_32:
8528                 if (fracbits) {
8529                     if (is_signed) {
8530                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8531                                              tcg_shift, tcg_fpst);
8532                     } else {
8533                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8534                                              tcg_shift, tcg_fpst);
8535                     }
8536                 } else {
8537                     if (is_signed) {
8538                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8539                     } else {
8540                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8541                     }
8542                 }
8543                 break;
8544             case MO_16:
8545                 if (fracbits) {
8546                     if (is_signed) {
8547                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8548                                              tcg_shift, tcg_fpst);
8549                     } else {
8550                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8551                                              tcg_shift, tcg_fpst);
8552                     }
8553                 } else {
8554                     if (is_signed) {
8555                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8556                     } else {
8557                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8558                     }
8559                 }
8560                 break;
8561             default:
8562                 g_assert_not_reached();
8563             }
8564 
8565             if (elements == 1) {
8566                 write_fp_sreg(s, rd, tcg_float);
8567             } else {
8568                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8569             }
8570         }
8571     }
8572 
8573     clear_vec_high(s, elements << size == 16, rd);
8574 }
8575 
8576 /* UCVTF/SCVTF - Integer to FP conversion */
8577 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8578                                          bool is_q, bool is_u,
8579                                          int immh, int immb, int opcode,
8580                                          int rn, int rd)
8581 {
8582     int size, elements, fracbits;
8583     int immhb = immh << 3 | immb;
8584 
8585     if (immh & 8) {
8586         size = MO_64;
8587         if (!is_scalar && !is_q) {
8588             unallocated_encoding(s);
8589             return;
8590         }
8591     } else if (immh & 4) {
8592         size = MO_32;
8593     } else if (immh & 2) {
8594         size = MO_16;
8595         if (!dc_isar_feature(aa64_fp16, s)) {
8596             unallocated_encoding(s);
8597             return;
8598         }
8599     } else {
8600         /* immh == 0 would be a failure of the decode logic */
8601         g_assert(immh == 1);
8602         unallocated_encoding(s);
8603         return;
8604     }
8605 
8606     if (is_scalar) {
8607         elements = 1;
8608     } else {
8609         elements = (8 << is_q) >> size;
8610     }
8611     fracbits = (16 << size) - immhb;
8612 
8613     if (!fp_access_check(s)) {
8614         return;
8615     }
8616 
8617     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8618 }
8619 
8620 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8621 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8622                                          bool is_q, bool is_u,
8623                                          int immh, int immb, int rn, int rd)
8624 {
8625     int immhb = immh << 3 | immb;
8626     int pass, size, fracbits;
8627     TCGv_ptr tcg_fpstatus;
8628     TCGv_i32 tcg_rmode, tcg_shift;
8629 
8630     if (immh & 0x8) {
8631         size = MO_64;
8632         if (!is_scalar && !is_q) {
8633             unallocated_encoding(s);
8634             return;
8635         }
8636     } else if (immh & 0x4) {
8637         size = MO_32;
8638     } else if (immh & 0x2) {
8639         size = MO_16;
8640         if (!dc_isar_feature(aa64_fp16, s)) {
8641             unallocated_encoding(s);
8642             return;
8643         }
8644     } else {
8645         /* Should have split out AdvSIMD modified immediate earlier.  */
8646         assert(immh == 1);
8647         unallocated_encoding(s);
8648         return;
8649     }
8650 
8651     if (!fp_access_check(s)) {
8652         return;
8653     }
8654 
8655     assert(!(is_scalar && is_q));
8656 
8657     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8658     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8659     fracbits = (16 << size) - immhb;
8660     tcg_shift = tcg_constant_i32(fracbits);
8661 
8662     if (size == MO_64) {
8663         int maxpass = is_scalar ? 1 : 2;
8664 
8665         for (pass = 0; pass < maxpass; pass++) {
8666             TCGv_i64 tcg_op = tcg_temp_new_i64();
8667 
8668             read_vec_element(s, tcg_op, rn, pass, MO_64);
8669             if (is_u) {
8670                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8671             } else {
8672                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8673             }
8674             write_vec_element(s, tcg_op, rd, pass, MO_64);
8675         }
8676         clear_vec_high(s, is_q, rd);
8677     } else {
8678         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8679         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8680 
8681         switch (size) {
8682         case MO_16:
8683             if (is_u) {
8684                 fn = gen_helper_vfp_touhh;
8685             } else {
8686                 fn = gen_helper_vfp_toshh;
8687             }
8688             break;
8689         case MO_32:
8690             if (is_u) {
8691                 fn = gen_helper_vfp_touls;
8692             } else {
8693                 fn = gen_helper_vfp_tosls;
8694             }
8695             break;
8696         default:
8697             g_assert_not_reached();
8698         }
8699 
8700         for (pass = 0; pass < maxpass; pass++) {
8701             TCGv_i32 tcg_op = tcg_temp_new_i32();
8702 
8703             read_vec_element_i32(s, tcg_op, rn, pass, size);
8704             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8705             if (is_scalar) {
8706                 write_fp_sreg(s, rd, tcg_op);
8707             } else {
8708                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8709             }
8710         }
8711         if (!is_scalar) {
8712             clear_vec_high(s, is_q, rd);
8713         }
8714     }
8715 
8716     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8717 }
8718 
8719 /* AdvSIMD scalar shift by immediate
8720  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8721  * +-----+---+-------------+------+------+--------+---+------+------+
8722  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8723  * +-----+---+-------------+------+------+--------+---+------+------+
8724  *
8725  * This is the scalar version so it works on a fixed sized registers
8726  */
8727 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8728 {
8729     int rd = extract32(insn, 0, 5);
8730     int rn = extract32(insn, 5, 5);
8731     int opcode = extract32(insn, 11, 5);
8732     int immb = extract32(insn, 16, 3);
8733     int immh = extract32(insn, 19, 4);
8734     bool is_u = extract32(insn, 29, 1);
8735 
8736     if (immh == 0) {
8737         unallocated_encoding(s);
8738         return;
8739     }
8740 
8741     switch (opcode) {
8742     case 0x08: /* SRI */
8743         if (!is_u) {
8744             unallocated_encoding(s);
8745             return;
8746         }
8747         /* fall through */
8748     case 0x00: /* SSHR / USHR */
8749     case 0x02: /* SSRA / USRA */
8750     case 0x04: /* SRSHR / URSHR */
8751     case 0x06: /* SRSRA / URSRA */
8752         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8753         break;
8754     case 0x0a: /* SHL / SLI */
8755         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8756         break;
8757     case 0x1c: /* SCVTF, UCVTF */
8758         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8759                                      opcode, rn, rd);
8760         break;
8761     case 0x10: /* SQSHRUN, SQSHRUN2 */
8762     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8763         if (!is_u) {
8764             unallocated_encoding(s);
8765             return;
8766         }
8767         handle_vec_simd_sqshrn(s, true, false, false, true,
8768                                immh, immb, opcode, rn, rd);
8769         break;
8770     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8771     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8772         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8773                                immh, immb, opcode, rn, rd);
8774         break;
8775     case 0xc: /* SQSHLU */
8776         if (!is_u) {
8777             unallocated_encoding(s);
8778             return;
8779         }
8780         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8781         break;
8782     case 0xe: /* SQSHL, UQSHL */
8783         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8784         break;
8785     case 0x1f: /* FCVTZS, FCVTZU */
8786         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8787         break;
8788     default:
8789         unallocated_encoding(s);
8790         break;
8791     }
8792 }
8793 
8794 /* AdvSIMD scalar three different
8795  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8796  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8797  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8798  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8799  */
8800 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8801 {
8802     bool is_u = extract32(insn, 29, 1);
8803     int size = extract32(insn, 22, 2);
8804     int opcode = extract32(insn, 12, 4);
8805     int rm = extract32(insn, 16, 5);
8806     int rn = extract32(insn, 5, 5);
8807     int rd = extract32(insn, 0, 5);
8808 
8809     if (is_u) {
8810         unallocated_encoding(s);
8811         return;
8812     }
8813 
8814     switch (opcode) {
8815     case 0x9: /* SQDMLAL, SQDMLAL2 */
8816     case 0xb: /* SQDMLSL, SQDMLSL2 */
8817     case 0xd: /* SQDMULL, SQDMULL2 */
8818         if (size == 0 || size == 3) {
8819             unallocated_encoding(s);
8820             return;
8821         }
8822         break;
8823     default:
8824         unallocated_encoding(s);
8825         return;
8826     }
8827 
8828     if (!fp_access_check(s)) {
8829         return;
8830     }
8831 
8832     if (size == 2) {
8833         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8834         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8835         TCGv_i64 tcg_res = tcg_temp_new_i64();
8836 
8837         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8838         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8839 
8840         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8841         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8842 
8843         switch (opcode) {
8844         case 0xd: /* SQDMULL, SQDMULL2 */
8845             break;
8846         case 0xb: /* SQDMLSL, SQDMLSL2 */
8847             tcg_gen_neg_i64(tcg_res, tcg_res);
8848             /* fall through */
8849         case 0x9: /* SQDMLAL, SQDMLAL2 */
8850             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8851             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8852                                               tcg_res, tcg_op1);
8853             break;
8854         default:
8855             g_assert_not_reached();
8856         }
8857 
8858         write_fp_dreg(s, rd, tcg_res);
8859     } else {
8860         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8861         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8862         TCGv_i64 tcg_res = tcg_temp_new_i64();
8863 
8864         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8865         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8866 
8867         switch (opcode) {
8868         case 0xd: /* SQDMULL, SQDMULL2 */
8869             break;
8870         case 0xb: /* SQDMLSL, SQDMLSL2 */
8871             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8872             /* fall through */
8873         case 0x9: /* SQDMLAL, SQDMLAL2 */
8874         {
8875             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8876             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8877             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8878                                               tcg_res, tcg_op3);
8879             break;
8880         }
8881         default:
8882             g_assert_not_reached();
8883         }
8884 
8885         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8886         write_fp_dreg(s, rd, tcg_res);
8887     }
8888 }
8889 
8890 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8891                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8892 {
8893     /* Handle 64x64->64 opcodes which are shared between the scalar
8894      * and vector 3-same groups. We cover every opcode where size == 3
8895      * is valid in either the three-reg-same (integer, not pairwise)
8896      * or scalar-three-reg-same groups.
8897      */
8898     TCGCond cond;
8899 
8900     switch (opcode) {
8901     case 0x1: /* SQADD */
8902         if (u) {
8903             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8904         } else {
8905             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8906         }
8907         break;
8908     case 0x5: /* SQSUB */
8909         if (u) {
8910             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8911         } else {
8912             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8913         }
8914         break;
8915     case 0x6: /* CMGT, CMHI */
8916         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8917          * We implement this using setcond (test) and then negating.
8918          */
8919         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8920     do_cmop:
8921         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8922         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8923         break;
8924     case 0x7: /* CMGE, CMHS */
8925         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8926         goto do_cmop;
8927     case 0x11: /* CMTST, CMEQ */
8928         if (u) {
8929             cond = TCG_COND_EQ;
8930             goto do_cmop;
8931         }
8932         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8933         break;
8934     case 0x8: /* SSHL, USHL */
8935         if (u) {
8936             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8937         } else {
8938             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8939         }
8940         break;
8941     case 0x9: /* SQSHL, UQSHL */
8942         if (u) {
8943             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8944         } else {
8945             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8946         }
8947         break;
8948     case 0xa: /* SRSHL, URSHL */
8949         if (u) {
8950             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8951         } else {
8952             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8953         }
8954         break;
8955     case 0xb: /* SQRSHL, UQRSHL */
8956         if (u) {
8957             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8958         } else {
8959             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8960         }
8961         break;
8962     case 0x10: /* ADD, SUB */
8963         if (u) {
8964             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8965         } else {
8966             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8967         }
8968         break;
8969     default:
8970         g_assert_not_reached();
8971     }
8972 }
8973 
8974 /* Handle the 3-same-operands float operations; shared by the scalar
8975  * and vector encodings. The caller must filter out any encodings
8976  * not allocated for the encoding it is dealing with.
8977  */
8978 static void handle_3same_float(DisasContext *s, int size, int elements,
8979                                int fpopcode, int rd, int rn, int rm)
8980 {
8981     int pass;
8982     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8983 
8984     for (pass = 0; pass < elements; pass++) {
8985         if (size) {
8986             /* Double */
8987             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8988             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8989             TCGv_i64 tcg_res = tcg_temp_new_i64();
8990 
8991             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8992             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8993 
8994             switch (fpopcode) {
8995             case 0x39: /* FMLS */
8996                 /* As usual for ARM, separate negation for fused multiply-add */
8997                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8998                 /* fall through */
8999             case 0x19: /* FMLA */
9000                 read_vec_element(s, tcg_res, rd, pass, MO_64);
9001                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9002                                        tcg_res, fpst);
9003                 break;
9004             case 0x18: /* FMAXNM */
9005                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9006                 break;
9007             case 0x1a: /* FADD */
9008                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9009                 break;
9010             case 0x1b: /* FMULX */
9011                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9012                 break;
9013             case 0x1c: /* FCMEQ */
9014                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9015                 break;
9016             case 0x1e: /* FMAX */
9017                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9018                 break;
9019             case 0x1f: /* FRECPS */
9020                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9021                 break;
9022             case 0x38: /* FMINNM */
9023                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9024                 break;
9025             case 0x3a: /* FSUB */
9026                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9027                 break;
9028             case 0x3e: /* FMIN */
9029                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9030                 break;
9031             case 0x3f: /* FRSQRTS */
9032                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9033                 break;
9034             case 0x5b: /* FMUL */
9035                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9036                 break;
9037             case 0x5c: /* FCMGE */
9038                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9039                 break;
9040             case 0x5d: /* FACGE */
9041                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9042                 break;
9043             case 0x5f: /* FDIV */
9044                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9045                 break;
9046             case 0x7a: /* FABD */
9047                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9048                 gen_helper_vfp_absd(tcg_res, tcg_res);
9049                 break;
9050             case 0x7c: /* FCMGT */
9051                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9052                 break;
9053             case 0x7d: /* FACGT */
9054                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9055                 break;
9056             default:
9057                 g_assert_not_reached();
9058             }
9059 
9060             write_vec_element(s, tcg_res, rd, pass, MO_64);
9061         } else {
9062             /* Single */
9063             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9064             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9065             TCGv_i32 tcg_res = tcg_temp_new_i32();
9066 
9067             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9068             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9069 
9070             switch (fpopcode) {
9071             case 0x39: /* FMLS */
9072                 /* As usual for ARM, separate negation for fused multiply-add */
9073                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9074                 /* fall through */
9075             case 0x19: /* FMLA */
9076                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9077                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9078                                        tcg_res, fpst);
9079                 break;
9080             case 0x1a: /* FADD */
9081                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9082                 break;
9083             case 0x1b: /* FMULX */
9084                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9085                 break;
9086             case 0x1c: /* FCMEQ */
9087                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9088                 break;
9089             case 0x1e: /* FMAX */
9090                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9091                 break;
9092             case 0x1f: /* FRECPS */
9093                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9094                 break;
9095             case 0x18: /* FMAXNM */
9096                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9097                 break;
9098             case 0x38: /* FMINNM */
9099                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9100                 break;
9101             case 0x3a: /* FSUB */
9102                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9103                 break;
9104             case 0x3e: /* FMIN */
9105                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9106                 break;
9107             case 0x3f: /* FRSQRTS */
9108                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9109                 break;
9110             case 0x5b: /* FMUL */
9111                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9112                 break;
9113             case 0x5c: /* FCMGE */
9114                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9115                 break;
9116             case 0x5d: /* FACGE */
9117                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9118                 break;
9119             case 0x5f: /* FDIV */
9120                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9121                 break;
9122             case 0x7a: /* FABD */
9123                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9124                 gen_helper_vfp_abss(tcg_res, tcg_res);
9125                 break;
9126             case 0x7c: /* FCMGT */
9127                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9128                 break;
9129             case 0x7d: /* FACGT */
9130                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9131                 break;
9132             default:
9133                 g_assert_not_reached();
9134             }
9135 
9136             if (elements == 1) {
9137                 /* scalar single so clear high part */
9138                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9139 
9140                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9141                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9142             } else {
9143                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9144             }
9145         }
9146     }
9147 
9148     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9149 }
9150 
9151 /* AdvSIMD scalar three same
9152  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9153  * +-----+---+-----------+------+---+------+--------+---+------+------+
9154  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9155  * +-----+---+-----------+------+---+------+--------+---+------+------+
9156  */
9157 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9158 {
9159     int rd = extract32(insn, 0, 5);
9160     int rn = extract32(insn, 5, 5);
9161     int opcode = extract32(insn, 11, 5);
9162     int rm = extract32(insn, 16, 5);
9163     int size = extract32(insn, 22, 2);
9164     bool u = extract32(insn, 29, 1);
9165     TCGv_i64 tcg_rd;
9166 
9167     if (opcode >= 0x18) {
9168         /* Floating point: U, size[1] and opcode indicate operation */
9169         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9170         switch (fpopcode) {
9171         case 0x1b: /* FMULX */
9172         case 0x1f: /* FRECPS */
9173         case 0x3f: /* FRSQRTS */
9174         case 0x5d: /* FACGE */
9175         case 0x7d: /* FACGT */
9176         case 0x1c: /* FCMEQ */
9177         case 0x5c: /* FCMGE */
9178         case 0x7c: /* FCMGT */
9179         case 0x7a: /* FABD */
9180             break;
9181         default:
9182             unallocated_encoding(s);
9183             return;
9184         }
9185 
9186         if (!fp_access_check(s)) {
9187             return;
9188         }
9189 
9190         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9191         return;
9192     }
9193 
9194     switch (opcode) {
9195     case 0x1: /* SQADD, UQADD */
9196     case 0x5: /* SQSUB, UQSUB */
9197     case 0x9: /* SQSHL, UQSHL */
9198     case 0xb: /* SQRSHL, UQRSHL */
9199         break;
9200     case 0x8: /* SSHL, USHL */
9201     case 0xa: /* SRSHL, URSHL */
9202     case 0x6: /* CMGT, CMHI */
9203     case 0x7: /* CMGE, CMHS */
9204     case 0x11: /* CMTST, CMEQ */
9205     case 0x10: /* ADD, SUB (vector) */
9206         if (size != 3) {
9207             unallocated_encoding(s);
9208             return;
9209         }
9210         break;
9211     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9212         if (size != 1 && size != 2) {
9213             unallocated_encoding(s);
9214             return;
9215         }
9216         break;
9217     default:
9218         unallocated_encoding(s);
9219         return;
9220     }
9221 
9222     if (!fp_access_check(s)) {
9223         return;
9224     }
9225 
9226     tcg_rd = tcg_temp_new_i64();
9227 
9228     if (size == 3) {
9229         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9230         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9231 
9232         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9233     } else {
9234         /* Do a single operation on the lowest element in the vector.
9235          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9236          * no side effects for all these operations.
9237          * OPTME: special-purpose helpers would avoid doing some
9238          * unnecessary work in the helper for the 8 and 16 bit cases.
9239          */
9240         NeonGenTwoOpEnvFn *genenvfn;
9241         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9242         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9243         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9244 
9245         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9246         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9247 
9248         switch (opcode) {
9249         case 0x1: /* SQADD, UQADD */
9250         {
9251             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9252                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9253                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9254                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9255             };
9256             genenvfn = fns[size][u];
9257             break;
9258         }
9259         case 0x5: /* SQSUB, UQSUB */
9260         {
9261             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9262                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9263                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9264                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9265             };
9266             genenvfn = fns[size][u];
9267             break;
9268         }
9269         case 0x9: /* SQSHL, UQSHL */
9270         {
9271             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9272                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9273                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9274                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9275             };
9276             genenvfn = fns[size][u];
9277             break;
9278         }
9279         case 0xb: /* SQRSHL, UQRSHL */
9280         {
9281             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9282                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9283                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9284                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9285             };
9286             genenvfn = fns[size][u];
9287             break;
9288         }
9289         case 0x16: /* SQDMULH, SQRDMULH */
9290         {
9291             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9292                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9293                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9294             };
9295             assert(size == 1 || size == 2);
9296             genenvfn = fns[size - 1][u];
9297             break;
9298         }
9299         default:
9300             g_assert_not_reached();
9301         }
9302 
9303         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9304         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9305     }
9306 
9307     write_fp_dreg(s, rd, tcg_rd);
9308 }
9309 
9310 /* AdvSIMD scalar three same FP16
9311  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9312  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9313  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9314  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9315  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9316  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9317  */
9318 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9319                                                   uint32_t insn)
9320 {
9321     int rd = extract32(insn, 0, 5);
9322     int rn = extract32(insn, 5, 5);
9323     int opcode = extract32(insn, 11, 3);
9324     int rm = extract32(insn, 16, 5);
9325     bool u = extract32(insn, 29, 1);
9326     bool a = extract32(insn, 23, 1);
9327     int fpopcode = opcode | (a << 3) |  (u << 4);
9328     TCGv_ptr fpst;
9329     TCGv_i32 tcg_op1;
9330     TCGv_i32 tcg_op2;
9331     TCGv_i32 tcg_res;
9332 
9333     switch (fpopcode) {
9334     case 0x03: /* FMULX */
9335     case 0x04: /* FCMEQ (reg) */
9336     case 0x07: /* FRECPS */
9337     case 0x0f: /* FRSQRTS */
9338     case 0x14: /* FCMGE (reg) */
9339     case 0x15: /* FACGE */
9340     case 0x1a: /* FABD */
9341     case 0x1c: /* FCMGT (reg) */
9342     case 0x1d: /* FACGT */
9343         break;
9344     default:
9345         unallocated_encoding(s);
9346         return;
9347     }
9348 
9349     if (!dc_isar_feature(aa64_fp16, s)) {
9350         unallocated_encoding(s);
9351     }
9352 
9353     if (!fp_access_check(s)) {
9354         return;
9355     }
9356 
9357     fpst = fpstatus_ptr(FPST_FPCR_F16);
9358 
9359     tcg_op1 = read_fp_hreg(s, rn);
9360     tcg_op2 = read_fp_hreg(s, rm);
9361     tcg_res = tcg_temp_new_i32();
9362 
9363     switch (fpopcode) {
9364     case 0x03: /* FMULX */
9365         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9366         break;
9367     case 0x04: /* FCMEQ (reg) */
9368         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9369         break;
9370     case 0x07: /* FRECPS */
9371         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9372         break;
9373     case 0x0f: /* FRSQRTS */
9374         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9375         break;
9376     case 0x14: /* FCMGE (reg) */
9377         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9378         break;
9379     case 0x15: /* FACGE */
9380         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9381         break;
9382     case 0x1a: /* FABD */
9383         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9384         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9385         break;
9386     case 0x1c: /* FCMGT (reg) */
9387         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9388         break;
9389     case 0x1d: /* FACGT */
9390         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9391         break;
9392     default:
9393         g_assert_not_reached();
9394     }
9395 
9396     write_fp_sreg(s, rd, tcg_res);
9397 }
9398 
9399 /* AdvSIMD scalar three same extra
9400  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9401  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9402  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9403  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9404  */
9405 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9406                                                    uint32_t insn)
9407 {
9408     int rd = extract32(insn, 0, 5);
9409     int rn = extract32(insn, 5, 5);
9410     int opcode = extract32(insn, 11, 4);
9411     int rm = extract32(insn, 16, 5);
9412     int size = extract32(insn, 22, 2);
9413     bool u = extract32(insn, 29, 1);
9414     TCGv_i32 ele1, ele2, ele3;
9415     TCGv_i64 res;
9416     bool feature;
9417 
9418     switch (u * 16 + opcode) {
9419     case 0x10: /* SQRDMLAH (vector) */
9420     case 0x11: /* SQRDMLSH (vector) */
9421         if (size != 1 && size != 2) {
9422             unallocated_encoding(s);
9423             return;
9424         }
9425         feature = dc_isar_feature(aa64_rdm, s);
9426         break;
9427     default:
9428         unallocated_encoding(s);
9429         return;
9430     }
9431     if (!feature) {
9432         unallocated_encoding(s);
9433         return;
9434     }
9435     if (!fp_access_check(s)) {
9436         return;
9437     }
9438 
9439     /* Do a single operation on the lowest element in the vector.
9440      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9441      * with no side effects for all these operations.
9442      * OPTME: special-purpose helpers would avoid doing some
9443      * unnecessary work in the helper for the 16 bit cases.
9444      */
9445     ele1 = tcg_temp_new_i32();
9446     ele2 = tcg_temp_new_i32();
9447     ele3 = tcg_temp_new_i32();
9448 
9449     read_vec_element_i32(s, ele1, rn, 0, size);
9450     read_vec_element_i32(s, ele2, rm, 0, size);
9451     read_vec_element_i32(s, ele3, rd, 0, size);
9452 
9453     switch (opcode) {
9454     case 0x0: /* SQRDMLAH */
9455         if (size == 1) {
9456             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9457         } else {
9458             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9459         }
9460         break;
9461     case 0x1: /* SQRDMLSH */
9462         if (size == 1) {
9463             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9464         } else {
9465             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9466         }
9467         break;
9468     default:
9469         g_assert_not_reached();
9470     }
9471 
9472     res = tcg_temp_new_i64();
9473     tcg_gen_extu_i32_i64(res, ele3);
9474     write_fp_dreg(s, rd, res);
9475 }
9476 
9477 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9478                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9479                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9480 {
9481     /* Handle 64->64 opcodes which are shared between the scalar and
9482      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9483      * is valid in either group and also the double-precision fp ops.
9484      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9485      * requires them.
9486      */
9487     TCGCond cond;
9488 
9489     switch (opcode) {
9490     case 0x4: /* CLS, CLZ */
9491         if (u) {
9492             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9493         } else {
9494             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9495         }
9496         break;
9497     case 0x5: /* NOT */
9498         /* This opcode is shared with CNT and RBIT but we have earlier
9499          * enforced that size == 3 if and only if this is the NOT insn.
9500          */
9501         tcg_gen_not_i64(tcg_rd, tcg_rn);
9502         break;
9503     case 0x7: /* SQABS, SQNEG */
9504         if (u) {
9505             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9506         } else {
9507             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9508         }
9509         break;
9510     case 0xa: /* CMLT */
9511         /* 64 bit integer comparison against zero, result is
9512          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9513          * subtracting 1.
9514          */
9515         cond = TCG_COND_LT;
9516     do_cmop:
9517         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9518         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9519         break;
9520     case 0x8: /* CMGT, CMGE */
9521         cond = u ? TCG_COND_GE : TCG_COND_GT;
9522         goto do_cmop;
9523     case 0x9: /* CMEQ, CMLE */
9524         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9525         goto do_cmop;
9526     case 0xb: /* ABS, NEG */
9527         if (u) {
9528             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9529         } else {
9530             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9531         }
9532         break;
9533     case 0x2f: /* FABS */
9534         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9535         break;
9536     case 0x6f: /* FNEG */
9537         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9538         break;
9539     case 0x7f: /* FSQRT */
9540         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9541         break;
9542     case 0x1a: /* FCVTNS */
9543     case 0x1b: /* FCVTMS */
9544     case 0x1c: /* FCVTAS */
9545     case 0x3a: /* FCVTPS */
9546     case 0x3b: /* FCVTZS */
9547         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9548         break;
9549     case 0x5a: /* FCVTNU */
9550     case 0x5b: /* FCVTMU */
9551     case 0x5c: /* FCVTAU */
9552     case 0x7a: /* FCVTPU */
9553     case 0x7b: /* FCVTZU */
9554         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9555         break;
9556     case 0x18: /* FRINTN */
9557     case 0x19: /* FRINTM */
9558     case 0x38: /* FRINTP */
9559     case 0x39: /* FRINTZ */
9560     case 0x58: /* FRINTA */
9561     case 0x79: /* FRINTI */
9562         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9563         break;
9564     case 0x59: /* FRINTX */
9565         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9566         break;
9567     case 0x1e: /* FRINT32Z */
9568     case 0x5e: /* FRINT32X */
9569         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9570         break;
9571     case 0x1f: /* FRINT64Z */
9572     case 0x5f: /* FRINT64X */
9573         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9574         break;
9575     default:
9576         g_assert_not_reached();
9577     }
9578 }
9579 
9580 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9581                                    bool is_scalar, bool is_u, bool is_q,
9582                                    int size, int rn, int rd)
9583 {
9584     bool is_double = (size == MO_64);
9585     TCGv_ptr fpst;
9586 
9587     if (!fp_access_check(s)) {
9588         return;
9589     }
9590 
9591     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9592 
9593     if (is_double) {
9594         TCGv_i64 tcg_op = tcg_temp_new_i64();
9595         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9596         TCGv_i64 tcg_res = tcg_temp_new_i64();
9597         NeonGenTwoDoubleOpFn *genfn;
9598         bool swap = false;
9599         int pass;
9600 
9601         switch (opcode) {
9602         case 0x2e: /* FCMLT (zero) */
9603             swap = true;
9604             /* fallthrough */
9605         case 0x2c: /* FCMGT (zero) */
9606             genfn = gen_helper_neon_cgt_f64;
9607             break;
9608         case 0x2d: /* FCMEQ (zero) */
9609             genfn = gen_helper_neon_ceq_f64;
9610             break;
9611         case 0x6d: /* FCMLE (zero) */
9612             swap = true;
9613             /* fall through */
9614         case 0x6c: /* FCMGE (zero) */
9615             genfn = gen_helper_neon_cge_f64;
9616             break;
9617         default:
9618             g_assert_not_reached();
9619         }
9620 
9621         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9622             read_vec_element(s, tcg_op, rn, pass, MO_64);
9623             if (swap) {
9624                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9625             } else {
9626                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9627             }
9628             write_vec_element(s, tcg_res, rd, pass, MO_64);
9629         }
9630 
9631         clear_vec_high(s, !is_scalar, rd);
9632     } else {
9633         TCGv_i32 tcg_op = tcg_temp_new_i32();
9634         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9635         TCGv_i32 tcg_res = tcg_temp_new_i32();
9636         NeonGenTwoSingleOpFn *genfn;
9637         bool swap = false;
9638         int pass, maxpasses;
9639 
9640         if (size == MO_16) {
9641             switch (opcode) {
9642             case 0x2e: /* FCMLT (zero) */
9643                 swap = true;
9644                 /* fall through */
9645             case 0x2c: /* FCMGT (zero) */
9646                 genfn = gen_helper_advsimd_cgt_f16;
9647                 break;
9648             case 0x2d: /* FCMEQ (zero) */
9649                 genfn = gen_helper_advsimd_ceq_f16;
9650                 break;
9651             case 0x6d: /* FCMLE (zero) */
9652                 swap = true;
9653                 /* fall through */
9654             case 0x6c: /* FCMGE (zero) */
9655                 genfn = gen_helper_advsimd_cge_f16;
9656                 break;
9657             default:
9658                 g_assert_not_reached();
9659             }
9660         } else {
9661             switch (opcode) {
9662             case 0x2e: /* FCMLT (zero) */
9663                 swap = true;
9664                 /* fall through */
9665             case 0x2c: /* FCMGT (zero) */
9666                 genfn = gen_helper_neon_cgt_f32;
9667                 break;
9668             case 0x2d: /* FCMEQ (zero) */
9669                 genfn = gen_helper_neon_ceq_f32;
9670                 break;
9671             case 0x6d: /* FCMLE (zero) */
9672                 swap = true;
9673                 /* fall through */
9674             case 0x6c: /* FCMGE (zero) */
9675                 genfn = gen_helper_neon_cge_f32;
9676                 break;
9677             default:
9678                 g_assert_not_reached();
9679             }
9680         }
9681 
9682         if (is_scalar) {
9683             maxpasses = 1;
9684         } else {
9685             int vector_size = 8 << is_q;
9686             maxpasses = vector_size >> size;
9687         }
9688 
9689         for (pass = 0; pass < maxpasses; pass++) {
9690             read_vec_element_i32(s, tcg_op, rn, pass, size);
9691             if (swap) {
9692                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9693             } else {
9694                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9695             }
9696             if (is_scalar) {
9697                 write_fp_sreg(s, rd, tcg_res);
9698             } else {
9699                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9700             }
9701         }
9702 
9703         if (!is_scalar) {
9704             clear_vec_high(s, is_q, rd);
9705         }
9706     }
9707 }
9708 
9709 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9710                                     bool is_scalar, bool is_u, bool is_q,
9711                                     int size, int rn, int rd)
9712 {
9713     bool is_double = (size == 3);
9714     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9715 
9716     if (is_double) {
9717         TCGv_i64 tcg_op = tcg_temp_new_i64();
9718         TCGv_i64 tcg_res = tcg_temp_new_i64();
9719         int pass;
9720 
9721         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9722             read_vec_element(s, tcg_op, rn, pass, MO_64);
9723             switch (opcode) {
9724             case 0x3d: /* FRECPE */
9725                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9726                 break;
9727             case 0x3f: /* FRECPX */
9728                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9729                 break;
9730             case 0x7d: /* FRSQRTE */
9731                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9732                 break;
9733             default:
9734                 g_assert_not_reached();
9735             }
9736             write_vec_element(s, tcg_res, rd, pass, MO_64);
9737         }
9738         clear_vec_high(s, !is_scalar, rd);
9739     } else {
9740         TCGv_i32 tcg_op = tcg_temp_new_i32();
9741         TCGv_i32 tcg_res = tcg_temp_new_i32();
9742         int pass, maxpasses;
9743 
9744         if (is_scalar) {
9745             maxpasses = 1;
9746         } else {
9747             maxpasses = is_q ? 4 : 2;
9748         }
9749 
9750         for (pass = 0; pass < maxpasses; pass++) {
9751             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9752 
9753             switch (opcode) {
9754             case 0x3c: /* URECPE */
9755                 gen_helper_recpe_u32(tcg_res, tcg_op);
9756                 break;
9757             case 0x3d: /* FRECPE */
9758                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9759                 break;
9760             case 0x3f: /* FRECPX */
9761                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9762                 break;
9763             case 0x7d: /* FRSQRTE */
9764                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9765                 break;
9766             default:
9767                 g_assert_not_reached();
9768             }
9769 
9770             if (is_scalar) {
9771                 write_fp_sreg(s, rd, tcg_res);
9772             } else {
9773                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9774             }
9775         }
9776         if (!is_scalar) {
9777             clear_vec_high(s, is_q, rd);
9778         }
9779     }
9780 }
9781 
9782 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9783                                 int opcode, bool u, bool is_q,
9784                                 int size, int rn, int rd)
9785 {
9786     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9787      * in the source becomes a size element in the destination).
9788      */
9789     int pass;
9790     TCGv_i32 tcg_res[2];
9791     int destelt = is_q ? 2 : 0;
9792     int passes = scalar ? 1 : 2;
9793 
9794     if (scalar) {
9795         tcg_res[1] = tcg_constant_i32(0);
9796     }
9797 
9798     for (pass = 0; pass < passes; pass++) {
9799         TCGv_i64 tcg_op = tcg_temp_new_i64();
9800         NeonGenNarrowFn *genfn = NULL;
9801         NeonGenNarrowEnvFn *genenvfn = NULL;
9802 
9803         if (scalar) {
9804             read_vec_element(s, tcg_op, rn, pass, size + 1);
9805         } else {
9806             read_vec_element(s, tcg_op, rn, pass, MO_64);
9807         }
9808         tcg_res[pass] = tcg_temp_new_i32();
9809 
9810         switch (opcode) {
9811         case 0x12: /* XTN, SQXTUN */
9812         {
9813             static NeonGenNarrowFn * const xtnfns[3] = {
9814                 gen_helper_neon_narrow_u8,
9815                 gen_helper_neon_narrow_u16,
9816                 tcg_gen_extrl_i64_i32,
9817             };
9818             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9819                 gen_helper_neon_unarrow_sat8,
9820                 gen_helper_neon_unarrow_sat16,
9821                 gen_helper_neon_unarrow_sat32,
9822             };
9823             if (u) {
9824                 genenvfn = sqxtunfns[size];
9825             } else {
9826                 genfn = xtnfns[size];
9827             }
9828             break;
9829         }
9830         case 0x14: /* SQXTN, UQXTN */
9831         {
9832             static NeonGenNarrowEnvFn * const fns[3][2] = {
9833                 { gen_helper_neon_narrow_sat_s8,
9834                   gen_helper_neon_narrow_sat_u8 },
9835                 { gen_helper_neon_narrow_sat_s16,
9836                   gen_helper_neon_narrow_sat_u16 },
9837                 { gen_helper_neon_narrow_sat_s32,
9838                   gen_helper_neon_narrow_sat_u32 },
9839             };
9840             genenvfn = fns[size][u];
9841             break;
9842         }
9843         case 0x16: /* FCVTN, FCVTN2 */
9844             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9845             if (size == 2) {
9846                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9847             } else {
9848                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9849                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9850                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9851                 TCGv_i32 ahp = get_ahp_flag();
9852 
9853                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9854                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9855                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9856                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9857             }
9858             break;
9859         case 0x36: /* BFCVTN, BFCVTN2 */
9860             {
9861                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9862                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9863             }
9864             break;
9865         case 0x56:  /* FCVTXN, FCVTXN2 */
9866             /* 64 bit to 32 bit float conversion
9867              * with von Neumann rounding (round to odd)
9868              */
9869             assert(size == 2);
9870             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9871             break;
9872         default:
9873             g_assert_not_reached();
9874         }
9875 
9876         if (genfn) {
9877             genfn(tcg_res[pass], tcg_op);
9878         } else if (genenvfn) {
9879             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9880         }
9881     }
9882 
9883     for (pass = 0; pass < 2; pass++) {
9884         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9885     }
9886     clear_vec_high(s, is_q, rd);
9887 }
9888 
9889 /* Remaining saturating accumulating ops */
9890 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9891                                 bool is_q, int size, int rn, int rd)
9892 {
9893     bool is_double = (size == 3);
9894 
9895     if (is_double) {
9896         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9897         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9898         int pass;
9899 
9900         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9901             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9902             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9903 
9904             if (is_u) { /* USQADD */
9905                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9906             } else { /* SUQADD */
9907                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9908             }
9909             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9910         }
9911         clear_vec_high(s, !is_scalar, rd);
9912     } else {
9913         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9914         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9915         int pass, maxpasses;
9916 
9917         if (is_scalar) {
9918             maxpasses = 1;
9919         } else {
9920             maxpasses = is_q ? 4 : 2;
9921         }
9922 
9923         for (pass = 0; pass < maxpasses; pass++) {
9924             if (is_scalar) {
9925                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9926                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9927             } else {
9928                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9929                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9930             }
9931 
9932             if (is_u) { /* USQADD */
9933                 switch (size) {
9934                 case 0:
9935                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9936                     break;
9937                 case 1:
9938                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9939                     break;
9940                 case 2:
9941                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9942                     break;
9943                 default:
9944                     g_assert_not_reached();
9945                 }
9946             } else { /* SUQADD */
9947                 switch (size) {
9948                 case 0:
9949                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9950                     break;
9951                 case 1:
9952                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9953                     break;
9954                 case 2:
9955                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9956                     break;
9957                 default:
9958                     g_assert_not_reached();
9959                 }
9960             }
9961 
9962             if (is_scalar) {
9963                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9964             }
9965             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9966         }
9967         clear_vec_high(s, is_q, rd);
9968     }
9969 }
9970 
9971 /* AdvSIMD scalar two reg misc
9972  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9973  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9974  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9975  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9976  */
9977 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9978 {
9979     int rd = extract32(insn, 0, 5);
9980     int rn = extract32(insn, 5, 5);
9981     int opcode = extract32(insn, 12, 5);
9982     int size = extract32(insn, 22, 2);
9983     bool u = extract32(insn, 29, 1);
9984     bool is_fcvt = false;
9985     int rmode;
9986     TCGv_i32 tcg_rmode;
9987     TCGv_ptr tcg_fpstatus;
9988 
9989     switch (opcode) {
9990     case 0x3: /* USQADD / SUQADD*/
9991         if (!fp_access_check(s)) {
9992             return;
9993         }
9994         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9995         return;
9996     case 0x7: /* SQABS / SQNEG */
9997         break;
9998     case 0xa: /* CMLT */
9999         if (u) {
10000             unallocated_encoding(s);
10001             return;
10002         }
10003         /* fall through */
10004     case 0x8: /* CMGT, CMGE */
10005     case 0x9: /* CMEQ, CMLE */
10006     case 0xb: /* ABS, NEG */
10007         if (size != 3) {
10008             unallocated_encoding(s);
10009             return;
10010         }
10011         break;
10012     case 0x12: /* SQXTUN */
10013         if (!u) {
10014             unallocated_encoding(s);
10015             return;
10016         }
10017         /* fall through */
10018     case 0x14: /* SQXTN, UQXTN */
10019         if (size == 3) {
10020             unallocated_encoding(s);
10021             return;
10022         }
10023         if (!fp_access_check(s)) {
10024             return;
10025         }
10026         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10027         return;
10028     case 0xc ... 0xf:
10029     case 0x16 ... 0x1d:
10030     case 0x1f:
10031         /* Floating point: U, size[1] and opcode indicate operation;
10032          * size[0] indicates single or double precision.
10033          */
10034         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10035         size = extract32(size, 0, 1) ? 3 : 2;
10036         switch (opcode) {
10037         case 0x2c: /* FCMGT (zero) */
10038         case 0x2d: /* FCMEQ (zero) */
10039         case 0x2e: /* FCMLT (zero) */
10040         case 0x6c: /* FCMGE (zero) */
10041         case 0x6d: /* FCMLE (zero) */
10042             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10043             return;
10044         case 0x1d: /* SCVTF */
10045         case 0x5d: /* UCVTF */
10046         {
10047             bool is_signed = (opcode == 0x1d);
10048             if (!fp_access_check(s)) {
10049                 return;
10050             }
10051             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10052             return;
10053         }
10054         case 0x3d: /* FRECPE */
10055         case 0x3f: /* FRECPX */
10056         case 0x7d: /* FRSQRTE */
10057             if (!fp_access_check(s)) {
10058                 return;
10059             }
10060             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10061             return;
10062         case 0x1a: /* FCVTNS */
10063         case 0x1b: /* FCVTMS */
10064         case 0x3a: /* FCVTPS */
10065         case 0x3b: /* FCVTZS */
10066         case 0x5a: /* FCVTNU */
10067         case 0x5b: /* FCVTMU */
10068         case 0x7a: /* FCVTPU */
10069         case 0x7b: /* FCVTZU */
10070             is_fcvt = true;
10071             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10072             break;
10073         case 0x1c: /* FCVTAS */
10074         case 0x5c: /* FCVTAU */
10075             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10076             is_fcvt = true;
10077             rmode = FPROUNDING_TIEAWAY;
10078             break;
10079         case 0x56: /* FCVTXN, FCVTXN2 */
10080             if (size == 2) {
10081                 unallocated_encoding(s);
10082                 return;
10083             }
10084             if (!fp_access_check(s)) {
10085                 return;
10086             }
10087             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10088             return;
10089         default:
10090             unallocated_encoding(s);
10091             return;
10092         }
10093         break;
10094     default:
10095         unallocated_encoding(s);
10096         return;
10097     }
10098 
10099     if (!fp_access_check(s)) {
10100         return;
10101     }
10102 
10103     if (is_fcvt) {
10104         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10105         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10106     } else {
10107         tcg_fpstatus = NULL;
10108         tcg_rmode = NULL;
10109     }
10110 
10111     if (size == 3) {
10112         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10113         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10114 
10115         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10116         write_fp_dreg(s, rd, tcg_rd);
10117     } else {
10118         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10119         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10120 
10121         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10122 
10123         switch (opcode) {
10124         case 0x7: /* SQABS, SQNEG */
10125         {
10126             NeonGenOneOpEnvFn *genfn;
10127             static NeonGenOneOpEnvFn * const fns[3][2] = {
10128                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10129                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10130                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10131             };
10132             genfn = fns[size][u];
10133             genfn(tcg_rd, cpu_env, tcg_rn);
10134             break;
10135         }
10136         case 0x1a: /* FCVTNS */
10137         case 0x1b: /* FCVTMS */
10138         case 0x1c: /* FCVTAS */
10139         case 0x3a: /* FCVTPS */
10140         case 0x3b: /* FCVTZS */
10141             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10142                                  tcg_fpstatus);
10143             break;
10144         case 0x5a: /* FCVTNU */
10145         case 0x5b: /* FCVTMU */
10146         case 0x5c: /* FCVTAU */
10147         case 0x7a: /* FCVTPU */
10148         case 0x7b: /* FCVTZU */
10149             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10150                                  tcg_fpstatus);
10151             break;
10152         default:
10153             g_assert_not_reached();
10154         }
10155 
10156         write_fp_sreg(s, rd, tcg_rd);
10157     }
10158 
10159     if (is_fcvt) {
10160         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10161     }
10162 }
10163 
10164 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10165 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10166                                  int immh, int immb, int opcode, int rn, int rd)
10167 {
10168     int size = 32 - clz32(immh) - 1;
10169     int immhb = immh << 3 | immb;
10170     int shift = 2 * (8 << size) - immhb;
10171     GVecGen2iFn *gvec_fn;
10172 
10173     if (extract32(immh, 3, 1) && !is_q) {
10174         unallocated_encoding(s);
10175         return;
10176     }
10177     tcg_debug_assert(size <= 3);
10178 
10179     if (!fp_access_check(s)) {
10180         return;
10181     }
10182 
10183     switch (opcode) {
10184     case 0x02: /* SSRA / USRA (accumulate) */
10185         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10186         break;
10187 
10188     case 0x08: /* SRI */
10189         gvec_fn = gen_gvec_sri;
10190         break;
10191 
10192     case 0x00: /* SSHR / USHR */
10193         if (is_u) {
10194             if (shift == 8 << size) {
10195                 /* Shift count the same size as element size produces zero.  */
10196                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10197                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10198                 return;
10199             }
10200             gvec_fn = tcg_gen_gvec_shri;
10201         } else {
10202             /* Shift count the same size as element size produces all sign.  */
10203             if (shift == 8 << size) {
10204                 shift -= 1;
10205             }
10206             gvec_fn = tcg_gen_gvec_sari;
10207         }
10208         break;
10209 
10210     case 0x04: /* SRSHR / URSHR (rounding) */
10211         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10212         break;
10213 
10214     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10215         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10216         break;
10217 
10218     default:
10219         g_assert_not_reached();
10220     }
10221 
10222     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10223 }
10224 
10225 /* SHL/SLI - Vector shift left */
10226 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10227                                  int immh, int immb, int opcode, int rn, int rd)
10228 {
10229     int size = 32 - clz32(immh) - 1;
10230     int immhb = immh << 3 | immb;
10231     int shift = immhb - (8 << size);
10232 
10233     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10234     assert(size >= 0 && size <= 3);
10235 
10236     if (extract32(immh, 3, 1) && !is_q) {
10237         unallocated_encoding(s);
10238         return;
10239     }
10240 
10241     if (!fp_access_check(s)) {
10242         return;
10243     }
10244 
10245     if (insert) {
10246         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10247     } else {
10248         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10249     }
10250 }
10251 
10252 /* USHLL/SHLL - Vector shift left with widening */
10253 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10254                                  int immh, int immb, int opcode, int rn, int rd)
10255 {
10256     int size = 32 - clz32(immh) - 1;
10257     int immhb = immh << 3 | immb;
10258     int shift = immhb - (8 << size);
10259     int dsize = 64;
10260     int esize = 8 << size;
10261     int elements = dsize/esize;
10262     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10263     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10264     int i;
10265 
10266     if (size >= 3) {
10267         unallocated_encoding(s);
10268         return;
10269     }
10270 
10271     if (!fp_access_check(s)) {
10272         return;
10273     }
10274 
10275     /* For the LL variants the store is larger than the load,
10276      * so if rd == rn we would overwrite parts of our input.
10277      * So load everything right now and use shifts in the main loop.
10278      */
10279     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10280 
10281     for (i = 0; i < elements; i++) {
10282         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10283         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10284         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10285         write_vec_element(s, tcg_rd, rd, i, size + 1);
10286     }
10287 }
10288 
10289 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10290 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10291                                  int immh, int immb, int opcode, int rn, int rd)
10292 {
10293     int immhb = immh << 3 | immb;
10294     int size = 32 - clz32(immh) - 1;
10295     int dsize = 64;
10296     int esize = 8 << size;
10297     int elements = dsize/esize;
10298     int shift = (2 * esize) - immhb;
10299     bool round = extract32(opcode, 0, 1);
10300     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10301     TCGv_i64 tcg_round;
10302     int i;
10303 
10304     if (extract32(immh, 3, 1)) {
10305         unallocated_encoding(s);
10306         return;
10307     }
10308 
10309     if (!fp_access_check(s)) {
10310         return;
10311     }
10312 
10313     tcg_rn = tcg_temp_new_i64();
10314     tcg_rd = tcg_temp_new_i64();
10315     tcg_final = tcg_temp_new_i64();
10316     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10317 
10318     if (round) {
10319         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10320     } else {
10321         tcg_round = NULL;
10322     }
10323 
10324     for (i = 0; i < elements; i++) {
10325         read_vec_element(s, tcg_rn, rn, i, size+1);
10326         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10327                                 false, true, size+1, shift);
10328 
10329         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10330     }
10331 
10332     if (!is_q) {
10333         write_vec_element(s, tcg_final, rd, 0, MO_64);
10334     } else {
10335         write_vec_element(s, tcg_final, rd, 1, MO_64);
10336     }
10337 
10338     clear_vec_high(s, is_q, rd);
10339 }
10340 
10341 
10342 /* AdvSIMD shift by immediate
10343  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10344  * +---+---+---+-------------+------+------+--------+---+------+------+
10345  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10346  * +---+---+---+-------------+------+------+--------+---+------+------+
10347  */
10348 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10349 {
10350     int rd = extract32(insn, 0, 5);
10351     int rn = extract32(insn, 5, 5);
10352     int opcode = extract32(insn, 11, 5);
10353     int immb = extract32(insn, 16, 3);
10354     int immh = extract32(insn, 19, 4);
10355     bool is_u = extract32(insn, 29, 1);
10356     bool is_q = extract32(insn, 30, 1);
10357 
10358     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10359     assert(immh != 0);
10360 
10361     switch (opcode) {
10362     case 0x08: /* SRI */
10363         if (!is_u) {
10364             unallocated_encoding(s);
10365             return;
10366         }
10367         /* fall through */
10368     case 0x00: /* SSHR / USHR */
10369     case 0x02: /* SSRA / USRA (accumulate) */
10370     case 0x04: /* SRSHR / URSHR (rounding) */
10371     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10372         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10373         break;
10374     case 0x0a: /* SHL / SLI */
10375         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10376         break;
10377     case 0x10: /* SHRN */
10378     case 0x11: /* RSHRN / SQRSHRUN */
10379         if (is_u) {
10380             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10381                                    opcode, rn, rd);
10382         } else {
10383             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10384         }
10385         break;
10386     case 0x12: /* SQSHRN / UQSHRN */
10387     case 0x13: /* SQRSHRN / UQRSHRN */
10388         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10389                                opcode, rn, rd);
10390         break;
10391     case 0x14: /* SSHLL / USHLL */
10392         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10393         break;
10394     case 0x1c: /* SCVTF / UCVTF */
10395         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10396                                      opcode, rn, rd);
10397         break;
10398     case 0xc: /* SQSHLU */
10399         if (!is_u) {
10400             unallocated_encoding(s);
10401             return;
10402         }
10403         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10404         break;
10405     case 0xe: /* SQSHL, UQSHL */
10406         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10407         break;
10408     case 0x1f: /* FCVTZS/ FCVTZU */
10409         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10410         return;
10411     default:
10412         unallocated_encoding(s);
10413         return;
10414     }
10415 }
10416 
10417 /* Generate code to do a "long" addition or subtraction, ie one done in
10418  * TCGv_i64 on vector lanes twice the width specified by size.
10419  */
10420 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10421                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10422 {
10423     static NeonGenTwo64OpFn * const fns[3][2] = {
10424         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10425         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10426         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10427     };
10428     NeonGenTwo64OpFn *genfn;
10429     assert(size < 3);
10430 
10431     genfn = fns[size][is_sub];
10432     genfn(tcg_res, tcg_op1, tcg_op2);
10433 }
10434 
10435 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10436                                 int opcode, int rd, int rn, int rm)
10437 {
10438     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10439     TCGv_i64 tcg_res[2];
10440     int pass, accop;
10441 
10442     tcg_res[0] = tcg_temp_new_i64();
10443     tcg_res[1] = tcg_temp_new_i64();
10444 
10445     /* Does this op do an adding accumulate, a subtracting accumulate,
10446      * or no accumulate at all?
10447      */
10448     switch (opcode) {
10449     case 5:
10450     case 8:
10451     case 9:
10452         accop = 1;
10453         break;
10454     case 10:
10455     case 11:
10456         accop = -1;
10457         break;
10458     default:
10459         accop = 0;
10460         break;
10461     }
10462 
10463     if (accop != 0) {
10464         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10465         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10466     }
10467 
10468     /* size == 2 means two 32x32->64 operations; this is worth special
10469      * casing because we can generally handle it inline.
10470      */
10471     if (size == 2) {
10472         for (pass = 0; pass < 2; pass++) {
10473             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10474             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10475             TCGv_i64 tcg_passres;
10476             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10477 
10478             int elt = pass + is_q * 2;
10479 
10480             read_vec_element(s, tcg_op1, rn, elt, memop);
10481             read_vec_element(s, tcg_op2, rm, elt, memop);
10482 
10483             if (accop == 0) {
10484                 tcg_passres = tcg_res[pass];
10485             } else {
10486                 tcg_passres = tcg_temp_new_i64();
10487             }
10488 
10489             switch (opcode) {
10490             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10491                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10492                 break;
10493             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10494                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10495                 break;
10496             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10497             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10498             {
10499                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10500                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10501 
10502                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10503                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10504                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10505                                     tcg_passres,
10506                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10507                 break;
10508             }
10509             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10510             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10511             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10512                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10513                 break;
10514             case 9: /* SQDMLAL, SQDMLAL2 */
10515             case 11: /* SQDMLSL, SQDMLSL2 */
10516             case 13: /* SQDMULL, SQDMULL2 */
10517                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10518                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10519                                                   tcg_passres, tcg_passres);
10520                 break;
10521             default:
10522                 g_assert_not_reached();
10523             }
10524 
10525             if (opcode == 9 || opcode == 11) {
10526                 /* saturating accumulate ops */
10527                 if (accop < 0) {
10528                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10529                 }
10530                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10531                                                   tcg_res[pass], tcg_passres);
10532             } else if (accop > 0) {
10533                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10534             } else if (accop < 0) {
10535                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10536             }
10537         }
10538     } else {
10539         /* size 0 or 1, generally helper functions */
10540         for (pass = 0; pass < 2; pass++) {
10541             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10542             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10543             TCGv_i64 tcg_passres;
10544             int elt = pass + is_q * 2;
10545 
10546             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10547             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10548 
10549             if (accop == 0) {
10550                 tcg_passres = tcg_res[pass];
10551             } else {
10552                 tcg_passres = tcg_temp_new_i64();
10553             }
10554 
10555             switch (opcode) {
10556             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10557             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10558             {
10559                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10560                 static NeonGenWidenFn * const widenfns[2][2] = {
10561                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10562                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10563                 };
10564                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10565 
10566                 widenfn(tcg_op2_64, tcg_op2);
10567                 widenfn(tcg_passres, tcg_op1);
10568                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10569                               tcg_passres, tcg_op2_64);
10570                 break;
10571             }
10572             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10573             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10574                 if (size == 0) {
10575                     if (is_u) {
10576                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10577                     } else {
10578                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10579                     }
10580                 } else {
10581                     if (is_u) {
10582                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10583                     } else {
10584                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10585                     }
10586                 }
10587                 break;
10588             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10589             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10590             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10591                 if (size == 0) {
10592                     if (is_u) {
10593                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10594                     } else {
10595                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10596                     }
10597                 } else {
10598                     if (is_u) {
10599                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10600                     } else {
10601                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10602                     }
10603                 }
10604                 break;
10605             case 9: /* SQDMLAL, SQDMLAL2 */
10606             case 11: /* SQDMLSL, SQDMLSL2 */
10607             case 13: /* SQDMULL, SQDMULL2 */
10608                 assert(size == 1);
10609                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10610                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10611                                                   tcg_passres, tcg_passres);
10612                 break;
10613             default:
10614                 g_assert_not_reached();
10615             }
10616 
10617             if (accop != 0) {
10618                 if (opcode == 9 || opcode == 11) {
10619                     /* saturating accumulate ops */
10620                     if (accop < 0) {
10621                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10622                     }
10623                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10624                                                       tcg_res[pass],
10625                                                       tcg_passres);
10626                 } else {
10627                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10628                                   tcg_res[pass], tcg_passres);
10629                 }
10630             }
10631         }
10632     }
10633 
10634     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10635     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10636 }
10637 
10638 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10639                             int opcode, int rd, int rn, int rm)
10640 {
10641     TCGv_i64 tcg_res[2];
10642     int part = is_q ? 2 : 0;
10643     int pass;
10644 
10645     for (pass = 0; pass < 2; pass++) {
10646         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10647         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10648         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10649         static NeonGenWidenFn * const widenfns[3][2] = {
10650             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10651             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10652             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10653         };
10654         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10655 
10656         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10657         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10658         widenfn(tcg_op2_wide, tcg_op2);
10659         tcg_res[pass] = tcg_temp_new_i64();
10660         gen_neon_addl(size, (opcode == 3),
10661                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10662     }
10663 
10664     for (pass = 0; pass < 2; pass++) {
10665         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10666     }
10667 }
10668 
10669 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10670 {
10671     tcg_gen_addi_i64(in, in, 1U << 31);
10672     tcg_gen_extrh_i64_i32(res, in);
10673 }
10674 
10675 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10676                                  int opcode, int rd, int rn, int rm)
10677 {
10678     TCGv_i32 tcg_res[2];
10679     int part = is_q ? 2 : 0;
10680     int pass;
10681 
10682     for (pass = 0; pass < 2; pass++) {
10683         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10684         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10685         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10686         static NeonGenNarrowFn * const narrowfns[3][2] = {
10687             { gen_helper_neon_narrow_high_u8,
10688               gen_helper_neon_narrow_round_high_u8 },
10689             { gen_helper_neon_narrow_high_u16,
10690               gen_helper_neon_narrow_round_high_u16 },
10691             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10692         };
10693         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10694 
10695         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10696         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10697 
10698         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10699 
10700         tcg_res[pass] = tcg_temp_new_i32();
10701         gennarrow(tcg_res[pass], tcg_wideres);
10702     }
10703 
10704     for (pass = 0; pass < 2; pass++) {
10705         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10706     }
10707     clear_vec_high(s, is_q, rd);
10708 }
10709 
10710 /* AdvSIMD three different
10711  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10712  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10713  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10714  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10715  */
10716 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10717 {
10718     /* Instructions in this group fall into three basic classes
10719      * (in each case with the operation working on each element in
10720      * the input vectors):
10721      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10722      *     128 bit input)
10723      * (2) wide 64 x 128 -> 128
10724      * (3) narrowing 128 x 128 -> 64
10725      * Here we do initial decode, catch unallocated cases and
10726      * dispatch to separate functions for each class.
10727      */
10728     int is_q = extract32(insn, 30, 1);
10729     int is_u = extract32(insn, 29, 1);
10730     int size = extract32(insn, 22, 2);
10731     int opcode = extract32(insn, 12, 4);
10732     int rm = extract32(insn, 16, 5);
10733     int rn = extract32(insn, 5, 5);
10734     int rd = extract32(insn, 0, 5);
10735 
10736     switch (opcode) {
10737     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10738     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10739         /* 64 x 128 -> 128 */
10740         if (size == 3) {
10741             unallocated_encoding(s);
10742             return;
10743         }
10744         if (!fp_access_check(s)) {
10745             return;
10746         }
10747         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10748         break;
10749     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10750     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10751         /* 128 x 128 -> 64 */
10752         if (size == 3) {
10753             unallocated_encoding(s);
10754             return;
10755         }
10756         if (!fp_access_check(s)) {
10757             return;
10758         }
10759         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10760         break;
10761     case 14: /* PMULL, PMULL2 */
10762         if (is_u) {
10763             unallocated_encoding(s);
10764             return;
10765         }
10766         switch (size) {
10767         case 0: /* PMULL.P8 */
10768             if (!fp_access_check(s)) {
10769                 return;
10770             }
10771             /* The Q field specifies lo/hi half input for this insn.  */
10772             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10773                              gen_helper_neon_pmull_h);
10774             break;
10775 
10776         case 3: /* PMULL.P64 */
10777             if (!dc_isar_feature(aa64_pmull, s)) {
10778                 unallocated_encoding(s);
10779                 return;
10780             }
10781             if (!fp_access_check(s)) {
10782                 return;
10783             }
10784             /* The Q field specifies lo/hi half input for this insn.  */
10785             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10786                              gen_helper_gvec_pmull_q);
10787             break;
10788 
10789         default:
10790             unallocated_encoding(s);
10791             break;
10792         }
10793         return;
10794     case 9: /* SQDMLAL, SQDMLAL2 */
10795     case 11: /* SQDMLSL, SQDMLSL2 */
10796     case 13: /* SQDMULL, SQDMULL2 */
10797         if (is_u || size == 0) {
10798             unallocated_encoding(s);
10799             return;
10800         }
10801         /* fall through */
10802     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10803     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10804     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10805     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10806     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10807     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10808     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10809         /* 64 x 64 -> 128 */
10810         if (size == 3) {
10811             unallocated_encoding(s);
10812             return;
10813         }
10814         if (!fp_access_check(s)) {
10815             return;
10816         }
10817 
10818         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10819         break;
10820     default:
10821         /* opcode 15 not allocated */
10822         unallocated_encoding(s);
10823         break;
10824     }
10825 }
10826 
10827 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10828 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10829 {
10830     int rd = extract32(insn, 0, 5);
10831     int rn = extract32(insn, 5, 5);
10832     int rm = extract32(insn, 16, 5);
10833     int size = extract32(insn, 22, 2);
10834     bool is_u = extract32(insn, 29, 1);
10835     bool is_q = extract32(insn, 30, 1);
10836 
10837     if (!fp_access_check(s)) {
10838         return;
10839     }
10840 
10841     switch (size + 4 * is_u) {
10842     case 0: /* AND */
10843         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10844         return;
10845     case 1: /* BIC */
10846         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10847         return;
10848     case 2: /* ORR */
10849         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10850         return;
10851     case 3: /* ORN */
10852         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10853         return;
10854     case 4: /* EOR */
10855         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10856         return;
10857 
10858     case 5: /* BSL bitwise select */
10859         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10860         return;
10861     case 6: /* BIT, bitwise insert if true */
10862         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10863         return;
10864     case 7: /* BIF, bitwise insert if false */
10865         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10866         return;
10867 
10868     default:
10869         g_assert_not_reached();
10870     }
10871 }
10872 
10873 /* Pairwise op subgroup of C3.6.16.
10874  *
10875  * This is called directly or via the handle_3same_float for float pairwise
10876  * operations where the opcode and size are calculated differently.
10877  */
10878 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10879                                    int size, int rn, int rm, int rd)
10880 {
10881     TCGv_ptr fpst;
10882     int pass;
10883 
10884     /* Floating point operations need fpst */
10885     if (opcode >= 0x58) {
10886         fpst = fpstatus_ptr(FPST_FPCR);
10887     } else {
10888         fpst = NULL;
10889     }
10890 
10891     if (!fp_access_check(s)) {
10892         return;
10893     }
10894 
10895     /* These operations work on the concatenated rm:rn, with each pair of
10896      * adjacent elements being operated on to produce an element in the result.
10897      */
10898     if (size == 3) {
10899         TCGv_i64 tcg_res[2];
10900 
10901         for (pass = 0; pass < 2; pass++) {
10902             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10903             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10904             int passreg = (pass == 0) ? rn : rm;
10905 
10906             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10907             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10908             tcg_res[pass] = tcg_temp_new_i64();
10909 
10910             switch (opcode) {
10911             case 0x17: /* ADDP */
10912                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10913                 break;
10914             case 0x58: /* FMAXNMP */
10915                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10916                 break;
10917             case 0x5a: /* FADDP */
10918                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10919                 break;
10920             case 0x5e: /* FMAXP */
10921                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10922                 break;
10923             case 0x78: /* FMINNMP */
10924                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10925                 break;
10926             case 0x7e: /* FMINP */
10927                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10928                 break;
10929             default:
10930                 g_assert_not_reached();
10931             }
10932         }
10933 
10934         for (pass = 0; pass < 2; pass++) {
10935             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10936         }
10937     } else {
10938         int maxpass = is_q ? 4 : 2;
10939         TCGv_i32 tcg_res[4];
10940 
10941         for (pass = 0; pass < maxpass; pass++) {
10942             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10943             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10944             NeonGenTwoOpFn *genfn = NULL;
10945             int passreg = pass < (maxpass / 2) ? rn : rm;
10946             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10947 
10948             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10949             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10950             tcg_res[pass] = tcg_temp_new_i32();
10951 
10952             switch (opcode) {
10953             case 0x17: /* ADDP */
10954             {
10955                 static NeonGenTwoOpFn * const fns[3] = {
10956                     gen_helper_neon_padd_u8,
10957                     gen_helper_neon_padd_u16,
10958                     tcg_gen_add_i32,
10959                 };
10960                 genfn = fns[size];
10961                 break;
10962             }
10963             case 0x14: /* SMAXP, UMAXP */
10964             {
10965                 static NeonGenTwoOpFn * const fns[3][2] = {
10966                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10967                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10968                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10969                 };
10970                 genfn = fns[size][u];
10971                 break;
10972             }
10973             case 0x15: /* SMINP, UMINP */
10974             {
10975                 static NeonGenTwoOpFn * const fns[3][2] = {
10976                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10977                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10978                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10979                 };
10980                 genfn = fns[size][u];
10981                 break;
10982             }
10983             /* The FP operations are all on single floats (32 bit) */
10984             case 0x58: /* FMAXNMP */
10985                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10986                 break;
10987             case 0x5a: /* FADDP */
10988                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10989                 break;
10990             case 0x5e: /* FMAXP */
10991                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10992                 break;
10993             case 0x78: /* FMINNMP */
10994                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10995                 break;
10996             case 0x7e: /* FMINP */
10997                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10998                 break;
10999             default:
11000                 g_assert_not_reached();
11001             }
11002 
11003             /* FP ops called directly, otherwise call now */
11004             if (genfn) {
11005                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
11006             }
11007         }
11008 
11009         for (pass = 0; pass < maxpass; pass++) {
11010             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11011         }
11012         clear_vec_high(s, is_q, rd);
11013     }
11014 }
11015 
11016 /* Floating point op subgroup of C3.6.16. */
11017 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11018 {
11019     /* For floating point ops, the U, size[1] and opcode bits
11020      * together indicate the operation. size[0] indicates single
11021      * or double.
11022      */
11023     int fpopcode = extract32(insn, 11, 5)
11024         | (extract32(insn, 23, 1) << 5)
11025         | (extract32(insn, 29, 1) << 6);
11026     int is_q = extract32(insn, 30, 1);
11027     int size = extract32(insn, 22, 1);
11028     int rm = extract32(insn, 16, 5);
11029     int rn = extract32(insn, 5, 5);
11030     int rd = extract32(insn, 0, 5);
11031 
11032     int datasize = is_q ? 128 : 64;
11033     int esize = 32 << size;
11034     int elements = datasize / esize;
11035 
11036     if (size == 1 && !is_q) {
11037         unallocated_encoding(s);
11038         return;
11039     }
11040 
11041     switch (fpopcode) {
11042     case 0x58: /* FMAXNMP */
11043     case 0x5a: /* FADDP */
11044     case 0x5e: /* FMAXP */
11045     case 0x78: /* FMINNMP */
11046     case 0x7e: /* FMINP */
11047         if (size && !is_q) {
11048             unallocated_encoding(s);
11049             return;
11050         }
11051         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11052                                rn, rm, rd);
11053         return;
11054     case 0x1b: /* FMULX */
11055     case 0x1f: /* FRECPS */
11056     case 0x3f: /* FRSQRTS */
11057     case 0x5d: /* FACGE */
11058     case 0x7d: /* FACGT */
11059     case 0x19: /* FMLA */
11060     case 0x39: /* FMLS */
11061     case 0x18: /* FMAXNM */
11062     case 0x1a: /* FADD */
11063     case 0x1c: /* FCMEQ */
11064     case 0x1e: /* FMAX */
11065     case 0x38: /* FMINNM */
11066     case 0x3a: /* FSUB */
11067     case 0x3e: /* FMIN */
11068     case 0x5b: /* FMUL */
11069     case 0x5c: /* FCMGE */
11070     case 0x5f: /* FDIV */
11071     case 0x7a: /* FABD */
11072     case 0x7c: /* FCMGT */
11073         if (!fp_access_check(s)) {
11074             return;
11075         }
11076         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11077         return;
11078 
11079     case 0x1d: /* FMLAL  */
11080     case 0x3d: /* FMLSL  */
11081     case 0x59: /* FMLAL2 */
11082     case 0x79: /* FMLSL2 */
11083         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11084             unallocated_encoding(s);
11085             return;
11086         }
11087         if (fp_access_check(s)) {
11088             int is_s = extract32(insn, 23, 1);
11089             int is_2 = extract32(insn, 29, 1);
11090             int data = (is_2 << 1) | is_s;
11091             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11092                                vec_full_reg_offset(s, rn),
11093                                vec_full_reg_offset(s, rm), cpu_env,
11094                                is_q ? 16 : 8, vec_full_reg_size(s),
11095                                data, gen_helper_gvec_fmlal_a64);
11096         }
11097         return;
11098 
11099     default:
11100         unallocated_encoding(s);
11101         return;
11102     }
11103 }
11104 
11105 /* Integer op subgroup of C3.6.16. */
11106 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11107 {
11108     int is_q = extract32(insn, 30, 1);
11109     int u = extract32(insn, 29, 1);
11110     int size = extract32(insn, 22, 2);
11111     int opcode = extract32(insn, 11, 5);
11112     int rm = extract32(insn, 16, 5);
11113     int rn = extract32(insn, 5, 5);
11114     int rd = extract32(insn, 0, 5);
11115     int pass;
11116     TCGCond cond;
11117 
11118     switch (opcode) {
11119     case 0x13: /* MUL, PMUL */
11120         if (u && size != 0) {
11121             unallocated_encoding(s);
11122             return;
11123         }
11124         /* fall through */
11125     case 0x0: /* SHADD, UHADD */
11126     case 0x2: /* SRHADD, URHADD */
11127     case 0x4: /* SHSUB, UHSUB */
11128     case 0xc: /* SMAX, UMAX */
11129     case 0xd: /* SMIN, UMIN */
11130     case 0xe: /* SABD, UABD */
11131     case 0xf: /* SABA, UABA */
11132     case 0x12: /* MLA, MLS */
11133         if (size == 3) {
11134             unallocated_encoding(s);
11135             return;
11136         }
11137         break;
11138     case 0x16: /* SQDMULH, SQRDMULH */
11139         if (size == 0 || size == 3) {
11140             unallocated_encoding(s);
11141             return;
11142         }
11143         break;
11144     default:
11145         if (size == 3 && !is_q) {
11146             unallocated_encoding(s);
11147             return;
11148         }
11149         break;
11150     }
11151 
11152     if (!fp_access_check(s)) {
11153         return;
11154     }
11155 
11156     switch (opcode) {
11157     case 0x01: /* SQADD, UQADD */
11158         if (u) {
11159             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11160         } else {
11161             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11162         }
11163         return;
11164     case 0x05: /* SQSUB, UQSUB */
11165         if (u) {
11166             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11167         } else {
11168             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11169         }
11170         return;
11171     case 0x08: /* SSHL, USHL */
11172         if (u) {
11173             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11174         } else {
11175             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11176         }
11177         return;
11178     case 0x0c: /* SMAX, UMAX */
11179         if (u) {
11180             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11181         } else {
11182             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11183         }
11184         return;
11185     case 0x0d: /* SMIN, UMIN */
11186         if (u) {
11187             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11188         } else {
11189             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11190         }
11191         return;
11192     case 0xe: /* SABD, UABD */
11193         if (u) {
11194             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11195         } else {
11196             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11197         }
11198         return;
11199     case 0xf: /* SABA, UABA */
11200         if (u) {
11201             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11202         } else {
11203             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11204         }
11205         return;
11206     case 0x10: /* ADD, SUB */
11207         if (u) {
11208             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11209         } else {
11210             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11211         }
11212         return;
11213     case 0x13: /* MUL, PMUL */
11214         if (!u) { /* MUL */
11215             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11216         } else {  /* PMUL */
11217             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11218         }
11219         return;
11220     case 0x12: /* MLA, MLS */
11221         if (u) {
11222             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11223         } else {
11224             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11225         }
11226         return;
11227     case 0x16: /* SQDMULH, SQRDMULH */
11228         {
11229             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11230                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11231                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11232             };
11233             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11234         }
11235         return;
11236     case 0x11:
11237         if (!u) { /* CMTST */
11238             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11239             return;
11240         }
11241         /* else CMEQ */
11242         cond = TCG_COND_EQ;
11243         goto do_gvec_cmp;
11244     case 0x06: /* CMGT, CMHI */
11245         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11246         goto do_gvec_cmp;
11247     case 0x07: /* CMGE, CMHS */
11248         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11249     do_gvec_cmp:
11250         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11251                          vec_full_reg_offset(s, rn),
11252                          vec_full_reg_offset(s, rm),
11253                          is_q ? 16 : 8, vec_full_reg_size(s));
11254         return;
11255     }
11256 
11257     if (size == 3) {
11258         assert(is_q);
11259         for (pass = 0; pass < 2; pass++) {
11260             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11261             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11262             TCGv_i64 tcg_res = tcg_temp_new_i64();
11263 
11264             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11265             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11266 
11267             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11268 
11269             write_vec_element(s, tcg_res, rd, pass, MO_64);
11270         }
11271     } else {
11272         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11273             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11274             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11275             TCGv_i32 tcg_res = tcg_temp_new_i32();
11276             NeonGenTwoOpFn *genfn = NULL;
11277             NeonGenTwoOpEnvFn *genenvfn = NULL;
11278 
11279             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11280             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11281 
11282             switch (opcode) {
11283             case 0x0: /* SHADD, UHADD */
11284             {
11285                 static NeonGenTwoOpFn * const fns[3][2] = {
11286                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11287                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11288                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11289                 };
11290                 genfn = fns[size][u];
11291                 break;
11292             }
11293             case 0x2: /* SRHADD, URHADD */
11294             {
11295                 static NeonGenTwoOpFn * const fns[3][2] = {
11296                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11297                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11298                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11299                 };
11300                 genfn = fns[size][u];
11301                 break;
11302             }
11303             case 0x4: /* SHSUB, UHSUB */
11304             {
11305                 static NeonGenTwoOpFn * const fns[3][2] = {
11306                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11307                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11308                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11309                 };
11310                 genfn = fns[size][u];
11311                 break;
11312             }
11313             case 0x9: /* SQSHL, UQSHL */
11314             {
11315                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11316                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11317                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11318                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11319                 };
11320                 genenvfn = fns[size][u];
11321                 break;
11322             }
11323             case 0xa: /* SRSHL, URSHL */
11324             {
11325                 static NeonGenTwoOpFn * const fns[3][2] = {
11326                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11327                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11328                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11329                 };
11330                 genfn = fns[size][u];
11331                 break;
11332             }
11333             case 0xb: /* SQRSHL, UQRSHL */
11334             {
11335                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11336                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11337                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11338                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11339                 };
11340                 genenvfn = fns[size][u];
11341                 break;
11342             }
11343             default:
11344                 g_assert_not_reached();
11345             }
11346 
11347             if (genenvfn) {
11348                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11349             } else {
11350                 genfn(tcg_res, tcg_op1, tcg_op2);
11351             }
11352 
11353             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11354         }
11355     }
11356     clear_vec_high(s, is_q, rd);
11357 }
11358 
11359 /* AdvSIMD three same
11360  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11361  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11362  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11363  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11364  */
11365 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11366 {
11367     int opcode = extract32(insn, 11, 5);
11368 
11369     switch (opcode) {
11370     case 0x3: /* logic ops */
11371         disas_simd_3same_logic(s, insn);
11372         break;
11373     case 0x17: /* ADDP */
11374     case 0x14: /* SMAXP, UMAXP */
11375     case 0x15: /* SMINP, UMINP */
11376     {
11377         /* Pairwise operations */
11378         int is_q = extract32(insn, 30, 1);
11379         int u = extract32(insn, 29, 1);
11380         int size = extract32(insn, 22, 2);
11381         int rm = extract32(insn, 16, 5);
11382         int rn = extract32(insn, 5, 5);
11383         int rd = extract32(insn, 0, 5);
11384         if (opcode == 0x17) {
11385             if (u || (size == 3 && !is_q)) {
11386                 unallocated_encoding(s);
11387                 return;
11388             }
11389         } else {
11390             if (size == 3) {
11391                 unallocated_encoding(s);
11392                 return;
11393             }
11394         }
11395         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11396         break;
11397     }
11398     case 0x18 ... 0x31:
11399         /* floating point ops, sz[1] and U are part of opcode */
11400         disas_simd_3same_float(s, insn);
11401         break;
11402     default:
11403         disas_simd_3same_int(s, insn);
11404         break;
11405     }
11406 }
11407 
11408 /*
11409  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11410  *
11411  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11412  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11413  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11414  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11415  *
11416  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11417  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11418  *
11419  */
11420 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11421 {
11422     int opcode = extract32(insn, 11, 3);
11423     int u = extract32(insn, 29, 1);
11424     int a = extract32(insn, 23, 1);
11425     int is_q = extract32(insn, 30, 1);
11426     int rm = extract32(insn, 16, 5);
11427     int rn = extract32(insn, 5, 5);
11428     int rd = extract32(insn, 0, 5);
11429     /*
11430      * For these floating point ops, the U, a and opcode bits
11431      * together indicate the operation.
11432      */
11433     int fpopcode = opcode | (a << 3) | (u << 4);
11434     int datasize = is_q ? 128 : 64;
11435     int elements = datasize / 16;
11436     bool pairwise;
11437     TCGv_ptr fpst;
11438     int pass;
11439 
11440     switch (fpopcode) {
11441     case 0x0: /* FMAXNM */
11442     case 0x1: /* FMLA */
11443     case 0x2: /* FADD */
11444     case 0x3: /* FMULX */
11445     case 0x4: /* FCMEQ */
11446     case 0x6: /* FMAX */
11447     case 0x7: /* FRECPS */
11448     case 0x8: /* FMINNM */
11449     case 0x9: /* FMLS */
11450     case 0xa: /* FSUB */
11451     case 0xe: /* FMIN */
11452     case 0xf: /* FRSQRTS */
11453     case 0x13: /* FMUL */
11454     case 0x14: /* FCMGE */
11455     case 0x15: /* FACGE */
11456     case 0x17: /* FDIV */
11457     case 0x1a: /* FABD */
11458     case 0x1c: /* FCMGT */
11459     case 0x1d: /* FACGT */
11460         pairwise = false;
11461         break;
11462     case 0x10: /* FMAXNMP */
11463     case 0x12: /* FADDP */
11464     case 0x16: /* FMAXP */
11465     case 0x18: /* FMINNMP */
11466     case 0x1e: /* FMINP */
11467         pairwise = true;
11468         break;
11469     default:
11470         unallocated_encoding(s);
11471         return;
11472     }
11473 
11474     if (!dc_isar_feature(aa64_fp16, s)) {
11475         unallocated_encoding(s);
11476         return;
11477     }
11478 
11479     if (!fp_access_check(s)) {
11480         return;
11481     }
11482 
11483     fpst = fpstatus_ptr(FPST_FPCR_F16);
11484 
11485     if (pairwise) {
11486         int maxpass = is_q ? 8 : 4;
11487         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11488         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11489         TCGv_i32 tcg_res[8];
11490 
11491         for (pass = 0; pass < maxpass; pass++) {
11492             int passreg = pass < (maxpass / 2) ? rn : rm;
11493             int passelt = (pass << 1) & (maxpass - 1);
11494 
11495             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11496             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11497             tcg_res[pass] = tcg_temp_new_i32();
11498 
11499             switch (fpopcode) {
11500             case 0x10: /* FMAXNMP */
11501                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11502                                            fpst);
11503                 break;
11504             case 0x12: /* FADDP */
11505                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11506                 break;
11507             case 0x16: /* FMAXP */
11508                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11509                 break;
11510             case 0x18: /* FMINNMP */
11511                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11512                                            fpst);
11513                 break;
11514             case 0x1e: /* FMINP */
11515                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11516                 break;
11517             default:
11518                 g_assert_not_reached();
11519             }
11520         }
11521 
11522         for (pass = 0; pass < maxpass; pass++) {
11523             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11524         }
11525     } else {
11526         for (pass = 0; pass < elements; pass++) {
11527             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11528             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11529             TCGv_i32 tcg_res = tcg_temp_new_i32();
11530 
11531             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11532             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11533 
11534             switch (fpopcode) {
11535             case 0x0: /* FMAXNM */
11536                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11537                 break;
11538             case 0x1: /* FMLA */
11539                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11540                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11541                                            fpst);
11542                 break;
11543             case 0x2: /* FADD */
11544                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11545                 break;
11546             case 0x3: /* FMULX */
11547                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11548                 break;
11549             case 0x4: /* FCMEQ */
11550                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11551                 break;
11552             case 0x6: /* FMAX */
11553                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11554                 break;
11555             case 0x7: /* FRECPS */
11556                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11557                 break;
11558             case 0x8: /* FMINNM */
11559                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11560                 break;
11561             case 0x9: /* FMLS */
11562                 /* As usual for ARM, separate negation for fused multiply-add */
11563                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11564                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11565                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11566                                            fpst);
11567                 break;
11568             case 0xa: /* FSUB */
11569                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11570                 break;
11571             case 0xe: /* FMIN */
11572                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11573                 break;
11574             case 0xf: /* FRSQRTS */
11575                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11576                 break;
11577             case 0x13: /* FMUL */
11578                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11579                 break;
11580             case 0x14: /* FCMGE */
11581                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11582                 break;
11583             case 0x15: /* FACGE */
11584                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11585                 break;
11586             case 0x17: /* FDIV */
11587                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11588                 break;
11589             case 0x1a: /* FABD */
11590                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11591                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11592                 break;
11593             case 0x1c: /* FCMGT */
11594                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11595                 break;
11596             case 0x1d: /* FACGT */
11597                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11598                 break;
11599             default:
11600                 g_assert_not_reached();
11601             }
11602 
11603             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11604         }
11605     }
11606 
11607     clear_vec_high(s, is_q, rd);
11608 }
11609 
11610 /* AdvSIMD three same extra
11611  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11612  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11613  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11614  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11615  */
11616 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11617 {
11618     int rd = extract32(insn, 0, 5);
11619     int rn = extract32(insn, 5, 5);
11620     int opcode = extract32(insn, 11, 4);
11621     int rm = extract32(insn, 16, 5);
11622     int size = extract32(insn, 22, 2);
11623     bool u = extract32(insn, 29, 1);
11624     bool is_q = extract32(insn, 30, 1);
11625     bool feature;
11626     int rot;
11627 
11628     switch (u * 16 + opcode) {
11629     case 0x10: /* SQRDMLAH (vector) */
11630     case 0x11: /* SQRDMLSH (vector) */
11631         if (size != 1 && size != 2) {
11632             unallocated_encoding(s);
11633             return;
11634         }
11635         feature = dc_isar_feature(aa64_rdm, s);
11636         break;
11637     case 0x02: /* SDOT (vector) */
11638     case 0x12: /* UDOT (vector) */
11639         if (size != MO_32) {
11640             unallocated_encoding(s);
11641             return;
11642         }
11643         feature = dc_isar_feature(aa64_dp, s);
11644         break;
11645     case 0x03: /* USDOT */
11646         if (size != MO_32) {
11647             unallocated_encoding(s);
11648             return;
11649         }
11650         feature = dc_isar_feature(aa64_i8mm, s);
11651         break;
11652     case 0x04: /* SMMLA */
11653     case 0x14: /* UMMLA */
11654     case 0x05: /* USMMLA */
11655         if (!is_q || size != MO_32) {
11656             unallocated_encoding(s);
11657             return;
11658         }
11659         feature = dc_isar_feature(aa64_i8mm, s);
11660         break;
11661     case 0x18: /* FCMLA, #0 */
11662     case 0x19: /* FCMLA, #90 */
11663     case 0x1a: /* FCMLA, #180 */
11664     case 0x1b: /* FCMLA, #270 */
11665     case 0x1c: /* FCADD, #90 */
11666     case 0x1e: /* FCADD, #270 */
11667         if (size == 0
11668             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11669             || (size == 3 && !is_q)) {
11670             unallocated_encoding(s);
11671             return;
11672         }
11673         feature = dc_isar_feature(aa64_fcma, s);
11674         break;
11675     case 0x1d: /* BFMMLA */
11676         if (size != MO_16 || !is_q) {
11677             unallocated_encoding(s);
11678             return;
11679         }
11680         feature = dc_isar_feature(aa64_bf16, s);
11681         break;
11682     case 0x1f:
11683         switch (size) {
11684         case 1: /* BFDOT */
11685         case 3: /* BFMLAL{B,T} */
11686             feature = dc_isar_feature(aa64_bf16, s);
11687             break;
11688         default:
11689             unallocated_encoding(s);
11690             return;
11691         }
11692         break;
11693     default:
11694         unallocated_encoding(s);
11695         return;
11696     }
11697     if (!feature) {
11698         unallocated_encoding(s);
11699         return;
11700     }
11701     if (!fp_access_check(s)) {
11702         return;
11703     }
11704 
11705     switch (opcode) {
11706     case 0x0: /* SQRDMLAH (vector) */
11707         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11708         return;
11709 
11710     case 0x1: /* SQRDMLSH (vector) */
11711         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11712         return;
11713 
11714     case 0x2: /* SDOT / UDOT */
11715         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11716                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11717         return;
11718 
11719     case 0x3: /* USDOT */
11720         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11721         return;
11722 
11723     case 0x04: /* SMMLA, UMMLA */
11724         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11725                          u ? gen_helper_gvec_ummla_b
11726                          : gen_helper_gvec_smmla_b);
11727         return;
11728     case 0x05: /* USMMLA */
11729         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11730         return;
11731 
11732     case 0x8: /* FCMLA, #0 */
11733     case 0x9: /* FCMLA, #90 */
11734     case 0xa: /* FCMLA, #180 */
11735     case 0xb: /* FCMLA, #270 */
11736         rot = extract32(opcode, 0, 2);
11737         switch (size) {
11738         case 1:
11739             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11740                               gen_helper_gvec_fcmlah);
11741             break;
11742         case 2:
11743             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11744                               gen_helper_gvec_fcmlas);
11745             break;
11746         case 3:
11747             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11748                               gen_helper_gvec_fcmlad);
11749             break;
11750         default:
11751             g_assert_not_reached();
11752         }
11753         return;
11754 
11755     case 0xc: /* FCADD, #90 */
11756     case 0xe: /* FCADD, #270 */
11757         rot = extract32(opcode, 1, 1);
11758         switch (size) {
11759         case 1:
11760             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11761                               gen_helper_gvec_fcaddh);
11762             break;
11763         case 2:
11764             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11765                               gen_helper_gvec_fcadds);
11766             break;
11767         case 3:
11768             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11769                               gen_helper_gvec_fcaddd);
11770             break;
11771         default:
11772             g_assert_not_reached();
11773         }
11774         return;
11775 
11776     case 0xd: /* BFMMLA */
11777         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11778         return;
11779     case 0xf:
11780         switch (size) {
11781         case 1: /* BFDOT */
11782             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11783             break;
11784         case 3: /* BFMLAL{B,T} */
11785             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11786                               gen_helper_gvec_bfmlal);
11787             break;
11788         default:
11789             g_assert_not_reached();
11790         }
11791         return;
11792 
11793     default:
11794         g_assert_not_reached();
11795     }
11796 }
11797 
11798 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11799                                   int size, int rn, int rd)
11800 {
11801     /* Handle 2-reg-misc ops which are widening (so each size element
11802      * in the source becomes a 2*size element in the destination.
11803      * The only instruction like this is FCVTL.
11804      */
11805     int pass;
11806 
11807     if (size == 3) {
11808         /* 32 -> 64 bit fp conversion */
11809         TCGv_i64 tcg_res[2];
11810         int srcelt = is_q ? 2 : 0;
11811 
11812         for (pass = 0; pass < 2; pass++) {
11813             TCGv_i32 tcg_op = tcg_temp_new_i32();
11814             tcg_res[pass] = tcg_temp_new_i64();
11815 
11816             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11817             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11818         }
11819         for (pass = 0; pass < 2; pass++) {
11820             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11821         }
11822     } else {
11823         /* 16 -> 32 bit fp conversion */
11824         int srcelt = is_q ? 4 : 0;
11825         TCGv_i32 tcg_res[4];
11826         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11827         TCGv_i32 ahp = get_ahp_flag();
11828 
11829         for (pass = 0; pass < 4; pass++) {
11830             tcg_res[pass] = tcg_temp_new_i32();
11831 
11832             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11833             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11834                                            fpst, ahp);
11835         }
11836         for (pass = 0; pass < 4; pass++) {
11837             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11838         }
11839     }
11840 }
11841 
11842 static void handle_rev(DisasContext *s, int opcode, bool u,
11843                        bool is_q, int size, int rn, int rd)
11844 {
11845     int op = (opcode << 1) | u;
11846     int opsz = op + size;
11847     int grp_size = 3 - opsz;
11848     int dsize = is_q ? 128 : 64;
11849     int i;
11850 
11851     if (opsz >= 3) {
11852         unallocated_encoding(s);
11853         return;
11854     }
11855 
11856     if (!fp_access_check(s)) {
11857         return;
11858     }
11859 
11860     if (size == 0) {
11861         /* Special case bytes, use bswap op on each group of elements */
11862         int groups = dsize / (8 << grp_size);
11863 
11864         for (i = 0; i < groups; i++) {
11865             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11866 
11867             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11868             switch (grp_size) {
11869             case MO_16:
11870                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11871                 break;
11872             case MO_32:
11873                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11874                 break;
11875             case MO_64:
11876                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11877                 break;
11878             default:
11879                 g_assert_not_reached();
11880             }
11881             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11882         }
11883         clear_vec_high(s, is_q, rd);
11884     } else {
11885         int revmask = (1 << grp_size) - 1;
11886         int esize = 8 << size;
11887         int elements = dsize / esize;
11888         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11889         TCGv_i64 tcg_rd[2];
11890 
11891         for (i = 0; i < 2; i++) {
11892             tcg_rd[i] = tcg_temp_new_i64();
11893             tcg_gen_movi_i64(tcg_rd[i], 0);
11894         }
11895 
11896         for (i = 0; i < elements; i++) {
11897             int e_rev = (i & 0xf) ^ revmask;
11898             int w = (e_rev * esize) / 64;
11899             int o = (e_rev * esize) % 64;
11900 
11901             read_vec_element(s, tcg_rn, rn, i, size);
11902             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11903         }
11904 
11905         for (i = 0; i < 2; i++) {
11906             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11907         }
11908         clear_vec_high(s, true, rd);
11909     }
11910 }
11911 
11912 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11913                                   bool is_q, int size, int rn, int rd)
11914 {
11915     /* Implement the pairwise operations from 2-misc:
11916      * SADDLP, UADDLP, SADALP, UADALP.
11917      * These all add pairs of elements in the input to produce a
11918      * double-width result element in the output (possibly accumulating).
11919      */
11920     bool accum = (opcode == 0x6);
11921     int maxpass = is_q ? 2 : 1;
11922     int pass;
11923     TCGv_i64 tcg_res[2];
11924 
11925     if (size == 2) {
11926         /* 32 + 32 -> 64 op */
11927         MemOp memop = size + (u ? 0 : MO_SIGN);
11928 
11929         for (pass = 0; pass < maxpass; pass++) {
11930             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11931             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11932 
11933             tcg_res[pass] = tcg_temp_new_i64();
11934 
11935             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11936             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11937             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11938             if (accum) {
11939                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11940                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11941             }
11942         }
11943     } else {
11944         for (pass = 0; pass < maxpass; pass++) {
11945             TCGv_i64 tcg_op = tcg_temp_new_i64();
11946             NeonGenOne64OpFn *genfn;
11947             static NeonGenOne64OpFn * const fns[2][2] = {
11948                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11949                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11950             };
11951 
11952             genfn = fns[size][u];
11953 
11954             tcg_res[pass] = tcg_temp_new_i64();
11955 
11956             read_vec_element(s, tcg_op, rn, pass, MO_64);
11957             genfn(tcg_res[pass], tcg_op);
11958 
11959             if (accum) {
11960                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11961                 if (size == 0) {
11962                     gen_helper_neon_addl_u16(tcg_res[pass],
11963                                              tcg_res[pass], tcg_op);
11964                 } else {
11965                     gen_helper_neon_addl_u32(tcg_res[pass],
11966                                              tcg_res[pass], tcg_op);
11967                 }
11968             }
11969         }
11970     }
11971     if (!is_q) {
11972         tcg_res[1] = tcg_constant_i64(0);
11973     }
11974     for (pass = 0; pass < 2; pass++) {
11975         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11976     }
11977 }
11978 
11979 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11980 {
11981     /* Implement SHLL and SHLL2 */
11982     int pass;
11983     int part = is_q ? 2 : 0;
11984     TCGv_i64 tcg_res[2];
11985 
11986     for (pass = 0; pass < 2; pass++) {
11987         static NeonGenWidenFn * const widenfns[3] = {
11988             gen_helper_neon_widen_u8,
11989             gen_helper_neon_widen_u16,
11990             tcg_gen_extu_i32_i64,
11991         };
11992         NeonGenWidenFn *widenfn = widenfns[size];
11993         TCGv_i32 tcg_op = tcg_temp_new_i32();
11994 
11995         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11996         tcg_res[pass] = tcg_temp_new_i64();
11997         widenfn(tcg_res[pass], tcg_op);
11998         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11999     }
12000 
12001     for (pass = 0; pass < 2; pass++) {
12002         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12003     }
12004 }
12005 
12006 /* AdvSIMD two reg misc
12007  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12008  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12009  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12010  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12011  */
12012 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12013 {
12014     int size = extract32(insn, 22, 2);
12015     int opcode = extract32(insn, 12, 5);
12016     bool u = extract32(insn, 29, 1);
12017     bool is_q = extract32(insn, 30, 1);
12018     int rn = extract32(insn, 5, 5);
12019     int rd = extract32(insn, 0, 5);
12020     bool need_fpstatus = false;
12021     int rmode = -1;
12022     TCGv_i32 tcg_rmode;
12023     TCGv_ptr tcg_fpstatus;
12024 
12025     switch (opcode) {
12026     case 0x0: /* REV64, REV32 */
12027     case 0x1: /* REV16 */
12028         handle_rev(s, opcode, u, is_q, size, rn, rd);
12029         return;
12030     case 0x5: /* CNT, NOT, RBIT */
12031         if (u && size == 0) {
12032             /* NOT */
12033             break;
12034         } else if (u && size == 1) {
12035             /* RBIT */
12036             break;
12037         } else if (!u && size == 0) {
12038             /* CNT */
12039             break;
12040         }
12041         unallocated_encoding(s);
12042         return;
12043     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12044     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12045         if (size == 3) {
12046             unallocated_encoding(s);
12047             return;
12048         }
12049         if (!fp_access_check(s)) {
12050             return;
12051         }
12052 
12053         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12054         return;
12055     case 0x4: /* CLS, CLZ */
12056         if (size == 3) {
12057             unallocated_encoding(s);
12058             return;
12059         }
12060         break;
12061     case 0x2: /* SADDLP, UADDLP */
12062     case 0x6: /* SADALP, UADALP */
12063         if (size == 3) {
12064             unallocated_encoding(s);
12065             return;
12066         }
12067         if (!fp_access_check(s)) {
12068             return;
12069         }
12070         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12071         return;
12072     case 0x13: /* SHLL, SHLL2 */
12073         if (u == 0 || size == 3) {
12074             unallocated_encoding(s);
12075             return;
12076         }
12077         if (!fp_access_check(s)) {
12078             return;
12079         }
12080         handle_shll(s, is_q, size, rn, rd);
12081         return;
12082     case 0xa: /* CMLT */
12083         if (u == 1) {
12084             unallocated_encoding(s);
12085             return;
12086         }
12087         /* fall through */
12088     case 0x8: /* CMGT, CMGE */
12089     case 0x9: /* CMEQ, CMLE */
12090     case 0xb: /* ABS, NEG */
12091         if (size == 3 && !is_q) {
12092             unallocated_encoding(s);
12093             return;
12094         }
12095         break;
12096     case 0x3: /* SUQADD, USQADD */
12097         if (size == 3 && !is_q) {
12098             unallocated_encoding(s);
12099             return;
12100         }
12101         if (!fp_access_check(s)) {
12102             return;
12103         }
12104         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12105         return;
12106     case 0x7: /* SQABS, SQNEG */
12107         if (size == 3 && !is_q) {
12108             unallocated_encoding(s);
12109             return;
12110         }
12111         break;
12112     case 0xc ... 0xf:
12113     case 0x16 ... 0x1f:
12114     {
12115         /* Floating point: U, size[1] and opcode indicate operation;
12116          * size[0] indicates single or double precision.
12117          */
12118         int is_double = extract32(size, 0, 1);
12119         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12120         size = is_double ? 3 : 2;
12121         switch (opcode) {
12122         case 0x2f: /* FABS */
12123         case 0x6f: /* FNEG */
12124             if (size == 3 && !is_q) {
12125                 unallocated_encoding(s);
12126                 return;
12127             }
12128             break;
12129         case 0x1d: /* SCVTF */
12130         case 0x5d: /* UCVTF */
12131         {
12132             bool is_signed = (opcode == 0x1d) ? true : false;
12133             int elements = is_double ? 2 : is_q ? 4 : 2;
12134             if (is_double && !is_q) {
12135                 unallocated_encoding(s);
12136                 return;
12137             }
12138             if (!fp_access_check(s)) {
12139                 return;
12140             }
12141             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12142             return;
12143         }
12144         case 0x2c: /* FCMGT (zero) */
12145         case 0x2d: /* FCMEQ (zero) */
12146         case 0x2e: /* FCMLT (zero) */
12147         case 0x6c: /* FCMGE (zero) */
12148         case 0x6d: /* FCMLE (zero) */
12149             if (size == 3 && !is_q) {
12150                 unallocated_encoding(s);
12151                 return;
12152             }
12153             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12154             return;
12155         case 0x7f: /* FSQRT */
12156             if (size == 3 && !is_q) {
12157                 unallocated_encoding(s);
12158                 return;
12159             }
12160             break;
12161         case 0x1a: /* FCVTNS */
12162         case 0x1b: /* FCVTMS */
12163         case 0x3a: /* FCVTPS */
12164         case 0x3b: /* FCVTZS */
12165         case 0x5a: /* FCVTNU */
12166         case 0x5b: /* FCVTMU */
12167         case 0x7a: /* FCVTPU */
12168         case 0x7b: /* FCVTZU */
12169             need_fpstatus = true;
12170             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12171             if (size == 3 && !is_q) {
12172                 unallocated_encoding(s);
12173                 return;
12174             }
12175             break;
12176         case 0x5c: /* FCVTAU */
12177         case 0x1c: /* FCVTAS */
12178             need_fpstatus = true;
12179             rmode = FPROUNDING_TIEAWAY;
12180             if (size == 3 && !is_q) {
12181                 unallocated_encoding(s);
12182                 return;
12183             }
12184             break;
12185         case 0x3c: /* URECPE */
12186             if (size == 3) {
12187                 unallocated_encoding(s);
12188                 return;
12189             }
12190             /* fall through */
12191         case 0x3d: /* FRECPE */
12192         case 0x7d: /* FRSQRTE */
12193             if (size == 3 && !is_q) {
12194                 unallocated_encoding(s);
12195                 return;
12196             }
12197             if (!fp_access_check(s)) {
12198                 return;
12199             }
12200             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12201             return;
12202         case 0x56: /* FCVTXN, FCVTXN2 */
12203             if (size == 2) {
12204                 unallocated_encoding(s);
12205                 return;
12206             }
12207             /* fall through */
12208         case 0x16: /* FCVTN, FCVTN2 */
12209             /* handle_2misc_narrow does a 2*size -> size operation, but these
12210              * instructions encode the source size rather than dest size.
12211              */
12212             if (!fp_access_check(s)) {
12213                 return;
12214             }
12215             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12216             return;
12217         case 0x36: /* BFCVTN, BFCVTN2 */
12218             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12219                 unallocated_encoding(s);
12220                 return;
12221             }
12222             if (!fp_access_check(s)) {
12223                 return;
12224             }
12225             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12226             return;
12227         case 0x17: /* FCVTL, FCVTL2 */
12228             if (!fp_access_check(s)) {
12229                 return;
12230             }
12231             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12232             return;
12233         case 0x18: /* FRINTN */
12234         case 0x19: /* FRINTM */
12235         case 0x38: /* FRINTP */
12236         case 0x39: /* FRINTZ */
12237             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12238             /* fall through */
12239         case 0x59: /* FRINTX */
12240         case 0x79: /* FRINTI */
12241             need_fpstatus = true;
12242             if (size == 3 && !is_q) {
12243                 unallocated_encoding(s);
12244                 return;
12245             }
12246             break;
12247         case 0x58: /* FRINTA */
12248             rmode = FPROUNDING_TIEAWAY;
12249             need_fpstatus = true;
12250             if (size == 3 && !is_q) {
12251                 unallocated_encoding(s);
12252                 return;
12253             }
12254             break;
12255         case 0x7c: /* URSQRTE */
12256             if (size == 3) {
12257                 unallocated_encoding(s);
12258                 return;
12259             }
12260             break;
12261         case 0x1e: /* FRINT32Z */
12262         case 0x1f: /* FRINT64Z */
12263             rmode = FPROUNDING_ZERO;
12264             /* fall through */
12265         case 0x5e: /* FRINT32X */
12266         case 0x5f: /* FRINT64X */
12267             need_fpstatus = true;
12268             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12269                 unallocated_encoding(s);
12270                 return;
12271             }
12272             break;
12273         default:
12274             unallocated_encoding(s);
12275             return;
12276         }
12277         break;
12278     }
12279     default:
12280         unallocated_encoding(s);
12281         return;
12282     }
12283 
12284     if (!fp_access_check(s)) {
12285         return;
12286     }
12287 
12288     if (need_fpstatus || rmode >= 0) {
12289         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12290     } else {
12291         tcg_fpstatus = NULL;
12292     }
12293     if (rmode >= 0) {
12294         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12295     } else {
12296         tcg_rmode = NULL;
12297     }
12298 
12299     switch (opcode) {
12300     case 0x5:
12301         if (u && size == 0) { /* NOT */
12302             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12303             return;
12304         }
12305         break;
12306     case 0x8: /* CMGT, CMGE */
12307         if (u) {
12308             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12309         } else {
12310             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12311         }
12312         return;
12313     case 0x9: /* CMEQ, CMLE */
12314         if (u) {
12315             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12316         } else {
12317             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12318         }
12319         return;
12320     case 0xa: /* CMLT */
12321         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12322         return;
12323     case 0xb:
12324         if (u) { /* ABS, NEG */
12325             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12326         } else {
12327             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12328         }
12329         return;
12330     }
12331 
12332     if (size == 3) {
12333         /* All 64-bit element operations can be shared with scalar 2misc */
12334         int pass;
12335 
12336         /* Coverity claims (size == 3 && !is_q) has been eliminated
12337          * from all paths leading to here.
12338          */
12339         tcg_debug_assert(is_q);
12340         for (pass = 0; pass < 2; pass++) {
12341             TCGv_i64 tcg_op = tcg_temp_new_i64();
12342             TCGv_i64 tcg_res = tcg_temp_new_i64();
12343 
12344             read_vec_element(s, tcg_op, rn, pass, MO_64);
12345 
12346             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12347                             tcg_rmode, tcg_fpstatus);
12348 
12349             write_vec_element(s, tcg_res, rd, pass, MO_64);
12350         }
12351     } else {
12352         int pass;
12353 
12354         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12355             TCGv_i32 tcg_op = tcg_temp_new_i32();
12356             TCGv_i32 tcg_res = tcg_temp_new_i32();
12357 
12358             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12359 
12360             if (size == 2) {
12361                 /* Special cases for 32 bit elements */
12362                 switch (opcode) {
12363                 case 0x4: /* CLS */
12364                     if (u) {
12365                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12366                     } else {
12367                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12368                     }
12369                     break;
12370                 case 0x7: /* SQABS, SQNEG */
12371                     if (u) {
12372                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12373                     } else {
12374                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12375                     }
12376                     break;
12377                 case 0x2f: /* FABS */
12378                     gen_helper_vfp_abss(tcg_res, tcg_op);
12379                     break;
12380                 case 0x6f: /* FNEG */
12381                     gen_helper_vfp_negs(tcg_res, tcg_op);
12382                     break;
12383                 case 0x7f: /* FSQRT */
12384                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12385                     break;
12386                 case 0x1a: /* FCVTNS */
12387                 case 0x1b: /* FCVTMS */
12388                 case 0x1c: /* FCVTAS */
12389                 case 0x3a: /* FCVTPS */
12390                 case 0x3b: /* FCVTZS */
12391                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12392                                          tcg_constant_i32(0), tcg_fpstatus);
12393                     break;
12394                 case 0x5a: /* FCVTNU */
12395                 case 0x5b: /* FCVTMU */
12396                 case 0x5c: /* FCVTAU */
12397                 case 0x7a: /* FCVTPU */
12398                 case 0x7b: /* FCVTZU */
12399                     gen_helper_vfp_touls(tcg_res, tcg_op,
12400                                          tcg_constant_i32(0), tcg_fpstatus);
12401                     break;
12402                 case 0x18: /* FRINTN */
12403                 case 0x19: /* FRINTM */
12404                 case 0x38: /* FRINTP */
12405                 case 0x39: /* FRINTZ */
12406                 case 0x58: /* FRINTA */
12407                 case 0x79: /* FRINTI */
12408                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12409                     break;
12410                 case 0x59: /* FRINTX */
12411                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12412                     break;
12413                 case 0x7c: /* URSQRTE */
12414                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12415                     break;
12416                 case 0x1e: /* FRINT32Z */
12417                 case 0x5e: /* FRINT32X */
12418                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12419                     break;
12420                 case 0x1f: /* FRINT64Z */
12421                 case 0x5f: /* FRINT64X */
12422                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12423                     break;
12424                 default:
12425                     g_assert_not_reached();
12426                 }
12427             } else {
12428                 /* Use helpers for 8 and 16 bit elements */
12429                 switch (opcode) {
12430                 case 0x5: /* CNT, RBIT */
12431                     /* For these two insns size is part of the opcode specifier
12432                      * (handled earlier); they always operate on byte elements.
12433                      */
12434                     if (u) {
12435                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12436                     } else {
12437                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12438                     }
12439                     break;
12440                 case 0x7: /* SQABS, SQNEG */
12441                 {
12442                     NeonGenOneOpEnvFn *genfn;
12443                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12444                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12445                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12446                     };
12447                     genfn = fns[size][u];
12448                     genfn(tcg_res, cpu_env, tcg_op);
12449                     break;
12450                 }
12451                 case 0x4: /* CLS, CLZ */
12452                     if (u) {
12453                         if (size == 0) {
12454                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12455                         } else {
12456                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12457                         }
12458                     } else {
12459                         if (size == 0) {
12460                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12461                         } else {
12462                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12463                         }
12464                     }
12465                     break;
12466                 default:
12467                     g_assert_not_reached();
12468                 }
12469             }
12470 
12471             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12472         }
12473     }
12474     clear_vec_high(s, is_q, rd);
12475 
12476     if (tcg_rmode) {
12477         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12478     }
12479 }
12480 
12481 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12482  *
12483  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12484  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12485  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12486  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12487  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12488  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12489  *
12490  * This actually covers two groups where scalar access is governed by
12491  * bit 28. A bunch of the instructions (float to integral) only exist
12492  * in the vector form and are un-allocated for the scalar decode. Also
12493  * in the scalar decode Q is always 1.
12494  */
12495 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12496 {
12497     int fpop, opcode, a, u;
12498     int rn, rd;
12499     bool is_q;
12500     bool is_scalar;
12501     bool only_in_vector = false;
12502 
12503     int pass;
12504     TCGv_i32 tcg_rmode = NULL;
12505     TCGv_ptr tcg_fpstatus = NULL;
12506     bool need_fpst = true;
12507     int rmode = -1;
12508 
12509     if (!dc_isar_feature(aa64_fp16, s)) {
12510         unallocated_encoding(s);
12511         return;
12512     }
12513 
12514     rd = extract32(insn, 0, 5);
12515     rn = extract32(insn, 5, 5);
12516 
12517     a = extract32(insn, 23, 1);
12518     u = extract32(insn, 29, 1);
12519     is_scalar = extract32(insn, 28, 1);
12520     is_q = extract32(insn, 30, 1);
12521 
12522     opcode = extract32(insn, 12, 5);
12523     fpop = deposit32(opcode, 5, 1, a);
12524     fpop = deposit32(fpop, 6, 1, u);
12525 
12526     switch (fpop) {
12527     case 0x1d: /* SCVTF */
12528     case 0x5d: /* UCVTF */
12529     {
12530         int elements;
12531 
12532         if (is_scalar) {
12533             elements = 1;
12534         } else {
12535             elements = (is_q ? 8 : 4);
12536         }
12537 
12538         if (!fp_access_check(s)) {
12539             return;
12540         }
12541         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12542         return;
12543     }
12544     break;
12545     case 0x2c: /* FCMGT (zero) */
12546     case 0x2d: /* FCMEQ (zero) */
12547     case 0x2e: /* FCMLT (zero) */
12548     case 0x6c: /* FCMGE (zero) */
12549     case 0x6d: /* FCMLE (zero) */
12550         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12551         return;
12552     case 0x3d: /* FRECPE */
12553     case 0x3f: /* FRECPX */
12554         break;
12555     case 0x18: /* FRINTN */
12556         only_in_vector = true;
12557         rmode = FPROUNDING_TIEEVEN;
12558         break;
12559     case 0x19: /* FRINTM */
12560         only_in_vector = true;
12561         rmode = FPROUNDING_NEGINF;
12562         break;
12563     case 0x38: /* FRINTP */
12564         only_in_vector = true;
12565         rmode = FPROUNDING_POSINF;
12566         break;
12567     case 0x39: /* FRINTZ */
12568         only_in_vector = true;
12569         rmode = FPROUNDING_ZERO;
12570         break;
12571     case 0x58: /* FRINTA */
12572         only_in_vector = true;
12573         rmode = FPROUNDING_TIEAWAY;
12574         break;
12575     case 0x59: /* FRINTX */
12576     case 0x79: /* FRINTI */
12577         only_in_vector = true;
12578         /* current rounding mode */
12579         break;
12580     case 0x1a: /* FCVTNS */
12581         rmode = FPROUNDING_TIEEVEN;
12582         break;
12583     case 0x1b: /* FCVTMS */
12584         rmode = FPROUNDING_NEGINF;
12585         break;
12586     case 0x1c: /* FCVTAS */
12587         rmode = FPROUNDING_TIEAWAY;
12588         break;
12589     case 0x3a: /* FCVTPS */
12590         rmode = FPROUNDING_POSINF;
12591         break;
12592     case 0x3b: /* FCVTZS */
12593         rmode = FPROUNDING_ZERO;
12594         break;
12595     case 0x5a: /* FCVTNU */
12596         rmode = FPROUNDING_TIEEVEN;
12597         break;
12598     case 0x5b: /* FCVTMU */
12599         rmode = FPROUNDING_NEGINF;
12600         break;
12601     case 0x5c: /* FCVTAU */
12602         rmode = FPROUNDING_TIEAWAY;
12603         break;
12604     case 0x7a: /* FCVTPU */
12605         rmode = FPROUNDING_POSINF;
12606         break;
12607     case 0x7b: /* FCVTZU */
12608         rmode = FPROUNDING_ZERO;
12609         break;
12610     case 0x2f: /* FABS */
12611     case 0x6f: /* FNEG */
12612         need_fpst = false;
12613         break;
12614     case 0x7d: /* FRSQRTE */
12615     case 0x7f: /* FSQRT (vector) */
12616         break;
12617     default:
12618         unallocated_encoding(s);
12619         return;
12620     }
12621 
12622 
12623     /* Check additional constraints for the scalar encoding */
12624     if (is_scalar) {
12625         if (!is_q) {
12626             unallocated_encoding(s);
12627             return;
12628         }
12629         /* FRINTxx is only in the vector form */
12630         if (only_in_vector) {
12631             unallocated_encoding(s);
12632             return;
12633         }
12634     }
12635 
12636     if (!fp_access_check(s)) {
12637         return;
12638     }
12639 
12640     if (rmode >= 0 || need_fpst) {
12641         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12642     }
12643 
12644     if (rmode >= 0) {
12645         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12646     }
12647 
12648     if (is_scalar) {
12649         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12650         TCGv_i32 tcg_res = tcg_temp_new_i32();
12651 
12652         switch (fpop) {
12653         case 0x1a: /* FCVTNS */
12654         case 0x1b: /* FCVTMS */
12655         case 0x1c: /* FCVTAS */
12656         case 0x3a: /* FCVTPS */
12657         case 0x3b: /* FCVTZS */
12658             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12659             break;
12660         case 0x3d: /* FRECPE */
12661             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12662             break;
12663         case 0x3f: /* FRECPX */
12664             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12665             break;
12666         case 0x5a: /* FCVTNU */
12667         case 0x5b: /* FCVTMU */
12668         case 0x5c: /* FCVTAU */
12669         case 0x7a: /* FCVTPU */
12670         case 0x7b: /* FCVTZU */
12671             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12672             break;
12673         case 0x6f: /* FNEG */
12674             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12675             break;
12676         case 0x7d: /* FRSQRTE */
12677             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12678             break;
12679         default:
12680             g_assert_not_reached();
12681         }
12682 
12683         /* limit any sign extension going on */
12684         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12685         write_fp_sreg(s, rd, tcg_res);
12686     } else {
12687         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12688             TCGv_i32 tcg_op = tcg_temp_new_i32();
12689             TCGv_i32 tcg_res = tcg_temp_new_i32();
12690 
12691             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12692 
12693             switch (fpop) {
12694             case 0x1a: /* FCVTNS */
12695             case 0x1b: /* FCVTMS */
12696             case 0x1c: /* FCVTAS */
12697             case 0x3a: /* FCVTPS */
12698             case 0x3b: /* FCVTZS */
12699                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12700                 break;
12701             case 0x3d: /* FRECPE */
12702                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12703                 break;
12704             case 0x5a: /* FCVTNU */
12705             case 0x5b: /* FCVTMU */
12706             case 0x5c: /* FCVTAU */
12707             case 0x7a: /* FCVTPU */
12708             case 0x7b: /* FCVTZU */
12709                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12710                 break;
12711             case 0x18: /* FRINTN */
12712             case 0x19: /* FRINTM */
12713             case 0x38: /* FRINTP */
12714             case 0x39: /* FRINTZ */
12715             case 0x58: /* FRINTA */
12716             case 0x79: /* FRINTI */
12717                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12718                 break;
12719             case 0x59: /* FRINTX */
12720                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12721                 break;
12722             case 0x2f: /* FABS */
12723                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12724                 break;
12725             case 0x6f: /* FNEG */
12726                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12727                 break;
12728             case 0x7d: /* FRSQRTE */
12729                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12730                 break;
12731             case 0x7f: /* FSQRT */
12732                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12733                 break;
12734             default:
12735                 g_assert_not_reached();
12736             }
12737 
12738             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12739         }
12740 
12741         clear_vec_high(s, is_q, rd);
12742     }
12743 
12744     if (tcg_rmode) {
12745         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12746     }
12747 }
12748 
12749 /* AdvSIMD scalar x indexed element
12750  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12751  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12752  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12753  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12754  * AdvSIMD vector x indexed element
12755  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12756  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12757  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12758  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12759  */
12760 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12761 {
12762     /* This encoding has two kinds of instruction:
12763      *  normal, where we perform elt x idxelt => elt for each
12764      *     element in the vector
12765      *  long, where we perform elt x idxelt and generate a result of
12766      *     double the width of the input element
12767      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12768      */
12769     bool is_scalar = extract32(insn, 28, 1);
12770     bool is_q = extract32(insn, 30, 1);
12771     bool u = extract32(insn, 29, 1);
12772     int size = extract32(insn, 22, 2);
12773     int l = extract32(insn, 21, 1);
12774     int m = extract32(insn, 20, 1);
12775     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12776     int rm = extract32(insn, 16, 4);
12777     int opcode = extract32(insn, 12, 4);
12778     int h = extract32(insn, 11, 1);
12779     int rn = extract32(insn, 5, 5);
12780     int rd = extract32(insn, 0, 5);
12781     bool is_long = false;
12782     int is_fp = 0;
12783     bool is_fp16 = false;
12784     int index;
12785     TCGv_ptr fpst;
12786 
12787     switch (16 * u + opcode) {
12788     case 0x08: /* MUL */
12789     case 0x10: /* MLA */
12790     case 0x14: /* MLS */
12791         if (is_scalar) {
12792             unallocated_encoding(s);
12793             return;
12794         }
12795         break;
12796     case 0x02: /* SMLAL, SMLAL2 */
12797     case 0x12: /* UMLAL, UMLAL2 */
12798     case 0x06: /* SMLSL, SMLSL2 */
12799     case 0x16: /* UMLSL, UMLSL2 */
12800     case 0x0a: /* SMULL, SMULL2 */
12801     case 0x1a: /* UMULL, UMULL2 */
12802         if (is_scalar) {
12803             unallocated_encoding(s);
12804             return;
12805         }
12806         is_long = true;
12807         break;
12808     case 0x03: /* SQDMLAL, SQDMLAL2 */
12809     case 0x07: /* SQDMLSL, SQDMLSL2 */
12810     case 0x0b: /* SQDMULL, SQDMULL2 */
12811         is_long = true;
12812         break;
12813     case 0x0c: /* SQDMULH */
12814     case 0x0d: /* SQRDMULH */
12815         break;
12816     case 0x01: /* FMLA */
12817     case 0x05: /* FMLS */
12818     case 0x09: /* FMUL */
12819     case 0x19: /* FMULX */
12820         is_fp = 1;
12821         break;
12822     case 0x1d: /* SQRDMLAH */
12823     case 0x1f: /* SQRDMLSH */
12824         if (!dc_isar_feature(aa64_rdm, s)) {
12825             unallocated_encoding(s);
12826             return;
12827         }
12828         break;
12829     case 0x0e: /* SDOT */
12830     case 0x1e: /* UDOT */
12831         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12832             unallocated_encoding(s);
12833             return;
12834         }
12835         break;
12836     case 0x0f:
12837         switch (size) {
12838         case 0: /* SUDOT */
12839         case 2: /* USDOT */
12840             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12841                 unallocated_encoding(s);
12842                 return;
12843             }
12844             size = MO_32;
12845             break;
12846         case 1: /* BFDOT */
12847             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12848                 unallocated_encoding(s);
12849                 return;
12850             }
12851             size = MO_32;
12852             break;
12853         case 3: /* BFMLAL{B,T} */
12854             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12855                 unallocated_encoding(s);
12856                 return;
12857             }
12858             /* can't set is_fp without other incorrect size checks */
12859             size = MO_16;
12860             break;
12861         default:
12862             unallocated_encoding(s);
12863             return;
12864         }
12865         break;
12866     case 0x11: /* FCMLA #0 */
12867     case 0x13: /* FCMLA #90 */
12868     case 0x15: /* FCMLA #180 */
12869     case 0x17: /* FCMLA #270 */
12870         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12871             unallocated_encoding(s);
12872             return;
12873         }
12874         is_fp = 2;
12875         break;
12876     case 0x00: /* FMLAL */
12877     case 0x04: /* FMLSL */
12878     case 0x18: /* FMLAL2 */
12879     case 0x1c: /* FMLSL2 */
12880         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12881             unallocated_encoding(s);
12882             return;
12883         }
12884         size = MO_16;
12885         /* is_fp, but we pass cpu_env not fp_status.  */
12886         break;
12887     default:
12888         unallocated_encoding(s);
12889         return;
12890     }
12891 
12892     switch (is_fp) {
12893     case 1: /* normal fp */
12894         /* convert insn encoded size to MemOp size */
12895         switch (size) {
12896         case 0: /* half-precision */
12897             size = MO_16;
12898             is_fp16 = true;
12899             break;
12900         case MO_32: /* single precision */
12901         case MO_64: /* double precision */
12902             break;
12903         default:
12904             unallocated_encoding(s);
12905             return;
12906         }
12907         break;
12908 
12909     case 2: /* complex fp */
12910         /* Each indexable element is a complex pair.  */
12911         size += 1;
12912         switch (size) {
12913         case MO_32:
12914             if (h && !is_q) {
12915                 unallocated_encoding(s);
12916                 return;
12917             }
12918             is_fp16 = true;
12919             break;
12920         case MO_64:
12921             break;
12922         default:
12923             unallocated_encoding(s);
12924             return;
12925         }
12926         break;
12927 
12928     default: /* integer */
12929         switch (size) {
12930         case MO_8:
12931         case MO_64:
12932             unallocated_encoding(s);
12933             return;
12934         }
12935         break;
12936     }
12937     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12938         unallocated_encoding(s);
12939         return;
12940     }
12941 
12942     /* Given MemOp size, adjust register and indexing.  */
12943     switch (size) {
12944     case MO_16:
12945         index = h << 2 | l << 1 | m;
12946         break;
12947     case MO_32:
12948         index = h << 1 | l;
12949         rm |= m << 4;
12950         break;
12951     case MO_64:
12952         if (l || !is_q) {
12953             unallocated_encoding(s);
12954             return;
12955         }
12956         index = h;
12957         rm |= m << 4;
12958         break;
12959     default:
12960         g_assert_not_reached();
12961     }
12962 
12963     if (!fp_access_check(s)) {
12964         return;
12965     }
12966 
12967     if (is_fp) {
12968         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12969     } else {
12970         fpst = NULL;
12971     }
12972 
12973     switch (16 * u + opcode) {
12974     case 0x0e: /* SDOT */
12975     case 0x1e: /* UDOT */
12976         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12977                          u ? gen_helper_gvec_udot_idx_b
12978                          : gen_helper_gvec_sdot_idx_b);
12979         return;
12980     case 0x0f:
12981         switch (extract32(insn, 22, 2)) {
12982         case 0: /* SUDOT */
12983             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12984                              gen_helper_gvec_sudot_idx_b);
12985             return;
12986         case 1: /* BFDOT */
12987             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12988                              gen_helper_gvec_bfdot_idx);
12989             return;
12990         case 2: /* USDOT */
12991             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12992                              gen_helper_gvec_usdot_idx_b);
12993             return;
12994         case 3: /* BFMLAL{B,T} */
12995             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12996                               gen_helper_gvec_bfmlal_idx);
12997             return;
12998         }
12999         g_assert_not_reached();
13000     case 0x11: /* FCMLA #0 */
13001     case 0x13: /* FCMLA #90 */
13002     case 0x15: /* FCMLA #180 */
13003     case 0x17: /* FCMLA #270 */
13004         {
13005             int rot = extract32(insn, 13, 2);
13006             int data = (index << 2) | rot;
13007             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
13008                                vec_full_reg_offset(s, rn),
13009                                vec_full_reg_offset(s, rm),
13010                                vec_full_reg_offset(s, rd), fpst,
13011                                is_q ? 16 : 8, vec_full_reg_size(s), data,
13012                                size == MO_64
13013                                ? gen_helper_gvec_fcmlas_idx
13014                                : gen_helper_gvec_fcmlah_idx);
13015         }
13016         return;
13017 
13018     case 0x00: /* FMLAL */
13019     case 0x04: /* FMLSL */
13020     case 0x18: /* FMLAL2 */
13021     case 0x1c: /* FMLSL2 */
13022         {
13023             int is_s = extract32(opcode, 2, 1);
13024             int is_2 = u;
13025             int data = (index << 2) | (is_2 << 1) | is_s;
13026             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13027                                vec_full_reg_offset(s, rn),
13028                                vec_full_reg_offset(s, rm), cpu_env,
13029                                is_q ? 16 : 8, vec_full_reg_size(s),
13030                                data, gen_helper_gvec_fmlal_idx_a64);
13031         }
13032         return;
13033 
13034     case 0x08: /* MUL */
13035         if (!is_long && !is_scalar) {
13036             static gen_helper_gvec_3 * const fns[3] = {
13037                 gen_helper_gvec_mul_idx_h,
13038                 gen_helper_gvec_mul_idx_s,
13039                 gen_helper_gvec_mul_idx_d,
13040             };
13041             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13042                                vec_full_reg_offset(s, rn),
13043                                vec_full_reg_offset(s, rm),
13044                                is_q ? 16 : 8, vec_full_reg_size(s),
13045                                index, fns[size - 1]);
13046             return;
13047         }
13048         break;
13049 
13050     case 0x10: /* MLA */
13051         if (!is_long && !is_scalar) {
13052             static gen_helper_gvec_4 * const fns[3] = {
13053                 gen_helper_gvec_mla_idx_h,
13054                 gen_helper_gvec_mla_idx_s,
13055                 gen_helper_gvec_mla_idx_d,
13056             };
13057             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13058                                vec_full_reg_offset(s, rn),
13059                                vec_full_reg_offset(s, rm),
13060                                vec_full_reg_offset(s, rd),
13061                                is_q ? 16 : 8, vec_full_reg_size(s),
13062                                index, fns[size - 1]);
13063             return;
13064         }
13065         break;
13066 
13067     case 0x14: /* MLS */
13068         if (!is_long && !is_scalar) {
13069             static gen_helper_gvec_4 * const fns[3] = {
13070                 gen_helper_gvec_mls_idx_h,
13071                 gen_helper_gvec_mls_idx_s,
13072                 gen_helper_gvec_mls_idx_d,
13073             };
13074             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13075                                vec_full_reg_offset(s, rn),
13076                                vec_full_reg_offset(s, rm),
13077                                vec_full_reg_offset(s, rd),
13078                                is_q ? 16 : 8, vec_full_reg_size(s),
13079                                index, fns[size - 1]);
13080             return;
13081         }
13082         break;
13083     }
13084 
13085     if (size == 3) {
13086         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13087         int pass;
13088 
13089         assert(is_fp && is_q && !is_long);
13090 
13091         read_vec_element(s, tcg_idx, rm, index, MO_64);
13092 
13093         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13094             TCGv_i64 tcg_op = tcg_temp_new_i64();
13095             TCGv_i64 tcg_res = tcg_temp_new_i64();
13096 
13097             read_vec_element(s, tcg_op, rn, pass, MO_64);
13098 
13099             switch (16 * u + opcode) {
13100             case 0x05: /* FMLS */
13101                 /* As usual for ARM, separate negation for fused multiply-add */
13102                 gen_helper_vfp_negd(tcg_op, tcg_op);
13103                 /* fall through */
13104             case 0x01: /* FMLA */
13105                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13106                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13107                 break;
13108             case 0x09: /* FMUL */
13109                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13110                 break;
13111             case 0x19: /* FMULX */
13112                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13113                 break;
13114             default:
13115                 g_assert_not_reached();
13116             }
13117 
13118             write_vec_element(s, tcg_res, rd, pass, MO_64);
13119         }
13120 
13121         clear_vec_high(s, !is_scalar, rd);
13122     } else if (!is_long) {
13123         /* 32 bit floating point, or 16 or 32 bit integer.
13124          * For the 16 bit scalar case we use the usual Neon helpers and
13125          * rely on the fact that 0 op 0 == 0 with no side effects.
13126          */
13127         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13128         int pass, maxpasses;
13129 
13130         if (is_scalar) {
13131             maxpasses = 1;
13132         } else {
13133             maxpasses = is_q ? 4 : 2;
13134         }
13135 
13136         read_vec_element_i32(s, tcg_idx, rm, index, size);
13137 
13138         if (size == 1 && !is_scalar) {
13139             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13140              * the index into both halves of the 32 bit tcg_idx and then use
13141              * the usual Neon helpers.
13142              */
13143             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13144         }
13145 
13146         for (pass = 0; pass < maxpasses; pass++) {
13147             TCGv_i32 tcg_op = tcg_temp_new_i32();
13148             TCGv_i32 tcg_res = tcg_temp_new_i32();
13149 
13150             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13151 
13152             switch (16 * u + opcode) {
13153             case 0x08: /* MUL */
13154             case 0x10: /* MLA */
13155             case 0x14: /* MLS */
13156             {
13157                 static NeonGenTwoOpFn * const fns[2][2] = {
13158                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13159                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13160                 };
13161                 NeonGenTwoOpFn *genfn;
13162                 bool is_sub = opcode == 0x4;
13163 
13164                 if (size == 1) {
13165                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13166                 } else {
13167                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13168                 }
13169                 if (opcode == 0x8) {
13170                     break;
13171                 }
13172                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13173                 genfn = fns[size - 1][is_sub];
13174                 genfn(tcg_res, tcg_op, tcg_res);
13175                 break;
13176             }
13177             case 0x05: /* FMLS */
13178             case 0x01: /* FMLA */
13179                 read_vec_element_i32(s, tcg_res, rd, pass,
13180                                      is_scalar ? size : MO_32);
13181                 switch (size) {
13182                 case 1:
13183                     if (opcode == 0x5) {
13184                         /* As usual for ARM, separate negation for fused
13185                          * multiply-add */
13186                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13187                     }
13188                     if (is_scalar) {
13189                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13190                                                    tcg_res, fpst);
13191                     } else {
13192                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13193                                                     tcg_res, fpst);
13194                     }
13195                     break;
13196                 case 2:
13197                     if (opcode == 0x5) {
13198                         /* As usual for ARM, separate negation for
13199                          * fused multiply-add */
13200                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13201                     }
13202                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13203                                            tcg_res, fpst);
13204                     break;
13205                 default:
13206                     g_assert_not_reached();
13207                 }
13208                 break;
13209             case 0x09: /* FMUL */
13210                 switch (size) {
13211                 case 1:
13212                     if (is_scalar) {
13213                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13214                                                 tcg_idx, fpst);
13215                     } else {
13216                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13217                                                  tcg_idx, fpst);
13218                     }
13219                     break;
13220                 case 2:
13221                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13222                     break;
13223                 default:
13224                     g_assert_not_reached();
13225                 }
13226                 break;
13227             case 0x19: /* FMULX */
13228                 switch (size) {
13229                 case 1:
13230                     if (is_scalar) {
13231                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13232                                                  tcg_idx, fpst);
13233                     } else {
13234                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13235                                                   tcg_idx, fpst);
13236                     }
13237                     break;
13238                 case 2:
13239                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13240                     break;
13241                 default:
13242                     g_assert_not_reached();
13243                 }
13244                 break;
13245             case 0x0c: /* SQDMULH */
13246                 if (size == 1) {
13247                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13248                                                tcg_op, tcg_idx);
13249                 } else {
13250                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13251                                                tcg_op, tcg_idx);
13252                 }
13253                 break;
13254             case 0x0d: /* SQRDMULH */
13255                 if (size == 1) {
13256                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13257                                                 tcg_op, tcg_idx);
13258                 } else {
13259                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13260                                                 tcg_op, tcg_idx);
13261                 }
13262                 break;
13263             case 0x1d: /* SQRDMLAH */
13264                 read_vec_element_i32(s, tcg_res, rd, pass,
13265                                      is_scalar ? size : MO_32);
13266                 if (size == 1) {
13267                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13268                                                 tcg_op, tcg_idx, tcg_res);
13269                 } else {
13270                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13271                                                 tcg_op, tcg_idx, tcg_res);
13272                 }
13273                 break;
13274             case 0x1f: /* SQRDMLSH */
13275                 read_vec_element_i32(s, tcg_res, rd, pass,
13276                                      is_scalar ? size : MO_32);
13277                 if (size == 1) {
13278                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13279                                                 tcg_op, tcg_idx, tcg_res);
13280                 } else {
13281                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13282                                                 tcg_op, tcg_idx, tcg_res);
13283                 }
13284                 break;
13285             default:
13286                 g_assert_not_reached();
13287             }
13288 
13289             if (is_scalar) {
13290                 write_fp_sreg(s, rd, tcg_res);
13291             } else {
13292                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13293             }
13294         }
13295 
13296         clear_vec_high(s, is_q, rd);
13297     } else {
13298         /* long ops: 16x16->32 or 32x32->64 */
13299         TCGv_i64 tcg_res[2];
13300         int pass;
13301         bool satop = extract32(opcode, 0, 1);
13302         MemOp memop = MO_32;
13303 
13304         if (satop || !u) {
13305             memop |= MO_SIGN;
13306         }
13307 
13308         if (size == 2) {
13309             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13310 
13311             read_vec_element(s, tcg_idx, rm, index, memop);
13312 
13313             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13314                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13315                 TCGv_i64 tcg_passres;
13316                 int passelt;
13317 
13318                 if (is_scalar) {
13319                     passelt = 0;
13320                 } else {
13321                     passelt = pass + (is_q * 2);
13322                 }
13323 
13324                 read_vec_element(s, tcg_op, rn, passelt, memop);
13325 
13326                 tcg_res[pass] = tcg_temp_new_i64();
13327 
13328                 if (opcode == 0xa || opcode == 0xb) {
13329                     /* Non-accumulating ops */
13330                     tcg_passres = tcg_res[pass];
13331                 } else {
13332                     tcg_passres = tcg_temp_new_i64();
13333                 }
13334 
13335                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13336 
13337                 if (satop) {
13338                     /* saturating, doubling */
13339                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13340                                                       tcg_passres, tcg_passres);
13341                 }
13342 
13343                 if (opcode == 0xa || opcode == 0xb) {
13344                     continue;
13345                 }
13346 
13347                 /* Accumulating op: handle accumulate step */
13348                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13349 
13350                 switch (opcode) {
13351                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13352                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13353                     break;
13354                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13355                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13356                     break;
13357                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13358                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13359                     /* fall through */
13360                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13361                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13362                                                       tcg_res[pass],
13363                                                       tcg_passres);
13364                     break;
13365                 default:
13366                     g_assert_not_reached();
13367                 }
13368             }
13369 
13370             clear_vec_high(s, !is_scalar, rd);
13371         } else {
13372             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13373 
13374             assert(size == 1);
13375             read_vec_element_i32(s, tcg_idx, rm, index, size);
13376 
13377             if (!is_scalar) {
13378                 /* The simplest way to handle the 16x16 indexed ops is to
13379                  * duplicate the index into both halves of the 32 bit tcg_idx
13380                  * and then use the usual Neon helpers.
13381                  */
13382                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13383             }
13384 
13385             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13386                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13387                 TCGv_i64 tcg_passres;
13388 
13389                 if (is_scalar) {
13390                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13391                 } else {
13392                     read_vec_element_i32(s, tcg_op, rn,
13393                                          pass + (is_q * 2), MO_32);
13394                 }
13395 
13396                 tcg_res[pass] = tcg_temp_new_i64();
13397 
13398                 if (opcode == 0xa || opcode == 0xb) {
13399                     /* Non-accumulating ops */
13400                     tcg_passres = tcg_res[pass];
13401                 } else {
13402                     tcg_passres = tcg_temp_new_i64();
13403                 }
13404 
13405                 if (memop & MO_SIGN) {
13406                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13407                 } else {
13408                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13409                 }
13410                 if (satop) {
13411                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13412                                                       tcg_passres, tcg_passres);
13413                 }
13414 
13415                 if (opcode == 0xa || opcode == 0xb) {
13416                     continue;
13417                 }
13418 
13419                 /* Accumulating op: handle accumulate step */
13420                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13421 
13422                 switch (opcode) {
13423                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13424                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13425                                              tcg_passres);
13426                     break;
13427                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13428                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13429                                              tcg_passres);
13430                     break;
13431                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13432                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13433                     /* fall through */
13434                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13435                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13436                                                       tcg_res[pass],
13437                                                       tcg_passres);
13438                     break;
13439                 default:
13440                     g_assert_not_reached();
13441                 }
13442             }
13443 
13444             if (is_scalar) {
13445                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13446             }
13447         }
13448 
13449         if (is_scalar) {
13450             tcg_res[1] = tcg_constant_i64(0);
13451         }
13452 
13453         for (pass = 0; pass < 2; pass++) {
13454             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13455         }
13456     }
13457 }
13458 
13459 /* Crypto AES
13460  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13461  * +-----------------+------+-----------+--------+-----+------+------+
13462  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13463  * +-----------------+------+-----------+--------+-----+------+------+
13464  */
13465 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13466 {
13467     int size = extract32(insn, 22, 2);
13468     int opcode = extract32(insn, 12, 5);
13469     int rn = extract32(insn, 5, 5);
13470     int rd = extract32(insn, 0, 5);
13471     int decrypt;
13472     gen_helper_gvec_2 *genfn2 = NULL;
13473     gen_helper_gvec_3 *genfn3 = NULL;
13474 
13475     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13476         unallocated_encoding(s);
13477         return;
13478     }
13479 
13480     switch (opcode) {
13481     case 0x4: /* AESE */
13482         decrypt = 0;
13483         genfn3 = gen_helper_crypto_aese;
13484         break;
13485     case 0x6: /* AESMC */
13486         decrypt = 0;
13487         genfn2 = gen_helper_crypto_aesmc;
13488         break;
13489     case 0x5: /* AESD */
13490         decrypt = 1;
13491         genfn3 = gen_helper_crypto_aese;
13492         break;
13493     case 0x7: /* AESIMC */
13494         decrypt = 1;
13495         genfn2 = gen_helper_crypto_aesmc;
13496         break;
13497     default:
13498         unallocated_encoding(s);
13499         return;
13500     }
13501 
13502     if (!fp_access_check(s)) {
13503         return;
13504     }
13505     if (genfn2) {
13506         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13507     } else {
13508         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13509     }
13510 }
13511 
13512 /* Crypto three-reg SHA
13513  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13514  * +-----------------+------+---+------+---+--------+-----+------+------+
13515  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13516  * +-----------------+------+---+------+---+--------+-----+------+------+
13517  */
13518 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13519 {
13520     int size = extract32(insn, 22, 2);
13521     int opcode = extract32(insn, 12, 3);
13522     int rm = extract32(insn, 16, 5);
13523     int rn = extract32(insn, 5, 5);
13524     int rd = extract32(insn, 0, 5);
13525     gen_helper_gvec_3 *genfn;
13526     bool feature;
13527 
13528     if (size != 0) {
13529         unallocated_encoding(s);
13530         return;
13531     }
13532 
13533     switch (opcode) {
13534     case 0: /* SHA1C */
13535         genfn = gen_helper_crypto_sha1c;
13536         feature = dc_isar_feature(aa64_sha1, s);
13537         break;
13538     case 1: /* SHA1P */
13539         genfn = gen_helper_crypto_sha1p;
13540         feature = dc_isar_feature(aa64_sha1, s);
13541         break;
13542     case 2: /* SHA1M */
13543         genfn = gen_helper_crypto_sha1m;
13544         feature = dc_isar_feature(aa64_sha1, s);
13545         break;
13546     case 3: /* SHA1SU0 */
13547         genfn = gen_helper_crypto_sha1su0;
13548         feature = dc_isar_feature(aa64_sha1, s);
13549         break;
13550     case 4: /* SHA256H */
13551         genfn = gen_helper_crypto_sha256h;
13552         feature = dc_isar_feature(aa64_sha256, s);
13553         break;
13554     case 5: /* SHA256H2 */
13555         genfn = gen_helper_crypto_sha256h2;
13556         feature = dc_isar_feature(aa64_sha256, s);
13557         break;
13558     case 6: /* SHA256SU1 */
13559         genfn = gen_helper_crypto_sha256su1;
13560         feature = dc_isar_feature(aa64_sha256, s);
13561         break;
13562     default:
13563         unallocated_encoding(s);
13564         return;
13565     }
13566 
13567     if (!feature) {
13568         unallocated_encoding(s);
13569         return;
13570     }
13571 
13572     if (!fp_access_check(s)) {
13573         return;
13574     }
13575     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13576 }
13577 
13578 /* Crypto two-reg SHA
13579  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13580  * +-----------------+------+-----------+--------+-----+------+------+
13581  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13582  * +-----------------+------+-----------+--------+-----+------+------+
13583  */
13584 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13585 {
13586     int size = extract32(insn, 22, 2);
13587     int opcode = extract32(insn, 12, 5);
13588     int rn = extract32(insn, 5, 5);
13589     int rd = extract32(insn, 0, 5);
13590     gen_helper_gvec_2 *genfn;
13591     bool feature;
13592 
13593     if (size != 0) {
13594         unallocated_encoding(s);
13595         return;
13596     }
13597 
13598     switch (opcode) {
13599     case 0: /* SHA1H */
13600         feature = dc_isar_feature(aa64_sha1, s);
13601         genfn = gen_helper_crypto_sha1h;
13602         break;
13603     case 1: /* SHA1SU1 */
13604         feature = dc_isar_feature(aa64_sha1, s);
13605         genfn = gen_helper_crypto_sha1su1;
13606         break;
13607     case 2: /* SHA256SU0 */
13608         feature = dc_isar_feature(aa64_sha256, s);
13609         genfn = gen_helper_crypto_sha256su0;
13610         break;
13611     default:
13612         unallocated_encoding(s);
13613         return;
13614     }
13615 
13616     if (!feature) {
13617         unallocated_encoding(s);
13618         return;
13619     }
13620 
13621     if (!fp_access_check(s)) {
13622         return;
13623     }
13624     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13625 }
13626 
13627 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13628 {
13629     tcg_gen_rotli_i64(d, m, 1);
13630     tcg_gen_xor_i64(d, d, n);
13631 }
13632 
13633 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13634 {
13635     tcg_gen_rotli_vec(vece, d, m, 1);
13636     tcg_gen_xor_vec(vece, d, d, n);
13637 }
13638 
13639 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13640                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13641 {
13642     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13643     static const GVecGen3 op = {
13644         .fni8 = gen_rax1_i64,
13645         .fniv = gen_rax1_vec,
13646         .opt_opc = vecop_list,
13647         .fno = gen_helper_crypto_rax1,
13648         .vece = MO_64,
13649     };
13650     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13651 }
13652 
13653 /* Crypto three-reg SHA512
13654  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13655  * +-----------------------+------+---+---+-----+--------+------+------+
13656  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13657  * +-----------------------+------+---+---+-----+--------+------+------+
13658  */
13659 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13660 {
13661     int opcode = extract32(insn, 10, 2);
13662     int o =  extract32(insn, 14, 1);
13663     int rm = extract32(insn, 16, 5);
13664     int rn = extract32(insn, 5, 5);
13665     int rd = extract32(insn, 0, 5);
13666     bool feature;
13667     gen_helper_gvec_3 *oolfn = NULL;
13668     GVecGen3Fn *gvecfn = NULL;
13669 
13670     if (o == 0) {
13671         switch (opcode) {
13672         case 0: /* SHA512H */
13673             feature = dc_isar_feature(aa64_sha512, s);
13674             oolfn = gen_helper_crypto_sha512h;
13675             break;
13676         case 1: /* SHA512H2 */
13677             feature = dc_isar_feature(aa64_sha512, s);
13678             oolfn = gen_helper_crypto_sha512h2;
13679             break;
13680         case 2: /* SHA512SU1 */
13681             feature = dc_isar_feature(aa64_sha512, s);
13682             oolfn = gen_helper_crypto_sha512su1;
13683             break;
13684         case 3: /* RAX1 */
13685             feature = dc_isar_feature(aa64_sha3, s);
13686             gvecfn = gen_gvec_rax1;
13687             break;
13688         default:
13689             g_assert_not_reached();
13690         }
13691     } else {
13692         switch (opcode) {
13693         case 0: /* SM3PARTW1 */
13694             feature = dc_isar_feature(aa64_sm3, s);
13695             oolfn = gen_helper_crypto_sm3partw1;
13696             break;
13697         case 1: /* SM3PARTW2 */
13698             feature = dc_isar_feature(aa64_sm3, s);
13699             oolfn = gen_helper_crypto_sm3partw2;
13700             break;
13701         case 2: /* SM4EKEY */
13702             feature = dc_isar_feature(aa64_sm4, s);
13703             oolfn = gen_helper_crypto_sm4ekey;
13704             break;
13705         default:
13706             unallocated_encoding(s);
13707             return;
13708         }
13709     }
13710 
13711     if (!feature) {
13712         unallocated_encoding(s);
13713         return;
13714     }
13715 
13716     if (!fp_access_check(s)) {
13717         return;
13718     }
13719 
13720     if (oolfn) {
13721         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13722     } else {
13723         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13724     }
13725 }
13726 
13727 /* Crypto two-reg SHA512
13728  *  31                                     12  11  10  9    5 4    0
13729  * +-----------------------------------------+--------+------+------+
13730  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13731  * +-----------------------------------------+--------+------+------+
13732  */
13733 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13734 {
13735     int opcode = extract32(insn, 10, 2);
13736     int rn = extract32(insn, 5, 5);
13737     int rd = extract32(insn, 0, 5);
13738     bool feature;
13739 
13740     switch (opcode) {
13741     case 0: /* SHA512SU0 */
13742         feature = dc_isar_feature(aa64_sha512, s);
13743         break;
13744     case 1: /* SM4E */
13745         feature = dc_isar_feature(aa64_sm4, s);
13746         break;
13747     default:
13748         unallocated_encoding(s);
13749         return;
13750     }
13751 
13752     if (!feature) {
13753         unallocated_encoding(s);
13754         return;
13755     }
13756 
13757     if (!fp_access_check(s)) {
13758         return;
13759     }
13760 
13761     switch (opcode) {
13762     case 0: /* SHA512SU0 */
13763         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13764         break;
13765     case 1: /* SM4E */
13766         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13767         break;
13768     default:
13769         g_assert_not_reached();
13770     }
13771 }
13772 
13773 /* Crypto four-register
13774  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13775  * +-------------------+-----+------+---+------+------+------+
13776  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13777  * +-------------------+-----+------+---+------+------+------+
13778  */
13779 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13780 {
13781     int op0 = extract32(insn, 21, 2);
13782     int rm = extract32(insn, 16, 5);
13783     int ra = extract32(insn, 10, 5);
13784     int rn = extract32(insn, 5, 5);
13785     int rd = extract32(insn, 0, 5);
13786     bool feature;
13787 
13788     switch (op0) {
13789     case 0: /* EOR3 */
13790     case 1: /* BCAX */
13791         feature = dc_isar_feature(aa64_sha3, s);
13792         break;
13793     case 2: /* SM3SS1 */
13794         feature = dc_isar_feature(aa64_sm3, s);
13795         break;
13796     default:
13797         unallocated_encoding(s);
13798         return;
13799     }
13800 
13801     if (!feature) {
13802         unallocated_encoding(s);
13803         return;
13804     }
13805 
13806     if (!fp_access_check(s)) {
13807         return;
13808     }
13809 
13810     if (op0 < 2) {
13811         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13812         int pass;
13813 
13814         tcg_op1 = tcg_temp_new_i64();
13815         tcg_op2 = tcg_temp_new_i64();
13816         tcg_op3 = tcg_temp_new_i64();
13817         tcg_res[0] = tcg_temp_new_i64();
13818         tcg_res[1] = tcg_temp_new_i64();
13819 
13820         for (pass = 0; pass < 2; pass++) {
13821             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13822             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13823             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13824 
13825             if (op0 == 0) {
13826                 /* EOR3 */
13827                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13828             } else {
13829                 /* BCAX */
13830                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13831             }
13832             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13833         }
13834         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13835         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13836     } else {
13837         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13838 
13839         tcg_op1 = tcg_temp_new_i32();
13840         tcg_op2 = tcg_temp_new_i32();
13841         tcg_op3 = tcg_temp_new_i32();
13842         tcg_res = tcg_temp_new_i32();
13843         tcg_zero = tcg_constant_i32(0);
13844 
13845         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13846         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13847         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13848 
13849         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13850         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13851         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13852         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13853 
13854         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13855         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13856         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13857         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13858     }
13859 }
13860 
13861 /* Crypto XAR
13862  *  31                   21 20  16 15    10 9    5 4    0
13863  * +-----------------------+------+--------+------+------+
13864  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13865  * +-----------------------+------+--------+------+------+
13866  */
13867 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13868 {
13869     int rm = extract32(insn, 16, 5);
13870     int imm6 = extract32(insn, 10, 6);
13871     int rn = extract32(insn, 5, 5);
13872     int rd = extract32(insn, 0, 5);
13873 
13874     if (!dc_isar_feature(aa64_sha3, s)) {
13875         unallocated_encoding(s);
13876         return;
13877     }
13878 
13879     if (!fp_access_check(s)) {
13880         return;
13881     }
13882 
13883     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13884                  vec_full_reg_offset(s, rn),
13885                  vec_full_reg_offset(s, rm), imm6, 16,
13886                  vec_full_reg_size(s));
13887 }
13888 
13889 /* Crypto three-reg imm2
13890  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13891  * +-----------------------+------+-----+------+--------+------+------+
13892  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13893  * +-----------------------+------+-----+------+--------+------+------+
13894  */
13895 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13896 {
13897     static gen_helper_gvec_3 * const fns[4] = {
13898         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13899         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13900     };
13901     int opcode = extract32(insn, 10, 2);
13902     int imm2 = extract32(insn, 12, 2);
13903     int rm = extract32(insn, 16, 5);
13904     int rn = extract32(insn, 5, 5);
13905     int rd = extract32(insn, 0, 5);
13906 
13907     if (!dc_isar_feature(aa64_sm3, s)) {
13908         unallocated_encoding(s);
13909         return;
13910     }
13911 
13912     if (!fp_access_check(s)) {
13913         return;
13914     }
13915 
13916     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13917 }
13918 
13919 /* C3.6 Data processing - SIMD, inc Crypto
13920  *
13921  * As the decode gets a little complex we are using a table based
13922  * approach for this part of the decode.
13923  */
13924 static const AArch64DecodeTable data_proc_simd[] = {
13925     /* pattern  ,  mask     ,  fn                        */
13926     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13927     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13928     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13929     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13930     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13931     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13932     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13933     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13934     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13935     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13936     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13937     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13938     { 0x2e000000, 0xbf208400, disas_simd_ext },
13939     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13940     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13941     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13942     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13943     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13944     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13945     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13946     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13947     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13948     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13949     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13950     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13951     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13952     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13953     { 0xce800000, 0xffe00000, disas_crypto_xar },
13954     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13955     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13956     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13957     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13958     { 0x00000000, 0x00000000, NULL }
13959 };
13960 
13961 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13962 {
13963     /* Note that this is called with all non-FP cases from
13964      * table C3-6 so it must UNDEF for entries not specifically
13965      * allocated to instructions in that table.
13966      */
13967     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13968     if (fn) {
13969         fn(s, insn);
13970     } else {
13971         unallocated_encoding(s);
13972     }
13973 }
13974 
13975 /* C3.6 Data processing - SIMD and floating point */
13976 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13977 {
13978     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13979         disas_data_proc_fp(s, insn);
13980     } else {
13981         /* SIMD, including crypto */
13982         disas_data_proc_simd(s, insn);
13983     }
13984 }
13985 
13986 static bool trans_OK(DisasContext *s, arg_OK *a)
13987 {
13988     return true;
13989 }
13990 
13991 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13992 {
13993     s->is_nonstreaming = true;
13994     return true;
13995 }
13996 
13997 /**
13998  * is_guarded_page:
13999  * @env: The cpu environment
14000  * @s: The DisasContext
14001  *
14002  * Return true if the page is guarded.
14003  */
14004 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
14005 {
14006     uint64_t addr = s->base.pc_first;
14007 #ifdef CONFIG_USER_ONLY
14008     return page_get_flags(addr) & PAGE_BTI;
14009 #else
14010     CPUTLBEntryFull *full;
14011     void *host;
14012     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14013     int flags;
14014 
14015     /*
14016      * We test this immediately after reading an insn, which means
14017      * that the TLB entry must be present and valid, and thus this
14018      * access will never raise an exception.
14019      */
14020     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
14021                               false, &host, &full, 0);
14022     assert(!(flags & TLB_INVALID_MASK));
14023 
14024     return full->guarded;
14025 #endif
14026 }
14027 
14028 /**
14029  * btype_destination_ok:
14030  * @insn: The instruction at the branch destination
14031  * @bt: SCTLR_ELx.BT
14032  * @btype: PSTATE.BTYPE, and is non-zero
14033  *
14034  * On a guarded page, there are a limited number of insns
14035  * that may be present at the branch target:
14036  *   - branch target identifiers,
14037  *   - paciasp, pacibsp,
14038  *   - BRK insn
14039  *   - HLT insn
14040  * Anything else causes a Branch Target Exception.
14041  *
14042  * Return true if the branch is compatible, false to raise BTITRAP.
14043  */
14044 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14045 {
14046     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14047         /* HINT space */
14048         switch (extract32(insn, 5, 7)) {
14049         case 0b011001: /* PACIASP */
14050         case 0b011011: /* PACIBSP */
14051             /*
14052              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14053              * with btype == 3.  Otherwise all btype are ok.
14054              */
14055             return !bt || btype != 3;
14056         case 0b100000: /* BTI */
14057             /* Not compatible with any btype.  */
14058             return false;
14059         case 0b100010: /* BTI c */
14060             /* Not compatible with btype == 3 */
14061             return btype != 3;
14062         case 0b100100: /* BTI j */
14063             /* Not compatible with btype == 2 */
14064             return btype != 2;
14065         case 0b100110: /* BTI jc */
14066             /* Compatible with any btype.  */
14067             return true;
14068         }
14069     } else {
14070         switch (insn & 0xffe0001fu) {
14071         case 0xd4200000u: /* BRK */
14072         case 0xd4400000u: /* HLT */
14073             /* Give priority to the breakpoint exception.  */
14074             return true;
14075         }
14076     }
14077     return false;
14078 }
14079 
14080 /* C3.1 A64 instruction index by encoding */
14081 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14082 {
14083     switch (extract32(insn, 25, 4)) {
14084     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14085         disas_b_exc_sys(s, insn);
14086         break;
14087     case 0x4:
14088     case 0x6:
14089     case 0xc:
14090     case 0xe:      /* Loads and stores */
14091         disas_ldst(s, insn);
14092         break;
14093     case 0x5:
14094     case 0xd:      /* Data processing - register */
14095         disas_data_proc_reg(s, insn);
14096         break;
14097     case 0x7:
14098     case 0xf:      /* Data processing - SIMD and floating point */
14099         disas_data_proc_simd_fp(s, insn);
14100         break;
14101     default:
14102         unallocated_encoding(s);
14103         break;
14104     }
14105 }
14106 
14107 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14108                                           CPUState *cpu)
14109 {
14110     DisasContext *dc = container_of(dcbase, DisasContext, base);
14111     CPUARMState *env = cpu->env_ptr;
14112     ARMCPU *arm_cpu = env_archcpu(env);
14113     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14114     int bound, core_mmu_idx;
14115 
14116     dc->isar = &arm_cpu->isar;
14117     dc->condjmp = 0;
14118     dc->pc_save = dc->base.pc_first;
14119     dc->aarch64 = true;
14120     dc->thumb = false;
14121     dc->sctlr_b = 0;
14122     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14123     dc->condexec_mask = 0;
14124     dc->condexec_cond = 0;
14125     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14126     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14127     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14128     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14129     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14130     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14131 #if !defined(CONFIG_USER_ONLY)
14132     dc->user = (dc->current_el == 0);
14133 #endif
14134     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14135     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14136     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14137     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14138     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14139     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14140     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14141     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14142     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14143     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14144     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14145     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14146     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14147     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14148     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14149     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14150     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14151     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14152     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14153     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14154     dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
14155     dc->vec_len = 0;
14156     dc->vec_stride = 0;
14157     dc->cp_regs = arm_cpu->cp_regs;
14158     dc->features = env->features;
14159     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14160 
14161 #ifdef CONFIG_USER_ONLY
14162     /* In sve_probe_page, we assume TBI is enabled. */
14163     tcg_debug_assert(dc->tbid & 1);
14164 #endif
14165 
14166     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14167 
14168     /* Single step state. The code-generation logic here is:
14169      *  SS_ACTIVE == 0:
14170      *   generate code with no special handling for single-stepping (except
14171      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14172      *   this happens anyway because those changes are all system register or
14173      *   PSTATE writes).
14174      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14175      *   emit code for one insn
14176      *   emit code to clear PSTATE.SS
14177      *   emit code to generate software step exception for completed step
14178      *   end TB (as usual for having generated an exception)
14179      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14180      *   emit code to generate a software step exception
14181      *   end the TB
14182      */
14183     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14184     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14185     dc->is_ldex = false;
14186 
14187     /* Bound the number of insns to execute to those left on the page.  */
14188     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14189 
14190     /* If architectural single step active, limit to 1.  */
14191     if (dc->ss_active) {
14192         bound = 1;
14193     }
14194     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14195 }
14196 
14197 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14198 {
14199 }
14200 
14201 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14202 {
14203     DisasContext *dc = container_of(dcbase, DisasContext, base);
14204     target_ulong pc_arg = dc->base.pc_next;
14205 
14206     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14207         pc_arg &= ~TARGET_PAGE_MASK;
14208     }
14209     tcg_gen_insn_start(pc_arg, 0, 0);
14210     dc->insn_start = tcg_last_op();
14211 }
14212 
14213 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14214 {
14215     DisasContext *s = container_of(dcbase, DisasContext, base);
14216     CPUARMState *env = cpu->env_ptr;
14217     uint64_t pc = s->base.pc_next;
14218     uint32_t insn;
14219 
14220     /* Singlestep exceptions have the highest priority. */
14221     if (s->ss_active && !s->pstate_ss) {
14222         /* Singlestep state is Active-pending.
14223          * If we're in this state at the start of a TB then either
14224          *  a) we just took an exception to an EL which is being debugged
14225          *     and this is the first insn in the exception handler
14226          *  b) debug exceptions were masked and we just unmasked them
14227          *     without changing EL (eg by clearing PSTATE.D)
14228          * In either case we're going to take a swstep exception in the
14229          * "did not step an insn" case, and so the syndrome ISV and EX
14230          * bits should be zero.
14231          */
14232         assert(s->base.num_insns == 1);
14233         gen_swstep_exception(s, 0, 0);
14234         s->base.is_jmp = DISAS_NORETURN;
14235         s->base.pc_next = pc + 4;
14236         return;
14237     }
14238 
14239     if (pc & 3) {
14240         /*
14241          * PC alignment fault.  This has priority over the instruction abort
14242          * that we would receive from a translation fault via arm_ldl_code.
14243          * This should only be possible after an indirect branch, at the
14244          * start of the TB.
14245          */
14246         assert(s->base.num_insns == 1);
14247         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14248         s->base.is_jmp = DISAS_NORETURN;
14249         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14250         return;
14251     }
14252 
14253     s->pc_curr = pc;
14254     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14255     s->insn = insn;
14256     s->base.pc_next = pc + 4;
14257 
14258     s->fp_access_checked = false;
14259     s->sve_access_checked = false;
14260 
14261     if (s->pstate_il) {
14262         /*
14263          * Illegal execution state. This has priority over BTI
14264          * exceptions, but comes after instruction abort exceptions.
14265          */
14266         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14267         return;
14268     }
14269 
14270     if (dc_isar_feature(aa64_bti, s)) {
14271         if (s->base.num_insns == 1) {
14272             /*
14273              * At the first insn of the TB, compute s->guarded_page.
14274              * We delayed computing this until successfully reading
14275              * the first insn of the TB, above.  This (mostly) ensures
14276              * that the softmmu tlb entry has been populated, and the
14277              * page table GP bit is available.
14278              *
14279              * Note that we need to compute this even if btype == 0,
14280              * because this value is used for BR instructions later
14281              * where ENV is not available.
14282              */
14283             s->guarded_page = is_guarded_page(env, s);
14284 
14285             /* First insn can have btype set to non-zero.  */
14286             tcg_debug_assert(s->btype >= 0);
14287 
14288             /*
14289              * Note that the Branch Target Exception has fairly high
14290              * priority -- below debugging exceptions but above most
14291              * everything else.  This allows us to handle this now
14292              * instead of waiting until the insn is otherwise decoded.
14293              */
14294             if (s->btype != 0
14295                 && s->guarded_page
14296                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14297                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14298                 return;
14299             }
14300         } else {
14301             /* Not the first insn: btype must be 0.  */
14302             tcg_debug_assert(s->btype == 0);
14303         }
14304     }
14305 
14306     s->is_nonstreaming = false;
14307     if (s->sme_trap_nonstreaming) {
14308         disas_sme_fa64(s, insn);
14309     }
14310 
14311     if (!disas_a64(s, insn) &&
14312         !disas_sme(s, insn) &&
14313         !disas_sve(s, insn)) {
14314         disas_a64_legacy(s, insn);
14315     }
14316 
14317     /*
14318      * After execution of most insns, btype is reset to 0.
14319      * Note that we set btype == -1 when the insn sets btype.
14320      */
14321     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14322         reset_btype(s);
14323     }
14324 }
14325 
14326 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14327 {
14328     DisasContext *dc = container_of(dcbase, DisasContext, base);
14329 
14330     if (unlikely(dc->ss_active)) {
14331         /* Note that this means single stepping WFI doesn't halt the CPU.
14332          * For conditional branch insns this is harmless unreachable code as
14333          * gen_goto_tb() has already handled emitting the debug exception
14334          * (and thus a tb-jump is not possible when singlestepping).
14335          */
14336         switch (dc->base.is_jmp) {
14337         default:
14338             gen_a64_update_pc(dc, 4);
14339             /* fall through */
14340         case DISAS_EXIT:
14341         case DISAS_JUMP:
14342             gen_step_complete_exception(dc);
14343             break;
14344         case DISAS_NORETURN:
14345             break;
14346         }
14347     } else {
14348         switch (dc->base.is_jmp) {
14349         case DISAS_NEXT:
14350         case DISAS_TOO_MANY:
14351             gen_goto_tb(dc, 1, 4);
14352             break;
14353         default:
14354         case DISAS_UPDATE_EXIT:
14355             gen_a64_update_pc(dc, 4);
14356             /* fall through */
14357         case DISAS_EXIT:
14358             tcg_gen_exit_tb(NULL, 0);
14359             break;
14360         case DISAS_UPDATE_NOCHAIN:
14361             gen_a64_update_pc(dc, 4);
14362             /* fall through */
14363         case DISAS_JUMP:
14364             tcg_gen_lookup_and_goto_ptr();
14365             break;
14366         case DISAS_NORETURN:
14367         case DISAS_SWI:
14368             break;
14369         case DISAS_WFE:
14370             gen_a64_update_pc(dc, 4);
14371             gen_helper_wfe(cpu_env);
14372             break;
14373         case DISAS_YIELD:
14374             gen_a64_update_pc(dc, 4);
14375             gen_helper_yield(cpu_env);
14376             break;
14377         case DISAS_WFI:
14378             /*
14379              * This is a special case because we don't want to just halt
14380              * the CPU if trying to debug across a WFI.
14381              */
14382             gen_a64_update_pc(dc, 4);
14383             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14384             /*
14385              * The helper doesn't necessarily throw an exception, but we
14386              * must go back to the main loop to check for interrupts anyway.
14387              */
14388             tcg_gen_exit_tb(NULL, 0);
14389             break;
14390         }
14391     }
14392 }
14393 
14394 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14395                                  CPUState *cpu, FILE *logfile)
14396 {
14397     DisasContext *dc = container_of(dcbase, DisasContext, base);
14398 
14399     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14400     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14401 }
14402 
14403 const TranslatorOps aarch64_translator_ops = {
14404     .init_disas_context = aarch64_tr_init_disas_context,
14405     .tb_start           = aarch64_tr_tb_start,
14406     .insn_start         = aarch64_tr_insn_start,
14407     .translate_insn     = aarch64_tr_translate_insn,
14408     .tb_stop            = aarch64_tr_tb_stop,
14409     .disas_log          = aarch64_tr_disas_log,
14410 };
14411