xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 6c1e3906)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30 #include "semihosting/semihost.h"
31 #include "exec/gen-icount.h"
32 #include "exec/helper-proto.h"
33 #include "exec/helper-gen.h"
34 #include "exec/log.h"
35 #include "cpregs.h"
36 #include "translate-a64.h"
37 #include "qemu/atomic128.h"
38 
39 static TCGv_i64 cpu_X[32];
40 static TCGv_i64 cpu_pc;
41 
42 /* Load/store exclusive handling */
43 static TCGv_i64 cpu_exclusive_high;
44 
45 static const char *regnames[] = {
46     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
47     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
48     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
49     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
50 };
51 
52 enum a64_shift_type {
53     A64_SHIFT_TYPE_LSL = 0,
54     A64_SHIFT_TYPE_LSR = 1,
55     A64_SHIFT_TYPE_ASR = 2,
56     A64_SHIFT_TYPE_ROR = 3
57 };
58 
59 /*
60  * Include the generated decoders.
61  */
62 
63 #include "decode-sme-fa64.c.inc"
64 #include "decode-a64.c.inc"
65 
66 /* Table based decoder typedefs - used when the relevant bits for decode
67  * are too awkwardly scattered across the instruction (eg SIMD).
68  */
69 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
70 
71 typedef struct AArch64DecodeTable {
72     uint32_t pattern;
73     uint32_t mask;
74     AArch64DecodeFn *disas_fn;
75 } AArch64DecodeTable;
76 
77 /* initialize TCG globals.  */
78 void a64_translate_init(void)
79 {
80     int i;
81 
82     cpu_pc = tcg_global_mem_new_i64(cpu_env,
83                                     offsetof(CPUARMState, pc),
84                                     "pc");
85     for (i = 0; i < 32; i++) {
86         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
87                                           offsetof(CPUARMState, xregs[i]),
88                                           regnames[i]);
89     }
90 
91     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
92         offsetof(CPUARMState, exclusive_high), "exclusive_high");
93 }
94 
95 /*
96  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
97  */
98 static int get_a64_user_mem_index(DisasContext *s)
99 {
100     /*
101      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
102      * which is the usual mmu_idx for this cpu state.
103      */
104     ARMMMUIdx useridx = s->mmu_idx;
105 
106     if (s->unpriv) {
107         /*
108          * We have pre-computed the condition for AccType_UNPRIV.
109          * Therefore we should never get here with a mmu_idx for
110          * which we do not know the corresponding user mmu_idx.
111          */
112         switch (useridx) {
113         case ARMMMUIdx_E10_1:
114         case ARMMMUIdx_E10_1_PAN:
115             useridx = ARMMMUIdx_E10_0;
116             break;
117         case ARMMMUIdx_E20_2:
118         case ARMMMUIdx_E20_2_PAN:
119             useridx = ARMMMUIdx_E20_0;
120             break;
121         default:
122             g_assert_not_reached();
123         }
124     }
125     return arm_to_core_mmu_idx(useridx);
126 }
127 
128 static void set_btype_raw(int val)
129 {
130     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
131                    offsetof(CPUARMState, btype));
132 }
133 
134 static void set_btype(DisasContext *s, int val)
135 {
136     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
137     tcg_debug_assert(val >= 1 && val <= 3);
138     set_btype_raw(val);
139     s->btype = -1;
140 }
141 
142 static void reset_btype(DisasContext *s)
143 {
144     if (s->btype != 0) {
145         set_btype_raw(0);
146         s->btype = 0;
147     }
148 }
149 
150 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
151 {
152     assert(s->pc_save != -1);
153     if (tb_cflags(s->base.tb) & CF_PCREL) {
154         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
155     } else {
156         tcg_gen_movi_i64(dest, s->pc_curr + diff);
157     }
158 }
159 
160 void gen_a64_update_pc(DisasContext *s, target_long diff)
161 {
162     gen_pc_plus_diff(s, cpu_pc, diff);
163     s->pc_save = s->pc_curr + diff;
164 }
165 
166 /*
167  * Handle Top Byte Ignore (TBI) bits.
168  *
169  * If address tagging is enabled via the TCR TBI bits:
170  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
171  *    then the address is zero-extended, clearing bits [63:56]
172  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
173  *    and TBI1 controls addressses with bit 55 == 1.
174  *    If the appropriate TBI bit is set for the address then
175  *    the address is sign-extended from bit 55 into bits [63:56]
176  *
177  * Here We have concatenated TBI{1,0} into tbi.
178  */
179 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
180                                 TCGv_i64 src, int tbi)
181 {
182     if (tbi == 0) {
183         /* Load unmodified address */
184         tcg_gen_mov_i64(dst, src);
185     } else if (!regime_has_2_ranges(s->mmu_idx)) {
186         /* Force tag byte to all zero */
187         tcg_gen_extract_i64(dst, src, 0, 56);
188     } else {
189         /* Sign-extend from bit 55.  */
190         tcg_gen_sextract_i64(dst, src, 0, 56);
191 
192         switch (tbi) {
193         case 1:
194             /* tbi0 but !tbi1: only use the extension if positive */
195             tcg_gen_and_i64(dst, dst, src);
196             break;
197         case 2:
198             /* !tbi0 but tbi1: only use the extension if negative */
199             tcg_gen_or_i64(dst, dst, src);
200             break;
201         case 3:
202             /* tbi0 and tbi1: always use the extension */
203             break;
204         default:
205             g_assert_not_reached();
206         }
207     }
208 }
209 
210 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
211 {
212     /*
213      * If address tagging is enabled for instructions via the TCR TBI bits,
214      * then loading an address into the PC will clear out any tag.
215      */
216     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
217     s->pc_save = -1;
218 }
219 
220 /*
221  * Handle MTE and/or TBI.
222  *
223  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
224  * for the tag to be present in the FAR_ELx register.  But for user-only
225  * mode we do not have a TLB with which to implement this, so we must
226  * remove the top byte now.
227  *
228  * Always return a fresh temporary that we can increment independently
229  * of the write-back address.
230  */
231 
232 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
233 {
234     TCGv_i64 clean = tcg_temp_new_i64();
235 #ifdef CONFIG_USER_ONLY
236     gen_top_byte_ignore(s, clean, addr, s->tbid);
237 #else
238     tcg_gen_mov_i64(clean, addr);
239 #endif
240     return clean;
241 }
242 
243 /* Insert a zero tag into src, with the result at dst. */
244 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
245 {
246     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
247 }
248 
249 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
250                              MMUAccessType acc, int log2_size)
251 {
252     gen_helper_probe_access(cpu_env, ptr,
253                             tcg_constant_i32(acc),
254                             tcg_constant_i32(get_mem_index(s)),
255                             tcg_constant_i32(1 << log2_size));
256 }
257 
258 /*
259  * For MTE, check a single logical or atomic access.  This probes a single
260  * address, the exact one specified.  The size and alignment of the access
261  * is not relevant to MTE, per se, but watchpoints do require the size,
262  * and we want to recognize those before making any other changes to state.
263  */
264 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
265                                       bool is_write, bool tag_checked,
266                                       int log2_size, bool is_unpriv,
267                                       int core_idx)
268 {
269     if (tag_checked && s->mte_active[is_unpriv]) {
270         TCGv_i64 ret;
271         int desc = 0;
272 
273         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
274         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
275         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
276         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
277         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
278 
279         ret = tcg_temp_new_i64();
280         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
281 
282         return ret;
283     }
284     return clean_data_tbi(s, addr);
285 }
286 
287 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int log2_size)
289 {
290     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
291                                  false, get_mem_index(s));
292 }
293 
294 /*
295  * For MTE, check multiple logical sequential accesses.
296  */
297 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, int size)
299 {
300     if (tag_checked && s->mte_active[0]) {
301         TCGv_i64 ret;
302         int desc = 0;
303 
304         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
305         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
306         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
307         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
308         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
309 
310         ret = tcg_temp_new_i64();
311         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
312 
313         return ret;
314     }
315     return clean_data_tbi(s, addr);
316 }
317 
318 typedef struct DisasCompare64 {
319     TCGCond cond;
320     TCGv_i64 value;
321 } DisasCompare64;
322 
323 static void a64_test_cc(DisasCompare64 *c64, int cc)
324 {
325     DisasCompare c32;
326 
327     arm_test_cc(&c32, cc);
328 
329     /*
330      * Sign-extend the 32-bit value so that the GE/LT comparisons work
331      * properly.  The NE/EQ comparisons are also fine with this choice.
332       */
333     c64->cond = c32.cond;
334     c64->value = tcg_temp_new_i64();
335     tcg_gen_ext_i32_i64(c64->value, c32.value);
336 }
337 
338 static void gen_rebuild_hflags(DisasContext *s)
339 {
340     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
341 }
342 
343 static void gen_exception_internal(int excp)
344 {
345     assert(excp_is_internal(excp));
346     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
347 }
348 
349 static void gen_exception_internal_insn(DisasContext *s, int excp)
350 {
351     gen_a64_update_pc(s, 0);
352     gen_exception_internal(excp);
353     s->base.is_jmp = DISAS_NORETURN;
354 }
355 
356 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
357 {
358     gen_a64_update_pc(s, 0);
359     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
360     s->base.is_jmp = DISAS_NORETURN;
361 }
362 
363 static void gen_step_complete_exception(DisasContext *s)
364 {
365     /* We just completed step of an insn. Move from Active-not-pending
366      * to Active-pending, and then also take the swstep exception.
367      * This corresponds to making the (IMPDEF) choice to prioritize
368      * swstep exceptions over asynchronous exceptions taken to an exception
369      * level where debug is disabled. This choice has the advantage that
370      * we do not need to maintain internal state corresponding to the
371      * ISV/EX syndrome bits between completion of the step and generation
372      * of the exception, and our syndrome information is always correct.
373      */
374     gen_ss_advance(s);
375     gen_swstep_exception(s, 1, s->is_ldex);
376     s->base.is_jmp = DISAS_NORETURN;
377 }
378 
379 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
380 {
381     if (s->ss_active) {
382         return false;
383     }
384     return translator_use_goto_tb(&s->base, dest);
385 }
386 
387 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
388 {
389     if (use_goto_tb(s, s->pc_curr + diff)) {
390         /*
391          * For pcrel, the pc must always be up-to-date on entry to
392          * the linked TB, so that it can use simple additions for all
393          * further adjustments.  For !pcrel, the linked TB is compiled
394          * to know its full virtual address, so we can delay the
395          * update to pc to the unlinked path.  A long chain of links
396          * can thus avoid many updates to the PC.
397          */
398         if (tb_cflags(s->base.tb) & CF_PCREL) {
399             gen_a64_update_pc(s, diff);
400             tcg_gen_goto_tb(n);
401         } else {
402             tcg_gen_goto_tb(n);
403             gen_a64_update_pc(s, diff);
404         }
405         tcg_gen_exit_tb(s->base.tb, n);
406         s->base.is_jmp = DISAS_NORETURN;
407     } else {
408         gen_a64_update_pc(s, diff);
409         if (s->ss_active) {
410             gen_step_complete_exception(s);
411         } else {
412             tcg_gen_lookup_and_goto_ptr();
413             s->base.is_jmp = DISAS_NORETURN;
414         }
415     }
416 }
417 
418 /*
419  * Register access functions
420  *
421  * These functions are used for directly accessing a register in where
422  * changes to the final register value are likely to be made. If you
423  * need to use a register for temporary calculation (e.g. index type
424  * operations) use the read_* form.
425  *
426  * B1.2.1 Register mappings
427  *
428  * In instruction register encoding 31 can refer to ZR (zero register) or
429  * the SP (stack pointer) depending on context. In QEMU's case we map SP
430  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
431  * This is the point of the _sp forms.
432  */
433 TCGv_i64 cpu_reg(DisasContext *s, int reg)
434 {
435     if (reg == 31) {
436         TCGv_i64 t = tcg_temp_new_i64();
437         tcg_gen_movi_i64(t, 0);
438         return t;
439     } else {
440         return cpu_X[reg];
441     }
442 }
443 
444 /* register access for when 31 == SP */
445 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
446 {
447     return cpu_X[reg];
448 }
449 
450 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
451  * representing the register contents. This TCGv is an auto-freed
452  * temporary so it need not be explicitly freed, and may be modified.
453  */
454 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
455 {
456     TCGv_i64 v = tcg_temp_new_i64();
457     if (reg != 31) {
458         if (sf) {
459             tcg_gen_mov_i64(v, cpu_X[reg]);
460         } else {
461             tcg_gen_ext32u_i64(v, cpu_X[reg]);
462         }
463     } else {
464         tcg_gen_movi_i64(v, 0);
465     }
466     return v;
467 }
468 
469 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
470 {
471     TCGv_i64 v = tcg_temp_new_i64();
472     if (sf) {
473         tcg_gen_mov_i64(v, cpu_X[reg]);
474     } else {
475         tcg_gen_ext32u_i64(v, cpu_X[reg]);
476     }
477     return v;
478 }
479 
480 /* Return the offset into CPUARMState of a slice (from
481  * the least significant end) of FP register Qn (ie
482  * Dn, Sn, Hn or Bn).
483  * (Note that this is not the same mapping as for A32; see cpu.h)
484  */
485 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
486 {
487     return vec_reg_offset(s, regno, 0, size);
488 }
489 
490 /* Offset of the high half of the 128 bit vector Qn */
491 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
492 {
493     return vec_reg_offset(s, regno, 1, MO_64);
494 }
495 
496 /* Convenience accessors for reading and writing single and double
497  * FP registers. Writing clears the upper parts of the associated
498  * 128 bit vector register, as required by the architecture.
499  * Note that unlike the GP register accessors, the values returned
500  * by the read functions must be manually freed.
501  */
502 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
503 {
504     TCGv_i64 v = tcg_temp_new_i64();
505 
506     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
507     return v;
508 }
509 
510 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
511 {
512     TCGv_i32 v = tcg_temp_new_i32();
513 
514     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
515     return v;
516 }
517 
518 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
519 {
520     TCGv_i32 v = tcg_temp_new_i32();
521 
522     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
523     return v;
524 }
525 
526 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
527  * If SVE is not enabled, then there are only 128 bits in the vector.
528  */
529 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
530 {
531     unsigned ofs = fp_reg_offset(s, rd, MO_64);
532     unsigned vsz = vec_full_reg_size(s);
533 
534     /* Nop move, with side effect of clearing the tail. */
535     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
536 }
537 
538 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
539 {
540     unsigned ofs = fp_reg_offset(s, reg, MO_64);
541 
542     tcg_gen_st_i64(v, cpu_env, ofs);
543     clear_vec_high(s, false, reg);
544 }
545 
546 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
547 {
548     TCGv_i64 tmp = tcg_temp_new_i64();
549 
550     tcg_gen_extu_i32_i64(tmp, v);
551     write_fp_dreg(s, reg, tmp);
552 }
553 
554 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
555 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
556                          GVecGen2Fn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 2-operand + immediate AdvSIMD vector operation using
563  * an expander function.
564  */
565 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
566                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
567 {
568     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
569             imm, is_q ? 16 : 8, vec_full_reg_size(s));
570 }
571 
572 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
573 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
574                          GVecGen3Fn *gvec_fn, int vece)
575 {
576     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
577             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
578 }
579 
580 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
581 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
582                          int rx, GVecGen4Fn *gvec_fn, int vece)
583 {
584     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
585             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
586             is_q ? 16 : 8, vec_full_reg_size(s));
587 }
588 
589 /* Expand a 2-operand operation using an out-of-line helper.  */
590 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
591                              int rn, int data, gen_helper_gvec_2 *fn)
592 {
593     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
594                        vec_full_reg_offset(s, rn),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand operation using an out-of-line helper.  */
599 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
600                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
601 {
602     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
603                        vec_full_reg_offset(s, rn),
604                        vec_full_reg_offset(s, rm),
605                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
606 }
607 
608 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
609  * an out-of-line helper.
610  */
611 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
612                               int rm, bool is_fp16, int data,
613                               gen_helper_gvec_3_ptr *fn)
614 {
615     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
616     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
617                        vec_full_reg_offset(s, rn),
618                        vec_full_reg_offset(s, rm), fpst,
619                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
620 }
621 
622 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
623 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
624                             int rm, gen_helper_gvec_3_ptr *fn)
625 {
626     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
627 
628     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
629     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm), qc_ptr,
632                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
633 }
634 
635 /* Expand a 4-operand operation using an out-of-line helper.  */
636 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
637                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
638 {
639     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
640                        vec_full_reg_offset(s, rn),
641                        vec_full_reg_offset(s, rm),
642                        vec_full_reg_offset(s, ra),
643                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
644 }
645 
646 /*
647  * Expand a 4-operand + fpstatus pointer + simd data value operation using
648  * an out-of-line helper.
649  */
650 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
651                               int rm, int ra, bool is_fp16, int data,
652                               gen_helper_gvec_4_ptr *fn)
653 {
654     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
655     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
656                        vec_full_reg_offset(s, rn),
657                        vec_full_reg_offset(s, rm),
658                        vec_full_reg_offset(s, ra), fpst,
659                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
660 }
661 
662 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
663  * than the 32 bit equivalent.
664  */
665 static inline void gen_set_NZ64(TCGv_i64 result)
666 {
667     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
668     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
669 }
670 
671 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
672 static inline void gen_logic_CC(int sf, TCGv_i64 result)
673 {
674     if (sf) {
675         gen_set_NZ64(result);
676     } else {
677         tcg_gen_extrl_i64_i32(cpu_ZF, result);
678         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
679     }
680     tcg_gen_movi_i32(cpu_CF, 0);
681     tcg_gen_movi_i32(cpu_VF, 0);
682 }
683 
684 /* dest = T0 + T1; compute C, N, V and Z flags */
685 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
686 {
687     TCGv_i64 result, flag, tmp;
688     result = tcg_temp_new_i64();
689     flag = tcg_temp_new_i64();
690     tmp = tcg_temp_new_i64();
691 
692     tcg_gen_movi_i64(tmp, 0);
693     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
694 
695     tcg_gen_extrl_i64_i32(cpu_CF, flag);
696 
697     gen_set_NZ64(result);
698 
699     tcg_gen_xor_i64(flag, result, t0);
700     tcg_gen_xor_i64(tmp, t0, t1);
701     tcg_gen_andc_i64(flag, flag, tmp);
702     tcg_gen_extrh_i64_i32(cpu_VF, flag);
703 
704     tcg_gen_mov_i64(dest, result);
705 }
706 
707 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
708 {
709     TCGv_i32 t0_32 = tcg_temp_new_i32();
710     TCGv_i32 t1_32 = tcg_temp_new_i32();
711     TCGv_i32 tmp = tcg_temp_new_i32();
712 
713     tcg_gen_movi_i32(tmp, 0);
714     tcg_gen_extrl_i64_i32(t0_32, t0);
715     tcg_gen_extrl_i64_i32(t1_32, t1);
716     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
717     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
718     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
719     tcg_gen_xor_i32(tmp, t0_32, t1_32);
720     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
721     tcg_gen_extu_i32_i64(dest, cpu_NF);
722 }
723 
724 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
725 {
726     if (sf) {
727         gen_add64_CC(dest, t0, t1);
728     } else {
729         gen_add32_CC(dest, t0, t1);
730     }
731 }
732 
733 /* dest = T0 - T1; compute C, N, V and Z flags */
734 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
735 {
736     /* 64 bit arithmetic */
737     TCGv_i64 result, flag, tmp;
738 
739     result = tcg_temp_new_i64();
740     flag = tcg_temp_new_i64();
741     tcg_gen_sub_i64(result, t0, t1);
742 
743     gen_set_NZ64(result);
744 
745     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
746     tcg_gen_extrl_i64_i32(cpu_CF, flag);
747 
748     tcg_gen_xor_i64(flag, result, t0);
749     tmp = tcg_temp_new_i64();
750     tcg_gen_xor_i64(tmp, t0, t1);
751     tcg_gen_and_i64(flag, flag, tmp);
752     tcg_gen_extrh_i64_i32(cpu_VF, flag);
753     tcg_gen_mov_i64(dest, result);
754 }
755 
756 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
757 {
758     /* 32 bit arithmetic */
759     TCGv_i32 t0_32 = tcg_temp_new_i32();
760     TCGv_i32 t1_32 = tcg_temp_new_i32();
761     TCGv_i32 tmp;
762 
763     tcg_gen_extrl_i64_i32(t0_32, t0);
764     tcg_gen_extrl_i64_i32(t1_32, t1);
765     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
766     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
767     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
768     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
769     tmp = tcg_temp_new_i32();
770     tcg_gen_xor_i32(tmp, t0_32, t1_32);
771     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
772     tcg_gen_extu_i32_i64(dest, cpu_NF);
773 }
774 
775 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
776 {
777     if (sf) {
778         gen_sub64_CC(dest, t0, t1);
779     } else {
780         gen_sub32_CC(dest, t0, t1);
781     }
782 }
783 
784 /* dest = T0 + T1 + CF; do not compute flags. */
785 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
786 {
787     TCGv_i64 flag = tcg_temp_new_i64();
788     tcg_gen_extu_i32_i64(flag, cpu_CF);
789     tcg_gen_add_i64(dest, t0, t1);
790     tcg_gen_add_i64(dest, dest, flag);
791 
792     if (!sf) {
793         tcg_gen_ext32u_i64(dest, dest);
794     }
795 }
796 
797 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
798 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
799 {
800     if (sf) {
801         TCGv_i64 result = tcg_temp_new_i64();
802         TCGv_i64 cf_64 = tcg_temp_new_i64();
803         TCGv_i64 vf_64 = tcg_temp_new_i64();
804         TCGv_i64 tmp = tcg_temp_new_i64();
805         TCGv_i64 zero = tcg_constant_i64(0);
806 
807         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
808         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
809         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
810         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
811         gen_set_NZ64(result);
812 
813         tcg_gen_xor_i64(vf_64, result, t0);
814         tcg_gen_xor_i64(tmp, t0, t1);
815         tcg_gen_andc_i64(vf_64, vf_64, tmp);
816         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
817 
818         tcg_gen_mov_i64(dest, result);
819     } else {
820         TCGv_i32 t0_32 = tcg_temp_new_i32();
821         TCGv_i32 t1_32 = tcg_temp_new_i32();
822         TCGv_i32 tmp = tcg_temp_new_i32();
823         TCGv_i32 zero = tcg_constant_i32(0);
824 
825         tcg_gen_extrl_i64_i32(t0_32, t0);
826         tcg_gen_extrl_i64_i32(t1_32, t1);
827         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
828         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
829 
830         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
831         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
832         tcg_gen_xor_i32(tmp, t0_32, t1_32);
833         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
834         tcg_gen_extu_i32_i64(dest, cpu_NF);
835     }
836 }
837 
838 /*
839  * Load/Store generators
840  */
841 
842 /*
843  * Store from GPR register to memory.
844  */
845 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
846                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
847                              bool iss_valid,
848                              unsigned int iss_srt,
849                              bool iss_sf, bool iss_ar)
850 {
851     memop = finalize_memop(s, memop);
852     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
853 
854     if (iss_valid) {
855         uint32_t syn;
856 
857         syn = syn_data_abort_with_iss(0,
858                                       (memop & MO_SIZE),
859                                       false,
860                                       iss_srt,
861                                       iss_sf,
862                                       iss_ar,
863                                       0, 0, 0, 0, 0, false);
864         disas_set_insn_syndrome(s, syn);
865     }
866 }
867 
868 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
869                       TCGv_i64 tcg_addr, MemOp memop,
870                       bool iss_valid,
871                       unsigned int iss_srt,
872                       bool iss_sf, bool iss_ar)
873 {
874     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
875                      iss_valid, iss_srt, iss_sf, iss_ar);
876 }
877 
878 /*
879  * Load from memory to GPR register
880  */
881 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
882                              MemOp memop, bool extend, int memidx,
883                              bool iss_valid, unsigned int iss_srt,
884                              bool iss_sf, bool iss_ar)
885 {
886     memop = finalize_memop(s, memop);
887     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
888 
889     if (extend && (memop & MO_SIGN)) {
890         g_assert((memop & MO_SIZE) <= MO_32);
891         tcg_gen_ext32u_i64(dest, dest);
892     }
893 
894     if (iss_valid) {
895         uint32_t syn;
896 
897         syn = syn_data_abort_with_iss(0,
898                                       (memop & MO_SIZE),
899                                       (memop & MO_SIGN) != 0,
900                                       iss_srt,
901                                       iss_sf,
902                                       iss_ar,
903                                       0, 0, 0, 0, 0, false);
904         disas_set_insn_syndrome(s, syn);
905     }
906 }
907 
908 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
909                       MemOp memop, bool extend,
910                       bool iss_valid, unsigned int iss_srt,
911                       bool iss_sf, bool iss_ar)
912 {
913     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
914                      iss_valid, iss_srt, iss_sf, iss_ar);
915 }
916 
917 /*
918  * Store from FP register to memory
919  */
920 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
921 {
922     /* This writes the bottom N bits of a 128 bit wide vector to memory */
923     TCGv_i64 tmplo = tcg_temp_new_i64();
924     MemOp mop;
925 
926     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
927 
928     if (size < 4) {
929         mop = finalize_memop(s, size);
930         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
931     } else {
932         bool be = s->be_data == MO_BE;
933         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
934         TCGv_i64 tmphi = tcg_temp_new_i64();
935 
936         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
937 
938         mop = s->be_data | MO_UQ;
939         tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
940                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
941         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
942         tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
943                             get_mem_index(s), mop);
944     }
945 }
946 
947 /*
948  * Load from memory to FP register
949  */
950 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
951 {
952     /* This always zero-extends and writes to a full 128 bit wide vector */
953     TCGv_i64 tmplo = tcg_temp_new_i64();
954     TCGv_i64 tmphi = NULL;
955     MemOp mop;
956 
957     if (size < 4) {
958         mop = finalize_memop(s, size);
959         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
960     } else {
961         bool be = s->be_data == MO_BE;
962         TCGv_i64 tcg_hiaddr;
963 
964         tmphi = tcg_temp_new_i64();
965         tcg_hiaddr = tcg_temp_new_i64();
966 
967         mop = s->be_data | MO_UQ;
968         tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
969                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
970         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
971         tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
972                             get_mem_index(s), mop);
973     }
974 
975     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
976 
977     if (tmphi) {
978         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
979     }
980     clear_vec_high(s, tmphi != NULL, destidx);
981 }
982 
983 /*
984  * Vector load/store helpers.
985  *
986  * The principal difference between this and a FP load is that we don't
987  * zero extend as we are filling a partial chunk of the vector register.
988  * These functions don't support 128 bit loads/stores, which would be
989  * normal load/store operations.
990  *
991  * The _i32 versions are useful when operating on 32 bit quantities
992  * (eg for floating point single or using Neon helper functions).
993  */
994 
995 /* Get value of an element within a vector register */
996 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
997                              int element, MemOp memop)
998 {
999     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1000     switch ((unsigned)memop) {
1001     case MO_8:
1002         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1003         break;
1004     case MO_16:
1005         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1006         break;
1007     case MO_32:
1008         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1009         break;
1010     case MO_8|MO_SIGN:
1011         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1012         break;
1013     case MO_16|MO_SIGN:
1014         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1015         break;
1016     case MO_32|MO_SIGN:
1017         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1018         break;
1019     case MO_64:
1020     case MO_64|MO_SIGN:
1021         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1022         break;
1023     default:
1024         g_assert_not_reached();
1025     }
1026 }
1027 
1028 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1029                                  int element, MemOp memop)
1030 {
1031     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1032     switch (memop) {
1033     case MO_8:
1034         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1035         break;
1036     case MO_16:
1037         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1038         break;
1039     case MO_8|MO_SIGN:
1040         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1041         break;
1042     case MO_16|MO_SIGN:
1043         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1044         break;
1045     case MO_32:
1046     case MO_32|MO_SIGN:
1047         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1048         break;
1049     default:
1050         g_assert_not_reached();
1051     }
1052 }
1053 
1054 /* Set value of an element within a vector register */
1055 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1056                               int element, MemOp memop)
1057 {
1058     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1059     switch (memop) {
1060     case MO_8:
1061         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1062         break;
1063     case MO_16:
1064         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1065         break;
1066     case MO_32:
1067         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1068         break;
1069     case MO_64:
1070         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1071         break;
1072     default:
1073         g_assert_not_reached();
1074     }
1075 }
1076 
1077 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1078                                   int destidx, int element, MemOp memop)
1079 {
1080     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1081     switch (memop) {
1082     case MO_8:
1083         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1084         break;
1085     case MO_16:
1086         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1087         break;
1088     case MO_32:
1089         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1090         break;
1091     default:
1092         g_assert_not_reached();
1093     }
1094 }
1095 
1096 /* Store from vector register to memory */
1097 static void do_vec_st(DisasContext *s, int srcidx, int element,
1098                       TCGv_i64 tcg_addr, MemOp mop)
1099 {
1100     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1101 
1102     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1103     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1104 }
1105 
1106 /* Load from memory to vector register */
1107 static void do_vec_ld(DisasContext *s, int destidx, int element,
1108                       TCGv_i64 tcg_addr, MemOp mop)
1109 {
1110     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1111 
1112     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1113     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1114 }
1115 
1116 /* Check that FP/Neon access is enabled. If it is, return
1117  * true. If not, emit code to generate an appropriate exception,
1118  * and return false; the caller should not emit any code for
1119  * the instruction. Note that this check must happen after all
1120  * unallocated-encoding checks (otherwise the syndrome information
1121  * for the resulting exception will be incorrect).
1122  */
1123 static bool fp_access_check_only(DisasContext *s)
1124 {
1125     if (s->fp_excp_el) {
1126         assert(!s->fp_access_checked);
1127         s->fp_access_checked = true;
1128 
1129         gen_exception_insn_el(s, 0, EXCP_UDEF,
1130                               syn_fp_access_trap(1, 0xe, false, 0),
1131                               s->fp_excp_el);
1132         return false;
1133     }
1134     s->fp_access_checked = true;
1135     return true;
1136 }
1137 
1138 static bool fp_access_check(DisasContext *s)
1139 {
1140     if (!fp_access_check_only(s)) {
1141         return false;
1142     }
1143     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1144         gen_exception_insn(s, 0, EXCP_UDEF,
1145                            syn_smetrap(SME_ET_Streaming, false));
1146         return false;
1147     }
1148     return true;
1149 }
1150 
1151 /*
1152  * Check that SVE access is enabled.  If it is, return true.
1153  * If not, emit code to generate an appropriate exception and return false.
1154  * This function corresponds to CheckSVEEnabled().
1155  */
1156 bool sve_access_check(DisasContext *s)
1157 {
1158     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1159         assert(dc_isar_feature(aa64_sme, s));
1160         if (!sme_sm_enabled_check(s)) {
1161             goto fail_exit;
1162         }
1163     } else if (s->sve_excp_el) {
1164         gen_exception_insn_el(s, 0, EXCP_UDEF,
1165                               syn_sve_access_trap(), s->sve_excp_el);
1166         goto fail_exit;
1167     }
1168     s->sve_access_checked = true;
1169     return fp_access_check(s);
1170 
1171  fail_exit:
1172     /* Assert that we only raise one exception per instruction. */
1173     assert(!s->sve_access_checked);
1174     s->sve_access_checked = true;
1175     return false;
1176 }
1177 
1178 /*
1179  * Check that SME access is enabled, raise an exception if not.
1180  * Note that this function corresponds to CheckSMEAccess and is
1181  * only used directly for cpregs.
1182  */
1183 static bool sme_access_check(DisasContext *s)
1184 {
1185     if (s->sme_excp_el) {
1186         gen_exception_insn_el(s, 0, EXCP_UDEF,
1187                               syn_smetrap(SME_ET_AccessTrap, false),
1188                               s->sme_excp_el);
1189         return false;
1190     }
1191     return true;
1192 }
1193 
1194 /* This function corresponds to CheckSMEEnabled. */
1195 bool sme_enabled_check(DisasContext *s)
1196 {
1197     /*
1198      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1199      * to be zero when fp_excp_el has priority.  This is because we need
1200      * sme_excp_el by itself for cpregs access checks.
1201      */
1202     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1203         s->fp_access_checked = true;
1204         return sme_access_check(s);
1205     }
1206     return fp_access_check_only(s);
1207 }
1208 
1209 /* Common subroutine for CheckSMEAnd*Enabled. */
1210 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1211 {
1212     if (!sme_enabled_check(s)) {
1213         return false;
1214     }
1215     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1216         gen_exception_insn(s, 0, EXCP_UDEF,
1217                            syn_smetrap(SME_ET_NotStreaming, false));
1218         return false;
1219     }
1220     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1221         gen_exception_insn(s, 0, EXCP_UDEF,
1222                            syn_smetrap(SME_ET_InactiveZA, false));
1223         return false;
1224     }
1225     return true;
1226 }
1227 
1228 /*
1229  * This utility function is for doing register extension with an
1230  * optional shift. You will likely want to pass a temporary for the
1231  * destination register. See DecodeRegExtend() in the ARM ARM.
1232  */
1233 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1234                               int option, unsigned int shift)
1235 {
1236     int extsize = extract32(option, 0, 2);
1237     bool is_signed = extract32(option, 2, 1);
1238 
1239     if (is_signed) {
1240         switch (extsize) {
1241         case 0:
1242             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1243             break;
1244         case 1:
1245             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1246             break;
1247         case 2:
1248             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1249             break;
1250         case 3:
1251             tcg_gen_mov_i64(tcg_out, tcg_in);
1252             break;
1253         }
1254     } else {
1255         switch (extsize) {
1256         case 0:
1257             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1258             break;
1259         case 1:
1260             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1261             break;
1262         case 2:
1263             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1264             break;
1265         case 3:
1266             tcg_gen_mov_i64(tcg_out, tcg_in);
1267             break;
1268         }
1269     }
1270 
1271     if (shift) {
1272         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1273     }
1274 }
1275 
1276 static inline void gen_check_sp_alignment(DisasContext *s)
1277 {
1278     /* The AArch64 architecture mandates that (if enabled via PSTATE
1279      * or SCTLR bits) there is a check that SP is 16-aligned on every
1280      * SP-relative load or store (with an exception generated if it is not).
1281      * In line with general QEMU practice regarding misaligned accesses,
1282      * we omit these checks for the sake of guest program performance.
1283      * This function is provided as a hook so we can more easily add these
1284      * checks in future (possibly as a "favour catching guest program bugs
1285      * over speed" user selectable option).
1286      */
1287 }
1288 
1289 /*
1290  * This provides a simple table based table lookup decoder. It is
1291  * intended to be used when the relevant bits for decode are too
1292  * awkwardly placed and switch/if based logic would be confusing and
1293  * deeply nested. Since it's a linear search through the table, tables
1294  * should be kept small.
1295  *
1296  * It returns the first handler where insn & mask == pattern, or
1297  * NULL if there is no match.
1298  * The table is terminated by an empty mask (i.e. 0)
1299  */
1300 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1301                                                uint32_t insn)
1302 {
1303     const AArch64DecodeTable *tptr = table;
1304 
1305     while (tptr->mask) {
1306         if ((insn & tptr->mask) == tptr->pattern) {
1307             return tptr->disas_fn;
1308         }
1309         tptr++;
1310     }
1311     return NULL;
1312 }
1313 
1314 /*
1315  * The instruction disassembly implemented here matches
1316  * the instruction encoding classifications in chapter C4
1317  * of the ARM Architecture Reference Manual (DDI0487B_a);
1318  * classification names and decode diagrams here should generally
1319  * match up with those in the manual.
1320  */
1321 
1322 static bool trans_B(DisasContext *s, arg_i *a)
1323 {
1324     reset_btype(s);
1325     gen_goto_tb(s, 0, a->imm);
1326     return true;
1327 }
1328 
1329 static bool trans_BL(DisasContext *s, arg_i *a)
1330 {
1331     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1332     reset_btype(s);
1333     gen_goto_tb(s, 0, a->imm);
1334     return true;
1335 }
1336 
1337 
1338 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1339 {
1340     DisasLabel match;
1341     TCGv_i64 tcg_cmp;
1342 
1343     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1344     reset_btype(s);
1345 
1346     match = gen_disas_label(s);
1347     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1348                         tcg_cmp, 0, match.label);
1349     gen_goto_tb(s, 0, 4);
1350     set_disas_label(s, match);
1351     gen_goto_tb(s, 1, a->imm);
1352     return true;
1353 }
1354 
1355 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1356 {
1357     DisasLabel match;
1358     TCGv_i64 tcg_cmp;
1359 
1360     tcg_cmp = tcg_temp_new_i64();
1361     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1362 
1363     reset_btype(s);
1364 
1365     match = gen_disas_label(s);
1366     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1367                         tcg_cmp, 0, match.label);
1368     gen_goto_tb(s, 0, 4);
1369     set_disas_label(s, match);
1370     gen_goto_tb(s, 1, a->imm);
1371     return true;
1372 }
1373 
1374 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1375 {
1376     reset_btype(s);
1377     if (a->cond < 0x0e) {
1378         /* genuinely conditional branches */
1379         DisasLabel match = gen_disas_label(s);
1380         arm_gen_test_cc(a->cond, match.label);
1381         gen_goto_tb(s, 0, 4);
1382         set_disas_label(s, match);
1383         gen_goto_tb(s, 1, a->imm);
1384     } else {
1385         /* 0xe and 0xf are both "always" conditions */
1386         gen_goto_tb(s, 0, a->imm);
1387     }
1388     return true;
1389 }
1390 
1391 static void set_btype_for_br(DisasContext *s, int rn)
1392 {
1393     if (dc_isar_feature(aa64_bti, s)) {
1394         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1395         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1396     }
1397 }
1398 
1399 static void set_btype_for_blr(DisasContext *s)
1400 {
1401     if (dc_isar_feature(aa64_bti, s)) {
1402         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1403         set_btype(s, 2);
1404     }
1405 }
1406 
1407 static bool trans_BR(DisasContext *s, arg_r *a)
1408 {
1409     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1410     set_btype_for_br(s, a->rn);
1411     s->base.is_jmp = DISAS_JUMP;
1412     return true;
1413 }
1414 
1415 static bool trans_BLR(DisasContext *s, arg_r *a)
1416 {
1417     TCGv_i64 dst = cpu_reg(s, a->rn);
1418     TCGv_i64 lr = cpu_reg(s, 30);
1419     if (dst == lr) {
1420         TCGv_i64 tmp = tcg_temp_new_i64();
1421         tcg_gen_mov_i64(tmp, dst);
1422         dst = tmp;
1423     }
1424     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1425     gen_a64_set_pc(s, dst);
1426     set_btype_for_blr(s);
1427     s->base.is_jmp = DISAS_JUMP;
1428     return true;
1429 }
1430 
1431 static bool trans_RET(DisasContext *s, arg_r *a)
1432 {
1433     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1434     s->base.is_jmp = DISAS_JUMP;
1435     return true;
1436 }
1437 
1438 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1439                                    TCGv_i64 modifier, bool use_key_a)
1440 {
1441     TCGv_i64 truedst;
1442     /*
1443      * Return the branch target for a BRAA/RETA/etc, which is either
1444      * just the destination dst, or that value with the pauth check
1445      * done and the code removed from the high bits.
1446      */
1447     if (!s->pauth_active) {
1448         return dst;
1449     }
1450 
1451     truedst = tcg_temp_new_i64();
1452     if (use_key_a) {
1453         gen_helper_autia(truedst, cpu_env, dst, modifier);
1454     } else {
1455         gen_helper_autib(truedst, cpu_env, dst, modifier);
1456     }
1457     return truedst;
1458 }
1459 
1460 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1461 {
1462     TCGv_i64 dst;
1463 
1464     if (!dc_isar_feature(aa64_pauth, s)) {
1465         return false;
1466     }
1467 
1468     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1469     gen_a64_set_pc(s, dst);
1470     set_btype_for_br(s, a->rn);
1471     s->base.is_jmp = DISAS_JUMP;
1472     return true;
1473 }
1474 
1475 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1476 {
1477     TCGv_i64 dst, lr;
1478 
1479     if (!dc_isar_feature(aa64_pauth, s)) {
1480         return false;
1481     }
1482 
1483     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1484     lr = cpu_reg(s, 30);
1485     if (dst == lr) {
1486         TCGv_i64 tmp = tcg_temp_new_i64();
1487         tcg_gen_mov_i64(tmp, dst);
1488         dst = tmp;
1489     }
1490     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1491     gen_a64_set_pc(s, dst);
1492     set_btype_for_blr(s);
1493     s->base.is_jmp = DISAS_JUMP;
1494     return true;
1495 }
1496 
1497 static bool trans_RETA(DisasContext *s, arg_reta *a)
1498 {
1499     TCGv_i64 dst;
1500 
1501     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1502     gen_a64_set_pc(s, dst);
1503     s->base.is_jmp = DISAS_JUMP;
1504     return true;
1505 }
1506 
1507 static bool trans_BRA(DisasContext *s, arg_bra *a)
1508 {
1509     TCGv_i64 dst;
1510 
1511     if (!dc_isar_feature(aa64_pauth, s)) {
1512         return false;
1513     }
1514     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1515     gen_a64_set_pc(s, dst);
1516     set_btype_for_br(s, a->rn);
1517     s->base.is_jmp = DISAS_JUMP;
1518     return true;
1519 }
1520 
1521 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1522 {
1523     TCGv_i64 dst, lr;
1524 
1525     if (!dc_isar_feature(aa64_pauth, s)) {
1526         return false;
1527     }
1528     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1529     lr = cpu_reg(s, 30);
1530     if (dst == lr) {
1531         TCGv_i64 tmp = tcg_temp_new_i64();
1532         tcg_gen_mov_i64(tmp, dst);
1533         dst = tmp;
1534     }
1535     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1536     gen_a64_set_pc(s, dst);
1537     set_btype_for_blr(s);
1538     s->base.is_jmp = DISAS_JUMP;
1539     return true;
1540 }
1541 
1542 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1543 {
1544     TCGv_i64 dst;
1545 
1546     if (s->current_el == 0) {
1547         return false;
1548     }
1549     if (s->fgt_eret) {
1550         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1551         return true;
1552     }
1553     dst = tcg_temp_new_i64();
1554     tcg_gen_ld_i64(dst, cpu_env,
1555                    offsetof(CPUARMState, elr_el[s->current_el]));
1556 
1557     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1558         gen_io_start();
1559     }
1560 
1561     gen_helper_exception_return(cpu_env, dst);
1562     /* Must exit loop to check un-masked IRQs */
1563     s->base.is_jmp = DISAS_EXIT;
1564     return true;
1565 }
1566 
1567 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1568 {
1569     TCGv_i64 dst;
1570 
1571     if (!dc_isar_feature(aa64_pauth, s)) {
1572         return false;
1573     }
1574     if (s->current_el == 0) {
1575         return false;
1576     }
1577     /* The FGT trap takes precedence over an auth trap. */
1578     if (s->fgt_eret) {
1579         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1580         return true;
1581     }
1582     dst = tcg_temp_new_i64();
1583     tcg_gen_ld_i64(dst, cpu_env,
1584                    offsetof(CPUARMState, elr_el[s->current_el]));
1585 
1586     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1587     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1588         gen_io_start();
1589     }
1590 
1591     gen_helper_exception_return(cpu_env, dst);
1592     /* Must exit loop to check un-masked IRQs */
1593     s->base.is_jmp = DISAS_EXIT;
1594     return true;
1595 }
1596 
1597 /* HINT instruction group, including various allocated HINTs */
1598 static void handle_hint(DisasContext *s, uint32_t insn,
1599                         unsigned int op1, unsigned int op2, unsigned int crm)
1600 {
1601     unsigned int selector = crm << 3 | op2;
1602 
1603     if (op1 != 3) {
1604         unallocated_encoding(s);
1605         return;
1606     }
1607 
1608     switch (selector) {
1609     case 0b00000: /* NOP */
1610         break;
1611     case 0b00011: /* WFI */
1612         s->base.is_jmp = DISAS_WFI;
1613         break;
1614     case 0b00001: /* YIELD */
1615         /* When running in MTTCG we don't generate jumps to the yield and
1616          * WFE helpers as it won't affect the scheduling of other vCPUs.
1617          * If we wanted to more completely model WFE/SEV so we don't busy
1618          * spin unnecessarily we would need to do something more involved.
1619          */
1620         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1621             s->base.is_jmp = DISAS_YIELD;
1622         }
1623         break;
1624     case 0b00010: /* WFE */
1625         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1626             s->base.is_jmp = DISAS_WFE;
1627         }
1628         break;
1629     case 0b00100: /* SEV */
1630     case 0b00101: /* SEVL */
1631     case 0b00110: /* DGH */
1632         /* we treat all as NOP at least for now */
1633         break;
1634     case 0b00111: /* XPACLRI */
1635         if (s->pauth_active) {
1636             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1637         }
1638         break;
1639     case 0b01000: /* PACIA1716 */
1640         if (s->pauth_active) {
1641             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1642         }
1643         break;
1644     case 0b01010: /* PACIB1716 */
1645         if (s->pauth_active) {
1646             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1647         }
1648         break;
1649     case 0b01100: /* AUTIA1716 */
1650         if (s->pauth_active) {
1651             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1652         }
1653         break;
1654     case 0b01110: /* AUTIB1716 */
1655         if (s->pauth_active) {
1656             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1657         }
1658         break;
1659     case 0b10000: /* ESB */
1660         /* Without RAS, we must implement this as NOP. */
1661         if (dc_isar_feature(aa64_ras, s)) {
1662             /*
1663              * QEMU does not have a source of physical SErrors,
1664              * so we are only concerned with virtual SErrors.
1665              * The pseudocode in the ARM for this case is
1666              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1667              *      AArch64.vESBOperation();
1668              * Most of the condition can be evaluated at translation time.
1669              * Test for EL2 present, and defer test for SEL2 to runtime.
1670              */
1671             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1672                 gen_helper_vesb(cpu_env);
1673             }
1674         }
1675         break;
1676     case 0b11000: /* PACIAZ */
1677         if (s->pauth_active) {
1678             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1679                              tcg_constant_i64(0));
1680         }
1681         break;
1682     case 0b11001: /* PACIASP */
1683         if (s->pauth_active) {
1684             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1685         }
1686         break;
1687     case 0b11010: /* PACIBZ */
1688         if (s->pauth_active) {
1689             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1690                              tcg_constant_i64(0));
1691         }
1692         break;
1693     case 0b11011: /* PACIBSP */
1694         if (s->pauth_active) {
1695             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1696         }
1697         break;
1698     case 0b11100: /* AUTIAZ */
1699         if (s->pauth_active) {
1700             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1701                              tcg_constant_i64(0));
1702         }
1703         break;
1704     case 0b11101: /* AUTIASP */
1705         if (s->pauth_active) {
1706             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1707         }
1708         break;
1709     case 0b11110: /* AUTIBZ */
1710         if (s->pauth_active) {
1711             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1712                              tcg_constant_i64(0));
1713         }
1714         break;
1715     case 0b11111: /* AUTIBSP */
1716         if (s->pauth_active) {
1717             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1718         }
1719         break;
1720     default:
1721         /* default specified as NOP equivalent */
1722         break;
1723     }
1724 }
1725 
1726 static void gen_clrex(DisasContext *s, uint32_t insn)
1727 {
1728     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1729 }
1730 
1731 /* CLREX, DSB, DMB, ISB */
1732 static void handle_sync(DisasContext *s, uint32_t insn,
1733                         unsigned int op1, unsigned int op2, unsigned int crm)
1734 {
1735     TCGBar bar;
1736 
1737     if (op1 != 3) {
1738         unallocated_encoding(s);
1739         return;
1740     }
1741 
1742     switch (op2) {
1743     case 2: /* CLREX */
1744         gen_clrex(s, insn);
1745         return;
1746     case 4: /* DSB */
1747     case 5: /* DMB */
1748         switch (crm & 3) {
1749         case 1: /* MBReqTypes_Reads */
1750             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1751             break;
1752         case 2: /* MBReqTypes_Writes */
1753             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1754             break;
1755         default: /* MBReqTypes_All */
1756             bar = TCG_BAR_SC | TCG_MO_ALL;
1757             break;
1758         }
1759         tcg_gen_mb(bar);
1760         return;
1761     case 6: /* ISB */
1762         /* We need to break the TB after this insn to execute
1763          * a self-modified code correctly and also to take
1764          * any pending interrupts immediately.
1765          */
1766         reset_btype(s);
1767         gen_goto_tb(s, 0, 4);
1768         return;
1769 
1770     case 7: /* SB */
1771         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1772             goto do_unallocated;
1773         }
1774         /*
1775          * TODO: There is no speculation barrier opcode for TCG;
1776          * MB and end the TB instead.
1777          */
1778         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1779         gen_goto_tb(s, 0, 4);
1780         return;
1781 
1782     default:
1783     do_unallocated:
1784         unallocated_encoding(s);
1785         return;
1786     }
1787 }
1788 
1789 static void gen_xaflag(void)
1790 {
1791     TCGv_i32 z = tcg_temp_new_i32();
1792 
1793     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1794 
1795     /*
1796      * (!C & !Z) << 31
1797      * (!(C | Z)) << 31
1798      * ~((C | Z) << 31)
1799      * ~-(C | Z)
1800      * (C | Z) - 1
1801      */
1802     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1803     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1804 
1805     /* !(Z & C) */
1806     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1807     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1808 
1809     /* (!C & Z) << 31 -> -(Z & ~C) */
1810     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1811     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1812 
1813     /* C | Z */
1814     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1815 }
1816 
1817 static void gen_axflag(void)
1818 {
1819     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1820     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1821 
1822     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1823     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1824 
1825     tcg_gen_movi_i32(cpu_NF, 0);
1826     tcg_gen_movi_i32(cpu_VF, 0);
1827 }
1828 
1829 /* MSR (immediate) - move immediate to processor state field */
1830 static void handle_msr_i(DisasContext *s, uint32_t insn,
1831                          unsigned int op1, unsigned int op2, unsigned int crm)
1832 {
1833     int op = op1 << 3 | op2;
1834 
1835     /* End the TB by default, chaining is ok.  */
1836     s->base.is_jmp = DISAS_TOO_MANY;
1837 
1838     switch (op) {
1839     case 0x00: /* CFINV */
1840         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1841             goto do_unallocated;
1842         }
1843         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1844         s->base.is_jmp = DISAS_NEXT;
1845         break;
1846 
1847     case 0x01: /* XAFlag */
1848         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1849             goto do_unallocated;
1850         }
1851         gen_xaflag();
1852         s->base.is_jmp = DISAS_NEXT;
1853         break;
1854 
1855     case 0x02: /* AXFlag */
1856         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1857             goto do_unallocated;
1858         }
1859         gen_axflag();
1860         s->base.is_jmp = DISAS_NEXT;
1861         break;
1862 
1863     case 0x03: /* UAO */
1864         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1865             goto do_unallocated;
1866         }
1867         if (crm & 1) {
1868             set_pstate_bits(PSTATE_UAO);
1869         } else {
1870             clear_pstate_bits(PSTATE_UAO);
1871         }
1872         gen_rebuild_hflags(s);
1873         break;
1874 
1875     case 0x04: /* PAN */
1876         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1877             goto do_unallocated;
1878         }
1879         if (crm & 1) {
1880             set_pstate_bits(PSTATE_PAN);
1881         } else {
1882             clear_pstate_bits(PSTATE_PAN);
1883         }
1884         gen_rebuild_hflags(s);
1885         break;
1886 
1887     case 0x05: /* SPSel */
1888         if (s->current_el == 0) {
1889             goto do_unallocated;
1890         }
1891         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1892         break;
1893 
1894     case 0x19: /* SSBS */
1895         if (!dc_isar_feature(aa64_ssbs, s)) {
1896             goto do_unallocated;
1897         }
1898         if (crm & 1) {
1899             set_pstate_bits(PSTATE_SSBS);
1900         } else {
1901             clear_pstate_bits(PSTATE_SSBS);
1902         }
1903         /* Don't need to rebuild hflags since SSBS is a nop */
1904         break;
1905 
1906     case 0x1a: /* DIT */
1907         if (!dc_isar_feature(aa64_dit, s)) {
1908             goto do_unallocated;
1909         }
1910         if (crm & 1) {
1911             set_pstate_bits(PSTATE_DIT);
1912         } else {
1913             clear_pstate_bits(PSTATE_DIT);
1914         }
1915         /* There's no need to rebuild hflags because DIT is a nop */
1916         break;
1917 
1918     case 0x1e: /* DAIFSet */
1919         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1920         break;
1921 
1922     case 0x1f: /* DAIFClear */
1923         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1924         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1925         s->base.is_jmp = DISAS_UPDATE_EXIT;
1926         break;
1927 
1928     case 0x1c: /* TCO */
1929         if (dc_isar_feature(aa64_mte, s)) {
1930             /* Full MTE is enabled -- set the TCO bit as directed. */
1931             if (crm & 1) {
1932                 set_pstate_bits(PSTATE_TCO);
1933             } else {
1934                 clear_pstate_bits(PSTATE_TCO);
1935             }
1936             gen_rebuild_hflags(s);
1937             /* Many factors, including TCO, go into MTE_ACTIVE. */
1938             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1939         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1940             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1941             s->base.is_jmp = DISAS_NEXT;
1942         } else {
1943             goto do_unallocated;
1944         }
1945         break;
1946 
1947     case 0x1b: /* SVCR* */
1948         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1949             goto do_unallocated;
1950         }
1951         if (sme_access_check(s)) {
1952             int old = s->pstate_sm | (s->pstate_za << 1);
1953             int new = (crm & 1) * 3;
1954             int msk = (crm >> 1) & 3;
1955 
1956             if ((old ^ new) & msk) {
1957                 /* At least one bit changes. */
1958                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1959                                     tcg_constant_i32(msk));
1960             } else {
1961                 s->base.is_jmp = DISAS_NEXT;
1962             }
1963         }
1964         break;
1965 
1966     default:
1967     do_unallocated:
1968         unallocated_encoding(s);
1969         return;
1970     }
1971 }
1972 
1973 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1974 {
1975     TCGv_i32 tmp = tcg_temp_new_i32();
1976     TCGv_i32 nzcv = tcg_temp_new_i32();
1977 
1978     /* build bit 31, N */
1979     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1980     /* build bit 30, Z */
1981     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1982     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1983     /* build bit 29, C */
1984     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1985     /* build bit 28, V */
1986     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1987     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1988     /* generate result */
1989     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1990 }
1991 
1992 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1993 {
1994     TCGv_i32 nzcv = tcg_temp_new_i32();
1995 
1996     /* take NZCV from R[t] */
1997     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1998 
1999     /* bit 31, N */
2000     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2001     /* bit 30, Z */
2002     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2003     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2004     /* bit 29, C */
2005     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2006     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2007     /* bit 28, V */
2008     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2009     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2010 }
2011 
2012 static void gen_sysreg_undef(DisasContext *s, bool isread,
2013                              uint8_t op0, uint8_t op1, uint8_t op2,
2014                              uint8_t crn, uint8_t crm, uint8_t rt)
2015 {
2016     /*
2017      * Generate code to emit an UNDEF with correct syndrome
2018      * information for a failed system register access.
2019      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2020      * but if FEAT_IDST is implemented then read accesses to registers
2021      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2022      * syndrome.
2023      */
2024     uint32_t syndrome;
2025 
2026     if (isread && dc_isar_feature(aa64_ids, s) &&
2027         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2028         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2029     } else {
2030         syndrome = syn_uncategorized();
2031     }
2032     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2033 }
2034 
2035 /* MRS - move from system register
2036  * MSR (register) - move to system register
2037  * SYS
2038  * SYSL
2039  * These are all essentially the same insn in 'read' and 'write'
2040  * versions, with varying op0 fields.
2041  */
2042 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
2043                        unsigned int op0, unsigned int op1, unsigned int op2,
2044                        unsigned int crn, unsigned int crm, unsigned int rt)
2045 {
2046     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2047                                       crn, crm, op0, op1, op2);
2048     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2049     TCGv_ptr tcg_ri = NULL;
2050     TCGv_i64 tcg_rt;
2051 
2052     if (!ri) {
2053         /* Unknown register; this might be a guest error or a QEMU
2054          * unimplemented feature.
2055          */
2056         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2057                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2058                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2059         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2060         return;
2061     }
2062 
2063     /* Check access permissions */
2064     if (!cp_access_ok(s->current_el, ri, isread)) {
2065         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2066         return;
2067     }
2068 
2069     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2070         /* Emit code to perform further access permissions checks at
2071          * runtime; this may result in an exception.
2072          */
2073         uint32_t syndrome;
2074 
2075         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2076         gen_a64_update_pc(s, 0);
2077         tcg_ri = tcg_temp_new_ptr();
2078         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2079                                        tcg_constant_i32(key),
2080                                        tcg_constant_i32(syndrome),
2081                                        tcg_constant_i32(isread));
2082     } else if (ri->type & ARM_CP_RAISES_EXC) {
2083         /*
2084          * The readfn or writefn might raise an exception;
2085          * synchronize the CPU state in case it does.
2086          */
2087         gen_a64_update_pc(s, 0);
2088     }
2089 
2090     /* Handle special cases first */
2091     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2092     case 0:
2093         break;
2094     case ARM_CP_NOP:
2095         return;
2096     case ARM_CP_NZCV:
2097         tcg_rt = cpu_reg(s, rt);
2098         if (isread) {
2099             gen_get_nzcv(tcg_rt);
2100         } else {
2101             gen_set_nzcv(tcg_rt);
2102         }
2103         return;
2104     case ARM_CP_CURRENTEL:
2105         /* Reads as current EL value from pstate, which is
2106          * guaranteed to be constant by the tb flags.
2107          */
2108         tcg_rt = cpu_reg(s, rt);
2109         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2110         return;
2111     case ARM_CP_DC_ZVA:
2112         /* Writes clear the aligned block of memory which rt points into. */
2113         if (s->mte_active[0]) {
2114             int desc = 0;
2115 
2116             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2117             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2118             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2119 
2120             tcg_rt = tcg_temp_new_i64();
2121             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2122                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2123         } else {
2124             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2125         }
2126         gen_helper_dc_zva(cpu_env, tcg_rt);
2127         return;
2128     case ARM_CP_DC_GVA:
2129         {
2130             TCGv_i64 clean_addr, tag;
2131 
2132             /*
2133              * DC_GVA, like DC_ZVA, requires that we supply the original
2134              * pointer for an invalid page.  Probe that address first.
2135              */
2136             tcg_rt = cpu_reg(s, rt);
2137             clean_addr = clean_data_tbi(s, tcg_rt);
2138             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2139 
2140             if (s->ata) {
2141                 /* Extract the tag from the register to match STZGM.  */
2142                 tag = tcg_temp_new_i64();
2143                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2144                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2145             }
2146         }
2147         return;
2148     case ARM_CP_DC_GZVA:
2149         {
2150             TCGv_i64 clean_addr, tag;
2151 
2152             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2153             tcg_rt = cpu_reg(s, rt);
2154             clean_addr = clean_data_tbi(s, tcg_rt);
2155             gen_helper_dc_zva(cpu_env, clean_addr);
2156 
2157             if (s->ata) {
2158                 /* Extract the tag from the register to match STZGM.  */
2159                 tag = tcg_temp_new_i64();
2160                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2161                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2162             }
2163         }
2164         return;
2165     default:
2166         g_assert_not_reached();
2167     }
2168     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2169         return;
2170     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2171         return;
2172     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2173         return;
2174     }
2175 
2176     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
2177         gen_io_start();
2178     }
2179 
2180     tcg_rt = cpu_reg(s, rt);
2181 
2182     if (isread) {
2183         if (ri->type & ARM_CP_CONST) {
2184             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2185         } else if (ri->readfn) {
2186             if (!tcg_ri) {
2187                 tcg_ri = gen_lookup_cp_reg(key);
2188             }
2189             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2190         } else {
2191             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2192         }
2193     } else {
2194         if (ri->type & ARM_CP_CONST) {
2195             /* If not forbidden by access permissions, treat as WI */
2196             return;
2197         } else if (ri->writefn) {
2198             if (!tcg_ri) {
2199                 tcg_ri = gen_lookup_cp_reg(key);
2200             }
2201             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2202         } else {
2203             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2204         }
2205     }
2206 
2207     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
2208         /* I/O operations must end the TB here (whether read or write) */
2209         s->base.is_jmp = DISAS_UPDATE_EXIT;
2210     }
2211     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2212         /*
2213          * A write to any coprocessor regiser that ends a TB
2214          * must rebuild the hflags for the next TB.
2215          */
2216         gen_rebuild_hflags(s);
2217         /*
2218          * We default to ending the TB on a coprocessor register write,
2219          * but allow this to be suppressed by the register definition
2220          * (usually only necessary to work around guest bugs).
2221          */
2222         s->base.is_jmp = DISAS_UPDATE_EXIT;
2223     }
2224 }
2225 
2226 /* System
2227  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2228  * +---------------------+---+-----+-----+-------+-------+-----+------+
2229  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2230  * +---------------------+---+-----+-----+-------+-------+-----+------+
2231  */
2232 static void disas_system(DisasContext *s, uint32_t insn)
2233 {
2234     unsigned int l, op0, op1, crn, crm, op2, rt;
2235     l = extract32(insn, 21, 1);
2236     op0 = extract32(insn, 19, 2);
2237     op1 = extract32(insn, 16, 3);
2238     crn = extract32(insn, 12, 4);
2239     crm = extract32(insn, 8, 4);
2240     op2 = extract32(insn, 5, 3);
2241     rt = extract32(insn, 0, 5);
2242 
2243     if (op0 == 0) {
2244         if (l || rt != 31) {
2245             unallocated_encoding(s);
2246             return;
2247         }
2248         switch (crn) {
2249         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2250             handle_hint(s, insn, op1, op2, crm);
2251             break;
2252         case 3: /* CLREX, DSB, DMB, ISB */
2253             handle_sync(s, insn, op1, op2, crm);
2254             break;
2255         case 4: /* MSR (immediate) */
2256             handle_msr_i(s, insn, op1, op2, crm);
2257             break;
2258         default:
2259             unallocated_encoding(s);
2260             break;
2261         }
2262         return;
2263     }
2264     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2265 }
2266 
2267 /* Exception generation
2268  *
2269  *  31             24 23 21 20                     5 4   2 1  0
2270  * +-----------------+-----+------------------------+-----+----+
2271  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2272  * +-----------------------+------------------------+----------+
2273  */
2274 static void disas_exc(DisasContext *s, uint32_t insn)
2275 {
2276     int opc = extract32(insn, 21, 3);
2277     int op2_ll = extract32(insn, 0, 5);
2278     int imm16 = extract32(insn, 5, 16);
2279     uint32_t syndrome;
2280 
2281     switch (opc) {
2282     case 0:
2283         /* For SVC, HVC and SMC we advance the single-step state
2284          * machine before taking the exception. This is architecturally
2285          * mandated, to ensure that single-stepping a system call
2286          * instruction works properly.
2287          */
2288         switch (op2_ll) {
2289         case 1:                                                     /* SVC */
2290             syndrome = syn_aa64_svc(imm16);
2291             if (s->fgt_svc) {
2292                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2293                 break;
2294             }
2295             gen_ss_advance(s);
2296             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2297             break;
2298         case 2:                                                     /* HVC */
2299             if (s->current_el == 0) {
2300                 unallocated_encoding(s);
2301                 break;
2302             }
2303             /* The pre HVC helper handles cases when HVC gets trapped
2304              * as an undefined insn by runtime configuration.
2305              */
2306             gen_a64_update_pc(s, 0);
2307             gen_helper_pre_hvc(cpu_env);
2308             gen_ss_advance(s);
2309             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2310             break;
2311         case 3:                                                     /* SMC */
2312             if (s->current_el == 0) {
2313                 unallocated_encoding(s);
2314                 break;
2315             }
2316             gen_a64_update_pc(s, 0);
2317             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2318             gen_ss_advance(s);
2319             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2320             break;
2321         default:
2322             unallocated_encoding(s);
2323             break;
2324         }
2325         break;
2326     case 1:
2327         if (op2_ll != 0) {
2328             unallocated_encoding(s);
2329             break;
2330         }
2331         /* BRK */
2332         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2333         break;
2334     case 2:
2335         if (op2_ll != 0) {
2336             unallocated_encoding(s);
2337             break;
2338         }
2339         /* HLT. This has two purposes.
2340          * Architecturally, it is an external halting debug instruction.
2341          * Since QEMU doesn't implement external debug, we treat this as
2342          * it is required for halting debug disabled: it will UNDEF.
2343          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2344          */
2345         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2346             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2347         } else {
2348             unallocated_encoding(s);
2349         }
2350         break;
2351     case 5:
2352         if (op2_ll < 1 || op2_ll > 3) {
2353             unallocated_encoding(s);
2354             break;
2355         }
2356         /* DCPS1, DCPS2, DCPS3 */
2357         unallocated_encoding(s);
2358         break;
2359     default:
2360         unallocated_encoding(s);
2361         break;
2362     }
2363 }
2364 
2365 /* Branches, exception generating and system instructions */
2366 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2367 {
2368     switch (extract32(insn, 25, 7)) {
2369     case 0x6a: /* Exception generation / System */
2370         if (insn & (1 << 24)) {
2371             if (extract32(insn, 22, 2) == 0) {
2372                 disas_system(s, insn);
2373             } else {
2374                 unallocated_encoding(s);
2375             }
2376         } else {
2377             disas_exc(s, insn);
2378         }
2379         break;
2380     default:
2381         unallocated_encoding(s);
2382         break;
2383     }
2384 }
2385 
2386 /*
2387  * Load/Store exclusive instructions are implemented by remembering
2388  * the value/address loaded, and seeing if these are the same
2389  * when the store is performed. This is not actually the architecturally
2390  * mandated semantics, but it works for typical guest code sequences
2391  * and avoids having to monitor regular stores.
2392  *
2393  * The store exclusive uses the atomic cmpxchg primitives to avoid
2394  * races in multi-threaded linux-user and when MTTCG softmmu is
2395  * enabled.
2396  */
2397 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2398                                TCGv_i64 addr, int size, bool is_pair)
2399 {
2400     int idx = get_mem_index(s);
2401     MemOp memop = s->be_data;
2402 
2403     g_assert(size <= 3);
2404     if (is_pair) {
2405         g_assert(size >= 2);
2406         if (size == 2) {
2407             /* The pair must be single-copy atomic for the doubleword.  */
2408             memop |= MO_64 | MO_ALIGN;
2409             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2410             if (s->be_data == MO_LE) {
2411                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2412                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2413             } else {
2414                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2415                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2416             }
2417         } else {
2418             /* The pair must be single-copy atomic for *each* doubleword, not
2419                the entire quadword, however it must be quadword aligned.  */
2420             memop |= MO_64;
2421             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2422                                 memop | MO_ALIGN_16);
2423 
2424             TCGv_i64 addr2 = tcg_temp_new_i64();
2425             tcg_gen_addi_i64(addr2, addr, 8);
2426             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2427 
2428             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2429             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2430         }
2431     } else {
2432         memop |= size | MO_ALIGN;
2433         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2434         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2435     }
2436     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2437 }
2438 
2439 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2440                                 TCGv_i64 addr, int size, int is_pair)
2441 {
2442     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2443      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2444      *     [addr] = {Rt};
2445      *     if (is_pair) {
2446      *         [addr + datasize] = {Rt2};
2447      *     }
2448      *     {Rd} = 0;
2449      * } else {
2450      *     {Rd} = 1;
2451      * }
2452      * env->exclusive_addr = -1;
2453      */
2454     TCGLabel *fail_label = gen_new_label();
2455     TCGLabel *done_label = gen_new_label();
2456     TCGv_i64 tmp;
2457 
2458     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2459 
2460     tmp = tcg_temp_new_i64();
2461     if (is_pair) {
2462         if (size == 2) {
2463             if (s->be_data == MO_LE) {
2464                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2465             } else {
2466                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2467             }
2468             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2469                                        cpu_exclusive_val, tmp,
2470                                        get_mem_index(s),
2471                                        MO_64 | MO_ALIGN | s->be_data);
2472             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2473         } else {
2474             TCGv_i128 t16 = tcg_temp_new_i128();
2475             TCGv_i128 c16 = tcg_temp_new_i128();
2476             TCGv_i64 a, b;
2477 
2478             if (s->be_data == MO_LE) {
2479                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2480                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2481                                         cpu_exclusive_high);
2482             } else {
2483                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2484                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2485                                         cpu_exclusive_val);
2486             }
2487 
2488             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2489                                         get_mem_index(s),
2490                                         MO_128 | MO_ALIGN | s->be_data);
2491 
2492             a = tcg_temp_new_i64();
2493             b = tcg_temp_new_i64();
2494             if (s->be_data == MO_LE) {
2495                 tcg_gen_extr_i128_i64(a, b, t16);
2496             } else {
2497                 tcg_gen_extr_i128_i64(b, a, t16);
2498             }
2499 
2500             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2501             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2502             tcg_gen_or_i64(tmp, a, b);
2503 
2504             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2505         }
2506     } else {
2507         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2508                                    cpu_reg(s, rt), get_mem_index(s),
2509                                    size | MO_ALIGN | s->be_data);
2510         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2511     }
2512     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2513     tcg_gen_br(done_label);
2514 
2515     gen_set_label(fail_label);
2516     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2517     gen_set_label(done_label);
2518     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2519 }
2520 
2521 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2522                                  int rn, int size)
2523 {
2524     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2525     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2526     int memidx = get_mem_index(s);
2527     TCGv_i64 clean_addr;
2528 
2529     if (rn == 31) {
2530         gen_check_sp_alignment(s);
2531     }
2532     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2533     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2534                                size | MO_ALIGN | s->be_data);
2535 }
2536 
2537 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2538                                       int rn, int size)
2539 {
2540     TCGv_i64 s1 = cpu_reg(s, rs);
2541     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2542     TCGv_i64 t1 = cpu_reg(s, rt);
2543     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2544     TCGv_i64 clean_addr;
2545     int memidx = get_mem_index(s);
2546 
2547     if (rn == 31) {
2548         gen_check_sp_alignment(s);
2549     }
2550 
2551     /* This is a single atomic access, despite the "pair". */
2552     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2553 
2554     if (size == 2) {
2555         TCGv_i64 cmp = tcg_temp_new_i64();
2556         TCGv_i64 val = tcg_temp_new_i64();
2557 
2558         if (s->be_data == MO_LE) {
2559             tcg_gen_concat32_i64(val, t1, t2);
2560             tcg_gen_concat32_i64(cmp, s1, s2);
2561         } else {
2562             tcg_gen_concat32_i64(val, t2, t1);
2563             tcg_gen_concat32_i64(cmp, s2, s1);
2564         }
2565 
2566         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2567                                    MO_64 | MO_ALIGN | s->be_data);
2568 
2569         if (s->be_data == MO_LE) {
2570             tcg_gen_extr32_i64(s1, s2, cmp);
2571         } else {
2572             tcg_gen_extr32_i64(s2, s1, cmp);
2573         }
2574     } else {
2575         TCGv_i128 cmp = tcg_temp_new_i128();
2576         TCGv_i128 val = tcg_temp_new_i128();
2577 
2578         if (s->be_data == MO_LE) {
2579             tcg_gen_concat_i64_i128(val, t1, t2);
2580             tcg_gen_concat_i64_i128(cmp, s1, s2);
2581         } else {
2582             tcg_gen_concat_i64_i128(val, t2, t1);
2583             tcg_gen_concat_i64_i128(cmp, s2, s1);
2584         }
2585 
2586         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2587                                     MO_128 | MO_ALIGN | s->be_data);
2588 
2589         if (s->be_data == MO_LE) {
2590             tcg_gen_extr_i128_i64(s1, s2, cmp);
2591         } else {
2592             tcg_gen_extr_i128_i64(s2, s1, cmp);
2593         }
2594     }
2595 }
2596 
2597 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2598  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2599  */
2600 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2601 {
2602     int opc0 = extract32(opc, 0, 1);
2603     int regsize;
2604 
2605     if (is_signed) {
2606         regsize = opc0 ? 32 : 64;
2607     } else {
2608         regsize = size == 3 ? 64 : 32;
2609     }
2610     return regsize == 64;
2611 }
2612 
2613 /* Load/store exclusive
2614  *
2615  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2616  * +-----+-------------+----+---+----+------+----+-------+------+------+
2617  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2618  * +-----+-------------+----+---+----+------+----+-------+------+------+
2619  *
2620  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2621  *   L: 0 -> store, 1 -> load
2622  *  o2: 0 -> exclusive, 1 -> not
2623  *  o1: 0 -> single register, 1 -> register pair
2624  *  o0: 1 -> load-acquire/store-release, 0 -> not
2625  */
2626 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2627 {
2628     int rt = extract32(insn, 0, 5);
2629     int rn = extract32(insn, 5, 5);
2630     int rt2 = extract32(insn, 10, 5);
2631     int rs = extract32(insn, 16, 5);
2632     int is_lasr = extract32(insn, 15, 1);
2633     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2634     int size = extract32(insn, 30, 2);
2635     TCGv_i64 clean_addr;
2636 
2637     switch (o2_L_o1_o0) {
2638     case 0x0: /* STXR */
2639     case 0x1: /* STLXR */
2640         if (rn == 31) {
2641             gen_check_sp_alignment(s);
2642         }
2643         if (is_lasr) {
2644             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2645         }
2646         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2647                                     true, rn != 31, size);
2648         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2649         return;
2650 
2651     case 0x4: /* LDXR */
2652     case 0x5: /* LDAXR */
2653         if (rn == 31) {
2654             gen_check_sp_alignment(s);
2655         }
2656         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2657                                     false, rn != 31, size);
2658         s->is_ldex = true;
2659         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2660         if (is_lasr) {
2661             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2662         }
2663         return;
2664 
2665     case 0x8: /* STLLR */
2666         if (!dc_isar_feature(aa64_lor, s)) {
2667             break;
2668         }
2669         /* StoreLORelease is the same as Store-Release for QEMU.  */
2670         /* fall through */
2671     case 0x9: /* STLR */
2672         /* Generate ISS for non-exclusive accesses including LASR.  */
2673         if (rn == 31) {
2674             gen_check_sp_alignment(s);
2675         }
2676         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2677         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2678                                     true, rn != 31, size);
2679         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2680         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2681                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2682         return;
2683 
2684     case 0xc: /* LDLAR */
2685         if (!dc_isar_feature(aa64_lor, s)) {
2686             break;
2687         }
2688         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2689         /* fall through */
2690     case 0xd: /* LDAR */
2691         /* Generate ISS for non-exclusive accesses including LASR.  */
2692         if (rn == 31) {
2693             gen_check_sp_alignment(s);
2694         }
2695         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2696                                     false, rn != 31, size);
2697         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2698         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2699                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2700         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2701         return;
2702 
2703     case 0x2: case 0x3: /* CASP / STXP */
2704         if (size & 2) { /* STXP / STLXP */
2705             if (rn == 31) {
2706                 gen_check_sp_alignment(s);
2707             }
2708             if (is_lasr) {
2709                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2710             }
2711             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2712                                         true, rn != 31, size);
2713             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2714             return;
2715         }
2716         if (rt2 == 31
2717             && ((rt | rs) & 1) == 0
2718             && dc_isar_feature(aa64_atomics, s)) {
2719             /* CASP / CASPL */
2720             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2721             return;
2722         }
2723         break;
2724 
2725     case 0x6: case 0x7: /* CASPA / LDXP */
2726         if (size & 2) { /* LDXP / LDAXP */
2727             if (rn == 31) {
2728                 gen_check_sp_alignment(s);
2729             }
2730             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2731                                         false, rn != 31, size);
2732             s->is_ldex = true;
2733             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2734             if (is_lasr) {
2735                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2736             }
2737             return;
2738         }
2739         if (rt2 == 31
2740             && ((rt | rs) & 1) == 0
2741             && dc_isar_feature(aa64_atomics, s)) {
2742             /* CASPA / CASPAL */
2743             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2744             return;
2745         }
2746         break;
2747 
2748     case 0xa: /* CAS */
2749     case 0xb: /* CASL */
2750     case 0xe: /* CASA */
2751     case 0xf: /* CASAL */
2752         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2753             gen_compare_and_swap(s, rs, rt, rn, size);
2754             return;
2755         }
2756         break;
2757     }
2758     unallocated_encoding(s);
2759 }
2760 
2761 /*
2762  * Load register (literal)
2763  *
2764  *  31 30 29   27  26 25 24 23                5 4     0
2765  * +-----+-------+---+-----+-------------------+-------+
2766  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2767  * +-----+-------+---+-----+-------------------+-------+
2768  *
2769  * V: 1 -> vector (simd/fp)
2770  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2771  *                   10-> 32 bit signed, 11 -> prefetch
2772  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2773  */
2774 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2775 {
2776     int rt = extract32(insn, 0, 5);
2777     int64_t imm = sextract32(insn, 5, 19) << 2;
2778     bool is_vector = extract32(insn, 26, 1);
2779     int opc = extract32(insn, 30, 2);
2780     bool is_signed = false;
2781     int size = 2;
2782     TCGv_i64 tcg_rt, clean_addr;
2783 
2784     if (is_vector) {
2785         if (opc == 3) {
2786             unallocated_encoding(s);
2787             return;
2788         }
2789         size = 2 + opc;
2790         if (!fp_access_check(s)) {
2791             return;
2792         }
2793     } else {
2794         if (opc == 3) {
2795             /* PRFM (literal) : prefetch */
2796             return;
2797         }
2798         size = 2 + extract32(opc, 0, 1);
2799         is_signed = extract32(opc, 1, 1);
2800     }
2801 
2802     tcg_rt = cpu_reg(s, rt);
2803 
2804     clean_addr = tcg_temp_new_i64();
2805     gen_pc_plus_diff(s, clean_addr, imm);
2806     if (is_vector) {
2807         do_fp_ld(s, rt, clean_addr, size);
2808     } else {
2809         /* Only unsigned 32bit loads target 32bit registers.  */
2810         bool iss_sf = opc != 0;
2811 
2812         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2813                   false, true, rt, iss_sf, false);
2814     }
2815 }
2816 
2817 /*
2818  * LDNP (Load Pair - non-temporal hint)
2819  * LDP (Load Pair - non vector)
2820  * LDPSW (Load Pair Signed Word - non vector)
2821  * STNP (Store Pair - non-temporal hint)
2822  * STP (Store Pair - non vector)
2823  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2824  * LDP (Load Pair of SIMD&FP)
2825  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2826  * STP (Store Pair of SIMD&FP)
2827  *
2828  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2829  * +-----+-------+---+---+-------+---+-----------------------------+
2830  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2831  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2832  *
2833  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2834  *      LDPSW/STGP               01
2835  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2836  *   V: 0 -> GPR, 1 -> Vector
2837  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2838  *      10 -> signed offset, 11 -> pre-index
2839  *   L: 0 -> Store 1 -> Load
2840  *
2841  * Rt, Rt2 = GPR or SIMD registers to be stored
2842  * Rn = general purpose register containing address
2843  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2844  */
2845 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2846 {
2847     int rt = extract32(insn, 0, 5);
2848     int rn = extract32(insn, 5, 5);
2849     int rt2 = extract32(insn, 10, 5);
2850     uint64_t offset = sextract64(insn, 15, 7);
2851     int index = extract32(insn, 23, 2);
2852     bool is_vector = extract32(insn, 26, 1);
2853     bool is_load = extract32(insn, 22, 1);
2854     int opc = extract32(insn, 30, 2);
2855 
2856     bool is_signed = false;
2857     bool postindex = false;
2858     bool wback = false;
2859     bool set_tag = false;
2860 
2861     TCGv_i64 clean_addr, dirty_addr;
2862 
2863     int size;
2864 
2865     if (opc == 3) {
2866         unallocated_encoding(s);
2867         return;
2868     }
2869 
2870     if (is_vector) {
2871         size = 2 + opc;
2872     } else if (opc == 1 && !is_load) {
2873         /* STGP */
2874         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2875             unallocated_encoding(s);
2876             return;
2877         }
2878         size = 3;
2879         set_tag = true;
2880     } else {
2881         size = 2 + extract32(opc, 1, 1);
2882         is_signed = extract32(opc, 0, 1);
2883         if (!is_load && is_signed) {
2884             unallocated_encoding(s);
2885             return;
2886         }
2887     }
2888 
2889     switch (index) {
2890     case 1: /* post-index */
2891         postindex = true;
2892         wback = true;
2893         break;
2894     case 0:
2895         /* signed offset with "non-temporal" hint. Since we don't emulate
2896          * caches we don't care about hints to the cache system about
2897          * data access patterns, and handle this identically to plain
2898          * signed offset.
2899          */
2900         if (is_signed) {
2901             /* There is no non-temporal-hint version of LDPSW */
2902             unallocated_encoding(s);
2903             return;
2904         }
2905         postindex = false;
2906         break;
2907     case 2: /* signed offset, rn not updated */
2908         postindex = false;
2909         break;
2910     case 3: /* pre-index */
2911         postindex = false;
2912         wback = true;
2913         break;
2914     }
2915 
2916     if (is_vector && !fp_access_check(s)) {
2917         return;
2918     }
2919 
2920     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2921 
2922     if (rn == 31) {
2923         gen_check_sp_alignment(s);
2924     }
2925 
2926     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2927     if (!postindex) {
2928         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2929     }
2930 
2931     if (set_tag) {
2932         if (!s->ata) {
2933             /*
2934              * TODO: We could rely on the stores below, at least for
2935              * system mode, if we arrange to add MO_ALIGN_16.
2936              */
2937             gen_helper_stg_stub(cpu_env, dirty_addr);
2938         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2939             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2940         } else {
2941             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2942         }
2943     }
2944 
2945     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2946                                 (wback || rn != 31) && !set_tag, 2 << size);
2947 
2948     if (is_vector) {
2949         if (is_load) {
2950             do_fp_ld(s, rt, clean_addr, size);
2951         } else {
2952             do_fp_st(s, rt, clean_addr, size);
2953         }
2954         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2955         if (is_load) {
2956             do_fp_ld(s, rt2, clean_addr, size);
2957         } else {
2958             do_fp_st(s, rt2, clean_addr, size);
2959         }
2960     } else {
2961         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2962         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2963 
2964         if (is_load) {
2965             TCGv_i64 tmp = tcg_temp_new_i64();
2966 
2967             /* Do not modify tcg_rt before recognizing any exception
2968              * from the second load.
2969              */
2970             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
2971                       false, false, 0, false, false);
2972             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2973             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
2974                       false, false, 0, false, false);
2975 
2976             tcg_gen_mov_i64(tcg_rt, tmp);
2977         } else {
2978             do_gpr_st(s, tcg_rt, clean_addr, size,
2979                       false, 0, false, false);
2980             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2981             do_gpr_st(s, tcg_rt2, clean_addr, size,
2982                       false, 0, false, false);
2983         }
2984     }
2985 
2986     if (wback) {
2987         if (postindex) {
2988             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2989         }
2990         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2991     }
2992 }
2993 
2994 /*
2995  * Load/store (immediate post-indexed)
2996  * Load/store (immediate pre-indexed)
2997  * Load/store (unscaled immediate)
2998  *
2999  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3000  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3001  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3002  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3003  *
3004  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3005          10 -> unprivileged
3006  * V = 0 -> non-vector
3007  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3008  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3009  */
3010 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3011                                 int opc,
3012                                 int size,
3013                                 int rt,
3014                                 bool is_vector)
3015 {
3016     int rn = extract32(insn, 5, 5);
3017     int imm9 = sextract32(insn, 12, 9);
3018     int idx = extract32(insn, 10, 2);
3019     bool is_signed = false;
3020     bool is_store = false;
3021     bool is_extended = false;
3022     bool is_unpriv = (idx == 2);
3023     bool iss_valid;
3024     bool post_index;
3025     bool writeback;
3026     int memidx;
3027 
3028     TCGv_i64 clean_addr, dirty_addr;
3029 
3030     if (is_vector) {
3031         size |= (opc & 2) << 1;
3032         if (size > 4 || is_unpriv) {
3033             unallocated_encoding(s);
3034             return;
3035         }
3036         is_store = ((opc & 1) == 0);
3037         if (!fp_access_check(s)) {
3038             return;
3039         }
3040     } else {
3041         if (size == 3 && opc == 2) {
3042             /* PRFM - prefetch */
3043             if (idx != 0) {
3044                 unallocated_encoding(s);
3045                 return;
3046             }
3047             return;
3048         }
3049         if (opc == 3 && size > 1) {
3050             unallocated_encoding(s);
3051             return;
3052         }
3053         is_store = (opc == 0);
3054         is_signed = extract32(opc, 1, 1);
3055         is_extended = (size < 3) && extract32(opc, 0, 1);
3056     }
3057 
3058     switch (idx) {
3059     case 0:
3060     case 2:
3061         post_index = false;
3062         writeback = false;
3063         break;
3064     case 1:
3065         post_index = true;
3066         writeback = true;
3067         break;
3068     case 3:
3069         post_index = false;
3070         writeback = true;
3071         break;
3072     default:
3073         g_assert_not_reached();
3074     }
3075 
3076     iss_valid = !is_vector && !writeback;
3077 
3078     if (rn == 31) {
3079         gen_check_sp_alignment(s);
3080     }
3081 
3082     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3083     if (!post_index) {
3084         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3085     }
3086 
3087     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3088     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3089                                        writeback || rn != 31,
3090                                        size, is_unpriv, memidx);
3091 
3092     if (is_vector) {
3093         if (is_store) {
3094             do_fp_st(s, rt, clean_addr, size);
3095         } else {
3096             do_fp_ld(s, rt, clean_addr, size);
3097         }
3098     } else {
3099         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3100         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3101 
3102         if (is_store) {
3103             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3104                              iss_valid, rt, iss_sf, false);
3105         } else {
3106             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3107                              is_extended, memidx,
3108                              iss_valid, rt, iss_sf, false);
3109         }
3110     }
3111 
3112     if (writeback) {
3113         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3114         if (post_index) {
3115             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3116         }
3117         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3118     }
3119 }
3120 
3121 /*
3122  * Load/store (register offset)
3123  *
3124  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3125  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3126  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3127  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3128  *
3129  * For non-vector:
3130  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3131  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3132  * For vector:
3133  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3134  *   opc<0>: 0 -> store, 1 -> load
3135  * V: 1 -> vector/simd
3136  * opt: extend encoding (see DecodeRegExtend)
3137  * S: if S=1 then scale (essentially index by sizeof(size))
3138  * Rt: register to transfer into/out of
3139  * Rn: address register or SP for base
3140  * Rm: offset register or ZR for offset
3141  */
3142 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3143                                    int opc,
3144                                    int size,
3145                                    int rt,
3146                                    bool is_vector)
3147 {
3148     int rn = extract32(insn, 5, 5);
3149     int shift = extract32(insn, 12, 1);
3150     int rm = extract32(insn, 16, 5);
3151     int opt = extract32(insn, 13, 3);
3152     bool is_signed = false;
3153     bool is_store = false;
3154     bool is_extended = false;
3155 
3156     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3157 
3158     if (extract32(opt, 1, 1) == 0) {
3159         unallocated_encoding(s);
3160         return;
3161     }
3162 
3163     if (is_vector) {
3164         size |= (opc & 2) << 1;
3165         if (size > 4) {
3166             unallocated_encoding(s);
3167             return;
3168         }
3169         is_store = !extract32(opc, 0, 1);
3170         if (!fp_access_check(s)) {
3171             return;
3172         }
3173     } else {
3174         if (size == 3 && opc == 2) {
3175             /* PRFM - prefetch */
3176             return;
3177         }
3178         if (opc == 3 && size > 1) {
3179             unallocated_encoding(s);
3180             return;
3181         }
3182         is_store = (opc == 0);
3183         is_signed = extract32(opc, 1, 1);
3184         is_extended = (size < 3) && extract32(opc, 0, 1);
3185     }
3186 
3187     if (rn == 31) {
3188         gen_check_sp_alignment(s);
3189     }
3190     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3191 
3192     tcg_rm = read_cpu_reg(s, rm, 1);
3193     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3194 
3195     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3196     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3197 
3198     if (is_vector) {
3199         if (is_store) {
3200             do_fp_st(s, rt, clean_addr, size);
3201         } else {
3202             do_fp_ld(s, rt, clean_addr, size);
3203         }
3204     } else {
3205         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3206         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3207         if (is_store) {
3208             do_gpr_st(s, tcg_rt, clean_addr, size,
3209                       true, rt, iss_sf, false);
3210         } else {
3211             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3212                       is_extended, true, rt, iss_sf, false);
3213         }
3214     }
3215 }
3216 
3217 /*
3218  * Load/store (unsigned immediate)
3219  *
3220  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3221  * +----+-------+---+-----+-----+------------+-------+------+
3222  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3223  * +----+-------+---+-----+-----+------------+-------+------+
3224  *
3225  * For non-vector:
3226  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3227  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3228  * For vector:
3229  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3230  *   opc<0>: 0 -> store, 1 -> load
3231  * Rn: base address register (inc SP)
3232  * Rt: target register
3233  */
3234 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3235                                         int opc,
3236                                         int size,
3237                                         int rt,
3238                                         bool is_vector)
3239 {
3240     int rn = extract32(insn, 5, 5);
3241     unsigned int imm12 = extract32(insn, 10, 12);
3242     unsigned int offset;
3243 
3244     TCGv_i64 clean_addr, dirty_addr;
3245 
3246     bool is_store;
3247     bool is_signed = false;
3248     bool is_extended = false;
3249 
3250     if (is_vector) {
3251         size |= (opc & 2) << 1;
3252         if (size > 4) {
3253             unallocated_encoding(s);
3254             return;
3255         }
3256         is_store = !extract32(opc, 0, 1);
3257         if (!fp_access_check(s)) {
3258             return;
3259         }
3260     } else {
3261         if (size == 3 && opc == 2) {
3262             /* PRFM - prefetch */
3263             return;
3264         }
3265         if (opc == 3 && size > 1) {
3266             unallocated_encoding(s);
3267             return;
3268         }
3269         is_store = (opc == 0);
3270         is_signed = extract32(opc, 1, 1);
3271         is_extended = (size < 3) && extract32(opc, 0, 1);
3272     }
3273 
3274     if (rn == 31) {
3275         gen_check_sp_alignment(s);
3276     }
3277     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3278     offset = imm12 << size;
3279     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3280     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3281 
3282     if (is_vector) {
3283         if (is_store) {
3284             do_fp_st(s, rt, clean_addr, size);
3285         } else {
3286             do_fp_ld(s, rt, clean_addr, size);
3287         }
3288     } else {
3289         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3290         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3291         if (is_store) {
3292             do_gpr_st(s, tcg_rt, clean_addr, size,
3293                       true, rt, iss_sf, false);
3294         } else {
3295             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3296                       is_extended, true, rt, iss_sf, false);
3297         }
3298     }
3299 }
3300 
3301 /* Atomic memory operations
3302  *
3303  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3304  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3305  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3306  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3307  *
3308  * Rt: the result register
3309  * Rn: base address or SP
3310  * Rs: the source register for the operation
3311  * V: vector flag (always 0 as of v8.3)
3312  * A: acquire flag
3313  * R: release flag
3314  */
3315 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3316                               int size, int rt, bool is_vector)
3317 {
3318     int rs = extract32(insn, 16, 5);
3319     int rn = extract32(insn, 5, 5);
3320     int o3_opc = extract32(insn, 12, 4);
3321     bool r = extract32(insn, 22, 1);
3322     bool a = extract32(insn, 23, 1);
3323     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3324     AtomicThreeOpFn *fn = NULL;
3325     MemOp mop = s->be_data | size | MO_ALIGN;
3326 
3327     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3328         unallocated_encoding(s);
3329         return;
3330     }
3331     switch (o3_opc) {
3332     case 000: /* LDADD */
3333         fn = tcg_gen_atomic_fetch_add_i64;
3334         break;
3335     case 001: /* LDCLR */
3336         fn = tcg_gen_atomic_fetch_and_i64;
3337         break;
3338     case 002: /* LDEOR */
3339         fn = tcg_gen_atomic_fetch_xor_i64;
3340         break;
3341     case 003: /* LDSET */
3342         fn = tcg_gen_atomic_fetch_or_i64;
3343         break;
3344     case 004: /* LDSMAX */
3345         fn = tcg_gen_atomic_fetch_smax_i64;
3346         mop |= MO_SIGN;
3347         break;
3348     case 005: /* LDSMIN */
3349         fn = tcg_gen_atomic_fetch_smin_i64;
3350         mop |= MO_SIGN;
3351         break;
3352     case 006: /* LDUMAX */
3353         fn = tcg_gen_atomic_fetch_umax_i64;
3354         break;
3355     case 007: /* LDUMIN */
3356         fn = tcg_gen_atomic_fetch_umin_i64;
3357         break;
3358     case 010: /* SWP */
3359         fn = tcg_gen_atomic_xchg_i64;
3360         break;
3361     case 014: /* LDAPR, LDAPRH, LDAPRB */
3362         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3363             rs != 31 || a != 1 || r != 0) {
3364             unallocated_encoding(s);
3365             return;
3366         }
3367         break;
3368     default:
3369         unallocated_encoding(s);
3370         return;
3371     }
3372 
3373     if (rn == 31) {
3374         gen_check_sp_alignment(s);
3375     }
3376     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3377 
3378     if (o3_opc == 014) {
3379         /*
3380          * LDAPR* are a special case because they are a simple load, not a
3381          * fetch-and-do-something op.
3382          * The architectural consistency requirements here are weaker than
3383          * full load-acquire (we only need "load-acquire processor consistent"),
3384          * but we choose to implement them as full LDAQ.
3385          */
3386         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3387                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3388         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3389         return;
3390     }
3391 
3392     tcg_rs = read_cpu_reg(s, rs, true);
3393     tcg_rt = cpu_reg(s, rt);
3394 
3395     if (o3_opc == 1) { /* LDCLR */
3396         tcg_gen_not_i64(tcg_rs, tcg_rs);
3397     }
3398 
3399     /* The tcg atomic primitives are all full barriers.  Therefore we
3400      * can ignore the Acquire and Release bits of this instruction.
3401      */
3402     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3403 
3404     if ((mop & MO_SIGN) && size != MO_64) {
3405         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3406     }
3407 }
3408 
3409 /*
3410  * PAC memory operations
3411  *
3412  *  31  30      27  26    24    22  21       12  11  10    5     0
3413  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3414  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3415  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3416  *
3417  * Rt: the result register
3418  * Rn: base address or SP
3419  * V: vector flag (always 0 as of v8.3)
3420  * M: clear for key DA, set for key DB
3421  * W: pre-indexing flag
3422  * S: sign for imm9.
3423  */
3424 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3425                            int size, int rt, bool is_vector)
3426 {
3427     int rn = extract32(insn, 5, 5);
3428     bool is_wback = extract32(insn, 11, 1);
3429     bool use_key_a = !extract32(insn, 23, 1);
3430     int offset;
3431     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3432 
3433     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3434         unallocated_encoding(s);
3435         return;
3436     }
3437 
3438     if (rn == 31) {
3439         gen_check_sp_alignment(s);
3440     }
3441     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3442 
3443     if (s->pauth_active) {
3444         if (use_key_a) {
3445             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3446                              tcg_constant_i64(0));
3447         } else {
3448             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3449                              tcg_constant_i64(0));
3450         }
3451     }
3452 
3453     /* Form the 10-bit signed, scaled offset.  */
3454     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3455     offset = sextract32(offset << size, 0, 10 + size);
3456     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3457 
3458     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3459     clean_addr = gen_mte_check1(s, dirty_addr, false,
3460                                 is_wback || rn != 31, size);
3461 
3462     tcg_rt = cpu_reg(s, rt);
3463     do_gpr_ld(s, tcg_rt, clean_addr, size,
3464               /* extend */ false, /* iss_valid */ !is_wback,
3465               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3466 
3467     if (is_wback) {
3468         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3469     }
3470 }
3471 
3472 /*
3473  * LDAPR/STLR (unscaled immediate)
3474  *
3475  *  31  30            24    22  21       12    10    5     0
3476  * +------+-------------+-----+---+--------+-----+----+-----+
3477  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3478  * +------+-------------+-----+---+--------+-----+----+-----+
3479  *
3480  * Rt: source or destination register
3481  * Rn: base register
3482  * imm9: unscaled immediate offset
3483  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3484  * size: size of load/store
3485  */
3486 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3487 {
3488     int rt = extract32(insn, 0, 5);
3489     int rn = extract32(insn, 5, 5);
3490     int offset = sextract32(insn, 12, 9);
3491     int opc = extract32(insn, 22, 2);
3492     int size = extract32(insn, 30, 2);
3493     TCGv_i64 clean_addr, dirty_addr;
3494     bool is_store = false;
3495     bool extend = false;
3496     bool iss_sf;
3497     MemOp mop;
3498 
3499     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3500         unallocated_encoding(s);
3501         return;
3502     }
3503 
3504     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3505     mop = size | MO_ALIGN;
3506 
3507     switch (opc) {
3508     case 0: /* STLURB */
3509         is_store = true;
3510         break;
3511     case 1: /* LDAPUR* */
3512         break;
3513     case 2: /* LDAPURS* 64-bit variant */
3514         if (size == 3) {
3515             unallocated_encoding(s);
3516             return;
3517         }
3518         mop |= MO_SIGN;
3519         break;
3520     case 3: /* LDAPURS* 32-bit variant */
3521         if (size > 1) {
3522             unallocated_encoding(s);
3523             return;
3524         }
3525         mop |= MO_SIGN;
3526         extend = true; /* zero-extend 32->64 after signed load */
3527         break;
3528     default:
3529         g_assert_not_reached();
3530     }
3531 
3532     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3533 
3534     if (rn == 31) {
3535         gen_check_sp_alignment(s);
3536     }
3537 
3538     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3539     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3540     clean_addr = clean_data_tbi(s, dirty_addr);
3541 
3542     if (is_store) {
3543         /* Store-Release semantics */
3544         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3545         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3546     } else {
3547         /*
3548          * Load-AcquirePC semantics; we implement as the slightly more
3549          * restrictive Load-Acquire.
3550          */
3551         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3552                   extend, true, rt, iss_sf, true);
3553         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3554     }
3555 }
3556 
3557 /* Load/store register (all forms) */
3558 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3559 {
3560     int rt = extract32(insn, 0, 5);
3561     int opc = extract32(insn, 22, 2);
3562     bool is_vector = extract32(insn, 26, 1);
3563     int size = extract32(insn, 30, 2);
3564 
3565     switch (extract32(insn, 24, 2)) {
3566     case 0:
3567         if (extract32(insn, 21, 1) == 0) {
3568             /* Load/store register (unscaled immediate)
3569              * Load/store immediate pre/post-indexed
3570              * Load/store register unprivileged
3571              */
3572             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3573             return;
3574         }
3575         switch (extract32(insn, 10, 2)) {
3576         case 0:
3577             disas_ldst_atomic(s, insn, size, rt, is_vector);
3578             return;
3579         case 2:
3580             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3581             return;
3582         default:
3583             disas_ldst_pac(s, insn, size, rt, is_vector);
3584             return;
3585         }
3586         break;
3587     case 1:
3588         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3589         return;
3590     }
3591     unallocated_encoding(s);
3592 }
3593 
3594 /* AdvSIMD load/store multiple structures
3595  *
3596  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3597  * +---+---+---------------+---+-------------+--------+------+------+------+
3598  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3599  * +---+---+---------------+---+-------------+--------+------+------+------+
3600  *
3601  * AdvSIMD load/store multiple structures (post-indexed)
3602  *
3603  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3604  * +---+---+---------------+---+---+---------+--------+------+------+------+
3605  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3606  * +---+---+---------------+---+---+---------+--------+------+------+------+
3607  *
3608  * Rt: first (or only) SIMD&FP register to be transferred
3609  * Rn: base address or SP
3610  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3611  */
3612 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3613 {
3614     int rt = extract32(insn, 0, 5);
3615     int rn = extract32(insn, 5, 5);
3616     int rm = extract32(insn, 16, 5);
3617     int size = extract32(insn, 10, 2);
3618     int opcode = extract32(insn, 12, 4);
3619     bool is_store = !extract32(insn, 22, 1);
3620     bool is_postidx = extract32(insn, 23, 1);
3621     bool is_q = extract32(insn, 30, 1);
3622     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3623     MemOp endian, align, mop;
3624 
3625     int total;    /* total bytes */
3626     int elements; /* elements per vector */
3627     int rpt;    /* num iterations */
3628     int selem;  /* structure elements */
3629     int r;
3630 
3631     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3632         unallocated_encoding(s);
3633         return;
3634     }
3635 
3636     if (!is_postidx && rm != 0) {
3637         unallocated_encoding(s);
3638         return;
3639     }
3640 
3641     /* From the shared decode logic */
3642     switch (opcode) {
3643     case 0x0:
3644         rpt = 1;
3645         selem = 4;
3646         break;
3647     case 0x2:
3648         rpt = 4;
3649         selem = 1;
3650         break;
3651     case 0x4:
3652         rpt = 1;
3653         selem = 3;
3654         break;
3655     case 0x6:
3656         rpt = 3;
3657         selem = 1;
3658         break;
3659     case 0x7:
3660         rpt = 1;
3661         selem = 1;
3662         break;
3663     case 0x8:
3664         rpt = 1;
3665         selem = 2;
3666         break;
3667     case 0xa:
3668         rpt = 2;
3669         selem = 1;
3670         break;
3671     default:
3672         unallocated_encoding(s);
3673         return;
3674     }
3675 
3676     if (size == 3 && !is_q && selem != 1) {
3677         /* reserved */
3678         unallocated_encoding(s);
3679         return;
3680     }
3681 
3682     if (!fp_access_check(s)) {
3683         return;
3684     }
3685 
3686     if (rn == 31) {
3687         gen_check_sp_alignment(s);
3688     }
3689 
3690     /* For our purposes, bytes are always little-endian.  */
3691     endian = s->be_data;
3692     if (size == 0) {
3693         endian = MO_LE;
3694     }
3695 
3696     total = rpt * selem * (is_q ? 16 : 8);
3697     tcg_rn = cpu_reg_sp(s, rn);
3698 
3699     /*
3700      * Issue the MTE check vs the logical repeat count, before we
3701      * promote consecutive little-endian elements below.
3702      */
3703     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3704                                 total);
3705 
3706     /*
3707      * Consecutive little-endian elements from a single register
3708      * can be promoted to a larger little-endian operation.
3709      */
3710     align = MO_ALIGN;
3711     if (selem == 1 && endian == MO_LE) {
3712         align = pow2_align(size);
3713         size = 3;
3714     }
3715     if (!s->align_mem) {
3716         align = 0;
3717     }
3718     mop = endian | size | align;
3719 
3720     elements = (is_q ? 16 : 8) >> size;
3721     tcg_ebytes = tcg_constant_i64(1 << size);
3722     for (r = 0; r < rpt; r++) {
3723         int e;
3724         for (e = 0; e < elements; e++) {
3725             int xs;
3726             for (xs = 0; xs < selem; xs++) {
3727                 int tt = (rt + r + xs) % 32;
3728                 if (is_store) {
3729                     do_vec_st(s, tt, e, clean_addr, mop);
3730                 } else {
3731                     do_vec_ld(s, tt, e, clean_addr, mop);
3732                 }
3733                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3734             }
3735         }
3736     }
3737 
3738     if (!is_store) {
3739         /* For non-quad operations, setting a slice of the low
3740          * 64 bits of the register clears the high 64 bits (in
3741          * the ARM ARM pseudocode this is implicit in the fact
3742          * that 'rval' is a 64 bit wide variable).
3743          * For quad operations, we might still need to zero the
3744          * high bits of SVE.
3745          */
3746         for (r = 0; r < rpt * selem; r++) {
3747             int tt = (rt + r) % 32;
3748             clear_vec_high(s, is_q, tt);
3749         }
3750     }
3751 
3752     if (is_postidx) {
3753         if (rm == 31) {
3754             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3755         } else {
3756             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3757         }
3758     }
3759 }
3760 
3761 /* AdvSIMD load/store single structure
3762  *
3763  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3764  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3765  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3766  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3767  *
3768  * AdvSIMD load/store single structure (post-indexed)
3769  *
3770  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3771  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3772  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3773  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3774  *
3775  * Rt: first (or only) SIMD&FP register to be transferred
3776  * Rn: base address or SP
3777  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3778  * index = encoded in Q:S:size dependent on size
3779  *
3780  * lane_size = encoded in R, opc
3781  * transfer width = encoded in opc, S, size
3782  */
3783 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3784 {
3785     int rt = extract32(insn, 0, 5);
3786     int rn = extract32(insn, 5, 5);
3787     int rm = extract32(insn, 16, 5);
3788     int size = extract32(insn, 10, 2);
3789     int S = extract32(insn, 12, 1);
3790     int opc = extract32(insn, 13, 3);
3791     int R = extract32(insn, 21, 1);
3792     int is_load = extract32(insn, 22, 1);
3793     int is_postidx = extract32(insn, 23, 1);
3794     int is_q = extract32(insn, 30, 1);
3795 
3796     int scale = extract32(opc, 1, 2);
3797     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3798     bool replicate = false;
3799     int index = is_q << 3 | S << 2 | size;
3800     int xs, total;
3801     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3802     MemOp mop;
3803 
3804     if (extract32(insn, 31, 1)) {
3805         unallocated_encoding(s);
3806         return;
3807     }
3808     if (!is_postidx && rm != 0) {
3809         unallocated_encoding(s);
3810         return;
3811     }
3812 
3813     switch (scale) {
3814     case 3:
3815         if (!is_load || S) {
3816             unallocated_encoding(s);
3817             return;
3818         }
3819         scale = size;
3820         replicate = true;
3821         break;
3822     case 0:
3823         break;
3824     case 1:
3825         if (extract32(size, 0, 1)) {
3826             unallocated_encoding(s);
3827             return;
3828         }
3829         index >>= 1;
3830         break;
3831     case 2:
3832         if (extract32(size, 1, 1)) {
3833             unallocated_encoding(s);
3834             return;
3835         }
3836         if (!extract32(size, 0, 1)) {
3837             index >>= 2;
3838         } else {
3839             if (S) {
3840                 unallocated_encoding(s);
3841                 return;
3842             }
3843             index >>= 3;
3844             scale = 3;
3845         }
3846         break;
3847     default:
3848         g_assert_not_reached();
3849     }
3850 
3851     if (!fp_access_check(s)) {
3852         return;
3853     }
3854 
3855     if (rn == 31) {
3856         gen_check_sp_alignment(s);
3857     }
3858 
3859     total = selem << scale;
3860     tcg_rn = cpu_reg_sp(s, rn);
3861 
3862     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3863                                 total);
3864     mop = finalize_memop(s, scale);
3865 
3866     tcg_ebytes = tcg_constant_i64(1 << scale);
3867     for (xs = 0; xs < selem; xs++) {
3868         if (replicate) {
3869             /* Load and replicate to all elements */
3870             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3871 
3872             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3873             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3874                                  (is_q + 1) * 8, vec_full_reg_size(s),
3875                                  tcg_tmp);
3876         } else {
3877             /* Load/store one element per register */
3878             if (is_load) {
3879                 do_vec_ld(s, rt, index, clean_addr, mop);
3880             } else {
3881                 do_vec_st(s, rt, index, clean_addr, mop);
3882             }
3883         }
3884         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3885         rt = (rt + 1) % 32;
3886     }
3887 
3888     if (is_postidx) {
3889         if (rm == 31) {
3890             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3891         } else {
3892             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3893         }
3894     }
3895 }
3896 
3897 /*
3898  * Load/Store memory tags
3899  *
3900  *  31 30 29         24     22  21     12    10      5      0
3901  * +-----+-------------+-----+---+------+-----+------+------+
3902  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3903  * +-----+-------------+-----+---+------+-----+------+------+
3904  */
3905 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3906 {
3907     int rt = extract32(insn, 0, 5);
3908     int rn = extract32(insn, 5, 5);
3909     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3910     int op2 = extract32(insn, 10, 2);
3911     int op1 = extract32(insn, 22, 2);
3912     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3913     int index = 0;
3914     TCGv_i64 addr, clean_addr, tcg_rt;
3915 
3916     /* We checked insn bits [29:24,21] in the caller.  */
3917     if (extract32(insn, 30, 2) != 3) {
3918         goto do_unallocated;
3919     }
3920 
3921     /*
3922      * @index is a tri-state variable which has 3 states:
3923      * < 0 : post-index, writeback
3924      * = 0 : signed offset
3925      * > 0 : pre-index, writeback
3926      */
3927     switch (op1) {
3928     case 0:
3929         if (op2 != 0) {
3930             /* STG */
3931             index = op2 - 2;
3932         } else {
3933             /* STZGM */
3934             if (s->current_el == 0 || offset != 0) {
3935                 goto do_unallocated;
3936             }
3937             is_mult = is_zero = true;
3938         }
3939         break;
3940     case 1:
3941         if (op2 != 0) {
3942             /* STZG */
3943             is_zero = true;
3944             index = op2 - 2;
3945         } else {
3946             /* LDG */
3947             is_load = true;
3948         }
3949         break;
3950     case 2:
3951         if (op2 != 0) {
3952             /* ST2G */
3953             is_pair = true;
3954             index = op2 - 2;
3955         } else {
3956             /* STGM */
3957             if (s->current_el == 0 || offset != 0) {
3958                 goto do_unallocated;
3959             }
3960             is_mult = true;
3961         }
3962         break;
3963     case 3:
3964         if (op2 != 0) {
3965             /* STZ2G */
3966             is_pair = is_zero = true;
3967             index = op2 - 2;
3968         } else {
3969             /* LDGM */
3970             if (s->current_el == 0 || offset != 0) {
3971                 goto do_unallocated;
3972             }
3973             is_mult = is_load = true;
3974         }
3975         break;
3976 
3977     default:
3978     do_unallocated:
3979         unallocated_encoding(s);
3980         return;
3981     }
3982 
3983     if (is_mult
3984         ? !dc_isar_feature(aa64_mte, s)
3985         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
3986         goto do_unallocated;
3987     }
3988 
3989     if (rn == 31) {
3990         gen_check_sp_alignment(s);
3991     }
3992 
3993     addr = read_cpu_reg_sp(s, rn, true);
3994     if (index >= 0) {
3995         /* pre-index or signed offset */
3996         tcg_gen_addi_i64(addr, addr, offset);
3997     }
3998 
3999     if (is_mult) {
4000         tcg_rt = cpu_reg(s, rt);
4001 
4002         if (is_zero) {
4003             int size = 4 << s->dcz_blocksize;
4004 
4005             if (s->ata) {
4006                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4007             }
4008             /*
4009              * The non-tags portion of STZGM is mostly like DC_ZVA,
4010              * except the alignment happens before the access.
4011              */
4012             clean_addr = clean_data_tbi(s, addr);
4013             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4014             gen_helper_dc_zva(cpu_env, clean_addr);
4015         } else if (s->ata) {
4016             if (is_load) {
4017                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4018             } else {
4019                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4020             }
4021         } else {
4022             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4023             int size = 4 << GMID_EL1_BS;
4024 
4025             clean_addr = clean_data_tbi(s, addr);
4026             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4027             gen_probe_access(s, clean_addr, acc, size);
4028 
4029             if (is_load) {
4030                 /* The result tags are zeros.  */
4031                 tcg_gen_movi_i64(tcg_rt, 0);
4032             }
4033         }
4034         return;
4035     }
4036 
4037     if (is_load) {
4038         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4039         tcg_rt = cpu_reg(s, rt);
4040         if (s->ata) {
4041             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4042         } else {
4043             clean_addr = clean_data_tbi(s, addr);
4044             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4045             gen_address_with_allocation_tag0(tcg_rt, addr);
4046         }
4047     } else {
4048         tcg_rt = cpu_reg_sp(s, rt);
4049         if (!s->ata) {
4050             /*
4051              * For STG and ST2G, we need to check alignment and probe memory.
4052              * TODO: For STZG and STZ2G, we could rely on the stores below,
4053              * at least for system mode; user-only won't enforce alignment.
4054              */
4055             if (is_pair) {
4056                 gen_helper_st2g_stub(cpu_env, addr);
4057             } else {
4058                 gen_helper_stg_stub(cpu_env, addr);
4059             }
4060         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4061             if (is_pair) {
4062                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4063             } else {
4064                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4065             }
4066         } else {
4067             if (is_pair) {
4068                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4069             } else {
4070                 gen_helper_stg(cpu_env, addr, tcg_rt);
4071             }
4072         }
4073     }
4074 
4075     if (is_zero) {
4076         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4077         TCGv_i64 tcg_zero = tcg_constant_i64(0);
4078         int mem_index = get_mem_index(s);
4079         int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4080 
4081         tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4082                             MO_UQ | MO_ALIGN_16);
4083         for (i = 8; i < n; i += 8) {
4084             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4085             tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
4086         }
4087     }
4088 
4089     if (index != 0) {
4090         /* pre-index or post-index */
4091         if (index < 0) {
4092             /* post-index */
4093             tcg_gen_addi_i64(addr, addr, offset);
4094         }
4095         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4096     }
4097 }
4098 
4099 /* Loads and stores */
4100 static void disas_ldst(DisasContext *s, uint32_t insn)
4101 {
4102     switch (extract32(insn, 24, 6)) {
4103     case 0x08: /* Load/store exclusive */
4104         disas_ldst_excl(s, insn);
4105         break;
4106     case 0x18: case 0x1c: /* Load register (literal) */
4107         disas_ld_lit(s, insn);
4108         break;
4109     case 0x28: case 0x29:
4110     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4111         disas_ldst_pair(s, insn);
4112         break;
4113     case 0x38: case 0x39:
4114     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4115         disas_ldst_reg(s, insn);
4116         break;
4117     case 0x0c: /* AdvSIMD load/store multiple structures */
4118         disas_ldst_multiple_struct(s, insn);
4119         break;
4120     case 0x0d: /* AdvSIMD load/store single structure */
4121         disas_ldst_single_struct(s, insn);
4122         break;
4123     case 0x19:
4124         if (extract32(insn, 21, 1) != 0) {
4125             disas_ldst_tag(s, insn);
4126         } else if (extract32(insn, 10, 2) == 0) {
4127             disas_ldst_ldapr_stlr(s, insn);
4128         } else {
4129             unallocated_encoding(s);
4130         }
4131         break;
4132     default:
4133         unallocated_encoding(s);
4134         break;
4135     }
4136 }
4137 
4138 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4139 
4140 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4141                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4142 {
4143     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4144     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4145     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4146 
4147     fn(tcg_rd, tcg_rn, tcg_imm);
4148     if (!a->sf) {
4149         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4150     }
4151     return true;
4152 }
4153 
4154 /*
4155  * PC-rel. addressing
4156  */
4157 
4158 static bool trans_ADR(DisasContext *s, arg_ri *a)
4159 {
4160     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4161     return true;
4162 }
4163 
4164 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4165 {
4166     int64_t offset = (int64_t)a->imm << 12;
4167 
4168     /* The page offset is ok for CF_PCREL. */
4169     offset -= s->pc_curr & 0xfff;
4170     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4171     return true;
4172 }
4173 
4174 /*
4175  * Add/subtract (immediate)
4176  */
4177 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4178 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4179 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4180 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4181 
4182 /*
4183  * Add/subtract (immediate, with tags)
4184  */
4185 
4186 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4187                                       bool sub_op)
4188 {
4189     TCGv_i64 tcg_rn, tcg_rd;
4190     int imm;
4191 
4192     imm = a->uimm6 << LOG2_TAG_GRANULE;
4193     if (sub_op) {
4194         imm = -imm;
4195     }
4196 
4197     tcg_rn = cpu_reg_sp(s, a->rn);
4198     tcg_rd = cpu_reg_sp(s, a->rd);
4199 
4200     if (s->ata) {
4201         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4202                            tcg_constant_i32(imm),
4203                            tcg_constant_i32(a->uimm4));
4204     } else {
4205         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4206         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4207     }
4208     return true;
4209 }
4210 
4211 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4212 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4213 
4214 /* The input should be a value in the bottom e bits (with higher
4215  * bits zero); returns that value replicated into every element
4216  * of size e in a 64 bit integer.
4217  */
4218 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4219 {
4220     assert(e != 0);
4221     while (e < 64) {
4222         mask |= mask << e;
4223         e *= 2;
4224     }
4225     return mask;
4226 }
4227 
4228 /*
4229  * Logical (immediate)
4230  */
4231 
4232 /*
4233  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4234  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4235  * value (ie should cause a guest UNDEF exception), and true if they are
4236  * valid, in which case the decoded bit pattern is written to result.
4237  */
4238 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4239                             unsigned int imms, unsigned int immr)
4240 {
4241     uint64_t mask;
4242     unsigned e, levels, s, r;
4243     int len;
4244 
4245     assert(immn < 2 && imms < 64 && immr < 64);
4246 
4247     /* The bit patterns we create here are 64 bit patterns which
4248      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4249      * 64 bits each. Each element contains the same value: a run
4250      * of between 1 and e-1 non-zero bits, rotated within the
4251      * element by between 0 and e-1 bits.
4252      *
4253      * The element size and run length are encoded into immn (1 bit)
4254      * and imms (6 bits) as follows:
4255      * 64 bit elements: immn = 1, imms = <length of run - 1>
4256      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4257      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4258      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4259      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4260      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4261      * Notice that immn = 0, imms = 11111x is the only combination
4262      * not covered by one of the above options; this is reserved.
4263      * Further, <length of run - 1> all-ones is a reserved pattern.
4264      *
4265      * In all cases the rotation is by immr % e (and immr is 6 bits).
4266      */
4267 
4268     /* First determine the element size */
4269     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4270     if (len < 1) {
4271         /* This is the immn == 0, imms == 0x11111x case */
4272         return false;
4273     }
4274     e = 1 << len;
4275 
4276     levels = e - 1;
4277     s = imms & levels;
4278     r = immr & levels;
4279 
4280     if (s == levels) {
4281         /* <length of run - 1> mustn't be all-ones. */
4282         return false;
4283     }
4284 
4285     /* Create the value of one element: s+1 set bits rotated
4286      * by r within the element (which is e bits wide)...
4287      */
4288     mask = MAKE_64BIT_MASK(0, s + 1);
4289     if (r) {
4290         mask = (mask >> r) | (mask << (e - r));
4291         mask &= MAKE_64BIT_MASK(0, e);
4292     }
4293     /* ...then replicate the element over the whole 64 bit value */
4294     mask = bitfield_replicate(mask, e);
4295     *result = mask;
4296     return true;
4297 }
4298 
4299 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4300                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4301 {
4302     TCGv_i64 tcg_rd, tcg_rn;
4303     uint64_t imm;
4304 
4305     /* Some immediate field values are reserved. */
4306     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4307                                 extract32(a->dbm, 0, 6),
4308                                 extract32(a->dbm, 6, 6))) {
4309         return false;
4310     }
4311     if (!a->sf) {
4312         imm &= 0xffffffffull;
4313     }
4314 
4315     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4316     tcg_rn = cpu_reg(s, a->rn);
4317 
4318     fn(tcg_rd, tcg_rn, imm);
4319     if (set_cc) {
4320         gen_logic_CC(a->sf, tcg_rd);
4321     }
4322     if (!a->sf) {
4323         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4324     }
4325     return true;
4326 }
4327 
4328 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4329 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4330 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4331 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4332 
4333 /*
4334  * Move wide (immediate)
4335  */
4336 
4337 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4338 {
4339     int pos = a->hw << 4;
4340     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4341     return true;
4342 }
4343 
4344 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4345 {
4346     int pos = a->hw << 4;
4347     uint64_t imm = a->imm;
4348 
4349     imm = ~(imm << pos);
4350     if (!a->sf) {
4351         imm = (uint32_t)imm;
4352     }
4353     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4354     return true;
4355 }
4356 
4357 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4358 {
4359     int pos = a->hw << 4;
4360     TCGv_i64 tcg_rd, tcg_im;
4361 
4362     tcg_rd = cpu_reg(s, a->rd);
4363     tcg_im = tcg_constant_i64(a->imm);
4364     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4365     if (!a->sf) {
4366         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4367     }
4368     return true;
4369 }
4370 
4371 /*
4372  * Bitfield
4373  */
4374 
4375 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4376 {
4377     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4378     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4379     unsigned int bitsize = a->sf ? 64 : 32;
4380     unsigned int ri = a->immr;
4381     unsigned int si = a->imms;
4382     unsigned int pos, len;
4383 
4384     if (si >= ri) {
4385         /* Wd<s-r:0> = Wn<s:r> */
4386         len = (si - ri) + 1;
4387         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4388         if (!a->sf) {
4389             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4390         }
4391     } else {
4392         /* Wd<32+s-r,32-r> = Wn<s:0> */
4393         len = si + 1;
4394         pos = (bitsize - ri) & (bitsize - 1);
4395 
4396         if (len < ri) {
4397             /*
4398              * Sign extend the destination field from len to fill the
4399              * balance of the word.  Let the deposit below insert all
4400              * of those sign bits.
4401              */
4402             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4403             len = ri;
4404         }
4405 
4406         /*
4407          * We start with zero, and we haven't modified any bits outside
4408          * bitsize, therefore no final zero-extension is unneeded for !sf.
4409          */
4410         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4411     }
4412     return true;
4413 }
4414 
4415 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4416 {
4417     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4418     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4419     unsigned int bitsize = a->sf ? 64 : 32;
4420     unsigned int ri = a->immr;
4421     unsigned int si = a->imms;
4422     unsigned int pos, len;
4423 
4424     tcg_rd = cpu_reg(s, a->rd);
4425     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4426 
4427     if (si >= ri) {
4428         /* Wd<s-r:0> = Wn<s:r> */
4429         len = (si - ri) + 1;
4430         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4431     } else {
4432         /* Wd<32+s-r,32-r> = Wn<s:0> */
4433         len = si + 1;
4434         pos = (bitsize - ri) & (bitsize - 1);
4435         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4436     }
4437     return true;
4438 }
4439 
4440 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4441 {
4442     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4443     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4444     unsigned int bitsize = a->sf ? 64 : 32;
4445     unsigned int ri = a->immr;
4446     unsigned int si = a->imms;
4447     unsigned int pos, len;
4448 
4449     tcg_rd = cpu_reg(s, a->rd);
4450     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4451 
4452     if (si >= ri) {
4453         /* Wd<s-r:0> = Wn<s:r> */
4454         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4455         len = (si - ri) + 1;
4456         pos = 0;
4457     } else {
4458         /* Wd<32+s-r,32-r> = Wn<s:0> */
4459         len = si + 1;
4460         pos = (bitsize - ri) & (bitsize - 1);
4461     }
4462 
4463     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4464     if (!a->sf) {
4465         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4466     }
4467     return true;
4468 }
4469 
4470 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4471 {
4472     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4473 
4474     tcg_rd = cpu_reg(s, a->rd);
4475 
4476     if (unlikely(a->imm == 0)) {
4477         /*
4478          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4479          * so an extract from bit 0 is a special case.
4480          */
4481         if (a->sf) {
4482             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4483         } else {
4484             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4485         }
4486     } else {
4487         tcg_rm = cpu_reg(s, a->rm);
4488         tcg_rn = cpu_reg(s, a->rn);
4489 
4490         if (a->sf) {
4491             /* Specialization to ROR happens in EXTRACT2.  */
4492             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4493         } else {
4494             TCGv_i32 t0 = tcg_temp_new_i32();
4495 
4496             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4497             if (a->rm == a->rn) {
4498                 tcg_gen_rotri_i32(t0, t0, a->imm);
4499             } else {
4500                 TCGv_i32 t1 = tcg_temp_new_i32();
4501                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4502                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4503             }
4504             tcg_gen_extu_i32_i64(tcg_rd, t0);
4505         }
4506     }
4507     return true;
4508 }
4509 
4510 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4511  * Note that it is the caller's responsibility to ensure that the
4512  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4513  * mandated semantics for out of range shifts.
4514  */
4515 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4516                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4517 {
4518     switch (shift_type) {
4519     case A64_SHIFT_TYPE_LSL:
4520         tcg_gen_shl_i64(dst, src, shift_amount);
4521         break;
4522     case A64_SHIFT_TYPE_LSR:
4523         tcg_gen_shr_i64(dst, src, shift_amount);
4524         break;
4525     case A64_SHIFT_TYPE_ASR:
4526         if (!sf) {
4527             tcg_gen_ext32s_i64(dst, src);
4528         }
4529         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4530         break;
4531     case A64_SHIFT_TYPE_ROR:
4532         if (sf) {
4533             tcg_gen_rotr_i64(dst, src, shift_amount);
4534         } else {
4535             TCGv_i32 t0, t1;
4536             t0 = tcg_temp_new_i32();
4537             t1 = tcg_temp_new_i32();
4538             tcg_gen_extrl_i64_i32(t0, src);
4539             tcg_gen_extrl_i64_i32(t1, shift_amount);
4540             tcg_gen_rotr_i32(t0, t0, t1);
4541             tcg_gen_extu_i32_i64(dst, t0);
4542         }
4543         break;
4544     default:
4545         assert(FALSE); /* all shift types should be handled */
4546         break;
4547     }
4548 
4549     if (!sf) { /* zero extend final result */
4550         tcg_gen_ext32u_i64(dst, dst);
4551     }
4552 }
4553 
4554 /* Shift a TCGv src by immediate, put result in dst.
4555  * The shift amount must be in range (this should always be true as the
4556  * relevant instructions will UNDEF on bad shift immediates).
4557  */
4558 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4559                           enum a64_shift_type shift_type, unsigned int shift_i)
4560 {
4561     assert(shift_i < (sf ? 64 : 32));
4562 
4563     if (shift_i == 0) {
4564         tcg_gen_mov_i64(dst, src);
4565     } else {
4566         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4567     }
4568 }
4569 
4570 /* Logical (shifted register)
4571  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4572  * +----+-----+-----------+-------+---+------+--------+------+------+
4573  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4574  * +----+-----+-----------+-------+---+------+--------+------+------+
4575  */
4576 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4577 {
4578     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4579     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4580 
4581     sf = extract32(insn, 31, 1);
4582     opc = extract32(insn, 29, 2);
4583     shift_type = extract32(insn, 22, 2);
4584     invert = extract32(insn, 21, 1);
4585     rm = extract32(insn, 16, 5);
4586     shift_amount = extract32(insn, 10, 6);
4587     rn = extract32(insn, 5, 5);
4588     rd = extract32(insn, 0, 5);
4589 
4590     if (!sf && (shift_amount & (1 << 5))) {
4591         unallocated_encoding(s);
4592         return;
4593     }
4594 
4595     tcg_rd = cpu_reg(s, rd);
4596 
4597     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4598         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4599          * register-register MOV and MVN, so it is worth special casing.
4600          */
4601         tcg_rm = cpu_reg(s, rm);
4602         if (invert) {
4603             tcg_gen_not_i64(tcg_rd, tcg_rm);
4604             if (!sf) {
4605                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4606             }
4607         } else {
4608             if (sf) {
4609                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4610             } else {
4611                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4612             }
4613         }
4614         return;
4615     }
4616 
4617     tcg_rm = read_cpu_reg(s, rm, sf);
4618 
4619     if (shift_amount) {
4620         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4621     }
4622 
4623     tcg_rn = cpu_reg(s, rn);
4624 
4625     switch (opc | (invert << 2)) {
4626     case 0: /* AND */
4627     case 3: /* ANDS */
4628         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4629         break;
4630     case 1: /* ORR */
4631         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4632         break;
4633     case 2: /* EOR */
4634         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4635         break;
4636     case 4: /* BIC */
4637     case 7: /* BICS */
4638         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4639         break;
4640     case 5: /* ORN */
4641         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4642         break;
4643     case 6: /* EON */
4644         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4645         break;
4646     default:
4647         assert(FALSE);
4648         break;
4649     }
4650 
4651     if (!sf) {
4652         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4653     }
4654 
4655     if (opc == 3) {
4656         gen_logic_CC(sf, tcg_rd);
4657     }
4658 }
4659 
4660 /*
4661  * Add/subtract (extended register)
4662  *
4663  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4664  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4665  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4666  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4667  *
4668  *  sf: 0 -> 32bit, 1 -> 64bit
4669  *  op: 0 -> add  , 1 -> sub
4670  *   S: 1 -> set flags
4671  * opt: 00
4672  * option: extension type (see DecodeRegExtend)
4673  * imm3: optional shift to Rm
4674  *
4675  * Rd = Rn + LSL(extend(Rm), amount)
4676  */
4677 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4678 {
4679     int rd = extract32(insn, 0, 5);
4680     int rn = extract32(insn, 5, 5);
4681     int imm3 = extract32(insn, 10, 3);
4682     int option = extract32(insn, 13, 3);
4683     int rm = extract32(insn, 16, 5);
4684     int opt = extract32(insn, 22, 2);
4685     bool setflags = extract32(insn, 29, 1);
4686     bool sub_op = extract32(insn, 30, 1);
4687     bool sf = extract32(insn, 31, 1);
4688 
4689     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4690     TCGv_i64 tcg_rd;
4691     TCGv_i64 tcg_result;
4692 
4693     if (imm3 > 4 || opt != 0) {
4694         unallocated_encoding(s);
4695         return;
4696     }
4697 
4698     /* non-flag setting ops may use SP */
4699     if (!setflags) {
4700         tcg_rd = cpu_reg_sp(s, rd);
4701     } else {
4702         tcg_rd = cpu_reg(s, rd);
4703     }
4704     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4705 
4706     tcg_rm = read_cpu_reg(s, rm, sf);
4707     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4708 
4709     tcg_result = tcg_temp_new_i64();
4710 
4711     if (!setflags) {
4712         if (sub_op) {
4713             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4714         } else {
4715             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4716         }
4717     } else {
4718         if (sub_op) {
4719             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4720         } else {
4721             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4722         }
4723     }
4724 
4725     if (sf) {
4726         tcg_gen_mov_i64(tcg_rd, tcg_result);
4727     } else {
4728         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4729     }
4730 }
4731 
4732 /*
4733  * Add/subtract (shifted register)
4734  *
4735  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4736  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4737  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4738  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4739  *
4740  *    sf: 0 -> 32bit, 1 -> 64bit
4741  *    op: 0 -> add  , 1 -> sub
4742  *     S: 1 -> set flags
4743  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4744  *  imm6: Shift amount to apply to Rm before the add/sub
4745  */
4746 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4747 {
4748     int rd = extract32(insn, 0, 5);
4749     int rn = extract32(insn, 5, 5);
4750     int imm6 = extract32(insn, 10, 6);
4751     int rm = extract32(insn, 16, 5);
4752     int shift_type = extract32(insn, 22, 2);
4753     bool setflags = extract32(insn, 29, 1);
4754     bool sub_op = extract32(insn, 30, 1);
4755     bool sf = extract32(insn, 31, 1);
4756 
4757     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4758     TCGv_i64 tcg_rn, tcg_rm;
4759     TCGv_i64 tcg_result;
4760 
4761     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4762         unallocated_encoding(s);
4763         return;
4764     }
4765 
4766     tcg_rn = read_cpu_reg(s, rn, sf);
4767     tcg_rm = read_cpu_reg(s, rm, sf);
4768 
4769     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4770 
4771     tcg_result = tcg_temp_new_i64();
4772 
4773     if (!setflags) {
4774         if (sub_op) {
4775             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4776         } else {
4777             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4778         }
4779     } else {
4780         if (sub_op) {
4781             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4782         } else {
4783             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4784         }
4785     }
4786 
4787     if (sf) {
4788         tcg_gen_mov_i64(tcg_rd, tcg_result);
4789     } else {
4790         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4791     }
4792 }
4793 
4794 /* Data-processing (3 source)
4795  *
4796  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4797  *  +--+------+-----------+------+------+----+------+------+------+
4798  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4799  *  +--+------+-----------+------+------+----+------+------+------+
4800  */
4801 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4802 {
4803     int rd = extract32(insn, 0, 5);
4804     int rn = extract32(insn, 5, 5);
4805     int ra = extract32(insn, 10, 5);
4806     int rm = extract32(insn, 16, 5);
4807     int op_id = (extract32(insn, 29, 3) << 4) |
4808         (extract32(insn, 21, 3) << 1) |
4809         extract32(insn, 15, 1);
4810     bool sf = extract32(insn, 31, 1);
4811     bool is_sub = extract32(op_id, 0, 1);
4812     bool is_high = extract32(op_id, 2, 1);
4813     bool is_signed = false;
4814     TCGv_i64 tcg_op1;
4815     TCGv_i64 tcg_op2;
4816     TCGv_i64 tcg_tmp;
4817 
4818     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4819     switch (op_id) {
4820     case 0x42: /* SMADDL */
4821     case 0x43: /* SMSUBL */
4822     case 0x44: /* SMULH */
4823         is_signed = true;
4824         break;
4825     case 0x0: /* MADD (32bit) */
4826     case 0x1: /* MSUB (32bit) */
4827     case 0x40: /* MADD (64bit) */
4828     case 0x41: /* MSUB (64bit) */
4829     case 0x4a: /* UMADDL */
4830     case 0x4b: /* UMSUBL */
4831     case 0x4c: /* UMULH */
4832         break;
4833     default:
4834         unallocated_encoding(s);
4835         return;
4836     }
4837 
4838     if (is_high) {
4839         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4840         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4841         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4842         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4843 
4844         if (is_signed) {
4845             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4846         } else {
4847             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4848         }
4849         return;
4850     }
4851 
4852     tcg_op1 = tcg_temp_new_i64();
4853     tcg_op2 = tcg_temp_new_i64();
4854     tcg_tmp = tcg_temp_new_i64();
4855 
4856     if (op_id < 0x42) {
4857         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4858         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4859     } else {
4860         if (is_signed) {
4861             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4862             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4863         } else {
4864             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4865             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4866         }
4867     }
4868 
4869     if (ra == 31 && !is_sub) {
4870         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4871         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4872     } else {
4873         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4874         if (is_sub) {
4875             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4876         } else {
4877             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4878         }
4879     }
4880 
4881     if (!sf) {
4882         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4883     }
4884 }
4885 
4886 /* Add/subtract (with carry)
4887  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4888  * +--+--+--+------------------------+------+-------------+------+-----+
4889  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4890  * +--+--+--+------------------------+------+-------------+------+-----+
4891  */
4892 
4893 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4894 {
4895     unsigned int sf, op, setflags, rm, rn, rd;
4896     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4897 
4898     sf = extract32(insn, 31, 1);
4899     op = extract32(insn, 30, 1);
4900     setflags = extract32(insn, 29, 1);
4901     rm = extract32(insn, 16, 5);
4902     rn = extract32(insn, 5, 5);
4903     rd = extract32(insn, 0, 5);
4904 
4905     tcg_rd = cpu_reg(s, rd);
4906     tcg_rn = cpu_reg(s, rn);
4907 
4908     if (op) {
4909         tcg_y = tcg_temp_new_i64();
4910         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4911     } else {
4912         tcg_y = cpu_reg(s, rm);
4913     }
4914 
4915     if (setflags) {
4916         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4917     } else {
4918         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4919     }
4920 }
4921 
4922 /*
4923  * Rotate right into flags
4924  *  31 30 29                21       15          10      5  4      0
4925  * +--+--+--+-----------------+--------+-----------+------+--+------+
4926  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4927  * +--+--+--+-----------------+--------+-----------+------+--+------+
4928  */
4929 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4930 {
4931     int mask = extract32(insn, 0, 4);
4932     int o2 = extract32(insn, 4, 1);
4933     int rn = extract32(insn, 5, 5);
4934     int imm6 = extract32(insn, 15, 6);
4935     int sf_op_s = extract32(insn, 29, 3);
4936     TCGv_i64 tcg_rn;
4937     TCGv_i32 nzcv;
4938 
4939     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4940         unallocated_encoding(s);
4941         return;
4942     }
4943 
4944     tcg_rn = read_cpu_reg(s, rn, 1);
4945     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4946 
4947     nzcv = tcg_temp_new_i32();
4948     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4949 
4950     if (mask & 8) { /* N */
4951         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4952     }
4953     if (mask & 4) { /* Z */
4954         tcg_gen_not_i32(cpu_ZF, nzcv);
4955         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4956     }
4957     if (mask & 2) { /* C */
4958         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4959     }
4960     if (mask & 1) { /* V */
4961         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4962     }
4963 }
4964 
4965 /*
4966  * Evaluate into flags
4967  *  31 30 29                21        15   14        10      5  4      0
4968  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4969  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4970  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4971  */
4972 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4973 {
4974     int o3_mask = extract32(insn, 0, 5);
4975     int rn = extract32(insn, 5, 5);
4976     int o2 = extract32(insn, 15, 6);
4977     int sz = extract32(insn, 14, 1);
4978     int sf_op_s = extract32(insn, 29, 3);
4979     TCGv_i32 tmp;
4980     int shift;
4981 
4982     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4983         !dc_isar_feature(aa64_condm_4, s)) {
4984         unallocated_encoding(s);
4985         return;
4986     }
4987     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4988 
4989     tmp = tcg_temp_new_i32();
4990     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4991     tcg_gen_shli_i32(cpu_NF, tmp, shift);
4992     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4993     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4994     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4995 }
4996 
4997 /* Conditional compare (immediate / register)
4998  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4999  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5000  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5001  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5002  *        [1]                             y                [0]       [0]
5003  */
5004 static void disas_cc(DisasContext *s, uint32_t insn)
5005 {
5006     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5007     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5008     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5009     DisasCompare c;
5010 
5011     if (!extract32(insn, 29, 1)) {
5012         unallocated_encoding(s);
5013         return;
5014     }
5015     if (insn & (1 << 10 | 1 << 4)) {
5016         unallocated_encoding(s);
5017         return;
5018     }
5019     sf = extract32(insn, 31, 1);
5020     op = extract32(insn, 30, 1);
5021     is_imm = extract32(insn, 11, 1);
5022     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5023     cond = extract32(insn, 12, 4);
5024     rn = extract32(insn, 5, 5);
5025     nzcv = extract32(insn, 0, 4);
5026 
5027     /* Set T0 = !COND.  */
5028     tcg_t0 = tcg_temp_new_i32();
5029     arm_test_cc(&c, cond);
5030     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5031 
5032     /* Load the arguments for the new comparison.  */
5033     if (is_imm) {
5034         tcg_y = tcg_temp_new_i64();
5035         tcg_gen_movi_i64(tcg_y, y);
5036     } else {
5037         tcg_y = cpu_reg(s, y);
5038     }
5039     tcg_rn = cpu_reg(s, rn);
5040 
5041     /* Set the flags for the new comparison.  */
5042     tcg_tmp = tcg_temp_new_i64();
5043     if (op) {
5044         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5045     } else {
5046         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5047     }
5048 
5049     /* If COND was false, force the flags to #nzcv.  Compute two masks
5050      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5051      * For tcg hosts that support ANDC, we can make do with just T1.
5052      * In either case, allow the tcg optimizer to delete any unused mask.
5053      */
5054     tcg_t1 = tcg_temp_new_i32();
5055     tcg_t2 = tcg_temp_new_i32();
5056     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5057     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5058 
5059     if (nzcv & 8) { /* N */
5060         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5061     } else {
5062         if (TCG_TARGET_HAS_andc_i32) {
5063             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5064         } else {
5065             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5066         }
5067     }
5068     if (nzcv & 4) { /* Z */
5069         if (TCG_TARGET_HAS_andc_i32) {
5070             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5071         } else {
5072             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5073         }
5074     } else {
5075         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5076     }
5077     if (nzcv & 2) { /* C */
5078         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5079     } else {
5080         if (TCG_TARGET_HAS_andc_i32) {
5081             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5082         } else {
5083             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5084         }
5085     }
5086     if (nzcv & 1) { /* V */
5087         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5088     } else {
5089         if (TCG_TARGET_HAS_andc_i32) {
5090             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5091         } else {
5092             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5093         }
5094     }
5095 }
5096 
5097 /* Conditional select
5098  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5099  * +----+----+---+-----------------+------+------+-----+------+------+
5100  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5101  * +----+----+---+-----------------+------+------+-----+------+------+
5102  */
5103 static void disas_cond_select(DisasContext *s, uint32_t insn)
5104 {
5105     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5106     TCGv_i64 tcg_rd, zero;
5107     DisasCompare64 c;
5108 
5109     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5110         /* S == 1 or op2<1> == 1 */
5111         unallocated_encoding(s);
5112         return;
5113     }
5114     sf = extract32(insn, 31, 1);
5115     else_inv = extract32(insn, 30, 1);
5116     rm = extract32(insn, 16, 5);
5117     cond = extract32(insn, 12, 4);
5118     else_inc = extract32(insn, 10, 1);
5119     rn = extract32(insn, 5, 5);
5120     rd = extract32(insn, 0, 5);
5121 
5122     tcg_rd = cpu_reg(s, rd);
5123 
5124     a64_test_cc(&c, cond);
5125     zero = tcg_constant_i64(0);
5126 
5127     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5128         /* CSET & CSETM.  */
5129         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5130         if (else_inv) {
5131             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5132         }
5133     } else {
5134         TCGv_i64 t_true = cpu_reg(s, rn);
5135         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5136         if (else_inv && else_inc) {
5137             tcg_gen_neg_i64(t_false, t_false);
5138         } else if (else_inv) {
5139             tcg_gen_not_i64(t_false, t_false);
5140         } else if (else_inc) {
5141             tcg_gen_addi_i64(t_false, t_false, 1);
5142         }
5143         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5144     }
5145 
5146     if (!sf) {
5147         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5148     }
5149 }
5150 
5151 static void handle_clz(DisasContext *s, unsigned int sf,
5152                        unsigned int rn, unsigned int rd)
5153 {
5154     TCGv_i64 tcg_rd, tcg_rn;
5155     tcg_rd = cpu_reg(s, rd);
5156     tcg_rn = cpu_reg(s, rn);
5157 
5158     if (sf) {
5159         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5160     } else {
5161         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5162         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5163         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5164         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5165     }
5166 }
5167 
5168 static void handle_cls(DisasContext *s, unsigned int sf,
5169                        unsigned int rn, unsigned int rd)
5170 {
5171     TCGv_i64 tcg_rd, tcg_rn;
5172     tcg_rd = cpu_reg(s, rd);
5173     tcg_rn = cpu_reg(s, rn);
5174 
5175     if (sf) {
5176         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5177     } else {
5178         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5179         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5180         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5181         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5182     }
5183 }
5184 
5185 static void handle_rbit(DisasContext *s, unsigned int sf,
5186                         unsigned int rn, unsigned int rd)
5187 {
5188     TCGv_i64 tcg_rd, tcg_rn;
5189     tcg_rd = cpu_reg(s, rd);
5190     tcg_rn = cpu_reg(s, rn);
5191 
5192     if (sf) {
5193         gen_helper_rbit64(tcg_rd, tcg_rn);
5194     } else {
5195         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5196         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5197         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5198         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5199     }
5200 }
5201 
5202 /* REV with sf==1, opcode==3 ("REV64") */
5203 static void handle_rev64(DisasContext *s, unsigned int sf,
5204                          unsigned int rn, unsigned int rd)
5205 {
5206     if (!sf) {
5207         unallocated_encoding(s);
5208         return;
5209     }
5210     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5211 }
5212 
5213 /* REV with sf==0, opcode==2
5214  * REV32 (sf==1, opcode==2)
5215  */
5216 static void handle_rev32(DisasContext *s, unsigned int sf,
5217                          unsigned int rn, unsigned int rd)
5218 {
5219     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5220     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5221 
5222     if (sf) {
5223         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5224         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5225     } else {
5226         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5227     }
5228 }
5229 
5230 /* REV16 (opcode==1) */
5231 static void handle_rev16(DisasContext *s, unsigned int sf,
5232                          unsigned int rn, unsigned int rd)
5233 {
5234     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5235     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5236     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5237     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5238 
5239     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5240     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5241     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5242     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5243     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5244 }
5245 
5246 /* Data-processing (1 source)
5247  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5248  * +----+---+---+-----------------+---------+--------+------+------+
5249  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5250  * +----+---+---+-----------------+---------+--------+------+------+
5251  */
5252 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5253 {
5254     unsigned int sf, opcode, opcode2, rn, rd;
5255     TCGv_i64 tcg_rd;
5256 
5257     if (extract32(insn, 29, 1)) {
5258         unallocated_encoding(s);
5259         return;
5260     }
5261 
5262     sf = extract32(insn, 31, 1);
5263     opcode = extract32(insn, 10, 6);
5264     opcode2 = extract32(insn, 16, 5);
5265     rn = extract32(insn, 5, 5);
5266     rd = extract32(insn, 0, 5);
5267 
5268 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5269 
5270     switch (MAP(sf, opcode2, opcode)) {
5271     case MAP(0, 0x00, 0x00): /* RBIT */
5272     case MAP(1, 0x00, 0x00):
5273         handle_rbit(s, sf, rn, rd);
5274         break;
5275     case MAP(0, 0x00, 0x01): /* REV16 */
5276     case MAP(1, 0x00, 0x01):
5277         handle_rev16(s, sf, rn, rd);
5278         break;
5279     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5280     case MAP(1, 0x00, 0x02):
5281         handle_rev32(s, sf, rn, rd);
5282         break;
5283     case MAP(1, 0x00, 0x03): /* REV64 */
5284         handle_rev64(s, sf, rn, rd);
5285         break;
5286     case MAP(0, 0x00, 0x04): /* CLZ */
5287     case MAP(1, 0x00, 0x04):
5288         handle_clz(s, sf, rn, rd);
5289         break;
5290     case MAP(0, 0x00, 0x05): /* CLS */
5291     case MAP(1, 0x00, 0x05):
5292         handle_cls(s, sf, rn, rd);
5293         break;
5294     case MAP(1, 0x01, 0x00): /* PACIA */
5295         if (s->pauth_active) {
5296             tcg_rd = cpu_reg(s, rd);
5297             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5298         } else if (!dc_isar_feature(aa64_pauth, s)) {
5299             goto do_unallocated;
5300         }
5301         break;
5302     case MAP(1, 0x01, 0x01): /* PACIB */
5303         if (s->pauth_active) {
5304             tcg_rd = cpu_reg(s, rd);
5305             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5306         } else if (!dc_isar_feature(aa64_pauth, s)) {
5307             goto do_unallocated;
5308         }
5309         break;
5310     case MAP(1, 0x01, 0x02): /* PACDA */
5311         if (s->pauth_active) {
5312             tcg_rd = cpu_reg(s, rd);
5313             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5314         } else if (!dc_isar_feature(aa64_pauth, s)) {
5315             goto do_unallocated;
5316         }
5317         break;
5318     case MAP(1, 0x01, 0x03): /* PACDB */
5319         if (s->pauth_active) {
5320             tcg_rd = cpu_reg(s, rd);
5321             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5322         } else if (!dc_isar_feature(aa64_pauth, s)) {
5323             goto do_unallocated;
5324         }
5325         break;
5326     case MAP(1, 0x01, 0x04): /* AUTIA */
5327         if (s->pauth_active) {
5328             tcg_rd = cpu_reg(s, rd);
5329             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5330         } else if (!dc_isar_feature(aa64_pauth, s)) {
5331             goto do_unallocated;
5332         }
5333         break;
5334     case MAP(1, 0x01, 0x05): /* AUTIB */
5335         if (s->pauth_active) {
5336             tcg_rd = cpu_reg(s, rd);
5337             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5338         } else if (!dc_isar_feature(aa64_pauth, s)) {
5339             goto do_unallocated;
5340         }
5341         break;
5342     case MAP(1, 0x01, 0x06): /* AUTDA */
5343         if (s->pauth_active) {
5344             tcg_rd = cpu_reg(s, rd);
5345             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5346         } else if (!dc_isar_feature(aa64_pauth, s)) {
5347             goto do_unallocated;
5348         }
5349         break;
5350     case MAP(1, 0x01, 0x07): /* AUTDB */
5351         if (s->pauth_active) {
5352             tcg_rd = cpu_reg(s, rd);
5353             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5354         } else if (!dc_isar_feature(aa64_pauth, s)) {
5355             goto do_unallocated;
5356         }
5357         break;
5358     case MAP(1, 0x01, 0x08): /* PACIZA */
5359         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5360             goto do_unallocated;
5361         } else if (s->pauth_active) {
5362             tcg_rd = cpu_reg(s, rd);
5363             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5364         }
5365         break;
5366     case MAP(1, 0x01, 0x09): /* PACIZB */
5367         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5368             goto do_unallocated;
5369         } else if (s->pauth_active) {
5370             tcg_rd = cpu_reg(s, rd);
5371             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5372         }
5373         break;
5374     case MAP(1, 0x01, 0x0a): /* PACDZA */
5375         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5376             goto do_unallocated;
5377         } else if (s->pauth_active) {
5378             tcg_rd = cpu_reg(s, rd);
5379             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5380         }
5381         break;
5382     case MAP(1, 0x01, 0x0b): /* PACDZB */
5383         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5384             goto do_unallocated;
5385         } else if (s->pauth_active) {
5386             tcg_rd = cpu_reg(s, rd);
5387             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5388         }
5389         break;
5390     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5391         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5392             goto do_unallocated;
5393         } else if (s->pauth_active) {
5394             tcg_rd = cpu_reg(s, rd);
5395             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5396         }
5397         break;
5398     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5399         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5400             goto do_unallocated;
5401         } else if (s->pauth_active) {
5402             tcg_rd = cpu_reg(s, rd);
5403             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5404         }
5405         break;
5406     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5407         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5408             goto do_unallocated;
5409         } else if (s->pauth_active) {
5410             tcg_rd = cpu_reg(s, rd);
5411             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5412         }
5413         break;
5414     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5415         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5416             goto do_unallocated;
5417         } else if (s->pauth_active) {
5418             tcg_rd = cpu_reg(s, rd);
5419             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5420         }
5421         break;
5422     case MAP(1, 0x01, 0x10): /* XPACI */
5423         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5424             goto do_unallocated;
5425         } else if (s->pauth_active) {
5426             tcg_rd = cpu_reg(s, rd);
5427             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5428         }
5429         break;
5430     case MAP(1, 0x01, 0x11): /* XPACD */
5431         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5432             goto do_unallocated;
5433         } else if (s->pauth_active) {
5434             tcg_rd = cpu_reg(s, rd);
5435             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5436         }
5437         break;
5438     default:
5439     do_unallocated:
5440         unallocated_encoding(s);
5441         break;
5442     }
5443 
5444 #undef MAP
5445 }
5446 
5447 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5448                        unsigned int rm, unsigned int rn, unsigned int rd)
5449 {
5450     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5451     tcg_rd = cpu_reg(s, rd);
5452 
5453     if (!sf && is_signed) {
5454         tcg_n = tcg_temp_new_i64();
5455         tcg_m = tcg_temp_new_i64();
5456         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5457         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5458     } else {
5459         tcg_n = read_cpu_reg(s, rn, sf);
5460         tcg_m = read_cpu_reg(s, rm, sf);
5461     }
5462 
5463     if (is_signed) {
5464         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5465     } else {
5466         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5467     }
5468 
5469     if (!sf) { /* zero extend final result */
5470         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5471     }
5472 }
5473 
5474 /* LSLV, LSRV, ASRV, RORV */
5475 static void handle_shift_reg(DisasContext *s,
5476                              enum a64_shift_type shift_type, unsigned int sf,
5477                              unsigned int rm, unsigned int rn, unsigned int rd)
5478 {
5479     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5480     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5481     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5482 
5483     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5484     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5485 }
5486 
5487 /* CRC32[BHWX], CRC32C[BHWX] */
5488 static void handle_crc32(DisasContext *s,
5489                          unsigned int sf, unsigned int sz, bool crc32c,
5490                          unsigned int rm, unsigned int rn, unsigned int rd)
5491 {
5492     TCGv_i64 tcg_acc, tcg_val;
5493     TCGv_i32 tcg_bytes;
5494 
5495     if (!dc_isar_feature(aa64_crc32, s)
5496         || (sf == 1 && sz != 3)
5497         || (sf == 0 && sz == 3)) {
5498         unallocated_encoding(s);
5499         return;
5500     }
5501 
5502     if (sz == 3) {
5503         tcg_val = cpu_reg(s, rm);
5504     } else {
5505         uint64_t mask;
5506         switch (sz) {
5507         case 0:
5508             mask = 0xFF;
5509             break;
5510         case 1:
5511             mask = 0xFFFF;
5512             break;
5513         case 2:
5514             mask = 0xFFFFFFFF;
5515             break;
5516         default:
5517             g_assert_not_reached();
5518         }
5519         tcg_val = tcg_temp_new_i64();
5520         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5521     }
5522 
5523     tcg_acc = cpu_reg(s, rn);
5524     tcg_bytes = tcg_constant_i32(1 << sz);
5525 
5526     if (crc32c) {
5527         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5528     } else {
5529         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5530     }
5531 }
5532 
5533 /* Data-processing (2 source)
5534  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5535  * +----+---+---+-----------------+------+--------+------+------+
5536  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5537  * +----+---+---+-----------------+------+--------+------+------+
5538  */
5539 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5540 {
5541     unsigned int sf, rm, opcode, rn, rd, setflag;
5542     sf = extract32(insn, 31, 1);
5543     setflag = extract32(insn, 29, 1);
5544     rm = extract32(insn, 16, 5);
5545     opcode = extract32(insn, 10, 6);
5546     rn = extract32(insn, 5, 5);
5547     rd = extract32(insn, 0, 5);
5548 
5549     if (setflag && opcode != 0) {
5550         unallocated_encoding(s);
5551         return;
5552     }
5553 
5554     switch (opcode) {
5555     case 0: /* SUBP(S) */
5556         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5557             goto do_unallocated;
5558         } else {
5559             TCGv_i64 tcg_n, tcg_m, tcg_d;
5560 
5561             tcg_n = read_cpu_reg_sp(s, rn, true);
5562             tcg_m = read_cpu_reg_sp(s, rm, true);
5563             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5564             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5565             tcg_d = cpu_reg(s, rd);
5566 
5567             if (setflag) {
5568                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5569             } else {
5570                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5571             }
5572         }
5573         break;
5574     case 2: /* UDIV */
5575         handle_div(s, false, sf, rm, rn, rd);
5576         break;
5577     case 3: /* SDIV */
5578         handle_div(s, true, sf, rm, rn, rd);
5579         break;
5580     case 4: /* IRG */
5581         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5582             goto do_unallocated;
5583         }
5584         if (s->ata) {
5585             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5586                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5587         } else {
5588             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5589                                              cpu_reg_sp(s, rn));
5590         }
5591         break;
5592     case 5: /* GMI */
5593         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5594             goto do_unallocated;
5595         } else {
5596             TCGv_i64 t = tcg_temp_new_i64();
5597 
5598             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5599             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5600             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5601         }
5602         break;
5603     case 8: /* LSLV */
5604         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5605         break;
5606     case 9: /* LSRV */
5607         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5608         break;
5609     case 10: /* ASRV */
5610         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5611         break;
5612     case 11: /* RORV */
5613         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5614         break;
5615     case 12: /* PACGA */
5616         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5617             goto do_unallocated;
5618         }
5619         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5620                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5621         break;
5622     case 16:
5623     case 17:
5624     case 18:
5625     case 19:
5626     case 20:
5627     case 21:
5628     case 22:
5629     case 23: /* CRC32 */
5630     {
5631         int sz = extract32(opcode, 0, 2);
5632         bool crc32c = extract32(opcode, 2, 1);
5633         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5634         break;
5635     }
5636     default:
5637     do_unallocated:
5638         unallocated_encoding(s);
5639         break;
5640     }
5641 }
5642 
5643 /*
5644  * Data processing - register
5645  *  31  30 29  28      25    21  20  16      10         0
5646  * +--+---+--+---+-------+-----+-------+-------+---------+
5647  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5648  * +--+---+--+---+-------+-----+-------+-------+---------+
5649  */
5650 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5651 {
5652     int op0 = extract32(insn, 30, 1);
5653     int op1 = extract32(insn, 28, 1);
5654     int op2 = extract32(insn, 21, 4);
5655     int op3 = extract32(insn, 10, 6);
5656 
5657     if (!op1) {
5658         if (op2 & 8) {
5659             if (op2 & 1) {
5660                 /* Add/sub (extended register) */
5661                 disas_add_sub_ext_reg(s, insn);
5662             } else {
5663                 /* Add/sub (shifted register) */
5664                 disas_add_sub_reg(s, insn);
5665             }
5666         } else {
5667             /* Logical (shifted register) */
5668             disas_logic_reg(s, insn);
5669         }
5670         return;
5671     }
5672 
5673     switch (op2) {
5674     case 0x0:
5675         switch (op3) {
5676         case 0x00: /* Add/subtract (with carry) */
5677             disas_adc_sbc(s, insn);
5678             break;
5679 
5680         case 0x01: /* Rotate right into flags */
5681         case 0x21:
5682             disas_rotate_right_into_flags(s, insn);
5683             break;
5684 
5685         case 0x02: /* Evaluate into flags */
5686         case 0x12:
5687         case 0x22:
5688         case 0x32:
5689             disas_evaluate_into_flags(s, insn);
5690             break;
5691 
5692         default:
5693             goto do_unallocated;
5694         }
5695         break;
5696 
5697     case 0x2: /* Conditional compare */
5698         disas_cc(s, insn); /* both imm and reg forms */
5699         break;
5700 
5701     case 0x4: /* Conditional select */
5702         disas_cond_select(s, insn);
5703         break;
5704 
5705     case 0x6: /* Data-processing */
5706         if (op0) {    /* (1 source) */
5707             disas_data_proc_1src(s, insn);
5708         } else {      /* (2 source) */
5709             disas_data_proc_2src(s, insn);
5710         }
5711         break;
5712     case 0x8 ... 0xf: /* (3 source) */
5713         disas_data_proc_3src(s, insn);
5714         break;
5715 
5716     default:
5717     do_unallocated:
5718         unallocated_encoding(s);
5719         break;
5720     }
5721 }
5722 
5723 static void handle_fp_compare(DisasContext *s, int size,
5724                               unsigned int rn, unsigned int rm,
5725                               bool cmp_with_zero, bool signal_all_nans)
5726 {
5727     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5728     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5729 
5730     if (size == MO_64) {
5731         TCGv_i64 tcg_vn, tcg_vm;
5732 
5733         tcg_vn = read_fp_dreg(s, rn);
5734         if (cmp_with_zero) {
5735             tcg_vm = tcg_constant_i64(0);
5736         } else {
5737             tcg_vm = read_fp_dreg(s, rm);
5738         }
5739         if (signal_all_nans) {
5740             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5741         } else {
5742             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5743         }
5744     } else {
5745         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5746         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5747 
5748         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5749         if (cmp_with_zero) {
5750             tcg_gen_movi_i32(tcg_vm, 0);
5751         } else {
5752             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5753         }
5754 
5755         switch (size) {
5756         case MO_32:
5757             if (signal_all_nans) {
5758                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5759             } else {
5760                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5761             }
5762             break;
5763         case MO_16:
5764             if (signal_all_nans) {
5765                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5766             } else {
5767                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5768             }
5769             break;
5770         default:
5771             g_assert_not_reached();
5772         }
5773     }
5774 
5775     gen_set_nzcv(tcg_flags);
5776 }
5777 
5778 /* Floating point compare
5779  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5780  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5781  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5782  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5783  */
5784 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5785 {
5786     unsigned int mos, type, rm, op, rn, opc, op2r;
5787     int size;
5788 
5789     mos = extract32(insn, 29, 3);
5790     type = extract32(insn, 22, 2);
5791     rm = extract32(insn, 16, 5);
5792     op = extract32(insn, 14, 2);
5793     rn = extract32(insn, 5, 5);
5794     opc = extract32(insn, 3, 2);
5795     op2r = extract32(insn, 0, 3);
5796 
5797     if (mos || op || op2r) {
5798         unallocated_encoding(s);
5799         return;
5800     }
5801 
5802     switch (type) {
5803     case 0:
5804         size = MO_32;
5805         break;
5806     case 1:
5807         size = MO_64;
5808         break;
5809     case 3:
5810         size = MO_16;
5811         if (dc_isar_feature(aa64_fp16, s)) {
5812             break;
5813         }
5814         /* fallthru */
5815     default:
5816         unallocated_encoding(s);
5817         return;
5818     }
5819 
5820     if (!fp_access_check(s)) {
5821         return;
5822     }
5823 
5824     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5825 }
5826 
5827 /* Floating point conditional compare
5828  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5829  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5830  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5831  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5832  */
5833 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5834 {
5835     unsigned int mos, type, rm, cond, rn, op, nzcv;
5836     TCGLabel *label_continue = NULL;
5837     int size;
5838 
5839     mos = extract32(insn, 29, 3);
5840     type = extract32(insn, 22, 2);
5841     rm = extract32(insn, 16, 5);
5842     cond = extract32(insn, 12, 4);
5843     rn = extract32(insn, 5, 5);
5844     op = extract32(insn, 4, 1);
5845     nzcv = extract32(insn, 0, 4);
5846 
5847     if (mos) {
5848         unallocated_encoding(s);
5849         return;
5850     }
5851 
5852     switch (type) {
5853     case 0:
5854         size = MO_32;
5855         break;
5856     case 1:
5857         size = MO_64;
5858         break;
5859     case 3:
5860         size = MO_16;
5861         if (dc_isar_feature(aa64_fp16, s)) {
5862             break;
5863         }
5864         /* fallthru */
5865     default:
5866         unallocated_encoding(s);
5867         return;
5868     }
5869 
5870     if (!fp_access_check(s)) {
5871         return;
5872     }
5873 
5874     if (cond < 0x0e) { /* not always */
5875         TCGLabel *label_match = gen_new_label();
5876         label_continue = gen_new_label();
5877         arm_gen_test_cc(cond, label_match);
5878         /* nomatch: */
5879         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5880         tcg_gen_br(label_continue);
5881         gen_set_label(label_match);
5882     }
5883 
5884     handle_fp_compare(s, size, rn, rm, false, op);
5885 
5886     if (cond < 0x0e) {
5887         gen_set_label(label_continue);
5888     }
5889 }
5890 
5891 /* Floating point conditional select
5892  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5893  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5894  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5895  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5896  */
5897 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5898 {
5899     unsigned int mos, type, rm, cond, rn, rd;
5900     TCGv_i64 t_true, t_false;
5901     DisasCompare64 c;
5902     MemOp sz;
5903 
5904     mos = extract32(insn, 29, 3);
5905     type = extract32(insn, 22, 2);
5906     rm = extract32(insn, 16, 5);
5907     cond = extract32(insn, 12, 4);
5908     rn = extract32(insn, 5, 5);
5909     rd = extract32(insn, 0, 5);
5910 
5911     if (mos) {
5912         unallocated_encoding(s);
5913         return;
5914     }
5915 
5916     switch (type) {
5917     case 0:
5918         sz = MO_32;
5919         break;
5920     case 1:
5921         sz = MO_64;
5922         break;
5923     case 3:
5924         sz = MO_16;
5925         if (dc_isar_feature(aa64_fp16, s)) {
5926             break;
5927         }
5928         /* fallthru */
5929     default:
5930         unallocated_encoding(s);
5931         return;
5932     }
5933 
5934     if (!fp_access_check(s)) {
5935         return;
5936     }
5937 
5938     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5939     t_true = tcg_temp_new_i64();
5940     t_false = tcg_temp_new_i64();
5941     read_vec_element(s, t_true, rn, 0, sz);
5942     read_vec_element(s, t_false, rm, 0, sz);
5943 
5944     a64_test_cc(&c, cond);
5945     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5946                         t_true, t_false);
5947 
5948     /* Note that sregs & hregs write back zeros to the high bits,
5949        and we've already done the zero-extension.  */
5950     write_fp_dreg(s, rd, t_true);
5951 }
5952 
5953 /* Floating-point data-processing (1 source) - half precision */
5954 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5955 {
5956     TCGv_ptr fpst = NULL;
5957     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5958     TCGv_i32 tcg_res = tcg_temp_new_i32();
5959 
5960     switch (opcode) {
5961     case 0x0: /* FMOV */
5962         tcg_gen_mov_i32(tcg_res, tcg_op);
5963         break;
5964     case 0x1: /* FABS */
5965         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5966         break;
5967     case 0x2: /* FNEG */
5968         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5969         break;
5970     case 0x3: /* FSQRT */
5971         fpst = fpstatus_ptr(FPST_FPCR_F16);
5972         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5973         break;
5974     case 0x8: /* FRINTN */
5975     case 0x9: /* FRINTP */
5976     case 0xa: /* FRINTM */
5977     case 0xb: /* FRINTZ */
5978     case 0xc: /* FRINTA */
5979     {
5980         TCGv_i32 tcg_rmode;
5981 
5982         fpst = fpstatus_ptr(FPST_FPCR_F16);
5983         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
5984         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5985         gen_restore_rmode(tcg_rmode, fpst);
5986         break;
5987     }
5988     case 0xe: /* FRINTX */
5989         fpst = fpstatus_ptr(FPST_FPCR_F16);
5990         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5991         break;
5992     case 0xf: /* FRINTI */
5993         fpst = fpstatus_ptr(FPST_FPCR_F16);
5994         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5995         break;
5996     default:
5997         g_assert_not_reached();
5998     }
5999 
6000     write_fp_sreg(s, rd, tcg_res);
6001 }
6002 
6003 /* Floating-point data-processing (1 source) - single precision */
6004 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6005 {
6006     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6007     TCGv_i32 tcg_op, tcg_res;
6008     TCGv_ptr fpst;
6009     int rmode = -1;
6010 
6011     tcg_op = read_fp_sreg(s, rn);
6012     tcg_res = tcg_temp_new_i32();
6013 
6014     switch (opcode) {
6015     case 0x0: /* FMOV */
6016         tcg_gen_mov_i32(tcg_res, tcg_op);
6017         goto done;
6018     case 0x1: /* FABS */
6019         gen_helper_vfp_abss(tcg_res, tcg_op);
6020         goto done;
6021     case 0x2: /* FNEG */
6022         gen_helper_vfp_negs(tcg_res, tcg_op);
6023         goto done;
6024     case 0x3: /* FSQRT */
6025         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6026         goto done;
6027     case 0x6: /* BFCVT */
6028         gen_fpst = gen_helper_bfcvt;
6029         break;
6030     case 0x8: /* FRINTN */
6031     case 0x9: /* FRINTP */
6032     case 0xa: /* FRINTM */
6033     case 0xb: /* FRINTZ */
6034     case 0xc: /* FRINTA */
6035         rmode = opcode & 7;
6036         gen_fpst = gen_helper_rints;
6037         break;
6038     case 0xe: /* FRINTX */
6039         gen_fpst = gen_helper_rints_exact;
6040         break;
6041     case 0xf: /* FRINTI */
6042         gen_fpst = gen_helper_rints;
6043         break;
6044     case 0x10: /* FRINT32Z */
6045         rmode = FPROUNDING_ZERO;
6046         gen_fpst = gen_helper_frint32_s;
6047         break;
6048     case 0x11: /* FRINT32X */
6049         gen_fpst = gen_helper_frint32_s;
6050         break;
6051     case 0x12: /* FRINT64Z */
6052         rmode = FPROUNDING_ZERO;
6053         gen_fpst = gen_helper_frint64_s;
6054         break;
6055     case 0x13: /* FRINT64X */
6056         gen_fpst = gen_helper_frint64_s;
6057         break;
6058     default:
6059         g_assert_not_reached();
6060     }
6061 
6062     fpst = fpstatus_ptr(FPST_FPCR);
6063     if (rmode >= 0) {
6064         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6065         gen_fpst(tcg_res, tcg_op, fpst);
6066         gen_restore_rmode(tcg_rmode, fpst);
6067     } else {
6068         gen_fpst(tcg_res, tcg_op, fpst);
6069     }
6070 
6071  done:
6072     write_fp_sreg(s, rd, tcg_res);
6073 }
6074 
6075 /* Floating-point data-processing (1 source) - double precision */
6076 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6077 {
6078     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6079     TCGv_i64 tcg_op, tcg_res;
6080     TCGv_ptr fpst;
6081     int rmode = -1;
6082 
6083     switch (opcode) {
6084     case 0x0: /* FMOV */
6085         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6086         return;
6087     }
6088 
6089     tcg_op = read_fp_dreg(s, rn);
6090     tcg_res = tcg_temp_new_i64();
6091 
6092     switch (opcode) {
6093     case 0x1: /* FABS */
6094         gen_helper_vfp_absd(tcg_res, tcg_op);
6095         goto done;
6096     case 0x2: /* FNEG */
6097         gen_helper_vfp_negd(tcg_res, tcg_op);
6098         goto done;
6099     case 0x3: /* FSQRT */
6100         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6101         goto done;
6102     case 0x8: /* FRINTN */
6103     case 0x9: /* FRINTP */
6104     case 0xa: /* FRINTM */
6105     case 0xb: /* FRINTZ */
6106     case 0xc: /* FRINTA */
6107         rmode = opcode & 7;
6108         gen_fpst = gen_helper_rintd;
6109         break;
6110     case 0xe: /* FRINTX */
6111         gen_fpst = gen_helper_rintd_exact;
6112         break;
6113     case 0xf: /* FRINTI */
6114         gen_fpst = gen_helper_rintd;
6115         break;
6116     case 0x10: /* FRINT32Z */
6117         rmode = FPROUNDING_ZERO;
6118         gen_fpst = gen_helper_frint32_d;
6119         break;
6120     case 0x11: /* FRINT32X */
6121         gen_fpst = gen_helper_frint32_d;
6122         break;
6123     case 0x12: /* FRINT64Z */
6124         rmode = FPROUNDING_ZERO;
6125         gen_fpst = gen_helper_frint64_d;
6126         break;
6127     case 0x13: /* FRINT64X */
6128         gen_fpst = gen_helper_frint64_d;
6129         break;
6130     default:
6131         g_assert_not_reached();
6132     }
6133 
6134     fpst = fpstatus_ptr(FPST_FPCR);
6135     if (rmode >= 0) {
6136         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6137         gen_fpst(tcg_res, tcg_op, fpst);
6138         gen_restore_rmode(tcg_rmode, fpst);
6139     } else {
6140         gen_fpst(tcg_res, tcg_op, fpst);
6141     }
6142 
6143  done:
6144     write_fp_dreg(s, rd, tcg_res);
6145 }
6146 
6147 static void handle_fp_fcvt(DisasContext *s, int opcode,
6148                            int rd, int rn, int dtype, int ntype)
6149 {
6150     switch (ntype) {
6151     case 0x0:
6152     {
6153         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6154         if (dtype == 1) {
6155             /* Single to double */
6156             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6157             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6158             write_fp_dreg(s, rd, tcg_rd);
6159         } else {
6160             /* Single to half */
6161             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6162             TCGv_i32 ahp = get_ahp_flag();
6163             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6164 
6165             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6166             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6167             write_fp_sreg(s, rd, tcg_rd);
6168         }
6169         break;
6170     }
6171     case 0x1:
6172     {
6173         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6174         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6175         if (dtype == 0) {
6176             /* Double to single */
6177             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6178         } else {
6179             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6180             TCGv_i32 ahp = get_ahp_flag();
6181             /* Double to half */
6182             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6183             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6184         }
6185         write_fp_sreg(s, rd, tcg_rd);
6186         break;
6187     }
6188     case 0x3:
6189     {
6190         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6191         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6192         TCGv_i32 tcg_ahp = get_ahp_flag();
6193         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6194         if (dtype == 0) {
6195             /* Half to single */
6196             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6197             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6198             write_fp_sreg(s, rd, tcg_rd);
6199         } else {
6200             /* Half to double */
6201             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6202             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6203             write_fp_dreg(s, rd, tcg_rd);
6204         }
6205         break;
6206     }
6207     default:
6208         g_assert_not_reached();
6209     }
6210 }
6211 
6212 /* Floating point data-processing (1 source)
6213  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6214  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6215  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6216  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6217  */
6218 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6219 {
6220     int mos = extract32(insn, 29, 3);
6221     int type = extract32(insn, 22, 2);
6222     int opcode = extract32(insn, 15, 6);
6223     int rn = extract32(insn, 5, 5);
6224     int rd = extract32(insn, 0, 5);
6225 
6226     if (mos) {
6227         goto do_unallocated;
6228     }
6229 
6230     switch (opcode) {
6231     case 0x4: case 0x5: case 0x7:
6232     {
6233         /* FCVT between half, single and double precision */
6234         int dtype = extract32(opcode, 0, 2);
6235         if (type == 2 || dtype == type) {
6236             goto do_unallocated;
6237         }
6238         if (!fp_access_check(s)) {
6239             return;
6240         }
6241 
6242         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6243         break;
6244     }
6245 
6246     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6247         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6248             goto do_unallocated;
6249         }
6250         /* fall through */
6251     case 0x0 ... 0x3:
6252     case 0x8 ... 0xc:
6253     case 0xe ... 0xf:
6254         /* 32-to-32 and 64-to-64 ops */
6255         switch (type) {
6256         case 0:
6257             if (!fp_access_check(s)) {
6258                 return;
6259             }
6260             handle_fp_1src_single(s, opcode, rd, rn);
6261             break;
6262         case 1:
6263             if (!fp_access_check(s)) {
6264                 return;
6265             }
6266             handle_fp_1src_double(s, opcode, rd, rn);
6267             break;
6268         case 3:
6269             if (!dc_isar_feature(aa64_fp16, s)) {
6270                 goto do_unallocated;
6271             }
6272 
6273             if (!fp_access_check(s)) {
6274                 return;
6275             }
6276             handle_fp_1src_half(s, opcode, rd, rn);
6277             break;
6278         default:
6279             goto do_unallocated;
6280         }
6281         break;
6282 
6283     case 0x6:
6284         switch (type) {
6285         case 1: /* BFCVT */
6286             if (!dc_isar_feature(aa64_bf16, s)) {
6287                 goto do_unallocated;
6288             }
6289             if (!fp_access_check(s)) {
6290                 return;
6291             }
6292             handle_fp_1src_single(s, opcode, rd, rn);
6293             break;
6294         default:
6295             goto do_unallocated;
6296         }
6297         break;
6298 
6299     default:
6300     do_unallocated:
6301         unallocated_encoding(s);
6302         break;
6303     }
6304 }
6305 
6306 /* Floating-point data-processing (2 source) - single precision */
6307 static void handle_fp_2src_single(DisasContext *s, int opcode,
6308                                   int rd, int rn, int rm)
6309 {
6310     TCGv_i32 tcg_op1;
6311     TCGv_i32 tcg_op2;
6312     TCGv_i32 tcg_res;
6313     TCGv_ptr fpst;
6314 
6315     tcg_res = tcg_temp_new_i32();
6316     fpst = fpstatus_ptr(FPST_FPCR);
6317     tcg_op1 = read_fp_sreg(s, rn);
6318     tcg_op2 = read_fp_sreg(s, rm);
6319 
6320     switch (opcode) {
6321     case 0x0: /* FMUL */
6322         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6323         break;
6324     case 0x1: /* FDIV */
6325         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6326         break;
6327     case 0x2: /* FADD */
6328         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6329         break;
6330     case 0x3: /* FSUB */
6331         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6332         break;
6333     case 0x4: /* FMAX */
6334         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6335         break;
6336     case 0x5: /* FMIN */
6337         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6338         break;
6339     case 0x6: /* FMAXNM */
6340         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6341         break;
6342     case 0x7: /* FMINNM */
6343         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6344         break;
6345     case 0x8: /* FNMUL */
6346         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6347         gen_helper_vfp_negs(tcg_res, tcg_res);
6348         break;
6349     }
6350 
6351     write_fp_sreg(s, rd, tcg_res);
6352 }
6353 
6354 /* Floating-point data-processing (2 source) - double precision */
6355 static void handle_fp_2src_double(DisasContext *s, int opcode,
6356                                   int rd, int rn, int rm)
6357 {
6358     TCGv_i64 tcg_op1;
6359     TCGv_i64 tcg_op2;
6360     TCGv_i64 tcg_res;
6361     TCGv_ptr fpst;
6362 
6363     tcg_res = tcg_temp_new_i64();
6364     fpst = fpstatus_ptr(FPST_FPCR);
6365     tcg_op1 = read_fp_dreg(s, rn);
6366     tcg_op2 = read_fp_dreg(s, rm);
6367 
6368     switch (opcode) {
6369     case 0x0: /* FMUL */
6370         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6371         break;
6372     case 0x1: /* FDIV */
6373         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6374         break;
6375     case 0x2: /* FADD */
6376         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6377         break;
6378     case 0x3: /* FSUB */
6379         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6380         break;
6381     case 0x4: /* FMAX */
6382         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6383         break;
6384     case 0x5: /* FMIN */
6385         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6386         break;
6387     case 0x6: /* FMAXNM */
6388         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6389         break;
6390     case 0x7: /* FMINNM */
6391         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6392         break;
6393     case 0x8: /* FNMUL */
6394         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6395         gen_helper_vfp_negd(tcg_res, tcg_res);
6396         break;
6397     }
6398 
6399     write_fp_dreg(s, rd, tcg_res);
6400 }
6401 
6402 /* Floating-point data-processing (2 source) - half precision */
6403 static void handle_fp_2src_half(DisasContext *s, int opcode,
6404                                 int rd, int rn, int rm)
6405 {
6406     TCGv_i32 tcg_op1;
6407     TCGv_i32 tcg_op2;
6408     TCGv_i32 tcg_res;
6409     TCGv_ptr fpst;
6410 
6411     tcg_res = tcg_temp_new_i32();
6412     fpst = fpstatus_ptr(FPST_FPCR_F16);
6413     tcg_op1 = read_fp_hreg(s, rn);
6414     tcg_op2 = read_fp_hreg(s, rm);
6415 
6416     switch (opcode) {
6417     case 0x0: /* FMUL */
6418         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6419         break;
6420     case 0x1: /* FDIV */
6421         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6422         break;
6423     case 0x2: /* FADD */
6424         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6425         break;
6426     case 0x3: /* FSUB */
6427         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6428         break;
6429     case 0x4: /* FMAX */
6430         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6431         break;
6432     case 0x5: /* FMIN */
6433         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6434         break;
6435     case 0x6: /* FMAXNM */
6436         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6437         break;
6438     case 0x7: /* FMINNM */
6439         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6440         break;
6441     case 0x8: /* FNMUL */
6442         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6443         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6444         break;
6445     default:
6446         g_assert_not_reached();
6447     }
6448 
6449     write_fp_sreg(s, rd, tcg_res);
6450 }
6451 
6452 /* Floating point data-processing (2 source)
6453  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6454  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6455  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6456  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6457  */
6458 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6459 {
6460     int mos = extract32(insn, 29, 3);
6461     int type = extract32(insn, 22, 2);
6462     int rd = extract32(insn, 0, 5);
6463     int rn = extract32(insn, 5, 5);
6464     int rm = extract32(insn, 16, 5);
6465     int opcode = extract32(insn, 12, 4);
6466 
6467     if (opcode > 8 || mos) {
6468         unallocated_encoding(s);
6469         return;
6470     }
6471 
6472     switch (type) {
6473     case 0:
6474         if (!fp_access_check(s)) {
6475             return;
6476         }
6477         handle_fp_2src_single(s, opcode, rd, rn, rm);
6478         break;
6479     case 1:
6480         if (!fp_access_check(s)) {
6481             return;
6482         }
6483         handle_fp_2src_double(s, opcode, rd, rn, rm);
6484         break;
6485     case 3:
6486         if (!dc_isar_feature(aa64_fp16, s)) {
6487             unallocated_encoding(s);
6488             return;
6489         }
6490         if (!fp_access_check(s)) {
6491             return;
6492         }
6493         handle_fp_2src_half(s, opcode, rd, rn, rm);
6494         break;
6495     default:
6496         unallocated_encoding(s);
6497     }
6498 }
6499 
6500 /* Floating-point data-processing (3 source) - single precision */
6501 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6502                                   int rd, int rn, int rm, int ra)
6503 {
6504     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6505     TCGv_i32 tcg_res = tcg_temp_new_i32();
6506     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6507 
6508     tcg_op1 = read_fp_sreg(s, rn);
6509     tcg_op2 = read_fp_sreg(s, rm);
6510     tcg_op3 = read_fp_sreg(s, ra);
6511 
6512     /* These are fused multiply-add, and must be done as one
6513      * floating point operation with no rounding between the
6514      * multiplication and addition steps.
6515      * NB that doing the negations here as separate steps is
6516      * correct : an input NaN should come out with its sign bit
6517      * flipped if it is a negated-input.
6518      */
6519     if (o1 == true) {
6520         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6521     }
6522 
6523     if (o0 != o1) {
6524         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6525     }
6526 
6527     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6528 
6529     write_fp_sreg(s, rd, tcg_res);
6530 }
6531 
6532 /* Floating-point data-processing (3 source) - double precision */
6533 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6534                                   int rd, int rn, int rm, int ra)
6535 {
6536     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6537     TCGv_i64 tcg_res = tcg_temp_new_i64();
6538     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6539 
6540     tcg_op1 = read_fp_dreg(s, rn);
6541     tcg_op2 = read_fp_dreg(s, rm);
6542     tcg_op3 = read_fp_dreg(s, ra);
6543 
6544     /* These are fused multiply-add, and must be done as one
6545      * floating point operation with no rounding between the
6546      * multiplication and addition steps.
6547      * NB that doing the negations here as separate steps is
6548      * correct : an input NaN should come out with its sign bit
6549      * flipped if it is a negated-input.
6550      */
6551     if (o1 == true) {
6552         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6553     }
6554 
6555     if (o0 != o1) {
6556         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6557     }
6558 
6559     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6560 
6561     write_fp_dreg(s, rd, tcg_res);
6562 }
6563 
6564 /* Floating-point data-processing (3 source) - half precision */
6565 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6566                                 int rd, int rn, int rm, int ra)
6567 {
6568     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6569     TCGv_i32 tcg_res = tcg_temp_new_i32();
6570     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6571 
6572     tcg_op1 = read_fp_hreg(s, rn);
6573     tcg_op2 = read_fp_hreg(s, rm);
6574     tcg_op3 = read_fp_hreg(s, ra);
6575 
6576     /* These are fused multiply-add, and must be done as one
6577      * floating point operation with no rounding between the
6578      * multiplication and addition steps.
6579      * NB that doing the negations here as separate steps is
6580      * correct : an input NaN should come out with its sign bit
6581      * flipped if it is a negated-input.
6582      */
6583     if (o1 == true) {
6584         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6585     }
6586 
6587     if (o0 != o1) {
6588         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6589     }
6590 
6591     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6592 
6593     write_fp_sreg(s, rd, tcg_res);
6594 }
6595 
6596 /* Floating point data-processing (3 source)
6597  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6598  * +---+---+---+-----------+------+----+------+----+------+------+------+
6599  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6600  * +---+---+---+-----------+------+----+------+----+------+------+------+
6601  */
6602 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6603 {
6604     int mos = extract32(insn, 29, 3);
6605     int type = extract32(insn, 22, 2);
6606     int rd = extract32(insn, 0, 5);
6607     int rn = extract32(insn, 5, 5);
6608     int ra = extract32(insn, 10, 5);
6609     int rm = extract32(insn, 16, 5);
6610     bool o0 = extract32(insn, 15, 1);
6611     bool o1 = extract32(insn, 21, 1);
6612 
6613     if (mos) {
6614         unallocated_encoding(s);
6615         return;
6616     }
6617 
6618     switch (type) {
6619     case 0:
6620         if (!fp_access_check(s)) {
6621             return;
6622         }
6623         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6624         break;
6625     case 1:
6626         if (!fp_access_check(s)) {
6627             return;
6628         }
6629         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6630         break;
6631     case 3:
6632         if (!dc_isar_feature(aa64_fp16, s)) {
6633             unallocated_encoding(s);
6634             return;
6635         }
6636         if (!fp_access_check(s)) {
6637             return;
6638         }
6639         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6640         break;
6641     default:
6642         unallocated_encoding(s);
6643     }
6644 }
6645 
6646 /* Floating point immediate
6647  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6648  * +---+---+---+-----------+------+---+------------+-------+------+------+
6649  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6650  * +---+---+---+-----------+------+---+------------+-------+------+------+
6651  */
6652 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6653 {
6654     int rd = extract32(insn, 0, 5);
6655     int imm5 = extract32(insn, 5, 5);
6656     int imm8 = extract32(insn, 13, 8);
6657     int type = extract32(insn, 22, 2);
6658     int mos = extract32(insn, 29, 3);
6659     uint64_t imm;
6660     MemOp sz;
6661 
6662     if (mos || imm5) {
6663         unallocated_encoding(s);
6664         return;
6665     }
6666 
6667     switch (type) {
6668     case 0:
6669         sz = MO_32;
6670         break;
6671     case 1:
6672         sz = MO_64;
6673         break;
6674     case 3:
6675         sz = MO_16;
6676         if (dc_isar_feature(aa64_fp16, s)) {
6677             break;
6678         }
6679         /* fallthru */
6680     default:
6681         unallocated_encoding(s);
6682         return;
6683     }
6684 
6685     if (!fp_access_check(s)) {
6686         return;
6687     }
6688 
6689     imm = vfp_expand_imm(sz, imm8);
6690     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6691 }
6692 
6693 /* Handle floating point <=> fixed point conversions. Note that we can
6694  * also deal with fp <=> integer conversions as a special case (scale == 64)
6695  * OPTME: consider handling that special case specially or at least skipping
6696  * the call to scalbn in the helpers for zero shifts.
6697  */
6698 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6699                            bool itof, int rmode, int scale, int sf, int type)
6700 {
6701     bool is_signed = !(opcode & 1);
6702     TCGv_ptr tcg_fpstatus;
6703     TCGv_i32 tcg_shift, tcg_single;
6704     TCGv_i64 tcg_double;
6705 
6706     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6707 
6708     tcg_shift = tcg_constant_i32(64 - scale);
6709 
6710     if (itof) {
6711         TCGv_i64 tcg_int = cpu_reg(s, rn);
6712         if (!sf) {
6713             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6714 
6715             if (is_signed) {
6716                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6717             } else {
6718                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6719             }
6720 
6721             tcg_int = tcg_extend;
6722         }
6723 
6724         switch (type) {
6725         case 1: /* float64 */
6726             tcg_double = tcg_temp_new_i64();
6727             if (is_signed) {
6728                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6729                                      tcg_shift, tcg_fpstatus);
6730             } else {
6731                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6732                                      tcg_shift, tcg_fpstatus);
6733             }
6734             write_fp_dreg(s, rd, tcg_double);
6735             break;
6736 
6737         case 0: /* float32 */
6738             tcg_single = tcg_temp_new_i32();
6739             if (is_signed) {
6740                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6741                                      tcg_shift, tcg_fpstatus);
6742             } else {
6743                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6744                                      tcg_shift, tcg_fpstatus);
6745             }
6746             write_fp_sreg(s, rd, tcg_single);
6747             break;
6748 
6749         case 3: /* float16 */
6750             tcg_single = tcg_temp_new_i32();
6751             if (is_signed) {
6752                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6753                                      tcg_shift, tcg_fpstatus);
6754             } else {
6755                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6756                                      tcg_shift, tcg_fpstatus);
6757             }
6758             write_fp_sreg(s, rd, tcg_single);
6759             break;
6760 
6761         default:
6762             g_assert_not_reached();
6763         }
6764     } else {
6765         TCGv_i64 tcg_int = cpu_reg(s, rd);
6766         TCGv_i32 tcg_rmode;
6767 
6768         if (extract32(opcode, 2, 1)) {
6769             /* There are too many rounding modes to all fit into rmode,
6770              * so FCVTA[US] is a special case.
6771              */
6772             rmode = FPROUNDING_TIEAWAY;
6773         }
6774 
6775         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6776 
6777         switch (type) {
6778         case 1: /* float64 */
6779             tcg_double = read_fp_dreg(s, rn);
6780             if (is_signed) {
6781                 if (!sf) {
6782                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6783                                          tcg_shift, tcg_fpstatus);
6784                 } else {
6785                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6786                                          tcg_shift, tcg_fpstatus);
6787                 }
6788             } else {
6789                 if (!sf) {
6790                     gen_helper_vfp_tould(tcg_int, tcg_double,
6791                                          tcg_shift, tcg_fpstatus);
6792                 } else {
6793                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6794                                          tcg_shift, tcg_fpstatus);
6795                 }
6796             }
6797             if (!sf) {
6798                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6799             }
6800             break;
6801 
6802         case 0: /* float32 */
6803             tcg_single = read_fp_sreg(s, rn);
6804             if (sf) {
6805                 if (is_signed) {
6806                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6807                                          tcg_shift, tcg_fpstatus);
6808                 } else {
6809                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6810                                          tcg_shift, tcg_fpstatus);
6811                 }
6812             } else {
6813                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6814                 if (is_signed) {
6815                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6816                                          tcg_shift, tcg_fpstatus);
6817                 } else {
6818                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6819                                          tcg_shift, tcg_fpstatus);
6820                 }
6821                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6822             }
6823             break;
6824 
6825         case 3: /* float16 */
6826             tcg_single = read_fp_sreg(s, rn);
6827             if (sf) {
6828                 if (is_signed) {
6829                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6830                                          tcg_shift, tcg_fpstatus);
6831                 } else {
6832                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6833                                          tcg_shift, tcg_fpstatus);
6834                 }
6835             } else {
6836                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6837                 if (is_signed) {
6838                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6839                                          tcg_shift, tcg_fpstatus);
6840                 } else {
6841                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6842                                          tcg_shift, tcg_fpstatus);
6843                 }
6844                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6845             }
6846             break;
6847 
6848         default:
6849             g_assert_not_reached();
6850         }
6851 
6852         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6853     }
6854 }
6855 
6856 /* Floating point <-> fixed point conversions
6857  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6858  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6859  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6860  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6861  */
6862 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6863 {
6864     int rd = extract32(insn, 0, 5);
6865     int rn = extract32(insn, 5, 5);
6866     int scale = extract32(insn, 10, 6);
6867     int opcode = extract32(insn, 16, 3);
6868     int rmode = extract32(insn, 19, 2);
6869     int type = extract32(insn, 22, 2);
6870     bool sbit = extract32(insn, 29, 1);
6871     bool sf = extract32(insn, 31, 1);
6872     bool itof;
6873 
6874     if (sbit || (!sf && scale < 32)) {
6875         unallocated_encoding(s);
6876         return;
6877     }
6878 
6879     switch (type) {
6880     case 0: /* float32 */
6881     case 1: /* float64 */
6882         break;
6883     case 3: /* float16 */
6884         if (dc_isar_feature(aa64_fp16, s)) {
6885             break;
6886         }
6887         /* fallthru */
6888     default:
6889         unallocated_encoding(s);
6890         return;
6891     }
6892 
6893     switch ((rmode << 3) | opcode) {
6894     case 0x2: /* SCVTF */
6895     case 0x3: /* UCVTF */
6896         itof = true;
6897         break;
6898     case 0x18: /* FCVTZS */
6899     case 0x19: /* FCVTZU */
6900         itof = false;
6901         break;
6902     default:
6903         unallocated_encoding(s);
6904         return;
6905     }
6906 
6907     if (!fp_access_check(s)) {
6908         return;
6909     }
6910 
6911     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6912 }
6913 
6914 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6915 {
6916     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6917      * without conversion.
6918      */
6919 
6920     if (itof) {
6921         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6922         TCGv_i64 tmp;
6923 
6924         switch (type) {
6925         case 0:
6926             /* 32 bit */
6927             tmp = tcg_temp_new_i64();
6928             tcg_gen_ext32u_i64(tmp, tcg_rn);
6929             write_fp_dreg(s, rd, tmp);
6930             break;
6931         case 1:
6932             /* 64 bit */
6933             write_fp_dreg(s, rd, tcg_rn);
6934             break;
6935         case 2:
6936             /* 64 bit to top half. */
6937             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6938             clear_vec_high(s, true, rd);
6939             break;
6940         case 3:
6941             /* 16 bit */
6942             tmp = tcg_temp_new_i64();
6943             tcg_gen_ext16u_i64(tmp, tcg_rn);
6944             write_fp_dreg(s, rd, tmp);
6945             break;
6946         default:
6947             g_assert_not_reached();
6948         }
6949     } else {
6950         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6951 
6952         switch (type) {
6953         case 0:
6954             /* 32 bit */
6955             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6956             break;
6957         case 1:
6958             /* 64 bit */
6959             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6960             break;
6961         case 2:
6962             /* 64 bits from top half */
6963             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6964             break;
6965         case 3:
6966             /* 16 bit */
6967             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6968             break;
6969         default:
6970             g_assert_not_reached();
6971         }
6972     }
6973 }
6974 
6975 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6976 {
6977     TCGv_i64 t = read_fp_dreg(s, rn);
6978     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
6979 
6980     gen_helper_fjcvtzs(t, t, fpstatus);
6981 
6982     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6983     tcg_gen_extrh_i64_i32(cpu_ZF, t);
6984     tcg_gen_movi_i32(cpu_CF, 0);
6985     tcg_gen_movi_i32(cpu_NF, 0);
6986     tcg_gen_movi_i32(cpu_VF, 0);
6987 }
6988 
6989 /* Floating point <-> integer conversions
6990  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6991  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6992  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6993  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6994  */
6995 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6996 {
6997     int rd = extract32(insn, 0, 5);
6998     int rn = extract32(insn, 5, 5);
6999     int opcode = extract32(insn, 16, 3);
7000     int rmode = extract32(insn, 19, 2);
7001     int type = extract32(insn, 22, 2);
7002     bool sbit = extract32(insn, 29, 1);
7003     bool sf = extract32(insn, 31, 1);
7004     bool itof = false;
7005 
7006     if (sbit) {
7007         goto do_unallocated;
7008     }
7009 
7010     switch (opcode) {
7011     case 2: /* SCVTF */
7012     case 3: /* UCVTF */
7013         itof = true;
7014         /* fallthru */
7015     case 4: /* FCVTAS */
7016     case 5: /* FCVTAU */
7017         if (rmode != 0) {
7018             goto do_unallocated;
7019         }
7020         /* fallthru */
7021     case 0: /* FCVT[NPMZ]S */
7022     case 1: /* FCVT[NPMZ]U */
7023         switch (type) {
7024         case 0: /* float32 */
7025         case 1: /* float64 */
7026             break;
7027         case 3: /* float16 */
7028             if (!dc_isar_feature(aa64_fp16, s)) {
7029                 goto do_unallocated;
7030             }
7031             break;
7032         default:
7033             goto do_unallocated;
7034         }
7035         if (!fp_access_check(s)) {
7036             return;
7037         }
7038         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7039         break;
7040 
7041     default:
7042         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7043         case 0b01100110: /* FMOV half <-> 32-bit int */
7044         case 0b01100111:
7045         case 0b11100110: /* FMOV half <-> 64-bit int */
7046         case 0b11100111:
7047             if (!dc_isar_feature(aa64_fp16, s)) {
7048                 goto do_unallocated;
7049             }
7050             /* fallthru */
7051         case 0b00000110: /* FMOV 32-bit */
7052         case 0b00000111:
7053         case 0b10100110: /* FMOV 64-bit */
7054         case 0b10100111:
7055         case 0b11001110: /* FMOV top half of 128-bit */
7056         case 0b11001111:
7057             if (!fp_access_check(s)) {
7058                 return;
7059             }
7060             itof = opcode & 1;
7061             handle_fmov(s, rd, rn, type, itof);
7062             break;
7063 
7064         case 0b00111110: /* FJCVTZS */
7065             if (!dc_isar_feature(aa64_jscvt, s)) {
7066                 goto do_unallocated;
7067             } else if (fp_access_check(s)) {
7068                 handle_fjcvtzs(s, rd, rn);
7069             }
7070             break;
7071 
7072         default:
7073         do_unallocated:
7074             unallocated_encoding(s);
7075             return;
7076         }
7077         break;
7078     }
7079 }
7080 
7081 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7082  *   31  30  29 28     25 24                          0
7083  * +---+---+---+---------+-----------------------------+
7084  * |   | 0 |   | 1 1 1 1 |                             |
7085  * +---+---+---+---------+-----------------------------+
7086  */
7087 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7088 {
7089     if (extract32(insn, 24, 1)) {
7090         /* Floating point data-processing (3 source) */
7091         disas_fp_3src(s, insn);
7092     } else if (extract32(insn, 21, 1) == 0) {
7093         /* Floating point to fixed point conversions */
7094         disas_fp_fixed_conv(s, insn);
7095     } else {
7096         switch (extract32(insn, 10, 2)) {
7097         case 1:
7098             /* Floating point conditional compare */
7099             disas_fp_ccomp(s, insn);
7100             break;
7101         case 2:
7102             /* Floating point data-processing (2 source) */
7103             disas_fp_2src(s, insn);
7104             break;
7105         case 3:
7106             /* Floating point conditional select */
7107             disas_fp_csel(s, insn);
7108             break;
7109         case 0:
7110             switch (ctz32(extract32(insn, 12, 4))) {
7111             case 0: /* [15:12] == xxx1 */
7112                 /* Floating point immediate */
7113                 disas_fp_imm(s, insn);
7114                 break;
7115             case 1: /* [15:12] == xx10 */
7116                 /* Floating point compare */
7117                 disas_fp_compare(s, insn);
7118                 break;
7119             case 2: /* [15:12] == x100 */
7120                 /* Floating point data-processing (1 source) */
7121                 disas_fp_1src(s, insn);
7122                 break;
7123             case 3: /* [15:12] == 1000 */
7124                 unallocated_encoding(s);
7125                 break;
7126             default: /* [15:12] == 0000 */
7127                 /* Floating point <-> integer conversions */
7128                 disas_fp_int_conv(s, insn);
7129                 break;
7130             }
7131             break;
7132         }
7133     }
7134 }
7135 
7136 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7137                      int pos)
7138 {
7139     /* Extract 64 bits from the middle of two concatenated 64 bit
7140      * vector register slices left:right. The extracted bits start
7141      * at 'pos' bits into the right (least significant) side.
7142      * We return the result in tcg_right, and guarantee not to
7143      * trash tcg_left.
7144      */
7145     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7146     assert(pos > 0 && pos < 64);
7147 
7148     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7149     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7150     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7151 }
7152 
7153 /* EXT
7154  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7155  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7156  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7157  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7158  */
7159 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7160 {
7161     int is_q = extract32(insn, 30, 1);
7162     int op2 = extract32(insn, 22, 2);
7163     int imm4 = extract32(insn, 11, 4);
7164     int rm = extract32(insn, 16, 5);
7165     int rn = extract32(insn, 5, 5);
7166     int rd = extract32(insn, 0, 5);
7167     int pos = imm4 << 3;
7168     TCGv_i64 tcg_resl, tcg_resh;
7169 
7170     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7171         unallocated_encoding(s);
7172         return;
7173     }
7174 
7175     if (!fp_access_check(s)) {
7176         return;
7177     }
7178 
7179     tcg_resh = tcg_temp_new_i64();
7180     tcg_resl = tcg_temp_new_i64();
7181 
7182     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7183      * either extracting 128 bits from a 128:128 concatenation, or
7184      * extracting 64 bits from a 64:64 concatenation.
7185      */
7186     if (!is_q) {
7187         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7188         if (pos != 0) {
7189             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7190             do_ext64(s, tcg_resh, tcg_resl, pos);
7191         }
7192     } else {
7193         TCGv_i64 tcg_hh;
7194         typedef struct {
7195             int reg;
7196             int elt;
7197         } EltPosns;
7198         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7199         EltPosns *elt = eltposns;
7200 
7201         if (pos >= 64) {
7202             elt++;
7203             pos -= 64;
7204         }
7205 
7206         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7207         elt++;
7208         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7209         elt++;
7210         if (pos != 0) {
7211             do_ext64(s, tcg_resh, tcg_resl, pos);
7212             tcg_hh = tcg_temp_new_i64();
7213             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7214             do_ext64(s, tcg_hh, tcg_resh, pos);
7215         }
7216     }
7217 
7218     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7219     if (is_q) {
7220         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7221     }
7222     clear_vec_high(s, is_q, rd);
7223 }
7224 
7225 /* TBL/TBX
7226  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7227  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7228  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7229  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7230  */
7231 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7232 {
7233     int op2 = extract32(insn, 22, 2);
7234     int is_q = extract32(insn, 30, 1);
7235     int rm = extract32(insn, 16, 5);
7236     int rn = extract32(insn, 5, 5);
7237     int rd = extract32(insn, 0, 5);
7238     int is_tbx = extract32(insn, 12, 1);
7239     int len = (extract32(insn, 13, 2) + 1) * 16;
7240 
7241     if (op2 != 0) {
7242         unallocated_encoding(s);
7243         return;
7244     }
7245 
7246     if (!fp_access_check(s)) {
7247         return;
7248     }
7249 
7250     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7251                        vec_full_reg_offset(s, rm), cpu_env,
7252                        is_q ? 16 : 8, vec_full_reg_size(s),
7253                        (len << 6) | (is_tbx << 5) | rn,
7254                        gen_helper_simd_tblx);
7255 }
7256 
7257 /* ZIP/UZP/TRN
7258  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7259  * +---+---+-------------+------+---+------+---+------------------+------+
7260  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7261  * +---+---+-------------+------+---+------+---+------------------+------+
7262  */
7263 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7264 {
7265     int rd = extract32(insn, 0, 5);
7266     int rn = extract32(insn, 5, 5);
7267     int rm = extract32(insn, 16, 5);
7268     int size = extract32(insn, 22, 2);
7269     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7270      * bit 2 indicates 1 vs 2 variant of the insn.
7271      */
7272     int opcode = extract32(insn, 12, 2);
7273     bool part = extract32(insn, 14, 1);
7274     bool is_q = extract32(insn, 30, 1);
7275     int esize = 8 << size;
7276     int i;
7277     int datasize = is_q ? 128 : 64;
7278     int elements = datasize / esize;
7279     TCGv_i64 tcg_res[2], tcg_ele;
7280 
7281     if (opcode == 0 || (size == 3 && !is_q)) {
7282         unallocated_encoding(s);
7283         return;
7284     }
7285 
7286     if (!fp_access_check(s)) {
7287         return;
7288     }
7289 
7290     tcg_res[0] = tcg_temp_new_i64();
7291     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7292     tcg_ele = tcg_temp_new_i64();
7293 
7294     for (i = 0; i < elements; i++) {
7295         int o, w;
7296 
7297         switch (opcode) {
7298         case 1: /* UZP1/2 */
7299         {
7300             int midpoint = elements / 2;
7301             if (i < midpoint) {
7302                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7303             } else {
7304                 read_vec_element(s, tcg_ele, rm,
7305                                  2 * (i - midpoint) + part, size);
7306             }
7307             break;
7308         }
7309         case 2: /* TRN1/2 */
7310             if (i & 1) {
7311                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7312             } else {
7313                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7314             }
7315             break;
7316         case 3: /* ZIP1/2 */
7317         {
7318             int base = part * elements / 2;
7319             if (i & 1) {
7320                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7321             } else {
7322                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7323             }
7324             break;
7325         }
7326         default:
7327             g_assert_not_reached();
7328         }
7329 
7330         w = (i * esize) / 64;
7331         o = (i * esize) % 64;
7332         if (o == 0) {
7333             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7334         } else {
7335             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7336             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7337         }
7338     }
7339 
7340     for (i = 0; i <= is_q; ++i) {
7341         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7342     }
7343     clear_vec_high(s, is_q, rd);
7344 }
7345 
7346 /*
7347  * do_reduction_op helper
7348  *
7349  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7350  * important for correct NaN propagation that we do these
7351  * operations in exactly the order specified by the pseudocode.
7352  *
7353  * This is a recursive function, TCG temps should be freed by the
7354  * calling function once it is done with the values.
7355  */
7356 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7357                                 int esize, int size, int vmap, TCGv_ptr fpst)
7358 {
7359     if (esize == size) {
7360         int element;
7361         MemOp msize = esize == 16 ? MO_16 : MO_32;
7362         TCGv_i32 tcg_elem;
7363 
7364         /* We should have one register left here */
7365         assert(ctpop8(vmap) == 1);
7366         element = ctz32(vmap);
7367         assert(element < 8);
7368 
7369         tcg_elem = tcg_temp_new_i32();
7370         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7371         return tcg_elem;
7372     } else {
7373         int bits = size / 2;
7374         int shift = ctpop8(vmap) / 2;
7375         int vmap_lo = (vmap >> shift) & vmap;
7376         int vmap_hi = (vmap & ~vmap_lo);
7377         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7378 
7379         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7380         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7381         tcg_res = tcg_temp_new_i32();
7382 
7383         switch (fpopcode) {
7384         case 0x0c: /* fmaxnmv half-precision */
7385             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7386             break;
7387         case 0x0f: /* fmaxv half-precision */
7388             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7389             break;
7390         case 0x1c: /* fminnmv half-precision */
7391             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7392             break;
7393         case 0x1f: /* fminv half-precision */
7394             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7395             break;
7396         case 0x2c: /* fmaxnmv */
7397             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7398             break;
7399         case 0x2f: /* fmaxv */
7400             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7401             break;
7402         case 0x3c: /* fminnmv */
7403             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7404             break;
7405         case 0x3f: /* fminv */
7406             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7407             break;
7408         default:
7409             g_assert_not_reached();
7410         }
7411         return tcg_res;
7412     }
7413 }
7414 
7415 /* AdvSIMD across lanes
7416  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7417  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7418  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7419  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7420  */
7421 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7422 {
7423     int rd = extract32(insn, 0, 5);
7424     int rn = extract32(insn, 5, 5);
7425     int size = extract32(insn, 22, 2);
7426     int opcode = extract32(insn, 12, 5);
7427     bool is_q = extract32(insn, 30, 1);
7428     bool is_u = extract32(insn, 29, 1);
7429     bool is_fp = false;
7430     bool is_min = false;
7431     int esize;
7432     int elements;
7433     int i;
7434     TCGv_i64 tcg_res, tcg_elt;
7435 
7436     switch (opcode) {
7437     case 0x1b: /* ADDV */
7438         if (is_u) {
7439             unallocated_encoding(s);
7440             return;
7441         }
7442         /* fall through */
7443     case 0x3: /* SADDLV, UADDLV */
7444     case 0xa: /* SMAXV, UMAXV */
7445     case 0x1a: /* SMINV, UMINV */
7446         if (size == 3 || (size == 2 && !is_q)) {
7447             unallocated_encoding(s);
7448             return;
7449         }
7450         break;
7451     case 0xc: /* FMAXNMV, FMINNMV */
7452     case 0xf: /* FMAXV, FMINV */
7453         /* Bit 1 of size field encodes min vs max and the actual size
7454          * depends on the encoding of the U bit. If not set (and FP16
7455          * enabled) then we do half-precision float instead of single
7456          * precision.
7457          */
7458         is_min = extract32(size, 1, 1);
7459         is_fp = true;
7460         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7461             size = 1;
7462         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7463             unallocated_encoding(s);
7464             return;
7465         } else {
7466             size = 2;
7467         }
7468         break;
7469     default:
7470         unallocated_encoding(s);
7471         return;
7472     }
7473 
7474     if (!fp_access_check(s)) {
7475         return;
7476     }
7477 
7478     esize = 8 << size;
7479     elements = (is_q ? 128 : 64) / esize;
7480 
7481     tcg_res = tcg_temp_new_i64();
7482     tcg_elt = tcg_temp_new_i64();
7483 
7484     /* These instructions operate across all lanes of a vector
7485      * to produce a single result. We can guarantee that a 64
7486      * bit intermediate is sufficient:
7487      *  + for [US]ADDLV the maximum element size is 32 bits, and
7488      *    the result type is 64 bits
7489      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7490      *    same as the element size, which is 32 bits at most
7491      * For the integer operations we can choose to work at 64
7492      * or 32 bits and truncate at the end; for simplicity
7493      * we use 64 bits always. The floating point
7494      * ops do require 32 bit intermediates, though.
7495      */
7496     if (!is_fp) {
7497         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7498 
7499         for (i = 1; i < elements; i++) {
7500             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7501 
7502             switch (opcode) {
7503             case 0x03: /* SADDLV / UADDLV */
7504             case 0x1b: /* ADDV */
7505                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7506                 break;
7507             case 0x0a: /* SMAXV / UMAXV */
7508                 if (is_u) {
7509                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7510                 } else {
7511                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7512                 }
7513                 break;
7514             case 0x1a: /* SMINV / UMINV */
7515                 if (is_u) {
7516                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7517                 } else {
7518                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7519                 }
7520                 break;
7521             default:
7522                 g_assert_not_reached();
7523             }
7524 
7525         }
7526     } else {
7527         /* Floating point vector reduction ops which work across 32
7528          * bit (single) or 16 bit (half-precision) intermediates.
7529          * Note that correct NaN propagation requires that we do these
7530          * operations in exactly the order specified by the pseudocode.
7531          */
7532         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7533         int fpopcode = opcode | is_min << 4 | is_u << 5;
7534         int vmap = (1 << elements) - 1;
7535         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7536                                              (is_q ? 128 : 64), vmap, fpst);
7537         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7538     }
7539 
7540     /* Now truncate the result to the width required for the final output */
7541     if (opcode == 0x03) {
7542         /* SADDLV, UADDLV: result is 2*esize */
7543         size++;
7544     }
7545 
7546     switch (size) {
7547     case 0:
7548         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7549         break;
7550     case 1:
7551         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7552         break;
7553     case 2:
7554         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7555         break;
7556     case 3:
7557         break;
7558     default:
7559         g_assert_not_reached();
7560     }
7561 
7562     write_fp_dreg(s, rd, tcg_res);
7563 }
7564 
7565 /* DUP (Element, Vector)
7566  *
7567  *  31  30   29              21 20    16 15        10  9    5 4    0
7568  * +---+---+-------------------+--------+-------------+------+------+
7569  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7570  * +---+---+-------------------+--------+-------------+------+------+
7571  *
7572  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7573  */
7574 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7575                              int imm5)
7576 {
7577     int size = ctz32(imm5);
7578     int index;
7579 
7580     if (size > 3 || (size == 3 && !is_q)) {
7581         unallocated_encoding(s);
7582         return;
7583     }
7584 
7585     if (!fp_access_check(s)) {
7586         return;
7587     }
7588 
7589     index = imm5 >> (size + 1);
7590     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7591                          vec_reg_offset(s, rn, index, size),
7592                          is_q ? 16 : 8, vec_full_reg_size(s));
7593 }
7594 
7595 /* DUP (element, scalar)
7596  *  31                   21 20    16 15        10  9    5 4    0
7597  * +-----------------------+--------+-------------+------+------+
7598  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7599  * +-----------------------+--------+-------------+------+------+
7600  */
7601 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7602                               int imm5)
7603 {
7604     int size = ctz32(imm5);
7605     int index;
7606     TCGv_i64 tmp;
7607 
7608     if (size > 3) {
7609         unallocated_encoding(s);
7610         return;
7611     }
7612 
7613     if (!fp_access_check(s)) {
7614         return;
7615     }
7616 
7617     index = imm5 >> (size + 1);
7618 
7619     /* This instruction just extracts the specified element and
7620      * zero-extends it into the bottom of the destination register.
7621      */
7622     tmp = tcg_temp_new_i64();
7623     read_vec_element(s, tmp, rn, index, size);
7624     write_fp_dreg(s, rd, tmp);
7625 }
7626 
7627 /* DUP (General)
7628  *
7629  *  31  30   29              21 20    16 15        10  9    5 4    0
7630  * +---+---+-------------------+--------+-------------+------+------+
7631  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7632  * +---+---+-------------------+--------+-------------+------+------+
7633  *
7634  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7635  */
7636 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7637                              int imm5)
7638 {
7639     int size = ctz32(imm5);
7640     uint32_t dofs, oprsz, maxsz;
7641 
7642     if (size > 3 || ((size == 3) && !is_q)) {
7643         unallocated_encoding(s);
7644         return;
7645     }
7646 
7647     if (!fp_access_check(s)) {
7648         return;
7649     }
7650 
7651     dofs = vec_full_reg_offset(s, rd);
7652     oprsz = is_q ? 16 : 8;
7653     maxsz = vec_full_reg_size(s);
7654 
7655     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7656 }
7657 
7658 /* INS (Element)
7659  *
7660  *  31                   21 20    16 15  14    11  10 9    5 4    0
7661  * +-----------------------+--------+------------+---+------+------+
7662  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7663  * +-----------------------+--------+------------+---+------+------+
7664  *
7665  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7666  * index: encoded in imm5<4:size+1>
7667  */
7668 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7669                              int imm4, int imm5)
7670 {
7671     int size = ctz32(imm5);
7672     int src_index, dst_index;
7673     TCGv_i64 tmp;
7674 
7675     if (size > 3) {
7676         unallocated_encoding(s);
7677         return;
7678     }
7679 
7680     if (!fp_access_check(s)) {
7681         return;
7682     }
7683 
7684     dst_index = extract32(imm5, 1+size, 5);
7685     src_index = extract32(imm4, size, 4);
7686 
7687     tmp = tcg_temp_new_i64();
7688 
7689     read_vec_element(s, tmp, rn, src_index, size);
7690     write_vec_element(s, tmp, rd, dst_index, size);
7691 
7692     /* INS is considered a 128-bit write for SVE. */
7693     clear_vec_high(s, true, rd);
7694 }
7695 
7696 
7697 /* INS (General)
7698  *
7699  *  31                   21 20    16 15        10  9    5 4    0
7700  * +-----------------------+--------+-------------+------+------+
7701  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7702  * +-----------------------+--------+-------------+------+------+
7703  *
7704  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7705  * index: encoded in imm5<4:size+1>
7706  */
7707 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7708 {
7709     int size = ctz32(imm5);
7710     int idx;
7711 
7712     if (size > 3) {
7713         unallocated_encoding(s);
7714         return;
7715     }
7716 
7717     if (!fp_access_check(s)) {
7718         return;
7719     }
7720 
7721     idx = extract32(imm5, 1 + size, 4 - size);
7722     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7723 
7724     /* INS is considered a 128-bit write for SVE. */
7725     clear_vec_high(s, true, rd);
7726 }
7727 
7728 /*
7729  * UMOV (General)
7730  * SMOV (General)
7731  *
7732  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7733  * +---+---+-------------------+--------+-------------+------+------+
7734  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7735  * +---+---+-------------------+--------+-------------+------+------+
7736  *
7737  * U: unsigned when set
7738  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7739  */
7740 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7741                                   int rn, int rd, int imm5)
7742 {
7743     int size = ctz32(imm5);
7744     int element;
7745     TCGv_i64 tcg_rd;
7746 
7747     /* Check for UnallocatedEncodings */
7748     if (is_signed) {
7749         if (size > 2 || (size == 2 && !is_q)) {
7750             unallocated_encoding(s);
7751             return;
7752         }
7753     } else {
7754         if (size > 3
7755             || (size < 3 && is_q)
7756             || (size == 3 && !is_q)) {
7757             unallocated_encoding(s);
7758             return;
7759         }
7760     }
7761 
7762     if (!fp_access_check(s)) {
7763         return;
7764     }
7765 
7766     element = extract32(imm5, 1+size, 4);
7767 
7768     tcg_rd = cpu_reg(s, rd);
7769     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7770     if (is_signed && !is_q) {
7771         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7772     }
7773 }
7774 
7775 /* AdvSIMD copy
7776  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7777  * +---+---+----+-----------------+------+---+------+---+------+------+
7778  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7779  * +---+---+----+-----------------+------+---+------+---+------+------+
7780  */
7781 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7782 {
7783     int rd = extract32(insn, 0, 5);
7784     int rn = extract32(insn, 5, 5);
7785     int imm4 = extract32(insn, 11, 4);
7786     int op = extract32(insn, 29, 1);
7787     int is_q = extract32(insn, 30, 1);
7788     int imm5 = extract32(insn, 16, 5);
7789 
7790     if (op) {
7791         if (is_q) {
7792             /* INS (element) */
7793             handle_simd_inse(s, rd, rn, imm4, imm5);
7794         } else {
7795             unallocated_encoding(s);
7796         }
7797     } else {
7798         switch (imm4) {
7799         case 0:
7800             /* DUP (element - vector) */
7801             handle_simd_dupe(s, is_q, rd, rn, imm5);
7802             break;
7803         case 1:
7804             /* DUP (general) */
7805             handle_simd_dupg(s, is_q, rd, rn, imm5);
7806             break;
7807         case 3:
7808             if (is_q) {
7809                 /* INS (general) */
7810                 handle_simd_insg(s, rd, rn, imm5);
7811             } else {
7812                 unallocated_encoding(s);
7813             }
7814             break;
7815         case 5:
7816         case 7:
7817             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7818             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7819             break;
7820         default:
7821             unallocated_encoding(s);
7822             break;
7823         }
7824     }
7825 }
7826 
7827 /* AdvSIMD modified immediate
7828  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7829  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7830  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7831  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7832  *
7833  * There are a number of operations that can be carried out here:
7834  *   MOVI - move (shifted) imm into register
7835  *   MVNI - move inverted (shifted) imm into register
7836  *   ORR  - bitwise OR of (shifted) imm with register
7837  *   BIC  - bitwise clear of (shifted) imm with register
7838  * With ARMv8.2 we also have:
7839  *   FMOV half-precision
7840  */
7841 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7842 {
7843     int rd = extract32(insn, 0, 5);
7844     int cmode = extract32(insn, 12, 4);
7845     int o2 = extract32(insn, 11, 1);
7846     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7847     bool is_neg = extract32(insn, 29, 1);
7848     bool is_q = extract32(insn, 30, 1);
7849     uint64_t imm = 0;
7850 
7851     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7852         /* Check for FMOV (vector, immediate) - half-precision */
7853         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7854             unallocated_encoding(s);
7855             return;
7856         }
7857     }
7858 
7859     if (!fp_access_check(s)) {
7860         return;
7861     }
7862 
7863     if (cmode == 15 && o2 && !is_neg) {
7864         /* FMOV (vector, immediate) - half-precision */
7865         imm = vfp_expand_imm(MO_16, abcdefgh);
7866         /* now duplicate across the lanes */
7867         imm = dup_const(MO_16, imm);
7868     } else {
7869         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7870     }
7871 
7872     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7873         /* MOVI or MVNI, with MVNI negation handled above.  */
7874         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7875                              vec_full_reg_size(s), imm);
7876     } else {
7877         /* ORR or BIC, with BIC negation to AND handled above.  */
7878         if (is_neg) {
7879             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7880         } else {
7881             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7882         }
7883     }
7884 }
7885 
7886 /* AdvSIMD scalar copy
7887  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7888  * +-----+----+-----------------+------+---+------+---+------+------+
7889  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7890  * +-----+----+-----------------+------+---+------+---+------+------+
7891  */
7892 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7893 {
7894     int rd = extract32(insn, 0, 5);
7895     int rn = extract32(insn, 5, 5);
7896     int imm4 = extract32(insn, 11, 4);
7897     int imm5 = extract32(insn, 16, 5);
7898     int op = extract32(insn, 29, 1);
7899 
7900     if (op != 0 || imm4 != 0) {
7901         unallocated_encoding(s);
7902         return;
7903     }
7904 
7905     /* DUP (element, scalar) */
7906     handle_simd_dupes(s, rd, rn, imm5);
7907 }
7908 
7909 /* AdvSIMD scalar pairwise
7910  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7911  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7912  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7913  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7914  */
7915 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7916 {
7917     int u = extract32(insn, 29, 1);
7918     int size = extract32(insn, 22, 2);
7919     int opcode = extract32(insn, 12, 5);
7920     int rn = extract32(insn, 5, 5);
7921     int rd = extract32(insn, 0, 5);
7922     TCGv_ptr fpst;
7923 
7924     /* For some ops (the FP ones), size[1] is part of the encoding.
7925      * For ADDP strictly it is not but size[1] is always 1 for valid
7926      * encodings.
7927      */
7928     opcode |= (extract32(size, 1, 1) << 5);
7929 
7930     switch (opcode) {
7931     case 0x3b: /* ADDP */
7932         if (u || size != 3) {
7933             unallocated_encoding(s);
7934             return;
7935         }
7936         if (!fp_access_check(s)) {
7937             return;
7938         }
7939 
7940         fpst = NULL;
7941         break;
7942     case 0xc: /* FMAXNMP */
7943     case 0xd: /* FADDP */
7944     case 0xf: /* FMAXP */
7945     case 0x2c: /* FMINNMP */
7946     case 0x2f: /* FMINP */
7947         /* FP op, size[0] is 32 or 64 bit*/
7948         if (!u) {
7949             if (!dc_isar_feature(aa64_fp16, s)) {
7950                 unallocated_encoding(s);
7951                 return;
7952             } else {
7953                 size = MO_16;
7954             }
7955         } else {
7956             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7957         }
7958 
7959         if (!fp_access_check(s)) {
7960             return;
7961         }
7962 
7963         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7964         break;
7965     default:
7966         unallocated_encoding(s);
7967         return;
7968     }
7969 
7970     if (size == MO_64) {
7971         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7972         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7973         TCGv_i64 tcg_res = tcg_temp_new_i64();
7974 
7975         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7976         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7977 
7978         switch (opcode) {
7979         case 0x3b: /* ADDP */
7980             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7981             break;
7982         case 0xc: /* FMAXNMP */
7983             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7984             break;
7985         case 0xd: /* FADDP */
7986             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7987             break;
7988         case 0xf: /* FMAXP */
7989             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7990             break;
7991         case 0x2c: /* FMINNMP */
7992             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7993             break;
7994         case 0x2f: /* FMINP */
7995             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7996             break;
7997         default:
7998             g_assert_not_reached();
7999         }
8000 
8001         write_fp_dreg(s, rd, tcg_res);
8002     } else {
8003         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8004         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8005         TCGv_i32 tcg_res = tcg_temp_new_i32();
8006 
8007         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8008         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8009 
8010         if (size == MO_16) {
8011             switch (opcode) {
8012             case 0xc: /* FMAXNMP */
8013                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8014                 break;
8015             case 0xd: /* FADDP */
8016                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8017                 break;
8018             case 0xf: /* FMAXP */
8019                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8020                 break;
8021             case 0x2c: /* FMINNMP */
8022                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8023                 break;
8024             case 0x2f: /* FMINP */
8025                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8026                 break;
8027             default:
8028                 g_assert_not_reached();
8029             }
8030         } else {
8031             switch (opcode) {
8032             case 0xc: /* FMAXNMP */
8033                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8034                 break;
8035             case 0xd: /* FADDP */
8036                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8037                 break;
8038             case 0xf: /* FMAXP */
8039                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8040                 break;
8041             case 0x2c: /* FMINNMP */
8042                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8043                 break;
8044             case 0x2f: /* FMINP */
8045                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8046                 break;
8047             default:
8048                 g_assert_not_reached();
8049             }
8050         }
8051 
8052         write_fp_sreg(s, rd, tcg_res);
8053     }
8054 }
8055 
8056 /*
8057  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8058  *
8059  * This code is handles the common shifting code and is used by both
8060  * the vector and scalar code.
8061  */
8062 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8063                                     TCGv_i64 tcg_rnd, bool accumulate,
8064                                     bool is_u, int size, int shift)
8065 {
8066     bool extended_result = false;
8067     bool round = tcg_rnd != NULL;
8068     int ext_lshift = 0;
8069     TCGv_i64 tcg_src_hi;
8070 
8071     if (round && size == 3) {
8072         extended_result = true;
8073         ext_lshift = 64 - shift;
8074         tcg_src_hi = tcg_temp_new_i64();
8075     } else if (shift == 64) {
8076         if (!accumulate && is_u) {
8077             /* result is zero */
8078             tcg_gen_movi_i64(tcg_res, 0);
8079             return;
8080         }
8081     }
8082 
8083     /* Deal with the rounding step */
8084     if (round) {
8085         if (extended_result) {
8086             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8087             if (!is_u) {
8088                 /* take care of sign extending tcg_res */
8089                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8090                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8091                                  tcg_src, tcg_src_hi,
8092                                  tcg_rnd, tcg_zero);
8093             } else {
8094                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8095                                  tcg_src, tcg_zero,
8096                                  tcg_rnd, tcg_zero);
8097             }
8098         } else {
8099             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8100         }
8101     }
8102 
8103     /* Now do the shift right */
8104     if (round && extended_result) {
8105         /* extended case, >64 bit precision required */
8106         if (ext_lshift == 0) {
8107             /* special case, only high bits matter */
8108             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8109         } else {
8110             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8111             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8112             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8113         }
8114     } else {
8115         if (is_u) {
8116             if (shift == 64) {
8117                 /* essentially shifting in 64 zeros */
8118                 tcg_gen_movi_i64(tcg_src, 0);
8119             } else {
8120                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8121             }
8122         } else {
8123             if (shift == 64) {
8124                 /* effectively extending the sign-bit */
8125                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8126             } else {
8127                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8128             }
8129         }
8130     }
8131 
8132     if (accumulate) {
8133         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8134     } else {
8135         tcg_gen_mov_i64(tcg_res, tcg_src);
8136     }
8137 }
8138 
8139 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8140 static void handle_scalar_simd_shri(DisasContext *s,
8141                                     bool is_u, int immh, int immb,
8142                                     int opcode, int rn, int rd)
8143 {
8144     const int size = 3;
8145     int immhb = immh << 3 | immb;
8146     int shift = 2 * (8 << size) - immhb;
8147     bool accumulate = false;
8148     bool round = false;
8149     bool insert = false;
8150     TCGv_i64 tcg_rn;
8151     TCGv_i64 tcg_rd;
8152     TCGv_i64 tcg_round;
8153 
8154     if (!extract32(immh, 3, 1)) {
8155         unallocated_encoding(s);
8156         return;
8157     }
8158 
8159     if (!fp_access_check(s)) {
8160         return;
8161     }
8162 
8163     switch (opcode) {
8164     case 0x02: /* SSRA / USRA (accumulate) */
8165         accumulate = true;
8166         break;
8167     case 0x04: /* SRSHR / URSHR (rounding) */
8168         round = true;
8169         break;
8170     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8171         accumulate = round = true;
8172         break;
8173     case 0x08: /* SRI */
8174         insert = true;
8175         break;
8176     }
8177 
8178     if (round) {
8179         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8180     } else {
8181         tcg_round = NULL;
8182     }
8183 
8184     tcg_rn = read_fp_dreg(s, rn);
8185     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8186 
8187     if (insert) {
8188         /* shift count same as element size is valid but does nothing;
8189          * special case to avoid potential shift by 64.
8190          */
8191         int esize = 8 << size;
8192         if (shift != esize) {
8193             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8194             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8195         }
8196     } else {
8197         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8198                                 accumulate, is_u, size, shift);
8199     }
8200 
8201     write_fp_dreg(s, rd, tcg_rd);
8202 }
8203 
8204 /* SHL/SLI - Scalar shift left */
8205 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8206                                     int immh, int immb, int opcode,
8207                                     int rn, int rd)
8208 {
8209     int size = 32 - clz32(immh) - 1;
8210     int immhb = immh << 3 | immb;
8211     int shift = immhb - (8 << size);
8212     TCGv_i64 tcg_rn;
8213     TCGv_i64 tcg_rd;
8214 
8215     if (!extract32(immh, 3, 1)) {
8216         unallocated_encoding(s);
8217         return;
8218     }
8219 
8220     if (!fp_access_check(s)) {
8221         return;
8222     }
8223 
8224     tcg_rn = read_fp_dreg(s, rn);
8225     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8226 
8227     if (insert) {
8228         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8229     } else {
8230         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8231     }
8232 
8233     write_fp_dreg(s, rd, tcg_rd);
8234 }
8235 
8236 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8237  * (signed/unsigned) narrowing */
8238 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8239                                    bool is_u_shift, bool is_u_narrow,
8240                                    int immh, int immb, int opcode,
8241                                    int rn, int rd)
8242 {
8243     int immhb = immh << 3 | immb;
8244     int size = 32 - clz32(immh) - 1;
8245     int esize = 8 << size;
8246     int shift = (2 * esize) - immhb;
8247     int elements = is_scalar ? 1 : (64 / esize);
8248     bool round = extract32(opcode, 0, 1);
8249     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8250     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8251     TCGv_i32 tcg_rd_narrowed;
8252     TCGv_i64 tcg_final;
8253 
8254     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8255         { gen_helper_neon_narrow_sat_s8,
8256           gen_helper_neon_unarrow_sat8 },
8257         { gen_helper_neon_narrow_sat_s16,
8258           gen_helper_neon_unarrow_sat16 },
8259         { gen_helper_neon_narrow_sat_s32,
8260           gen_helper_neon_unarrow_sat32 },
8261         { NULL, NULL },
8262     };
8263     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8264         gen_helper_neon_narrow_sat_u8,
8265         gen_helper_neon_narrow_sat_u16,
8266         gen_helper_neon_narrow_sat_u32,
8267         NULL
8268     };
8269     NeonGenNarrowEnvFn *narrowfn;
8270 
8271     int i;
8272 
8273     assert(size < 4);
8274 
8275     if (extract32(immh, 3, 1)) {
8276         unallocated_encoding(s);
8277         return;
8278     }
8279 
8280     if (!fp_access_check(s)) {
8281         return;
8282     }
8283 
8284     if (is_u_shift) {
8285         narrowfn = unsigned_narrow_fns[size];
8286     } else {
8287         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8288     }
8289 
8290     tcg_rn = tcg_temp_new_i64();
8291     tcg_rd = tcg_temp_new_i64();
8292     tcg_rd_narrowed = tcg_temp_new_i32();
8293     tcg_final = tcg_temp_new_i64();
8294 
8295     if (round) {
8296         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8297     } else {
8298         tcg_round = NULL;
8299     }
8300 
8301     for (i = 0; i < elements; i++) {
8302         read_vec_element(s, tcg_rn, rn, i, ldop);
8303         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8304                                 false, is_u_shift, size+1, shift);
8305         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8306         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8307         if (i == 0) {
8308             tcg_gen_mov_i64(tcg_final, tcg_rd);
8309         } else {
8310             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8311         }
8312     }
8313 
8314     if (!is_q) {
8315         write_vec_element(s, tcg_final, rd, 0, MO_64);
8316     } else {
8317         write_vec_element(s, tcg_final, rd, 1, MO_64);
8318     }
8319     clear_vec_high(s, is_q, rd);
8320 }
8321 
8322 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8323 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8324                              bool src_unsigned, bool dst_unsigned,
8325                              int immh, int immb, int rn, int rd)
8326 {
8327     int immhb = immh << 3 | immb;
8328     int size = 32 - clz32(immh) - 1;
8329     int shift = immhb - (8 << size);
8330     int pass;
8331 
8332     assert(immh != 0);
8333     assert(!(scalar && is_q));
8334 
8335     if (!scalar) {
8336         if (!is_q && extract32(immh, 3, 1)) {
8337             unallocated_encoding(s);
8338             return;
8339         }
8340 
8341         /* Since we use the variable-shift helpers we must
8342          * replicate the shift count into each element of
8343          * the tcg_shift value.
8344          */
8345         switch (size) {
8346         case 0:
8347             shift |= shift << 8;
8348             /* fall through */
8349         case 1:
8350             shift |= shift << 16;
8351             break;
8352         case 2:
8353         case 3:
8354             break;
8355         default:
8356             g_assert_not_reached();
8357         }
8358     }
8359 
8360     if (!fp_access_check(s)) {
8361         return;
8362     }
8363 
8364     if (size == 3) {
8365         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8366         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8367             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8368             { NULL, gen_helper_neon_qshl_u64 },
8369         };
8370         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8371         int maxpass = is_q ? 2 : 1;
8372 
8373         for (pass = 0; pass < maxpass; pass++) {
8374             TCGv_i64 tcg_op = tcg_temp_new_i64();
8375 
8376             read_vec_element(s, tcg_op, rn, pass, MO_64);
8377             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8378             write_vec_element(s, tcg_op, rd, pass, MO_64);
8379         }
8380         clear_vec_high(s, is_q, rd);
8381     } else {
8382         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8383         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8384             {
8385                 { gen_helper_neon_qshl_s8,
8386                   gen_helper_neon_qshl_s16,
8387                   gen_helper_neon_qshl_s32 },
8388                 { gen_helper_neon_qshlu_s8,
8389                   gen_helper_neon_qshlu_s16,
8390                   gen_helper_neon_qshlu_s32 }
8391             }, {
8392                 { NULL, NULL, NULL },
8393                 { gen_helper_neon_qshl_u8,
8394                   gen_helper_neon_qshl_u16,
8395                   gen_helper_neon_qshl_u32 }
8396             }
8397         };
8398         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8399         MemOp memop = scalar ? size : MO_32;
8400         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8401 
8402         for (pass = 0; pass < maxpass; pass++) {
8403             TCGv_i32 tcg_op = tcg_temp_new_i32();
8404 
8405             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8406             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8407             if (scalar) {
8408                 switch (size) {
8409                 case 0:
8410                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8411                     break;
8412                 case 1:
8413                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8414                     break;
8415                 case 2:
8416                     break;
8417                 default:
8418                     g_assert_not_reached();
8419                 }
8420                 write_fp_sreg(s, rd, tcg_op);
8421             } else {
8422                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8423             }
8424         }
8425 
8426         if (!scalar) {
8427             clear_vec_high(s, is_q, rd);
8428         }
8429     }
8430 }
8431 
8432 /* Common vector code for handling integer to FP conversion */
8433 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8434                                    int elements, int is_signed,
8435                                    int fracbits, int size)
8436 {
8437     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8438     TCGv_i32 tcg_shift = NULL;
8439 
8440     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8441     int pass;
8442 
8443     if (fracbits || size == MO_64) {
8444         tcg_shift = tcg_constant_i32(fracbits);
8445     }
8446 
8447     if (size == MO_64) {
8448         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8449         TCGv_i64 tcg_double = tcg_temp_new_i64();
8450 
8451         for (pass = 0; pass < elements; pass++) {
8452             read_vec_element(s, tcg_int64, rn, pass, mop);
8453 
8454             if (is_signed) {
8455                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8456                                      tcg_shift, tcg_fpst);
8457             } else {
8458                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8459                                      tcg_shift, tcg_fpst);
8460             }
8461             if (elements == 1) {
8462                 write_fp_dreg(s, rd, tcg_double);
8463             } else {
8464                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8465             }
8466         }
8467     } else {
8468         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8469         TCGv_i32 tcg_float = tcg_temp_new_i32();
8470 
8471         for (pass = 0; pass < elements; pass++) {
8472             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8473 
8474             switch (size) {
8475             case MO_32:
8476                 if (fracbits) {
8477                     if (is_signed) {
8478                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8479                                              tcg_shift, tcg_fpst);
8480                     } else {
8481                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8482                                              tcg_shift, tcg_fpst);
8483                     }
8484                 } else {
8485                     if (is_signed) {
8486                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8487                     } else {
8488                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8489                     }
8490                 }
8491                 break;
8492             case MO_16:
8493                 if (fracbits) {
8494                     if (is_signed) {
8495                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8496                                              tcg_shift, tcg_fpst);
8497                     } else {
8498                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8499                                              tcg_shift, tcg_fpst);
8500                     }
8501                 } else {
8502                     if (is_signed) {
8503                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8504                     } else {
8505                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8506                     }
8507                 }
8508                 break;
8509             default:
8510                 g_assert_not_reached();
8511             }
8512 
8513             if (elements == 1) {
8514                 write_fp_sreg(s, rd, tcg_float);
8515             } else {
8516                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8517             }
8518         }
8519     }
8520 
8521     clear_vec_high(s, elements << size == 16, rd);
8522 }
8523 
8524 /* UCVTF/SCVTF - Integer to FP conversion */
8525 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8526                                          bool is_q, bool is_u,
8527                                          int immh, int immb, int opcode,
8528                                          int rn, int rd)
8529 {
8530     int size, elements, fracbits;
8531     int immhb = immh << 3 | immb;
8532 
8533     if (immh & 8) {
8534         size = MO_64;
8535         if (!is_scalar && !is_q) {
8536             unallocated_encoding(s);
8537             return;
8538         }
8539     } else if (immh & 4) {
8540         size = MO_32;
8541     } else if (immh & 2) {
8542         size = MO_16;
8543         if (!dc_isar_feature(aa64_fp16, s)) {
8544             unallocated_encoding(s);
8545             return;
8546         }
8547     } else {
8548         /* immh == 0 would be a failure of the decode logic */
8549         g_assert(immh == 1);
8550         unallocated_encoding(s);
8551         return;
8552     }
8553 
8554     if (is_scalar) {
8555         elements = 1;
8556     } else {
8557         elements = (8 << is_q) >> size;
8558     }
8559     fracbits = (16 << size) - immhb;
8560 
8561     if (!fp_access_check(s)) {
8562         return;
8563     }
8564 
8565     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8566 }
8567 
8568 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8569 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8570                                          bool is_q, bool is_u,
8571                                          int immh, int immb, int rn, int rd)
8572 {
8573     int immhb = immh << 3 | immb;
8574     int pass, size, fracbits;
8575     TCGv_ptr tcg_fpstatus;
8576     TCGv_i32 tcg_rmode, tcg_shift;
8577 
8578     if (immh & 0x8) {
8579         size = MO_64;
8580         if (!is_scalar && !is_q) {
8581             unallocated_encoding(s);
8582             return;
8583         }
8584     } else if (immh & 0x4) {
8585         size = MO_32;
8586     } else if (immh & 0x2) {
8587         size = MO_16;
8588         if (!dc_isar_feature(aa64_fp16, s)) {
8589             unallocated_encoding(s);
8590             return;
8591         }
8592     } else {
8593         /* Should have split out AdvSIMD modified immediate earlier.  */
8594         assert(immh == 1);
8595         unallocated_encoding(s);
8596         return;
8597     }
8598 
8599     if (!fp_access_check(s)) {
8600         return;
8601     }
8602 
8603     assert(!(is_scalar && is_q));
8604 
8605     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8606     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8607     fracbits = (16 << size) - immhb;
8608     tcg_shift = tcg_constant_i32(fracbits);
8609 
8610     if (size == MO_64) {
8611         int maxpass = is_scalar ? 1 : 2;
8612 
8613         for (pass = 0; pass < maxpass; pass++) {
8614             TCGv_i64 tcg_op = tcg_temp_new_i64();
8615 
8616             read_vec_element(s, tcg_op, rn, pass, MO_64);
8617             if (is_u) {
8618                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8619             } else {
8620                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8621             }
8622             write_vec_element(s, tcg_op, rd, pass, MO_64);
8623         }
8624         clear_vec_high(s, is_q, rd);
8625     } else {
8626         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8627         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8628 
8629         switch (size) {
8630         case MO_16:
8631             if (is_u) {
8632                 fn = gen_helper_vfp_touhh;
8633             } else {
8634                 fn = gen_helper_vfp_toshh;
8635             }
8636             break;
8637         case MO_32:
8638             if (is_u) {
8639                 fn = gen_helper_vfp_touls;
8640             } else {
8641                 fn = gen_helper_vfp_tosls;
8642             }
8643             break;
8644         default:
8645             g_assert_not_reached();
8646         }
8647 
8648         for (pass = 0; pass < maxpass; pass++) {
8649             TCGv_i32 tcg_op = tcg_temp_new_i32();
8650 
8651             read_vec_element_i32(s, tcg_op, rn, pass, size);
8652             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8653             if (is_scalar) {
8654                 write_fp_sreg(s, rd, tcg_op);
8655             } else {
8656                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8657             }
8658         }
8659         if (!is_scalar) {
8660             clear_vec_high(s, is_q, rd);
8661         }
8662     }
8663 
8664     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8665 }
8666 
8667 /* AdvSIMD scalar shift by immediate
8668  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8669  * +-----+---+-------------+------+------+--------+---+------+------+
8670  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8671  * +-----+---+-------------+------+------+--------+---+------+------+
8672  *
8673  * This is the scalar version so it works on a fixed sized registers
8674  */
8675 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8676 {
8677     int rd = extract32(insn, 0, 5);
8678     int rn = extract32(insn, 5, 5);
8679     int opcode = extract32(insn, 11, 5);
8680     int immb = extract32(insn, 16, 3);
8681     int immh = extract32(insn, 19, 4);
8682     bool is_u = extract32(insn, 29, 1);
8683 
8684     if (immh == 0) {
8685         unallocated_encoding(s);
8686         return;
8687     }
8688 
8689     switch (opcode) {
8690     case 0x08: /* SRI */
8691         if (!is_u) {
8692             unallocated_encoding(s);
8693             return;
8694         }
8695         /* fall through */
8696     case 0x00: /* SSHR / USHR */
8697     case 0x02: /* SSRA / USRA */
8698     case 0x04: /* SRSHR / URSHR */
8699     case 0x06: /* SRSRA / URSRA */
8700         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8701         break;
8702     case 0x0a: /* SHL / SLI */
8703         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8704         break;
8705     case 0x1c: /* SCVTF, UCVTF */
8706         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8707                                      opcode, rn, rd);
8708         break;
8709     case 0x10: /* SQSHRUN, SQSHRUN2 */
8710     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8711         if (!is_u) {
8712             unallocated_encoding(s);
8713             return;
8714         }
8715         handle_vec_simd_sqshrn(s, true, false, false, true,
8716                                immh, immb, opcode, rn, rd);
8717         break;
8718     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8719     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8720         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8721                                immh, immb, opcode, rn, rd);
8722         break;
8723     case 0xc: /* SQSHLU */
8724         if (!is_u) {
8725             unallocated_encoding(s);
8726             return;
8727         }
8728         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8729         break;
8730     case 0xe: /* SQSHL, UQSHL */
8731         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8732         break;
8733     case 0x1f: /* FCVTZS, FCVTZU */
8734         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8735         break;
8736     default:
8737         unallocated_encoding(s);
8738         break;
8739     }
8740 }
8741 
8742 /* AdvSIMD scalar three different
8743  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8744  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8745  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8746  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8747  */
8748 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8749 {
8750     bool is_u = extract32(insn, 29, 1);
8751     int size = extract32(insn, 22, 2);
8752     int opcode = extract32(insn, 12, 4);
8753     int rm = extract32(insn, 16, 5);
8754     int rn = extract32(insn, 5, 5);
8755     int rd = extract32(insn, 0, 5);
8756 
8757     if (is_u) {
8758         unallocated_encoding(s);
8759         return;
8760     }
8761 
8762     switch (opcode) {
8763     case 0x9: /* SQDMLAL, SQDMLAL2 */
8764     case 0xb: /* SQDMLSL, SQDMLSL2 */
8765     case 0xd: /* SQDMULL, SQDMULL2 */
8766         if (size == 0 || size == 3) {
8767             unallocated_encoding(s);
8768             return;
8769         }
8770         break;
8771     default:
8772         unallocated_encoding(s);
8773         return;
8774     }
8775 
8776     if (!fp_access_check(s)) {
8777         return;
8778     }
8779 
8780     if (size == 2) {
8781         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8782         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8783         TCGv_i64 tcg_res = tcg_temp_new_i64();
8784 
8785         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8786         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8787 
8788         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8789         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8790 
8791         switch (opcode) {
8792         case 0xd: /* SQDMULL, SQDMULL2 */
8793             break;
8794         case 0xb: /* SQDMLSL, SQDMLSL2 */
8795             tcg_gen_neg_i64(tcg_res, tcg_res);
8796             /* fall through */
8797         case 0x9: /* SQDMLAL, SQDMLAL2 */
8798             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8799             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8800                                               tcg_res, tcg_op1);
8801             break;
8802         default:
8803             g_assert_not_reached();
8804         }
8805 
8806         write_fp_dreg(s, rd, tcg_res);
8807     } else {
8808         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8809         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8810         TCGv_i64 tcg_res = tcg_temp_new_i64();
8811 
8812         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8813         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8814 
8815         switch (opcode) {
8816         case 0xd: /* SQDMULL, SQDMULL2 */
8817             break;
8818         case 0xb: /* SQDMLSL, SQDMLSL2 */
8819             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8820             /* fall through */
8821         case 0x9: /* SQDMLAL, SQDMLAL2 */
8822         {
8823             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8824             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8825             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8826                                               tcg_res, tcg_op3);
8827             break;
8828         }
8829         default:
8830             g_assert_not_reached();
8831         }
8832 
8833         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8834         write_fp_dreg(s, rd, tcg_res);
8835     }
8836 }
8837 
8838 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8839                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8840 {
8841     /* Handle 64x64->64 opcodes which are shared between the scalar
8842      * and vector 3-same groups. We cover every opcode where size == 3
8843      * is valid in either the three-reg-same (integer, not pairwise)
8844      * or scalar-three-reg-same groups.
8845      */
8846     TCGCond cond;
8847 
8848     switch (opcode) {
8849     case 0x1: /* SQADD */
8850         if (u) {
8851             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8852         } else {
8853             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8854         }
8855         break;
8856     case 0x5: /* SQSUB */
8857         if (u) {
8858             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8859         } else {
8860             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8861         }
8862         break;
8863     case 0x6: /* CMGT, CMHI */
8864         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8865          * We implement this using setcond (test) and then negating.
8866          */
8867         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8868     do_cmop:
8869         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8870         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8871         break;
8872     case 0x7: /* CMGE, CMHS */
8873         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8874         goto do_cmop;
8875     case 0x11: /* CMTST, CMEQ */
8876         if (u) {
8877             cond = TCG_COND_EQ;
8878             goto do_cmop;
8879         }
8880         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8881         break;
8882     case 0x8: /* SSHL, USHL */
8883         if (u) {
8884             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8885         } else {
8886             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8887         }
8888         break;
8889     case 0x9: /* SQSHL, UQSHL */
8890         if (u) {
8891             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8892         } else {
8893             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8894         }
8895         break;
8896     case 0xa: /* SRSHL, URSHL */
8897         if (u) {
8898             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8899         } else {
8900             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8901         }
8902         break;
8903     case 0xb: /* SQRSHL, UQRSHL */
8904         if (u) {
8905             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8906         } else {
8907             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8908         }
8909         break;
8910     case 0x10: /* ADD, SUB */
8911         if (u) {
8912             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8913         } else {
8914             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8915         }
8916         break;
8917     default:
8918         g_assert_not_reached();
8919     }
8920 }
8921 
8922 /* Handle the 3-same-operands float operations; shared by the scalar
8923  * and vector encodings. The caller must filter out any encodings
8924  * not allocated for the encoding it is dealing with.
8925  */
8926 static void handle_3same_float(DisasContext *s, int size, int elements,
8927                                int fpopcode, int rd, int rn, int rm)
8928 {
8929     int pass;
8930     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8931 
8932     for (pass = 0; pass < elements; pass++) {
8933         if (size) {
8934             /* Double */
8935             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8936             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8937             TCGv_i64 tcg_res = tcg_temp_new_i64();
8938 
8939             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8940             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8941 
8942             switch (fpopcode) {
8943             case 0x39: /* FMLS */
8944                 /* As usual for ARM, separate negation for fused multiply-add */
8945                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8946                 /* fall through */
8947             case 0x19: /* FMLA */
8948                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8949                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8950                                        tcg_res, fpst);
8951                 break;
8952             case 0x18: /* FMAXNM */
8953                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8954                 break;
8955             case 0x1a: /* FADD */
8956                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8957                 break;
8958             case 0x1b: /* FMULX */
8959                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8960                 break;
8961             case 0x1c: /* FCMEQ */
8962                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8963                 break;
8964             case 0x1e: /* FMAX */
8965                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8966                 break;
8967             case 0x1f: /* FRECPS */
8968                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8969                 break;
8970             case 0x38: /* FMINNM */
8971                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8972                 break;
8973             case 0x3a: /* FSUB */
8974                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8975                 break;
8976             case 0x3e: /* FMIN */
8977                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8978                 break;
8979             case 0x3f: /* FRSQRTS */
8980                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8981                 break;
8982             case 0x5b: /* FMUL */
8983                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8984                 break;
8985             case 0x5c: /* FCMGE */
8986                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8987                 break;
8988             case 0x5d: /* FACGE */
8989                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8990                 break;
8991             case 0x5f: /* FDIV */
8992                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8993                 break;
8994             case 0x7a: /* FABD */
8995                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8996                 gen_helper_vfp_absd(tcg_res, tcg_res);
8997                 break;
8998             case 0x7c: /* FCMGT */
8999                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9000                 break;
9001             case 0x7d: /* FACGT */
9002                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9003                 break;
9004             default:
9005                 g_assert_not_reached();
9006             }
9007 
9008             write_vec_element(s, tcg_res, rd, pass, MO_64);
9009         } else {
9010             /* Single */
9011             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9012             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9013             TCGv_i32 tcg_res = tcg_temp_new_i32();
9014 
9015             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9016             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9017 
9018             switch (fpopcode) {
9019             case 0x39: /* FMLS */
9020                 /* As usual for ARM, separate negation for fused multiply-add */
9021                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9022                 /* fall through */
9023             case 0x19: /* FMLA */
9024                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9025                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9026                                        tcg_res, fpst);
9027                 break;
9028             case 0x1a: /* FADD */
9029                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9030                 break;
9031             case 0x1b: /* FMULX */
9032                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9033                 break;
9034             case 0x1c: /* FCMEQ */
9035                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9036                 break;
9037             case 0x1e: /* FMAX */
9038                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9039                 break;
9040             case 0x1f: /* FRECPS */
9041                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9042                 break;
9043             case 0x18: /* FMAXNM */
9044                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9045                 break;
9046             case 0x38: /* FMINNM */
9047                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9048                 break;
9049             case 0x3a: /* FSUB */
9050                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9051                 break;
9052             case 0x3e: /* FMIN */
9053                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9054                 break;
9055             case 0x3f: /* FRSQRTS */
9056                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9057                 break;
9058             case 0x5b: /* FMUL */
9059                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9060                 break;
9061             case 0x5c: /* FCMGE */
9062                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9063                 break;
9064             case 0x5d: /* FACGE */
9065                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9066                 break;
9067             case 0x5f: /* FDIV */
9068                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9069                 break;
9070             case 0x7a: /* FABD */
9071                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9072                 gen_helper_vfp_abss(tcg_res, tcg_res);
9073                 break;
9074             case 0x7c: /* FCMGT */
9075                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x7d: /* FACGT */
9078                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9079                 break;
9080             default:
9081                 g_assert_not_reached();
9082             }
9083 
9084             if (elements == 1) {
9085                 /* scalar single so clear high part */
9086                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9087 
9088                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9089                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9090             } else {
9091                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9092             }
9093         }
9094     }
9095 
9096     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9097 }
9098 
9099 /* AdvSIMD scalar three same
9100  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9101  * +-----+---+-----------+------+---+------+--------+---+------+------+
9102  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9103  * +-----+---+-----------+------+---+------+--------+---+------+------+
9104  */
9105 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9106 {
9107     int rd = extract32(insn, 0, 5);
9108     int rn = extract32(insn, 5, 5);
9109     int opcode = extract32(insn, 11, 5);
9110     int rm = extract32(insn, 16, 5);
9111     int size = extract32(insn, 22, 2);
9112     bool u = extract32(insn, 29, 1);
9113     TCGv_i64 tcg_rd;
9114 
9115     if (opcode >= 0x18) {
9116         /* Floating point: U, size[1] and opcode indicate operation */
9117         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9118         switch (fpopcode) {
9119         case 0x1b: /* FMULX */
9120         case 0x1f: /* FRECPS */
9121         case 0x3f: /* FRSQRTS */
9122         case 0x5d: /* FACGE */
9123         case 0x7d: /* FACGT */
9124         case 0x1c: /* FCMEQ */
9125         case 0x5c: /* FCMGE */
9126         case 0x7c: /* FCMGT */
9127         case 0x7a: /* FABD */
9128             break;
9129         default:
9130             unallocated_encoding(s);
9131             return;
9132         }
9133 
9134         if (!fp_access_check(s)) {
9135             return;
9136         }
9137 
9138         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9139         return;
9140     }
9141 
9142     switch (opcode) {
9143     case 0x1: /* SQADD, UQADD */
9144     case 0x5: /* SQSUB, UQSUB */
9145     case 0x9: /* SQSHL, UQSHL */
9146     case 0xb: /* SQRSHL, UQRSHL */
9147         break;
9148     case 0x8: /* SSHL, USHL */
9149     case 0xa: /* SRSHL, URSHL */
9150     case 0x6: /* CMGT, CMHI */
9151     case 0x7: /* CMGE, CMHS */
9152     case 0x11: /* CMTST, CMEQ */
9153     case 0x10: /* ADD, SUB (vector) */
9154         if (size != 3) {
9155             unallocated_encoding(s);
9156             return;
9157         }
9158         break;
9159     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9160         if (size != 1 && size != 2) {
9161             unallocated_encoding(s);
9162             return;
9163         }
9164         break;
9165     default:
9166         unallocated_encoding(s);
9167         return;
9168     }
9169 
9170     if (!fp_access_check(s)) {
9171         return;
9172     }
9173 
9174     tcg_rd = tcg_temp_new_i64();
9175 
9176     if (size == 3) {
9177         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9178         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9179 
9180         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9181     } else {
9182         /* Do a single operation on the lowest element in the vector.
9183          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9184          * no side effects for all these operations.
9185          * OPTME: special-purpose helpers would avoid doing some
9186          * unnecessary work in the helper for the 8 and 16 bit cases.
9187          */
9188         NeonGenTwoOpEnvFn *genenvfn;
9189         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9190         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9191         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9192 
9193         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9194         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9195 
9196         switch (opcode) {
9197         case 0x1: /* SQADD, UQADD */
9198         {
9199             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9200                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9201                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9202                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9203             };
9204             genenvfn = fns[size][u];
9205             break;
9206         }
9207         case 0x5: /* SQSUB, UQSUB */
9208         {
9209             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9210                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9211                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9212                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9213             };
9214             genenvfn = fns[size][u];
9215             break;
9216         }
9217         case 0x9: /* SQSHL, UQSHL */
9218         {
9219             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9220                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9221                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9222                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9223             };
9224             genenvfn = fns[size][u];
9225             break;
9226         }
9227         case 0xb: /* SQRSHL, UQRSHL */
9228         {
9229             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9230                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9231                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9232                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9233             };
9234             genenvfn = fns[size][u];
9235             break;
9236         }
9237         case 0x16: /* SQDMULH, SQRDMULH */
9238         {
9239             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9240                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9241                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9242             };
9243             assert(size == 1 || size == 2);
9244             genenvfn = fns[size - 1][u];
9245             break;
9246         }
9247         default:
9248             g_assert_not_reached();
9249         }
9250 
9251         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9252         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9253     }
9254 
9255     write_fp_dreg(s, rd, tcg_rd);
9256 }
9257 
9258 /* AdvSIMD scalar three same FP16
9259  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9260  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9261  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9262  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9263  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9264  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9265  */
9266 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9267                                                   uint32_t insn)
9268 {
9269     int rd = extract32(insn, 0, 5);
9270     int rn = extract32(insn, 5, 5);
9271     int opcode = extract32(insn, 11, 3);
9272     int rm = extract32(insn, 16, 5);
9273     bool u = extract32(insn, 29, 1);
9274     bool a = extract32(insn, 23, 1);
9275     int fpopcode = opcode | (a << 3) |  (u << 4);
9276     TCGv_ptr fpst;
9277     TCGv_i32 tcg_op1;
9278     TCGv_i32 tcg_op2;
9279     TCGv_i32 tcg_res;
9280 
9281     switch (fpopcode) {
9282     case 0x03: /* FMULX */
9283     case 0x04: /* FCMEQ (reg) */
9284     case 0x07: /* FRECPS */
9285     case 0x0f: /* FRSQRTS */
9286     case 0x14: /* FCMGE (reg) */
9287     case 0x15: /* FACGE */
9288     case 0x1a: /* FABD */
9289     case 0x1c: /* FCMGT (reg) */
9290     case 0x1d: /* FACGT */
9291         break;
9292     default:
9293         unallocated_encoding(s);
9294         return;
9295     }
9296 
9297     if (!dc_isar_feature(aa64_fp16, s)) {
9298         unallocated_encoding(s);
9299     }
9300 
9301     if (!fp_access_check(s)) {
9302         return;
9303     }
9304 
9305     fpst = fpstatus_ptr(FPST_FPCR_F16);
9306 
9307     tcg_op1 = read_fp_hreg(s, rn);
9308     tcg_op2 = read_fp_hreg(s, rm);
9309     tcg_res = tcg_temp_new_i32();
9310 
9311     switch (fpopcode) {
9312     case 0x03: /* FMULX */
9313         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9314         break;
9315     case 0x04: /* FCMEQ (reg) */
9316         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9317         break;
9318     case 0x07: /* FRECPS */
9319         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9320         break;
9321     case 0x0f: /* FRSQRTS */
9322         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9323         break;
9324     case 0x14: /* FCMGE (reg) */
9325         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9326         break;
9327     case 0x15: /* FACGE */
9328         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9329         break;
9330     case 0x1a: /* FABD */
9331         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9332         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9333         break;
9334     case 0x1c: /* FCMGT (reg) */
9335         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9336         break;
9337     case 0x1d: /* FACGT */
9338         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9339         break;
9340     default:
9341         g_assert_not_reached();
9342     }
9343 
9344     write_fp_sreg(s, rd, tcg_res);
9345 }
9346 
9347 /* AdvSIMD scalar three same extra
9348  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9349  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9350  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9351  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9352  */
9353 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9354                                                    uint32_t insn)
9355 {
9356     int rd = extract32(insn, 0, 5);
9357     int rn = extract32(insn, 5, 5);
9358     int opcode = extract32(insn, 11, 4);
9359     int rm = extract32(insn, 16, 5);
9360     int size = extract32(insn, 22, 2);
9361     bool u = extract32(insn, 29, 1);
9362     TCGv_i32 ele1, ele2, ele3;
9363     TCGv_i64 res;
9364     bool feature;
9365 
9366     switch (u * 16 + opcode) {
9367     case 0x10: /* SQRDMLAH (vector) */
9368     case 0x11: /* SQRDMLSH (vector) */
9369         if (size != 1 && size != 2) {
9370             unallocated_encoding(s);
9371             return;
9372         }
9373         feature = dc_isar_feature(aa64_rdm, s);
9374         break;
9375     default:
9376         unallocated_encoding(s);
9377         return;
9378     }
9379     if (!feature) {
9380         unallocated_encoding(s);
9381         return;
9382     }
9383     if (!fp_access_check(s)) {
9384         return;
9385     }
9386 
9387     /* Do a single operation on the lowest element in the vector.
9388      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9389      * with no side effects for all these operations.
9390      * OPTME: special-purpose helpers would avoid doing some
9391      * unnecessary work in the helper for the 16 bit cases.
9392      */
9393     ele1 = tcg_temp_new_i32();
9394     ele2 = tcg_temp_new_i32();
9395     ele3 = tcg_temp_new_i32();
9396 
9397     read_vec_element_i32(s, ele1, rn, 0, size);
9398     read_vec_element_i32(s, ele2, rm, 0, size);
9399     read_vec_element_i32(s, ele3, rd, 0, size);
9400 
9401     switch (opcode) {
9402     case 0x0: /* SQRDMLAH */
9403         if (size == 1) {
9404             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9405         } else {
9406             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9407         }
9408         break;
9409     case 0x1: /* SQRDMLSH */
9410         if (size == 1) {
9411             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9412         } else {
9413             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9414         }
9415         break;
9416     default:
9417         g_assert_not_reached();
9418     }
9419 
9420     res = tcg_temp_new_i64();
9421     tcg_gen_extu_i32_i64(res, ele3);
9422     write_fp_dreg(s, rd, res);
9423 }
9424 
9425 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9426                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9427                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9428 {
9429     /* Handle 64->64 opcodes which are shared between the scalar and
9430      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9431      * is valid in either group and also the double-precision fp ops.
9432      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9433      * requires them.
9434      */
9435     TCGCond cond;
9436 
9437     switch (opcode) {
9438     case 0x4: /* CLS, CLZ */
9439         if (u) {
9440             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9441         } else {
9442             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9443         }
9444         break;
9445     case 0x5: /* NOT */
9446         /* This opcode is shared with CNT and RBIT but we have earlier
9447          * enforced that size == 3 if and only if this is the NOT insn.
9448          */
9449         tcg_gen_not_i64(tcg_rd, tcg_rn);
9450         break;
9451     case 0x7: /* SQABS, SQNEG */
9452         if (u) {
9453             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9454         } else {
9455             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9456         }
9457         break;
9458     case 0xa: /* CMLT */
9459         /* 64 bit integer comparison against zero, result is
9460          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9461          * subtracting 1.
9462          */
9463         cond = TCG_COND_LT;
9464     do_cmop:
9465         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9466         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9467         break;
9468     case 0x8: /* CMGT, CMGE */
9469         cond = u ? TCG_COND_GE : TCG_COND_GT;
9470         goto do_cmop;
9471     case 0x9: /* CMEQ, CMLE */
9472         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9473         goto do_cmop;
9474     case 0xb: /* ABS, NEG */
9475         if (u) {
9476             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9477         } else {
9478             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9479         }
9480         break;
9481     case 0x2f: /* FABS */
9482         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9483         break;
9484     case 0x6f: /* FNEG */
9485         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9486         break;
9487     case 0x7f: /* FSQRT */
9488         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9489         break;
9490     case 0x1a: /* FCVTNS */
9491     case 0x1b: /* FCVTMS */
9492     case 0x1c: /* FCVTAS */
9493     case 0x3a: /* FCVTPS */
9494     case 0x3b: /* FCVTZS */
9495         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9496         break;
9497     case 0x5a: /* FCVTNU */
9498     case 0x5b: /* FCVTMU */
9499     case 0x5c: /* FCVTAU */
9500     case 0x7a: /* FCVTPU */
9501     case 0x7b: /* FCVTZU */
9502         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9503         break;
9504     case 0x18: /* FRINTN */
9505     case 0x19: /* FRINTM */
9506     case 0x38: /* FRINTP */
9507     case 0x39: /* FRINTZ */
9508     case 0x58: /* FRINTA */
9509     case 0x79: /* FRINTI */
9510         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9511         break;
9512     case 0x59: /* FRINTX */
9513         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9514         break;
9515     case 0x1e: /* FRINT32Z */
9516     case 0x5e: /* FRINT32X */
9517         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9518         break;
9519     case 0x1f: /* FRINT64Z */
9520     case 0x5f: /* FRINT64X */
9521         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9522         break;
9523     default:
9524         g_assert_not_reached();
9525     }
9526 }
9527 
9528 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9529                                    bool is_scalar, bool is_u, bool is_q,
9530                                    int size, int rn, int rd)
9531 {
9532     bool is_double = (size == MO_64);
9533     TCGv_ptr fpst;
9534 
9535     if (!fp_access_check(s)) {
9536         return;
9537     }
9538 
9539     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9540 
9541     if (is_double) {
9542         TCGv_i64 tcg_op = tcg_temp_new_i64();
9543         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9544         TCGv_i64 tcg_res = tcg_temp_new_i64();
9545         NeonGenTwoDoubleOpFn *genfn;
9546         bool swap = false;
9547         int pass;
9548 
9549         switch (opcode) {
9550         case 0x2e: /* FCMLT (zero) */
9551             swap = true;
9552             /* fallthrough */
9553         case 0x2c: /* FCMGT (zero) */
9554             genfn = gen_helper_neon_cgt_f64;
9555             break;
9556         case 0x2d: /* FCMEQ (zero) */
9557             genfn = gen_helper_neon_ceq_f64;
9558             break;
9559         case 0x6d: /* FCMLE (zero) */
9560             swap = true;
9561             /* fall through */
9562         case 0x6c: /* FCMGE (zero) */
9563             genfn = gen_helper_neon_cge_f64;
9564             break;
9565         default:
9566             g_assert_not_reached();
9567         }
9568 
9569         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9570             read_vec_element(s, tcg_op, rn, pass, MO_64);
9571             if (swap) {
9572                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9573             } else {
9574                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9575             }
9576             write_vec_element(s, tcg_res, rd, pass, MO_64);
9577         }
9578 
9579         clear_vec_high(s, !is_scalar, rd);
9580     } else {
9581         TCGv_i32 tcg_op = tcg_temp_new_i32();
9582         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9583         TCGv_i32 tcg_res = tcg_temp_new_i32();
9584         NeonGenTwoSingleOpFn *genfn;
9585         bool swap = false;
9586         int pass, maxpasses;
9587 
9588         if (size == MO_16) {
9589             switch (opcode) {
9590             case 0x2e: /* FCMLT (zero) */
9591                 swap = true;
9592                 /* fall through */
9593             case 0x2c: /* FCMGT (zero) */
9594                 genfn = gen_helper_advsimd_cgt_f16;
9595                 break;
9596             case 0x2d: /* FCMEQ (zero) */
9597                 genfn = gen_helper_advsimd_ceq_f16;
9598                 break;
9599             case 0x6d: /* FCMLE (zero) */
9600                 swap = true;
9601                 /* fall through */
9602             case 0x6c: /* FCMGE (zero) */
9603                 genfn = gen_helper_advsimd_cge_f16;
9604                 break;
9605             default:
9606                 g_assert_not_reached();
9607             }
9608         } else {
9609             switch (opcode) {
9610             case 0x2e: /* FCMLT (zero) */
9611                 swap = true;
9612                 /* fall through */
9613             case 0x2c: /* FCMGT (zero) */
9614                 genfn = gen_helper_neon_cgt_f32;
9615                 break;
9616             case 0x2d: /* FCMEQ (zero) */
9617                 genfn = gen_helper_neon_ceq_f32;
9618                 break;
9619             case 0x6d: /* FCMLE (zero) */
9620                 swap = true;
9621                 /* fall through */
9622             case 0x6c: /* FCMGE (zero) */
9623                 genfn = gen_helper_neon_cge_f32;
9624                 break;
9625             default:
9626                 g_assert_not_reached();
9627             }
9628         }
9629 
9630         if (is_scalar) {
9631             maxpasses = 1;
9632         } else {
9633             int vector_size = 8 << is_q;
9634             maxpasses = vector_size >> size;
9635         }
9636 
9637         for (pass = 0; pass < maxpasses; pass++) {
9638             read_vec_element_i32(s, tcg_op, rn, pass, size);
9639             if (swap) {
9640                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9641             } else {
9642                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9643             }
9644             if (is_scalar) {
9645                 write_fp_sreg(s, rd, tcg_res);
9646             } else {
9647                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9648             }
9649         }
9650 
9651         if (!is_scalar) {
9652             clear_vec_high(s, is_q, rd);
9653         }
9654     }
9655 }
9656 
9657 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9658                                     bool is_scalar, bool is_u, bool is_q,
9659                                     int size, int rn, int rd)
9660 {
9661     bool is_double = (size == 3);
9662     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9663 
9664     if (is_double) {
9665         TCGv_i64 tcg_op = tcg_temp_new_i64();
9666         TCGv_i64 tcg_res = tcg_temp_new_i64();
9667         int pass;
9668 
9669         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9670             read_vec_element(s, tcg_op, rn, pass, MO_64);
9671             switch (opcode) {
9672             case 0x3d: /* FRECPE */
9673                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9674                 break;
9675             case 0x3f: /* FRECPX */
9676                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9677                 break;
9678             case 0x7d: /* FRSQRTE */
9679                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9680                 break;
9681             default:
9682                 g_assert_not_reached();
9683             }
9684             write_vec_element(s, tcg_res, rd, pass, MO_64);
9685         }
9686         clear_vec_high(s, !is_scalar, rd);
9687     } else {
9688         TCGv_i32 tcg_op = tcg_temp_new_i32();
9689         TCGv_i32 tcg_res = tcg_temp_new_i32();
9690         int pass, maxpasses;
9691 
9692         if (is_scalar) {
9693             maxpasses = 1;
9694         } else {
9695             maxpasses = is_q ? 4 : 2;
9696         }
9697 
9698         for (pass = 0; pass < maxpasses; pass++) {
9699             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9700 
9701             switch (opcode) {
9702             case 0x3c: /* URECPE */
9703                 gen_helper_recpe_u32(tcg_res, tcg_op);
9704                 break;
9705             case 0x3d: /* FRECPE */
9706                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9707                 break;
9708             case 0x3f: /* FRECPX */
9709                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9710                 break;
9711             case 0x7d: /* FRSQRTE */
9712                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9713                 break;
9714             default:
9715                 g_assert_not_reached();
9716             }
9717 
9718             if (is_scalar) {
9719                 write_fp_sreg(s, rd, tcg_res);
9720             } else {
9721                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9722             }
9723         }
9724         if (!is_scalar) {
9725             clear_vec_high(s, is_q, rd);
9726         }
9727     }
9728 }
9729 
9730 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9731                                 int opcode, bool u, bool is_q,
9732                                 int size, int rn, int rd)
9733 {
9734     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9735      * in the source becomes a size element in the destination).
9736      */
9737     int pass;
9738     TCGv_i32 tcg_res[2];
9739     int destelt = is_q ? 2 : 0;
9740     int passes = scalar ? 1 : 2;
9741 
9742     if (scalar) {
9743         tcg_res[1] = tcg_constant_i32(0);
9744     }
9745 
9746     for (pass = 0; pass < passes; pass++) {
9747         TCGv_i64 tcg_op = tcg_temp_new_i64();
9748         NeonGenNarrowFn *genfn = NULL;
9749         NeonGenNarrowEnvFn *genenvfn = NULL;
9750 
9751         if (scalar) {
9752             read_vec_element(s, tcg_op, rn, pass, size + 1);
9753         } else {
9754             read_vec_element(s, tcg_op, rn, pass, MO_64);
9755         }
9756         tcg_res[pass] = tcg_temp_new_i32();
9757 
9758         switch (opcode) {
9759         case 0x12: /* XTN, SQXTUN */
9760         {
9761             static NeonGenNarrowFn * const xtnfns[3] = {
9762                 gen_helper_neon_narrow_u8,
9763                 gen_helper_neon_narrow_u16,
9764                 tcg_gen_extrl_i64_i32,
9765             };
9766             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9767                 gen_helper_neon_unarrow_sat8,
9768                 gen_helper_neon_unarrow_sat16,
9769                 gen_helper_neon_unarrow_sat32,
9770             };
9771             if (u) {
9772                 genenvfn = sqxtunfns[size];
9773             } else {
9774                 genfn = xtnfns[size];
9775             }
9776             break;
9777         }
9778         case 0x14: /* SQXTN, UQXTN */
9779         {
9780             static NeonGenNarrowEnvFn * const fns[3][2] = {
9781                 { gen_helper_neon_narrow_sat_s8,
9782                   gen_helper_neon_narrow_sat_u8 },
9783                 { gen_helper_neon_narrow_sat_s16,
9784                   gen_helper_neon_narrow_sat_u16 },
9785                 { gen_helper_neon_narrow_sat_s32,
9786                   gen_helper_neon_narrow_sat_u32 },
9787             };
9788             genenvfn = fns[size][u];
9789             break;
9790         }
9791         case 0x16: /* FCVTN, FCVTN2 */
9792             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9793             if (size == 2) {
9794                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9795             } else {
9796                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9797                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9798                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9799                 TCGv_i32 ahp = get_ahp_flag();
9800 
9801                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9802                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9803                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9804                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9805             }
9806             break;
9807         case 0x36: /* BFCVTN, BFCVTN2 */
9808             {
9809                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9810                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9811             }
9812             break;
9813         case 0x56:  /* FCVTXN, FCVTXN2 */
9814             /* 64 bit to 32 bit float conversion
9815              * with von Neumann rounding (round to odd)
9816              */
9817             assert(size == 2);
9818             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9819             break;
9820         default:
9821             g_assert_not_reached();
9822         }
9823 
9824         if (genfn) {
9825             genfn(tcg_res[pass], tcg_op);
9826         } else if (genenvfn) {
9827             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9828         }
9829     }
9830 
9831     for (pass = 0; pass < 2; pass++) {
9832         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9833     }
9834     clear_vec_high(s, is_q, rd);
9835 }
9836 
9837 /* Remaining saturating accumulating ops */
9838 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9839                                 bool is_q, int size, int rn, int rd)
9840 {
9841     bool is_double = (size == 3);
9842 
9843     if (is_double) {
9844         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9845         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9846         int pass;
9847 
9848         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9849             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9850             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9851 
9852             if (is_u) { /* USQADD */
9853                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9854             } else { /* SUQADD */
9855                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9856             }
9857             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9858         }
9859         clear_vec_high(s, !is_scalar, rd);
9860     } else {
9861         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9862         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9863         int pass, maxpasses;
9864 
9865         if (is_scalar) {
9866             maxpasses = 1;
9867         } else {
9868             maxpasses = is_q ? 4 : 2;
9869         }
9870 
9871         for (pass = 0; pass < maxpasses; pass++) {
9872             if (is_scalar) {
9873                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9874                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9875             } else {
9876                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9877                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9878             }
9879 
9880             if (is_u) { /* USQADD */
9881                 switch (size) {
9882                 case 0:
9883                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9884                     break;
9885                 case 1:
9886                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9887                     break;
9888                 case 2:
9889                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9890                     break;
9891                 default:
9892                     g_assert_not_reached();
9893                 }
9894             } else { /* SUQADD */
9895                 switch (size) {
9896                 case 0:
9897                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9898                     break;
9899                 case 1:
9900                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9901                     break;
9902                 case 2:
9903                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9904                     break;
9905                 default:
9906                     g_assert_not_reached();
9907                 }
9908             }
9909 
9910             if (is_scalar) {
9911                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9912             }
9913             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9914         }
9915         clear_vec_high(s, is_q, rd);
9916     }
9917 }
9918 
9919 /* AdvSIMD scalar two reg misc
9920  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9921  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9922  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9923  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9924  */
9925 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9926 {
9927     int rd = extract32(insn, 0, 5);
9928     int rn = extract32(insn, 5, 5);
9929     int opcode = extract32(insn, 12, 5);
9930     int size = extract32(insn, 22, 2);
9931     bool u = extract32(insn, 29, 1);
9932     bool is_fcvt = false;
9933     int rmode;
9934     TCGv_i32 tcg_rmode;
9935     TCGv_ptr tcg_fpstatus;
9936 
9937     switch (opcode) {
9938     case 0x3: /* USQADD / SUQADD*/
9939         if (!fp_access_check(s)) {
9940             return;
9941         }
9942         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9943         return;
9944     case 0x7: /* SQABS / SQNEG */
9945         break;
9946     case 0xa: /* CMLT */
9947         if (u) {
9948             unallocated_encoding(s);
9949             return;
9950         }
9951         /* fall through */
9952     case 0x8: /* CMGT, CMGE */
9953     case 0x9: /* CMEQ, CMLE */
9954     case 0xb: /* ABS, NEG */
9955         if (size != 3) {
9956             unallocated_encoding(s);
9957             return;
9958         }
9959         break;
9960     case 0x12: /* SQXTUN */
9961         if (!u) {
9962             unallocated_encoding(s);
9963             return;
9964         }
9965         /* fall through */
9966     case 0x14: /* SQXTN, UQXTN */
9967         if (size == 3) {
9968             unallocated_encoding(s);
9969             return;
9970         }
9971         if (!fp_access_check(s)) {
9972             return;
9973         }
9974         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9975         return;
9976     case 0xc ... 0xf:
9977     case 0x16 ... 0x1d:
9978     case 0x1f:
9979         /* Floating point: U, size[1] and opcode indicate operation;
9980          * size[0] indicates single or double precision.
9981          */
9982         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9983         size = extract32(size, 0, 1) ? 3 : 2;
9984         switch (opcode) {
9985         case 0x2c: /* FCMGT (zero) */
9986         case 0x2d: /* FCMEQ (zero) */
9987         case 0x2e: /* FCMLT (zero) */
9988         case 0x6c: /* FCMGE (zero) */
9989         case 0x6d: /* FCMLE (zero) */
9990             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9991             return;
9992         case 0x1d: /* SCVTF */
9993         case 0x5d: /* UCVTF */
9994         {
9995             bool is_signed = (opcode == 0x1d);
9996             if (!fp_access_check(s)) {
9997                 return;
9998             }
9999             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10000             return;
10001         }
10002         case 0x3d: /* FRECPE */
10003         case 0x3f: /* FRECPX */
10004         case 0x7d: /* FRSQRTE */
10005             if (!fp_access_check(s)) {
10006                 return;
10007             }
10008             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10009             return;
10010         case 0x1a: /* FCVTNS */
10011         case 0x1b: /* FCVTMS */
10012         case 0x3a: /* FCVTPS */
10013         case 0x3b: /* FCVTZS */
10014         case 0x5a: /* FCVTNU */
10015         case 0x5b: /* FCVTMU */
10016         case 0x7a: /* FCVTPU */
10017         case 0x7b: /* FCVTZU */
10018             is_fcvt = true;
10019             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10020             break;
10021         case 0x1c: /* FCVTAS */
10022         case 0x5c: /* FCVTAU */
10023             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10024             is_fcvt = true;
10025             rmode = FPROUNDING_TIEAWAY;
10026             break;
10027         case 0x56: /* FCVTXN, FCVTXN2 */
10028             if (size == 2) {
10029                 unallocated_encoding(s);
10030                 return;
10031             }
10032             if (!fp_access_check(s)) {
10033                 return;
10034             }
10035             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10036             return;
10037         default:
10038             unallocated_encoding(s);
10039             return;
10040         }
10041         break;
10042     default:
10043         unallocated_encoding(s);
10044         return;
10045     }
10046 
10047     if (!fp_access_check(s)) {
10048         return;
10049     }
10050 
10051     if (is_fcvt) {
10052         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10053         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10054     } else {
10055         tcg_fpstatus = NULL;
10056         tcg_rmode = NULL;
10057     }
10058 
10059     if (size == 3) {
10060         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10061         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10062 
10063         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10064         write_fp_dreg(s, rd, tcg_rd);
10065     } else {
10066         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10067         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10068 
10069         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10070 
10071         switch (opcode) {
10072         case 0x7: /* SQABS, SQNEG */
10073         {
10074             NeonGenOneOpEnvFn *genfn;
10075             static NeonGenOneOpEnvFn * const fns[3][2] = {
10076                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10077                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10078                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10079             };
10080             genfn = fns[size][u];
10081             genfn(tcg_rd, cpu_env, tcg_rn);
10082             break;
10083         }
10084         case 0x1a: /* FCVTNS */
10085         case 0x1b: /* FCVTMS */
10086         case 0x1c: /* FCVTAS */
10087         case 0x3a: /* FCVTPS */
10088         case 0x3b: /* FCVTZS */
10089             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10090                                  tcg_fpstatus);
10091             break;
10092         case 0x5a: /* FCVTNU */
10093         case 0x5b: /* FCVTMU */
10094         case 0x5c: /* FCVTAU */
10095         case 0x7a: /* FCVTPU */
10096         case 0x7b: /* FCVTZU */
10097             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10098                                  tcg_fpstatus);
10099             break;
10100         default:
10101             g_assert_not_reached();
10102         }
10103 
10104         write_fp_sreg(s, rd, tcg_rd);
10105     }
10106 
10107     if (is_fcvt) {
10108         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10109     }
10110 }
10111 
10112 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10113 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10114                                  int immh, int immb, int opcode, int rn, int rd)
10115 {
10116     int size = 32 - clz32(immh) - 1;
10117     int immhb = immh << 3 | immb;
10118     int shift = 2 * (8 << size) - immhb;
10119     GVecGen2iFn *gvec_fn;
10120 
10121     if (extract32(immh, 3, 1) && !is_q) {
10122         unallocated_encoding(s);
10123         return;
10124     }
10125     tcg_debug_assert(size <= 3);
10126 
10127     if (!fp_access_check(s)) {
10128         return;
10129     }
10130 
10131     switch (opcode) {
10132     case 0x02: /* SSRA / USRA (accumulate) */
10133         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10134         break;
10135 
10136     case 0x08: /* SRI */
10137         gvec_fn = gen_gvec_sri;
10138         break;
10139 
10140     case 0x00: /* SSHR / USHR */
10141         if (is_u) {
10142             if (shift == 8 << size) {
10143                 /* Shift count the same size as element size produces zero.  */
10144                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10145                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10146                 return;
10147             }
10148             gvec_fn = tcg_gen_gvec_shri;
10149         } else {
10150             /* Shift count the same size as element size produces all sign.  */
10151             if (shift == 8 << size) {
10152                 shift -= 1;
10153             }
10154             gvec_fn = tcg_gen_gvec_sari;
10155         }
10156         break;
10157 
10158     case 0x04: /* SRSHR / URSHR (rounding) */
10159         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10160         break;
10161 
10162     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10163         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10164         break;
10165 
10166     default:
10167         g_assert_not_reached();
10168     }
10169 
10170     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10171 }
10172 
10173 /* SHL/SLI - Vector shift left */
10174 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10175                                  int immh, int immb, int opcode, int rn, int rd)
10176 {
10177     int size = 32 - clz32(immh) - 1;
10178     int immhb = immh << 3 | immb;
10179     int shift = immhb - (8 << size);
10180 
10181     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10182     assert(size >= 0 && size <= 3);
10183 
10184     if (extract32(immh, 3, 1) && !is_q) {
10185         unallocated_encoding(s);
10186         return;
10187     }
10188 
10189     if (!fp_access_check(s)) {
10190         return;
10191     }
10192 
10193     if (insert) {
10194         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10195     } else {
10196         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10197     }
10198 }
10199 
10200 /* USHLL/SHLL - Vector shift left with widening */
10201 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10202                                  int immh, int immb, int opcode, int rn, int rd)
10203 {
10204     int size = 32 - clz32(immh) - 1;
10205     int immhb = immh << 3 | immb;
10206     int shift = immhb - (8 << size);
10207     int dsize = 64;
10208     int esize = 8 << size;
10209     int elements = dsize/esize;
10210     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10211     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10212     int i;
10213 
10214     if (size >= 3) {
10215         unallocated_encoding(s);
10216         return;
10217     }
10218 
10219     if (!fp_access_check(s)) {
10220         return;
10221     }
10222 
10223     /* For the LL variants the store is larger than the load,
10224      * so if rd == rn we would overwrite parts of our input.
10225      * So load everything right now and use shifts in the main loop.
10226      */
10227     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10228 
10229     for (i = 0; i < elements; i++) {
10230         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10231         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10232         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10233         write_vec_element(s, tcg_rd, rd, i, size + 1);
10234     }
10235 }
10236 
10237 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10238 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10239                                  int immh, int immb, int opcode, int rn, int rd)
10240 {
10241     int immhb = immh << 3 | immb;
10242     int size = 32 - clz32(immh) - 1;
10243     int dsize = 64;
10244     int esize = 8 << size;
10245     int elements = dsize/esize;
10246     int shift = (2 * esize) - immhb;
10247     bool round = extract32(opcode, 0, 1);
10248     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10249     TCGv_i64 tcg_round;
10250     int i;
10251 
10252     if (extract32(immh, 3, 1)) {
10253         unallocated_encoding(s);
10254         return;
10255     }
10256 
10257     if (!fp_access_check(s)) {
10258         return;
10259     }
10260 
10261     tcg_rn = tcg_temp_new_i64();
10262     tcg_rd = tcg_temp_new_i64();
10263     tcg_final = tcg_temp_new_i64();
10264     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10265 
10266     if (round) {
10267         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10268     } else {
10269         tcg_round = NULL;
10270     }
10271 
10272     for (i = 0; i < elements; i++) {
10273         read_vec_element(s, tcg_rn, rn, i, size+1);
10274         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10275                                 false, true, size+1, shift);
10276 
10277         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10278     }
10279 
10280     if (!is_q) {
10281         write_vec_element(s, tcg_final, rd, 0, MO_64);
10282     } else {
10283         write_vec_element(s, tcg_final, rd, 1, MO_64);
10284     }
10285 
10286     clear_vec_high(s, is_q, rd);
10287 }
10288 
10289 
10290 /* AdvSIMD shift by immediate
10291  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10292  * +---+---+---+-------------+------+------+--------+---+------+------+
10293  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10294  * +---+---+---+-------------+------+------+--------+---+------+------+
10295  */
10296 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10297 {
10298     int rd = extract32(insn, 0, 5);
10299     int rn = extract32(insn, 5, 5);
10300     int opcode = extract32(insn, 11, 5);
10301     int immb = extract32(insn, 16, 3);
10302     int immh = extract32(insn, 19, 4);
10303     bool is_u = extract32(insn, 29, 1);
10304     bool is_q = extract32(insn, 30, 1);
10305 
10306     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10307     assert(immh != 0);
10308 
10309     switch (opcode) {
10310     case 0x08: /* SRI */
10311         if (!is_u) {
10312             unallocated_encoding(s);
10313             return;
10314         }
10315         /* fall through */
10316     case 0x00: /* SSHR / USHR */
10317     case 0x02: /* SSRA / USRA (accumulate) */
10318     case 0x04: /* SRSHR / URSHR (rounding) */
10319     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10320         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10321         break;
10322     case 0x0a: /* SHL / SLI */
10323         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10324         break;
10325     case 0x10: /* SHRN */
10326     case 0x11: /* RSHRN / SQRSHRUN */
10327         if (is_u) {
10328             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10329                                    opcode, rn, rd);
10330         } else {
10331             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10332         }
10333         break;
10334     case 0x12: /* SQSHRN / UQSHRN */
10335     case 0x13: /* SQRSHRN / UQRSHRN */
10336         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10337                                opcode, rn, rd);
10338         break;
10339     case 0x14: /* SSHLL / USHLL */
10340         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10341         break;
10342     case 0x1c: /* SCVTF / UCVTF */
10343         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10344                                      opcode, rn, rd);
10345         break;
10346     case 0xc: /* SQSHLU */
10347         if (!is_u) {
10348             unallocated_encoding(s);
10349             return;
10350         }
10351         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10352         break;
10353     case 0xe: /* SQSHL, UQSHL */
10354         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10355         break;
10356     case 0x1f: /* FCVTZS/ FCVTZU */
10357         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10358         return;
10359     default:
10360         unallocated_encoding(s);
10361         return;
10362     }
10363 }
10364 
10365 /* Generate code to do a "long" addition or subtraction, ie one done in
10366  * TCGv_i64 on vector lanes twice the width specified by size.
10367  */
10368 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10369                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10370 {
10371     static NeonGenTwo64OpFn * const fns[3][2] = {
10372         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10373         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10374         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10375     };
10376     NeonGenTwo64OpFn *genfn;
10377     assert(size < 3);
10378 
10379     genfn = fns[size][is_sub];
10380     genfn(tcg_res, tcg_op1, tcg_op2);
10381 }
10382 
10383 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10384                                 int opcode, int rd, int rn, int rm)
10385 {
10386     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10387     TCGv_i64 tcg_res[2];
10388     int pass, accop;
10389 
10390     tcg_res[0] = tcg_temp_new_i64();
10391     tcg_res[1] = tcg_temp_new_i64();
10392 
10393     /* Does this op do an adding accumulate, a subtracting accumulate,
10394      * or no accumulate at all?
10395      */
10396     switch (opcode) {
10397     case 5:
10398     case 8:
10399     case 9:
10400         accop = 1;
10401         break;
10402     case 10:
10403     case 11:
10404         accop = -1;
10405         break;
10406     default:
10407         accop = 0;
10408         break;
10409     }
10410 
10411     if (accop != 0) {
10412         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10413         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10414     }
10415 
10416     /* size == 2 means two 32x32->64 operations; this is worth special
10417      * casing because we can generally handle it inline.
10418      */
10419     if (size == 2) {
10420         for (pass = 0; pass < 2; pass++) {
10421             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10422             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10423             TCGv_i64 tcg_passres;
10424             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10425 
10426             int elt = pass + is_q * 2;
10427 
10428             read_vec_element(s, tcg_op1, rn, elt, memop);
10429             read_vec_element(s, tcg_op2, rm, elt, memop);
10430 
10431             if (accop == 0) {
10432                 tcg_passres = tcg_res[pass];
10433             } else {
10434                 tcg_passres = tcg_temp_new_i64();
10435             }
10436 
10437             switch (opcode) {
10438             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10439                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10440                 break;
10441             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10442                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10443                 break;
10444             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10445             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10446             {
10447                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10448                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10449 
10450                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10451                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10452                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10453                                     tcg_passres,
10454                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10455                 break;
10456             }
10457             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10458             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10459             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10460                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10461                 break;
10462             case 9: /* SQDMLAL, SQDMLAL2 */
10463             case 11: /* SQDMLSL, SQDMLSL2 */
10464             case 13: /* SQDMULL, SQDMULL2 */
10465                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10466                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10467                                                   tcg_passres, tcg_passres);
10468                 break;
10469             default:
10470                 g_assert_not_reached();
10471             }
10472 
10473             if (opcode == 9 || opcode == 11) {
10474                 /* saturating accumulate ops */
10475                 if (accop < 0) {
10476                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10477                 }
10478                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10479                                                   tcg_res[pass], tcg_passres);
10480             } else if (accop > 0) {
10481                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10482             } else if (accop < 0) {
10483                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10484             }
10485         }
10486     } else {
10487         /* size 0 or 1, generally helper functions */
10488         for (pass = 0; pass < 2; pass++) {
10489             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10490             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10491             TCGv_i64 tcg_passres;
10492             int elt = pass + is_q * 2;
10493 
10494             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10495             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10496 
10497             if (accop == 0) {
10498                 tcg_passres = tcg_res[pass];
10499             } else {
10500                 tcg_passres = tcg_temp_new_i64();
10501             }
10502 
10503             switch (opcode) {
10504             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10505             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10506             {
10507                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10508                 static NeonGenWidenFn * const widenfns[2][2] = {
10509                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10510                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10511                 };
10512                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10513 
10514                 widenfn(tcg_op2_64, tcg_op2);
10515                 widenfn(tcg_passres, tcg_op1);
10516                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10517                               tcg_passres, tcg_op2_64);
10518                 break;
10519             }
10520             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10521             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10522                 if (size == 0) {
10523                     if (is_u) {
10524                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10525                     } else {
10526                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10527                     }
10528                 } else {
10529                     if (is_u) {
10530                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10531                     } else {
10532                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10533                     }
10534                 }
10535                 break;
10536             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10537             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10538             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10539                 if (size == 0) {
10540                     if (is_u) {
10541                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10542                     } else {
10543                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10544                     }
10545                 } else {
10546                     if (is_u) {
10547                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10548                     } else {
10549                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10550                     }
10551                 }
10552                 break;
10553             case 9: /* SQDMLAL, SQDMLAL2 */
10554             case 11: /* SQDMLSL, SQDMLSL2 */
10555             case 13: /* SQDMULL, SQDMULL2 */
10556                 assert(size == 1);
10557                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10558                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10559                                                   tcg_passres, tcg_passres);
10560                 break;
10561             default:
10562                 g_assert_not_reached();
10563             }
10564 
10565             if (accop != 0) {
10566                 if (opcode == 9 || opcode == 11) {
10567                     /* saturating accumulate ops */
10568                     if (accop < 0) {
10569                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10570                     }
10571                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10572                                                       tcg_res[pass],
10573                                                       tcg_passres);
10574                 } else {
10575                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10576                                   tcg_res[pass], tcg_passres);
10577                 }
10578             }
10579         }
10580     }
10581 
10582     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10583     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10584 }
10585 
10586 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10587                             int opcode, int rd, int rn, int rm)
10588 {
10589     TCGv_i64 tcg_res[2];
10590     int part = is_q ? 2 : 0;
10591     int pass;
10592 
10593     for (pass = 0; pass < 2; pass++) {
10594         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10595         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10596         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10597         static NeonGenWidenFn * const widenfns[3][2] = {
10598             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10599             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10600             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10601         };
10602         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10603 
10604         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10605         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10606         widenfn(tcg_op2_wide, tcg_op2);
10607         tcg_res[pass] = tcg_temp_new_i64();
10608         gen_neon_addl(size, (opcode == 3),
10609                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10610     }
10611 
10612     for (pass = 0; pass < 2; pass++) {
10613         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10614     }
10615 }
10616 
10617 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10618 {
10619     tcg_gen_addi_i64(in, in, 1U << 31);
10620     tcg_gen_extrh_i64_i32(res, in);
10621 }
10622 
10623 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10624                                  int opcode, int rd, int rn, int rm)
10625 {
10626     TCGv_i32 tcg_res[2];
10627     int part = is_q ? 2 : 0;
10628     int pass;
10629 
10630     for (pass = 0; pass < 2; pass++) {
10631         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10632         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10633         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10634         static NeonGenNarrowFn * const narrowfns[3][2] = {
10635             { gen_helper_neon_narrow_high_u8,
10636               gen_helper_neon_narrow_round_high_u8 },
10637             { gen_helper_neon_narrow_high_u16,
10638               gen_helper_neon_narrow_round_high_u16 },
10639             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10640         };
10641         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10642 
10643         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10644         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10645 
10646         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10647 
10648         tcg_res[pass] = tcg_temp_new_i32();
10649         gennarrow(tcg_res[pass], tcg_wideres);
10650     }
10651 
10652     for (pass = 0; pass < 2; pass++) {
10653         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10654     }
10655     clear_vec_high(s, is_q, rd);
10656 }
10657 
10658 /* AdvSIMD three different
10659  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10660  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10661  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10662  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10663  */
10664 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10665 {
10666     /* Instructions in this group fall into three basic classes
10667      * (in each case with the operation working on each element in
10668      * the input vectors):
10669      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10670      *     128 bit input)
10671      * (2) wide 64 x 128 -> 128
10672      * (3) narrowing 128 x 128 -> 64
10673      * Here we do initial decode, catch unallocated cases and
10674      * dispatch to separate functions for each class.
10675      */
10676     int is_q = extract32(insn, 30, 1);
10677     int is_u = extract32(insn, 29, 1);
10678     int size = extract32(insn, 22, 2);
10679     int opcode = extract32(insn, 12, 4);
10680     int rm = extract32(insn, 16, 5);
10681     int rn = extract32(insn, 5, 5);
10682     int rd = extract32(insn, 0, 5);
10683 
10684     switch (opcode) {
10685     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10686     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10687         /* 64 x 128 -> 128 */
10688         if (size == 3) {
10689             unallocated_encoding(s);
10690             return;
10691         }
10692         if (!fp_access_check(s)) {
10693             return;
10694         }
10695         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10696         break;
10697     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10698     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10699         /* 128 x 128 -> 64 */
10700         if (size == 3) {
10701             unallocated_encoding(s);
10702             return;
10703         }
10704         if (!fp_access_check(s)) {
10705             return;
10706         }
10707         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10708         break;
10709     case 14: /* PMULL, PMULL2 */
10710         if (is_u) {
10711             unallocated_encoding(s);
10712             return;
10713         }
10714         switch (size) {
10715         case 0: /* PMULL.P8 */
10716             if (!fp_access_check(s)) {
10717                 return;
10718             }
10719             /* The Q field specifies lo/hi half input for this insn.  */
10720             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10721                              gen_helper_neon_pmull_h);
10722             break;
10723 
10724         case 3: /* PMULL.P64 */
10725             if (!dc_isar_feature(aa64_pmull, s)) {
10726                 unallocated_encoding(s);
10727                 return;
10728             }
10729             if (!fp_access_check(s)) {
10730                 return;
10731             }
10732             /* The Q field specifies lo/hi half input for this insn.  */
10733             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10734                              gen_helper_gvec_pmull_q);
10735             break;
10736 
10737         default:
10738             unallocated_encoding(s);
10739             break;
10740         }
10741         return;
10742     case 9: /* SQDMLAL, SQDMLAL2 */
10743     case 11: /* SQDMLSL, SQDMLSL2 */
10744     case 13: /* SQDMULL, SQDMULL2 */
10745         if (is_u || size == 0) {
10746             unallocated_encoding(s);
10747             return;
10748         }
10749         /* fall through */
10750     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10751     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10752     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10753     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10754     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10755     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10756     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10757         /* 64 x 64 -> 128 */
10758         if (size == 3) {
10759             unallocated_encoding(s);
10760             return;
10761         }
10762         if (!fp_access_check(s)) {
10763             return;
10764         }
10765 
10766         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10767         break;
10768     default:
10769         /* opcode 15 not allocated */
10770         unallocated_encoding(s);
10771         break;
10772     }
10773 }
10774 
10775 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10776 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10777 {
10778     int rd = extract32(insn, 0, 5);
10779     int rn = extract32(insn, 5, 5);
10780     int rm = extract32(insn, 16, 5);
10781     int size = extract32(insn, 22, 2);
10782     bool is_u = extract32(insn, 29, 1);
10783     bool is_q = extract32(insn, 30, 1);
10784 
10785     if (!fp_access_check(s)) {
10786         return;
10787     }
10788 
10789     switch (size + 4 * is_u) {
10790     case 0: /* AND */
10791         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10792         return;
10793     case 1: /* BIC */
10794         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10795         return;
10796     case 2: /* ORR */
10797         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10798         return;
10799     case 3: /* ORN */
10800         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10801         return;
10802     case 4: /* EOR */
10803         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10804         return;
10805 
10806     case 5: /* BSL bitwise select */
10807         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10808         return;
10809     case 6: /* BIT, bitwise insert if true */
10810         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10811         return;
10812     case 7: /* BIF, bitwise insert if false */
10813         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10814         return;
10815 
10816     default:
10817         g_assert_not_reached();
10818     }
10819 }
10820 
10821 /* Pairwise op subgroup of C3.6.16.
10822  *
10823  * This is called directly or via the handle_3same_float for float pairwise
10824  * operations where the opcode and size are calculated differently.
10825  */
10826 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10827                                    int size, int rn, int rm, int rd)
10828 {
10829     TCGv_ptr fpst;
10830     int pass;
10831 
10832     /* Floating point operations need fpst */
10833     if (opcode >= 0x58) {
10834         fpst = fpstatus_ptr(FPST_FPCR);
10835     } else {
10836         fpst = NULL;
10837     }
10838 
10839     if (!fp_access_check(s)) {
10840         return;
10841     }
10842 
10843     /* These operations work on the concatenated rm:rn, with each pair of
10844      * adjacent elements being operated on to produce an element in the result.
10845      */
10846     if (size == 3) {
10847         TCGv_i64 tcg_res[2];
10848 
10849         for (pass = 0; pass < 2; pass++) {
10850             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10851             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10852             int passreg = (pass == 0) ? rn : rm;
10853 
10854             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10855             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10856             tcg_res[pass] = tcg_temp_new_i64();
10857 
10858             switch (opcode) {
10859             case 0x17: /* ADDP */
10860                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10861                 break;
10862             case 0x58: /* FMAXNMP */
10863                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10864                 break;
10865             case 0x5a: /* FADDP */
10866                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10867                 break;
10868             case 0x5e: /* FMAXP */
10869                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10870                 break;
10871             case 0x78: /* FMINNMP */
10872                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10873                 break;
10874             case 0x7e: /* FMINP */
10875                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10876                 break;
10877             default:
10878                 g_assert_not_reached();
10879             }
10880         }
10881 
10882         for (pass = 0; pass < 2; pass++) {
10883             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10884         }
10885     } else {
10886         int maxpass = is_q ? 4 : 2;
10887         TCGv_i32 tcg_res[4];
10888 
10889         for (pass = 0; pass < maxpass; pass++) {
10890             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10891             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10892             NeonGenTwoOpFn *genfn = NULL;
10893             int passreg = pass < (maxpass / 2) ? rn : rm;
10894             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10895 
10896             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10897             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10898             tcg_res[pass] = tcg_temp_new_i32();
10899 
10900             switch (opcode) {
10901             case 0x17: /* ADDP */
10902             {
10903                 static NeonGenTwoOpFn * const fns[3] = {
10904                     gen_helper_neon_padd_u8,
10905                     gen_helper_neon_padd_u16,
10906                     tcg_gen_add_i32,
10907                 };
10908                 genfn = fns[size];
10909                 break;
10910             }
10911             case 0x14: /* SMAXP, UMAXP */
10912             {
10913                 static NeonGenTwoOpFn * const fns[3][2] = {
10914                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10915                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10916                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10917                 };
10918                 genfn = fns[size][u];
10919                 break;
10920             }
10921             case 0x15: /* SMINP, UMINP */
10922             {
10923                 static NeonGenTwoOpFn * const fns[3][2] = {
10924                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10925                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10926                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10927                 };
10928                 genfn = fns[size][u];
10929                 break;
10930             }
10931             /* The FP operations are all on single floats (32 bit) */
10932             case 0x58: /* FMAXNMP */
10933                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10934                 break;
10935             case 0x5a: /* FADDP */
10936                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10937                 break;
10938             case 0x5e: /* FMAXP */
10939                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10940                 break;
10941             case 0x78: /* FMINNMP */
10942                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10943                 break;
10944             case 0x7e: /* FMINP */
10945                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10946                 break;
10947             default:
10948                 g_assert_not_reached();
10949             }
10950 
10951             /* FP ops called directly, otherwise call now */
10952             if (genfn) {
10953                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10954             }
10955         }
10956 
10957         for (pass = 0; pass < maxpass; pass++) {
10958             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10959         }
10960         clear_vec_high(s, is_q, rd);
10961     }
10962 }
10963 
10964 /* Floating point op subgroup of C3.6.16. */
10965 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10966 {
10967     /* For floating point ops, the U, size[1] and opcode bits
10968      * together indicate the operation. size[0] indicates single
10969      * or double.
10970      */
10971     int fpopcode = extract32(insn, 11, 5)
10972         | (extract32(insn, 23, 1) << 5)
10973         | (extract32(insn, 29, 1) << 6);
10974     int is_q = extract32(insn, 30, 1);
10975     int size = extract32(insn, 22, 1);
10976     int rm = extract32(insn, 16, 5);
10977     int rn = extract32(insn, 5, 5);
10978     int rd = extract32(insn, 0, 5);
10979 
10980     int datasize = is_q ? 128 : 64;
10981     int esize = 32 << size;
10982     int elements = datasize / esize;
10983 
10984     if (size == 1 && !is_q) {
10985         unallocated_encoding(s);
10986         return;
10987     }
10988 
10989     switch (fpopcode) {
10990     case 0x58: /* FMAXNMP */
10991     case 0x5a: /* FADDP */
10992     case 0x5e: /* FMAXP */
10993     case 0x78: /* FMINNMP */
10994     case 0x7e: /* FMINP */
10995         if (size && !is_q) {
10996             unallocated_encoding(s);
10997             return;
10998         }
10999         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11000                                rn, rm, rd);
11001         return;
11002     case 0x1b: /* FMULX */
11003     case 0x1f: /* FRECPS */
11004     case 0x3f: /* FRSQRTS */
11005     case 0x5d: /* FACGE */
11006     case 0x7d: /* FACGT */
11007     case 0x19: /* FMLA */
11008     case 0x39: /* FMLS */
11009     case 0x18: /* FMAXNM */
11010     case 0x1a: /* FADD */
11011     case 0x1c: /* FCMEQ */
11012     case 0x1e: /* FMAX */
11013     case 0x38: /* FMINNM */
11014     case 0x3a: /* FSUB */
11015     case 0x3e: /* FMIN */
11016     case 0x5b: /* FMUL */
11017     case 0x5c: /* FCMGE */
11018     case 0x5f: /* FDIV */
11019     case 0x7a: /* FABD */
11020     case 0x7c: /* FCMGT */
11021         if (!fp_access_check(s)) {
11022             return;
11023         }
11024         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11025         return;
11026 
11027     case 0x1d: /* FMLAL  */
11028     case 0x3d: /* FMLSL  */
11029     case 0x59: /* FMLAL2 */
11030     case 0x79: /* FMLSL2 */
11031         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11032             unallocated_encoding(s);
11033             return;
11034         }
11035         if (fp_access_check(s)) {
11036             int is_s = extract32(insn, 23, 1);
11037             int is_2 = extract32(insn, 29, 1);
11038             int data = (is_2 << 1) | is_s;
11039             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11040                                vec_full_reg_offset(s, rn),
11041                                vec_full_reg_offset(s, rm), cpu_env,
11042                                is_q ? 16 : 8, vec_full_reg_size(s),
11043                                data, gen_helper_gvec_fmlal_a64);
11044         }
11045         return;
11046 
11047     default:
11048         unallocated_encoding(s);
11049         return;
11050     }
11051 }
11052 
11053 /* Integer op subgroup of C3.6.16. */
11054 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11055 {
11056     int is_q = extract32(insn, 30, 1);
11057     int u = extract32(insn, 29, 1);
11058     int size = extract32(insn, 22, 2);
11059     int opcode = extract32(insn, 11, 5);
11060     int rm = extract32(insn, 16, 5);
11061     int rn = extract32(insn, 5, 5);
11062     int rd = extract32(insn, 0, 5);
11063     int pass;
11064     TCGCond cond;
11065 
11066     switch (opcode) {
11067     case 0x13: /* MUL, PMUL */
11068         if (u && size != 0) {
11069             unallocated_encoding(s);
11070             return;
11071         }
11072         /* fall through */
11073     case 0x0: /* SHADD, UHADD */
11074     case 0x2: /* SRHADD, URHADD */
11075     case 0x4: /* SHSUB, UHSUB */
11076     case 0xc: /* SMAX, UMAX */
11077     case 0xd: /* SMIN, UMIN */
11078     case 0xe: /* SABD, UABD */
11079     case 0xf: /* SABA, UABA */
11080     case 0x12: /* MLA, MLS */
11081         if (size == 3) {
11082             unallocated_encoding(s);
11083             return;
11084         }
11085         break;
11086     case 0x16: /* SQDMULH, SQRDMULH */
11087         if (size == 0 || size == 3) {
11088             unallocated_encoding(s);
11089             return;
11090         }
11091         break;
11092     default:
11093         if (size == 3 && !is_q) {
11094             unallocated_encoding(s);
11095             return;
11096         }
11097         break;
11098     }
11099 
11100     if (!fp_access_check(s)) {
11101         return;
11102     }
11103 
11104     switch (opcode) {
11105     case 0x01: /* SQADD, UQADD */
11106         if (u) {
11107             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11108         } else {
11109             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11110         }
11111         return;
11112     case 0x05: /* SQSUB, UQSUB */
11113         if (u) {
11114             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11115         } else {
11116             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11117         }
11118         return;
11119     case 0x08: /* SSHL, USHL */
11120         if (u) {
11121             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11122         } else {
11123             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11124         }
11125         return;
11126     case 0x0c: /* SMAX, UMAX */
11127         if (u) {
11128             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11129         } else {
11130             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11131         }
11132         return;
11133     case 0x0d: /* SMIN, UMIN */
11134         if (u) {
11135             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11136         } else {
11137             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11138         }
11139         return;
11140     case 0xe: /* SABD, UABD */
11141         if (u) {
11142             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11143         } else {
11144             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11145         }
11146         return;
11147     case 0xf: /* SABA, UABA */
11148         if (u) {
11149             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11150         } else {
11151             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11152         }
11153         return;
11154     case 0x10: /* ADD, SUB */
11155         if (u) {
11156             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11157         } else {
11158             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11159         }
11160         return;
11161     case 0x13: /* MUL, PMUL */
11162         if (!u) { /* MUL */
11163             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11164         } else {  /* PMUL */
11165             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11166         }
11167         return;
11168     case 0x12: /* MLA, MLS */
11169         if (u) {
11170             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11171         } else {
11172             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11173         }
11174         return;
11175     case 0x16: /* SQDMULH, SQRDMULH */
11176         {
11177             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11178                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11179                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11180             };
11181             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11182         }
11183         return;
11184     case 0x11:
11185         if (!u) { /* CMTST */
11186             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11187             return;
11188         }
11189         /* else CMEQ */
11190         cond = TCG_COND_EQ;
11191         goto do_gvec_cmp;
11192     case 0x06: /* CMGT, CMHI */
11193         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11194         goto do_gvec_cmp;
11195     case 0x07: /* CMGE, CMHS */
11196         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11197     do_gvec_cmp:
11198         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11199                          vec_full_reg_offset(s, rn),
11200                          vec_full_reg_offset(s, rm),
11201                          is_q ? 16 : 8, vec_full_reg_size(s));
11202         return;
11203     }
11204 
11205     if (size == 3) {
11206         assert(is_q);
11207         for (pass = 0; pass < 2; pass++) {
11208             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11209             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11210             TCGv_i64 tcg_res = tcg_temp_new_i64();
11211 
11212             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11213             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11214 
11215             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11216 
11217             write_vec_element(s, tcg_res, rd, pass, MO_64);
11218         }
11219     } else {
11220         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11221             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11222             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11223             TCGv_i32 tcg_res = tcg_temp_new_i32();
11224             NeonGenTwoOpFn *genfn = NULL;
11225             NeonGenTwoOpEnvFn *genenvfn = NULL;
11226 
11227             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11228             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11229 
11230             switch (opcode) {
11231             case 0x0: /* SHADD, UHADD */
11232             {
11233                 static NeonGenTwoOpFn * const fns[3][2] = {
11234                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11235                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11236                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11237                 };
11238                 genfn = fns[size][u];
11239                 break;
11240             }
11241             case 0x2: /* SRHADD, URHADD */
11242             {
11243                 static NeonGenTwoOpFn * const fns[3][2] = {
11244                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11245                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11246                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11247                 };
11248                 genfn = fns[size][u];
11249                 break;
11250             }
11251             case 0x4: /* SHSUB, UHSUB */
11252             {
11253                 static NeonGenTwoOpFn * const fns[3][2] = {
11254                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11255                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11256                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11257                 };
11258                 genfn = fns[size][u];
11259                 break;
11260             }
11261             case 0x9: /* SQSHL, UQSHL */
11262             {
11263                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11264                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11265                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11266                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11267                 };
11268                 genenvfn = fns[size][u];
11269                 break;
11270             }
11271             case 0xa: /* SRSHL, URSHL */
11272             {
11273                 static NeonGenTwoOpFn * const fns[3][2] = {
11274                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11275                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11276                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11277                 };
11278                 genfn = fns[size][u];
11279                 break;
11280             }
11281             case 0xb: /* SQRSHL, UQRSHL */
11282             {
11283                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11284                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11285                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11286                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11287                 };
11288                 genenvfn = fns[size][u];
11289                 break;
11290             }
11291             default:
11292                 g_assert_not_reached();
11293             }
11294 
11295             if (genenvfn) {
11296                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11297             } else {
11298                 genfn(tcg_res, tcg_op1, tcg_op2);
11299             }
11300 
11301             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11302         }
11303     }
11304     clear_vec_high(s, is_q, rd);
11305 }
11306 
11307 /* AdvSIMD three same
11308  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11309  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11310  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11311  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11312  */
11313 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11314 {
11315     int opcode = extract32(insn, 11, 5);
11316 
11317     switch (opcode) {
11318     case 0x3: /* logic ops */
11319         disas_simd_3same_logic(s, insn);
11320         break;
11321     case 0x17: /* ADDP */
11322     case 0x14: /* SMAXP, UMAXP */
11323     case 0x15: /* SMINP, UMINP */
11324     {
11325         /* Pairwise operations */
11326         int is_q = extract32(insn, 30, 1);
11327         int u = extract32(insn, 29, 1);
11328         int size = extract32(insn, 22, 2);
11329         int rm = extract32(insn, 16, 5);
11330         int rn = extract32(insn, 5, 5);
11331         int rd = extract32(insn, 0, 5);
11332         if (opcode == 0x17) {
11333             if (u || (size == 3 && !is_q)) {
11334                 unallocated_encoding(s);
11335                 return;
11336             }
11337         } else {
11338             if (size == 3) {
11339                 unallocated_encoding(s);
11340                 return;
11341             }
11342         }
11343         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11344         break;
11345     }
11346     case 0x18 ... 0x31:
11347         /* floating point ops, sz[1] and U are part of opcode */
11348         disas_simd_3same_float(s, insn);
11349         break;
11350     default:
11351         disas_simd_3same_int(s, insn);
11352         break;
11353     }
11354 }
11355 
11356 /*
11357  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11358  *
11359  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11360  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11361  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11362  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11363  *
11364  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11365  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11366  *
11367  */
11368 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11369 {
11370     int opcode = extract32(insn, 11, 3);
11371     int u = extract32(insn, 29, 1);
11372     int a = extract32(insn, 23, 1);
11373     int is_q = extract32(insn, 30, 1);
11374     int rm = extract32(insn, 16, 5);
11375     int rn = extract32(insn, 5, 5);
11376     int rd = extract32(insn, 0, 5);
11377     /*
11378      * For these floating point ops, the U, a and opcode bits
11379      * together indicate the operation.
11380      */
11381     int fpopcode = opcode | (a << 3) | (u << 4);
11382     int datasize = is_q ? 128 : 64;
11383     int elements = datasize / 16;
11384     bool pairwise;
11385     TCGv_ptr fpst;
11386     int pass;
11387 
11388     switch (fpopcode) {
11389     case 0x0: /* FMAXNM */
11390     case 0x1: /* FMLA */
11391     case 0x2: /* FADD */
11392     case 0x3: /* FMULX */
11393     case 0x4: /* FCMEQ */
11394     case 0x6: /* FMAX */
11395     case 0x7: /* FRECPS */
11396     case 0x8: /* FMINNM */
11397     case 0x9: /* FMLS */
11398     case 0xa: /* FSUB */
11399     case 0xe: /* FMIN */
11400     case 0xf: /* FRSQRTS */
11401     case 0x13: /* FMUL */
11402     case 0x14: /* FCMGE */
11403     case 0x15: /* FACGE */
11404     case 0x17: /* FDIV */
11405     case 0x1a: /* FABD */
11406     case 0x1c: /* FCMGT */
11407     case 0x1d: /* FACGT */
11408         pairwise = false;
11409         break;
11410     case 0x10: /* FMAXNMP */
11411     case 0x12: /* FADDP */
11412     case 0x16: /* FMAXP */
11413     case 0x18: /* FMINNMP */
11414     case 0x1e: /* FMINP */
11415         pairwise = true;
11416         break;
11417     default:
11418         unallocated_encoding(s);
11419         return;
11420     }
11421 
11422     if (!dc_isar_feature(aa64_fp16, s)) {
11423         unallocated_encoding(s);
11424         return;
11425     }
11426 
11427     if (!fp_access_check(s)) {
11428         return;
11429     }
11430 
11431     fpst = fpstatus_ptr(FPST_FPCR_F16);
11432 
11433     if (pairwise) {
11434         int maxpass = is_q ? 8 : 4;
11435         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11436         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11437         TCGv_i32 tcg_res[8];
11438 
11439         for (pass = 0; pass < maxpass; pass++) {
11440             int passreg = pass < (maxpass / 2) ? rn : rm;
11441             int passelt = (pass << 1) & (maxpass - 1);
11442 
11443             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11444             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11445             tcg_res[pass] = tcg_temp_new_i32();
11446 
11447             switch (fpopcode) {
11448             case 0x10: /* FMAXNMP */
11449                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11450                                            fpst);
11451                 break;
11452             case 0x12: /* FADDP */
11453                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11454                 break;
11455             case 0x16: /* FMAXP */
11456                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11457                 break;
11458             case 0x18: /* FMINNMP */
11459                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11460                                            fpst);
11461                 break;
11462             case 0x1e: /* FMINP */
11463                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11464                 break;
11465             default:
11466                 g_assert_not_reached();
11467             }
11468         }
11469 
11470         for (pass = 0; pass < maxpass; pass++) {
11471             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11472         }
11473     } else {
11474         for (pass = 0; pass < elements; pass++) {
11475             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11476             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11477             TCGv_i32 tcg_res = tcg_temp_new_i32();
11478 
11479             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11480             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11481 
11482             switch (fpopcode) {
11483             case 0x0: /* FMAXNM */
11484                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11485                 break;
11486             case 0x1: /* FMLA */
11487                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11488                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11489                                            fpst);
11490                 break;
11491             case 0x2: /* FADD */
11492                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11493                 break;
11494             case 0x3: /* FMULX */
11495                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11496                 break;
11497             case 0x4: /* FCMEQ */
11498                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11499                 break;
11500             case 0x6: /* FMAX */
11501                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11502                 break;
11503             case 0x7: /* FRECPS */
11504                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11505                 break;
11506             case 0x8: /* FMINNM */
11507                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11508                 break;
11509             case 0x9: /* FMLS */
11510                 /* As usual for ARM, separate negation for fused multiply-add */
11511                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11512                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11513                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11514                                            fpst);
11515                 break;
11516             case 0xa: /* FSUB */
11517                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11518                 break;
11519             case 0xe: /* FMIN */
11520                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11521                 break;
11522             case 0xf: /* FRSQRTS */
11523                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11524                 break;
11525             case 0x13: /* FMUL */
11526                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11527                 break;
11528             case 0x14: /* FCMGE */
11529                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11530                 break;
11531             case 0x15: /* FACGE */
11532                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11533                 break;
11534             case 0x17: /* FDIV */
11535                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11536                 break;
11537             case 0x1a: /* FABD */
11538                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11539                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11540                 break;
11541             case 0x1c: /* FCMGT */
11542                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11543                 break;
11544             case 0x1d: /* FACGT */
11545                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11546                 break;
11547             default:
11548                 g_assert_not_reached();
11549             }
11550 
11551             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11552         }
11553     }
11554 
11555     clear_vec_high(s, is_q, rd);
11556 }
11557 
11558 /* AdvSIMD three same extra
11559  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11560  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11561  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11562  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11563  */
11564 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11565 {
11566     int rd = extract32(insn, 0, 5);
11567     int rn = extract32(insn, 5, 5);
11568     int opcode = extract32(insn, 11, 4);
11569     int rm = extract32(insn, 16, 5);
11570     int size = extract32(insn, 22, 2);
11571     bool u = extract32(insn, 29, 1);
11572     bool is_q = extract32(insn, 30, 1);
11573     bool feature;
11574     int rot;
11575 
11576     switch (u * 16 + opcode) {
11577     case 0x10: /* SQRDMLAH (vector) */
11578     case 0x11: /* SQRDMLSH (vector) */
11579         if (size != 1 && size != 2) {
11580             unallocated_encoding(s);
11581             return;
11582         }
11583         feature = dc_isar_feature(aa64_rdm, s);
11584         break;
11585     case 0x02: /* SDOT (vector) */
11586     case 0x12: /* UDOT (vector) */
11587         if (size != MO_32) {
11588             unallocated_encoding(s);
11589             return;
11590         }
11591         feature = dc_isar_feature(aa64_dp, s);
11592         break;
11593     case 0x03: /* USDOT */
11594         if (size != MO_32) {
11595             unallocated_encoding(s);
11596             return;
11597         }
11598         feature = dc_isar_feature(aa64_i8mm, s);
11599         break;
11600     case 0x04: /* SMMLA */
11601     case 0x14: /* UMMLA */
11602     case 0x05: /* USMMLA */
11603         if (!is_q || size != MO_32) {
11604             unallocated_encoding(s);
11605             return;
11606         }
11607         feature = dc_isar_feature(aa64_i8mm, s);
11608         break;
11609     case 0x18: /* FCMLA, #0 */
11610     case 0x19: /* FCMLA, #90 */
11611     case 0x1a: /* FCMLA, #180 */
11612     case 0x1b: /* FCMLA, #270 */
11613     case 0x1c: /* FCADD, #90 */
11614     case 0x1e: /* FCADD, #270 */
11615         if (size == 0
11616             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11617             || (size == 3 && !is_q)) {
11618             unallocated_encoding(s);
11619             return;
11620         }
11621         feature = dc_isar_feature(aa64_fcma, s);
11622         break;
11623     case 0x1d: /* BFMMLA */
11624         if (size != MO_16 || !is_q) {
11625             unallocated_encoding(s);
11626             return;
11627         }
11628         feature = dc_isar_feature(aa64_bf16, s);
11629         break;
11630     case 0x1f:
11631         switch (size) {
11632         case 1: /* BFDOT */
11633         case 3: /* BFMLAL{B,T} */
11634             feature = dc_isar_feature(aa64_bf16, s);
11635             break;
11636         default:
11637             unallocated_encoding(s);
11638             return;
11639         }
11640         break;
11641     default:
11642         unallocated_encoding(s);
11643         return;
11644     }
11645     if (!feature) {
11646         unallocated_encoding(s);
11647         return;
11648     }
11649     if (!fp_access_check(s)) {
11650         return;
11651     }
11652 
11653     switch (opcode) {
11654     case 0x0: /* SQRDMLAH (vector) */
11655         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11656         return;
11657 
11658     case 0x1: /* SQRDMLSH (vector) */
11659         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11660         return;
11661 
11662     case 0x2: /* SDOT / UDOT */
11663         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11664                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11665         return;
11666 
11667     case 0x3: /* USDOT */
11668         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11669         return;
11670 
11671     case 0x04: /* SMMLA, UMMLA */
11672         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11673                          u ? gen_helper_gvec_ummla_b
11674                          : gen_helper_gvec_smmla_b);
11675         return;
11676     case 0x05: /* USMMLA */
11677         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11678         return;
11679 
11680     case 0x8: /* FCMLA, #0 */
11681     case 0x9: /* FCMLA, #90 */
11682     case 0xa: /* FCMLA, #180 */
11683     case 0xb: /* FCMLA, #270 */
11684         rot = extract32(opcode, 0, 2);
11685         switch (size) {
11686         case 1:
11687             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11688                               gen_helper_gvec_fcmlah);
11689             break;
11690         case 2:
11691             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11692                               gen_helper_gvec_fcmlas);
11693             break;
11694         case 3:
11695             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11696                               gen_helper_gvec_fcmlad);
11697             break;
11698         default:
11699             g_assert_not_reached();
11700         }
11701         return;
11702 
11703     case 0xc: /* FCADD, #90 */
11704     case 0xe: /* FCADD, #270 */
11705         rot = extract32(opcode, 1, 1);
11706         switch (size) {
11707         case 1:
11708             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11709                               gen_helper_gvec_fcaddh);
11710             break;
11711         case 2:
11712             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11713                               gen_helper_gvec_fcadds);
11714             break;
11715         case 3:
11716             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11717                               gen_helper_gvec_fcaddd);
11718             break;
11719         default:
11720             g_assert_not_reached();
11721         }
11722         return;
11723 
11724     case 0xd: /* BFMMLA */
11725         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11726         return;
11727     case 0xf:
11728         switch (size) {
11729         case 1: /* BFDOT */
11730             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11731             break;
11732         case 3: /* BFMLAL{B,T} */
11733             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11734                               gen_helper_gvec_bfmlal);
11735             break;
11736         default:
11737             g_assert_not_reached();
11738         }
11739         return;
11740 
11741     default:
11742         g_assert_not_reached();
11743     }
11744 }
11745 
11746 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11747                                   int size, int rn, int rd)
11748 {
11749     /* Handle 2-reg-misc ops which are widening (so each size element
11750      * in the source becomes a 2*size element in the destination.
11751      * The only instruction like this is FCVTL.
11752      */
11753     int pass;
11754 
11755     if (size == 3) {
11756         /* 32 -> 64 bit fp conversion */
11757         TCGv_i64 tcg_res[2];
11758         int srcelt = is_q ? 2 : 0;
11759 
11760         for (pass = 0; pass < 2; pass++) {
11761             TCGv_i32 tcg_op = tcg_temp_new_i32();
11762             tcg_res[pass] = tcg_temp_new_i64();
11763 
11764             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11765             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11766         }
11767         for (pass = 0; pass < 2; pass++) {
11768             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11769         }
11770     } else {
11771         /* 16 -> 32 bit fp conversion */
11772         int srcelt = is_q ? 4 : 0;
11773         TCGv_i32 tcg_res[4];
11774         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11775         TCGv_i32 ahp = get_ahp_flag();
11776 
11777         for (pass = 0; pass < 4; pass++) {
11778             tcg_res[pass] = tcg_temp_new_i32();
11779 
11780             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11781             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11782                                            fpst, ahp);
11783         }
11784         for (pass = 0; pass < 4; pass++) {
11785             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11786         }
11787     }
11788 }
11789 
11790 static void handle_rev(DisasContext *s, int opcode, bool u,
11791                        bool is_q, int size, int rn, int rd)
11792 {
11793     int op = (opcode << 1) | u;
11794     int opsz = op + size;
11795     int grp_size = 3 - opsz;
11796     int dsize = is_q ? 128 : 64;
11797     int i;
11798 
11799     if (opsz >= 3) {
11800         unallocated_encoding(s);
11801         return;
11802     }
11803 
11804     if (!fp_access_check(s)) {
11805         return;
11806     }
11807 
11808     if (size == 0) {
11809         /* Special case bytes, use bswap op on each group of elements */
11810         int groups = dsize / (8 << grp_size);
11811 
11812         for (i = 0; i < groups; i++) {
11813             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11814 
11815             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11816             switch (grp_size) {
11817             case MO_16:
11818                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11819                 break;
11820             case MO_32:
11821                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11822                 break;
11823             case MO_64:
11824                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11825                 break;
11826             default:
11827                 g_assert_not_reached();
11828             }
11829             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11830         }
11831         clear_vec_high(s, is_q, rd);
11832     } else {
11833         int revmask = (1 << grp_size) - 1;
11834         int esize = 8 << size;
11835         int elements = dsize / esize;
11836         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11837         TCGv_i64 tcg_rd[2];
11838 
11839         for (i = 0; i < 2; i++) {
11840             tcg_rd[i] = tcg_temp_new_i64();
11841             tcg_gen_movi_i64(tcg_rd[i], 0);
11842         }
11843 
11844         for (i = 0; i < elements; i++) {
11845             int e_rev = (i & 0xf) ^ revmask;
11846             int w = (e_rev * esize) / 64;
11847             int o = (e_rev * esize) % 64;
11848 
11849             read_vec_element(s, tcg_rn, rn, i, size);
11850             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11851         }
11852 
11853         for (i = 0; i < 2; i++) {
11854             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11855         }
11856         clear_vec_high(s, true, rd);
11857     }
11858 }
11859 
11860 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11861                                   bool is_q, int size, int rn, int rd)
11862 {
11863     /* Implement the pairwise operations from 2-misc:
11864      * SADDLP, UADDLP, SADALP, UADALP.
11865      * These all add pairs of elements in the input to produce a
11866      * double-width result element in the output (possibly accumulating).
11867      */
11868     bool accum = (opcode == 0x6);
11869     int maxpass = is_q ? 2 : 1;
11870     int pass;
11871     TCGv_i64 tcg_res[2];
11872 
11873     if (size == 2) {
11874         /* 32 + 32 -> 64 op */
11875         MemOp memop = size + (u ? 0 : MO_SIGN);
11876 
11877         for (pass = 0; pass < maxpass; pass++) {
11878             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11879             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11880 
11881             tcg_res[pass] = tcg_temp_new_i64();
11882 
11883             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11884             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11885             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11886             if (accum) {
11887                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11888                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11889             }
11890         }
11891     } else {
11892         for (pass = 0; pass < maxpass; pass++) {
11893             TCGv_i64 tcg_op = tcg_temp_new_i64();
11894             NeonGenOne64OpFn *genfn;
11895             static NeonGenOne64OpFn * const fns[2][2] = {
11896                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11897                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11898             };
11899 
11900             genfn = fns[size][u];
11901 
11902             tcg_res[pass] = tcg_temp_new_i64();
11903 
11904             read_vec_element(s, tcg_op, rn, pass, MO_64);
11905             genfn(tcg_res[pass], tcg_op);
11906 
11907             if (accum) {
11908                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11909                 if (size == 0) {
11910                     gen_helper_neon_addl_u16(tcg_res[pass],
11911                                              tcg_res[pass], tcg_op);
11912                 } else {
11913                     gen_helper_neon_addl_u32(tcg_res[pass],
11914                                              tcg_res[pass], tcg_op);
11915                 }
11916             }
11917         }
11918     }
11919     if (!is_q) {
11920         tcg_res[1] = tcg_constant_i64(0);
11921     }
11922     for (pass = 0; pass < 2; pass++) {
11923         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11924     }
11925 }
11926 
11927 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11928 {
11929     /* Implement SHLL and SHLL2 */
11930     int pass;
11931     int part = is_q ? 2 : 0;
11932     TCGv_i64 tcg_res[2];
11933 
11934     for (pass = 0; pass < 2; pass++) {
11935         static NeonGenWidenFn * const widenfns[3] = {
11936             gen_helper_neon_widen_u8,
11937             gen_helper_neon_widen_u16,
11938             tcg_gen_extu_i32_i64,
11939         };
11940         NeonGenWidenFn *widenfn = widenfns[size];
11941         TCGv_i32 tcg_op = tcg_temp_new_i32();
11942 
11943         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11944         tcg_res[pass] = tcg_temp_new_i64();
11945         widenfn(tcg_res[pass], tcg_op);
11946         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11947     }
11948 
11949     for (pass = 0; pass < 2; pass++) {
11950         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11951     }
11952 }
11953 
11954 /* AdvSIMD two reg misc
11955  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11956  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11957  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11958  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11959  */
11960 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11961 {
11962     int size = extract32(insn, 22, 2);
11963     int opcode = extract32(insn, 12, 5);
11964     bool u = extract32(insn, 29, 1);
11965     bool is_q = extract32(insn, 30, 1);
11966     int rn = extract32(insn, 5, 5);
11967     int rd = extract32(insn, 0, 5);
11968     bool need_fpstatus = false;
11969     int rmode = -1;
11970     TCGv_i32 tcg_rmode;
11971     TCGv_ptr tcg_fpstatus;
11972 
11973     switch (opcode) {
11974     case 0x0: /* REV64, REV32 */
11975     case 0x1: /* REV16 */
11976         handle_rev(s, opcode, u, is_q, size, rn, rd);
11977         return;
11978     case 0x5: /* CNT, NOT, RBIT */
11979         if (u && size == 0) {
11980             /* NOT */
11981             break;
11982         } else if (u && size == 1) {
11983             /* RBIT */
11984             break;
11985         } else if (!u && size == 0) {
11986             /* CNT */
11987             break;
11988         }
11989         unallocated_encoding(s);
11990         return;
11991     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11992     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11993         if (size == 3) {
11994             unallocated_encoding(s);
11995             return;
11996         }
11997         if (!fp_access_check(s)) {
11998             return;
11999         }
12000 
12001         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12002         return;
12003     case 0x4: /* CLS, CLZ */
12004         if (size == 3) {
12005             unallocated_encoding(s);
12006             return;
12007         }
12008         break;
12009     case 0x2: /* SADDLP, UADDLP */
12010     case 0x6: /* SADALP, UADALP */
12011         if (size == 3) {
12012             unallocated_encoding(s);
12013             return;
12014         }
12015         if (!fp_access_check(s)) {
12016             return;
12017         }
12018         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12019         return;
12020     case 0x13: /* SHLL, SHLL2 */
12021         if (u == 0 || size == 3) {
12022             unallocated_encoding(s);
12023             return;
12024         }
12025         if (!fp_access_check(s)) {
12026             return;
12027         }
12028         handle_shll(s, is_q, size, rn, rd);
12029         return;
12030     case 0xa: /* CMLT */
12031         if (u == 1) {
12032             unallocated_encoding(s);
12033             return;
12034         }
12035         /* fall through */
12036     case 0x8: /* CMGT, CMGE */
12037     case 0x9: /* CMEQ, CMLE */
12038     case 0xb: /* ABS, NEG */
12039         if (size == 3 && !is_q) {
12040             unallocated_encoding(s);
12041             return;
12042         }
12043         break;
12044     case 0x3: /* SUQADD, USQADD */
12045         if (size == 3 && !is_q) {
12046             unallocated_encoding(s);
12047             return;
12048         }
12049         if (!fp_access_check(s)) {
12050             return;
12051         }
12052         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12053         return;
12054     case 0x7: /* SQABS, SQNEG */
12055         if (size == 3 && !is_q) {
12056             unallocated_encoding(s);
12057             return;
12058         }
12059         break;
12060     case 0xc ... 0xf:
12061     case 0x16 ... 0x1f:
12062     {
12063         /* Floating point: U, size[1] and opcode indicate operation;
12064          * size[0] indicates single or double precision.
12065          */
12066         int is_double = extract32(size, 0, 1);
12067         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12068         size = is_double ? 3 : 2;
12069         switch (opcode) {
12070         case 0x2f: /* FABS */
12071         case 0x6f: /* FNEG */
12072             if (size == 3 && !is_q) {
12073                 unallocated_encoding(s);
12074                 return;
12075             }
12076             break;
12077         case 0x1d: /* SCVTF */
12078         case 0x5d: /* UCVTF */
12079         {
12080             bool is_signed = (opcode == 0x1d) ? true : false;
12081             int elements = is_double ? 2 : is_q ? 4 : 2;
12082             if (is_double && !is_q) {
12083                 unallocated_encoding(s);
12084                 return;
12085             }
12086             if (!fp_access_check(s)) {
12087                 return;
12088             }
12089             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12090             return;
12091         }
12092         case 0x2c: /* FCMGT (zero) */
12093         case 0x2d: /* FCMEQ (zero) */
12094         case 0x2e: /* FCMLT (zero) */
12095         case 0x6c: /* FCMGE (zero) */
12096         case 0x6d: /* FCMLE (zero) */
12097             if (size == 3 && !is_q) {
12098                 unallocated_encoding(s);
12099                 return;
12100             }
12101             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12102             return;
12103         case 0x7f: /* FSQRT */
12104             if (size == 3 && !is_q) {
12105                 unallocated_encoding(s);
12106                 return;
12107             }
12108             break;
12109         case 0x1a: /* FCVTNS */
12110         case 0x1b: /* FCVTMS */
12111         case 0x3a: /* FCVTPS */
12112         case 0x3b: /* FCVTZS */
12113         case 0x5a: /* FCVTNU */
12114         case 0x5b: /* FCVTMU */
12115         case 0x7a: /* FCVTPU */
12116         case 0x7b: /* FCVTZU */
12117             need_fpstatus = true;
12118             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12119             if (size == 3 && !is_q) {
12120                 unallocated_encoding(s);
12121                 return;
12122             }
12123             break;
12124         case 0x5c: /* FCVTAU */
12125         case 0x1c: /* FCVTAS */
12126             need_fpstatus = true;
12127             rmode = FPROUNDING_TIEAWAY;
12128             if (size == 3 && !is_q) {
12129                 unallocated_encoding(s);
12130                 return;
12131             }
12132             break;
12133         case 0x3c: /* URECPE */
12134             if (size == 3) {
12135                 unallocated_encoding(s);
12136                 return;
12137             }
12138             /* fall through */
12139         case 0x3d: /* FRECPE */
12140         case 0x7d: /* FRSQRTE */
12141             if (size == 3 && !is_q) {
12142                 unallocated_encoding(s);
12143                 return;
12144             }
12145             if (!fp_access_check(s)) {
12146                 return;
12147             }
12148             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12149             return;
12150         case 0x56: /* FCVTXN, FCVTXN2 */
12151             if (size == 2) {
12152                 unallocated_encoding(s);
12153                 return;
12154             }
12155             /* fall through */
12156         case 0x16: /* FCVTN, FCVTN2 */
12157             /* handle_2misc_narrow does a 2*size -> size operation, but these
12158              * instructions encode the source size rather than dest size.
12159              */
12160             if (!fp_access_check(s)) {
12161                 return;
12162             }
12163             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12164             return;
12165         case 0x36: /* BFCVTN, BFCVTN2 */
12166             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12167                 unallocated_encoding(s);
12168                 return;
12169             }
12170             if (!fp_access_check(s)) {
12171                 return;
12172             }
12173             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12174             return;
12175         case 0x17: /* FCVTL, FCVTL2 */
12176             if (!fp_access_check(s)) {
12177                 return;
12178             }
12179             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12180             return;
12181         case 0x18: /* FRINTN */
12182         case 0x19: /* FRINTM */
12183         case 0x38: /* FRINTP */
12184         case 0x39: /* FRINTZ */
12185             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12186             /* fall through */
12187         case 0x59: /* FRINTX */
12188         case 0x79: /* FRINTI */
12189             need_fpstatus = true;
12190             if (size == 3 && !is_q) {
12191                 unallocated_encoding(s);
12192                 return;
12193             }
12194             break;
12195         case 0x58: /* FRINTA */
12196             rmode = FPROUNDING_TIEAWAY;
12197             need_fpstatus = true;
12198             if (size == 3 && !is_q) {
12199                 unallocated_encoding(s);
12200                 return;
12201             }
12202             break;
12203         case 0x7c: /* URSQRTE */
12204             if (size == 3) {
12205                 unallocated_encoding(s);
12206                 return;
12207             }
12208             break;
12209         case 0x1e: /* FRINT32Z */
12210         case 0x1f: /* FRINT64Z */
12211             rmode = FPROUNDING_ZERO;
12212             /* fall through */
12213         case 0x5e: /* FRINT32X */
12214         case 0x5f: /* FRINT64X */
12215             need_fpstatus = true;
12216             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12217                 unallocated_encoding(s);
12218                 return;
12219             }
12220             break;
12221         default:
12222             unallocated_encoding(s);
12223             return;
12224         }
12225         break;
12226     }
12227     default:
12228         unallocated_encoding(s);
12229         return;
12230     }
12231 
12232     if (!fp_access_check(s)) {
12233         return;
12234     }
12235 
12236     if (need_fpstatus || rmode >= 0) {
12237         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12238     } else {
12239         tcg_fpstatus = NULL;
12240     }
12241     if (rmode >= 0) {
12242         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12243     } else {
12244         tcg_rmode = NULL;
12245     }
12246 
12247     switch (opcode) {
12248     case 0x5:
12249         if (u && size == 0) { /* NOT */
12250             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12251             return;
12252         }
12253         break;
12254     case 0x8: /* CMGT, CMGE */
12255         if (u) {
12256             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12257         } else {
12258             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12259         }
12260         return;
12261     case 0x9: /* CMEQ, CMLE */
12262         if (u) {
12263             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12264         } else {
12265             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12266         }
12267         return;
12268     case 0xa: /* CMLT */
12269         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12270         return;
12271     case 0xb:
12272         if (u) { /* ABS, NEG */
12273             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12274         } else {
12275             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12276         }
12277         return;
12278     }
12279 
12280     if (size == 3) {
12281         /* All 64-bit element operations can be shared with scalar 2misc */
12282         int pass;
12283 
12284         /* Coverity claims (size == 3 && !is_q) has been eliminated
12285          * from all paths leading to here.
12286          */
12287         tcg_debug_assert(is_q);
12288         for (pass = 0; pass < 2; pass++) {
12289             TCGv_i64 tcg_op = tcg_temp_new_i64();
12290             TCGv_i64 tcg_res = tcg_temp_new_i64();
12291 
12292             read_vec_element(s, tcg_op, rn, pass, MO_64);
12293 
12294             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12295                             tcg_rmode, tcg_fpstatus);
12296 
12297             write_vec_element(s, tcg_res, rd, pass, MO_64);
12298         }
12299     } else {
12300         int pass;
12301 
12302         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12303             TCGv_i32 tcg_op = tcg_temp_new_i32();
12304             TCGv_i32 tcg_res = tcg_temp_new_i32();
12305 
12306             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12307 
12308             if (size == 2) {
12309                 /* Special cases for 32 bit elements */
12310                 switch (opcode) {
12311                 case 0x4: /* CLS */
12312                     if (u) {
12313                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12314                     } else {
12315                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12316                     }
12317                     break;
12318                 case 0x7: /* SQABS, SQNEG */
12319                     if (u) {
12320                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12321                     } else {
12322                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12323                     }
12324                     break;
12325                 case 0x2f: /* FABS */
12326                     gen_helper_vfp_abss(tcg_res, tcg_op);
12327                     break;
12328                 case 0x6f: /* FNEG */
12329                     gen_helper_vfp_negs(tcg_res, tcg_op);
12330                     break;
12331                 case 0x7f: /* FSQRT */
12332                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12333                     break;
12334                 case 0x1a: /* FCVTNS */
12335                 case 0x1b: /* FCVTMS */
12336                 case 0x1c: /* FCVTAS */
12337                 case 0x3a: /* FCVTPS */
12338                 case 0x3b: /* FCVTZS */
12339                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12340                                          tcg_constant_i32(0), tcg_fpstatus);
12341                     break;
12342                 case 0x5a: /* FCVTNU */
12343                 case 0x5b: /* FCVTMU */
12344                 case 0x5c: /* FCVTAU */
12345                 case 0x7a: /* FCVTPU */
12346                 case 0x7b: /* FCVTZU */
12347                     gen_helper_vfp_touls(tcg_res, tcg_op,
12348                                          tcg_constant_i32(0), tcg_fpstatus);
12349                     break;
12350                 case 0x18: /* FRINTN */
12351                 case 0x19: /* FRINTM */
12352                 case 0x38: /* FRINTP */
12353                 case 0x39: /* FRINTZ */
12354                 case 0x58: /* FRINTA */
12355                 case 0x79: /* FRINTI */
12356                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12357                     break;
12358                 case 0x59: /* FRINTX */
12359                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12360                     break;
12361                 case 0x7c: /* URSQRTE */
12362                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12363                     break;
12364                 case 0x1e: /* FRINT32Z */
12365                 case 0x5e: /* FRINT32X */
12366                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12367                     break;
12368                 case 0x1f: /* FRINT64Z */
12369                 case 0x5f: /* FRINT64X */
12370                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12371                     break;
12372                 default:
12373                     g_assert_not_reached();
12374                 }
12375             } else {
12376                 /* Use helpers for 8 and 16 bit elements */
12377                 switch (opcode) {
12378                 case 0x5: /* CNT, RBIT */
12379                     /* For these two insns size is part of the opcode specifier
12380                      * (handled earlier); they always operate on byte elements.
12381                      */
12382                     if (u) {
12383                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12384                     } else {
12385                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12386                     }
12387                     break;
12388                 case 0x7: /* SQABS, SQNEG */
12389                 {
12390                     NeonGenOneOpEnvFn *genfn;
12391                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12392                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12393                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12394                     };
12395                     genfn = fns[size][u];
12396                     genfn(tcg_res, cpu_env, tcg_op);
12397                     break;
12398                 }
12399                 case 0x4: /* CLS, CLZ */
12400                     if (u) {
12401                         if (size == 0) {
12402                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12403                         } else {
12404                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12405                         }
12406                     } else {
12407                         if (size == 0) {
12408                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12409                         } else {
12410                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12411                         }
12412                     }
12413                     break;
12414                 default:
12415                     g_assert_not_reached();
12416                 }
12417             }
12418 
12419             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12420         }
12421     }
12422     clear_vec_high(s, is_q, rd);
12423 
12424     if (tcg_rmode) {
12425         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12426     }
12427 }
12428 
12429 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12430  *
12431  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12432  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12433  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12434  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12435  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12436  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12437  *
12438  * This actually covers two groups where scalar access is governed by
12439  * bit 28. A bunch of the instructions (float to integral) only exist
12440  * in the vector form and are un-allocated for the scalar decode. Also
12441  * in the scalar decode Q is always 1.
12442  */
12443 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12444 {
12445     int fpop, opcode, a, u;
12446     int rn, rd;
12447     bool is_q;
12448     bool is_scalar;
12449     bool only_in_vector = false;
12450 
12451     int pass;
12452     TCGv_i32 tcg_rmode = NULL;
12453     TCGv_ptr tcg_fpstatus = NULL;
12454     bool need_fpst = true;
12455     int rmode = -1;
12456 
12457     if (!dc_isar_feature(aa64_fp16, s)) {
12458         unallocated_encoding(s);
12459         return;
12460     }
12461 
12462     rd = extract32(insn, 0, 5);
12463     rn = extract32(insn, 5, 5);
12464 
12465     a = extract32(insn, 23, 1);
12466     u = extract32(insn, 29, 1);
12467     is_scalar = extract32(insn, 28, 1);
12468     is_q = extract32(insn, 30, 1);
12469 
12470     opcode = extract32(insn, 12, 5);
12471     fpop = deposit32(opcode, 5, 1, a);
12472     fpop = deposit32(fpop, 6, 1, u);
12473 
12474     switch (fpop) {
12475     case 0x1d: /* SCVTF */
12476     case 0x5d: /* UCVTF */
12477     {
12478         int elements;
12479 
12480         if (is_scalar) {
12481             elements = 1;
12482         } else {
12483             elements = (is_q ? 8 : 4);
12484         }
12485 
12486         if (!fp_access_check(s)) {
12487             return;
12488         }
12489         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12490         return;
12491     }
12492     break;
12493     case 0x2c: /* FCMGT (zero) */
12494     case 0x2d: /* FCMEQ (zero) */
12495     case 0x2e: /* FCMLT (zero) */
12496     case 0x6c: /* FCMGE (zero) */
12497     case 0x6d: /* FCMLE (zero) */
12498         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12499         return;
12500     case 0x3d: /* FRECPE */
12501     case 0x3f: /* FRECPX */
12502         break;
12503     case 0x18: /* FRINTN */
12504         only_in_vector = true;
12505         rmode = FPROUNDING_TIEEVEN;
12506         break;
12507     case 0x19: /* FRINTM */
12508         only_in_vector = true;
12509         rmode = FPROUNDING_NEGINF;
12510         break;
12511     case 0x38: /* FRINTP */
12512         only_in_vector = true;
12513         rmode = FPROUNDING_POSINF;
12514         break;
12515     case 0x39: /* FRINTZ */
12516         only_in_vector = true;
12517         rmode = FPROUNDING_ZERO;
12518         break;
12519     case 0x58: /* FRINTA */
12520         only_in_vector = true;
12521         rmode = FPROUNDING_TIEAWAY;
12522         break;
12523     case 0x59: /* FRINTX */
12524     case 0x79: /* FRINTI */
12525         only_in_vector = true;
12526         /* current rounding mode */
12527         break;
12528     case 0x1a: /* FCVTNS */
12529         rmode = FPROUNDING_TIEEVEN;
12530         break;
12531     case 0x1b: /* FCVTMS */
12532         rmode = FPROUNDING_NEGINF;
12533         break;
12534     case 0x1c: /* FCVTAS */
12535         rmode = FPROUNDING_TIEAWAY;
12536         break;
12537     case 0x3a: /* FCVTPS */
12538         rmode = FPROUNDING_POSINF;
12539         break;
12540     case 0x3b: /* FCVTZS */
12541         rmode = FPROUNDING_ZERO;
12542         break;
12543     case 0x5a: /* FCVTNU */
12544         rmode = FPROUNDING_TIEEVEN;
12545         break;
12546     case 0x5b: /* FCVTMU */
12547         rmode = FPROUNDING_NEGINF;
12548         break;
12549     case 0x5c: /* FCVTAU */
12550         rmode = FPROUNDING_TIEAWAY;
12551         break;
12552     case 0x7a: /* FCVTPU */
12553         rmode = FPROUNDING_POSINF;
12554         break;
12555     case 0x7b: /* FCVTZU */
12556         rmode = FPROUNDING_ZERO;
12557         break;
12558     case 0x2f: /* FABS */
12559     case 0x6f: /* FNEG */
12560         need_fpst = false;
12561         break;
12562     case 0x7d: /* FRSQRTE */
12563     case 0x7f: /* FSQRT (vector) */
12564         break;
12565     default:
12566         unallocated_encoding(s);
12567         return;
12568     }
12569 
12570 
12571     /* Check additional constraints for the scalar encoding */
12572     if (is_scalar) {
12573         if (!is_q) {
12574             unallocated_encoding(s);
12575             return;
12576         }
12577         /* FRINTxx is only in the vector form */
12578         if (only_in_vector) {
12579             unallocated_encoding(s);
12580             return;
12581         }
12582     }
12583 
12584     if (!fp_access_check(s)) {
12585         return;
12586     }
12587 
12588     if (rmode >= 0 || need_fpst) {
12589         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12590     }
12591 
12592     if (rmode >= 0) {
12593         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12594     }
12595 
12596     if (is_scalar) {
12597         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12598         TCGv_i32 tcg_res = tcg_temp_new_i32();
12599 
12600         switch (fpop) {
12601         case 0x1a: /* FCVTNS */
12602         case 0x1b: /* FCVTMS */
12603         case 0x1c: /* FCVTAS */
12604         case 0x3a: /* FCVTPS */
12605         case 0x3b: /* FCVTZS */
12606             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12607             break;
12608         case 0x3d: /* FRECPE */
12609             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12610             break;
12611         case 0x3f: /* FRECPX */
12612             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12613             break;
12614         case 0x5a: /* FCVTNU */
12615         case 0x5b: /* FCVTMU */
12616         case 0x5c: /* FCVTAU */
12617         case 0x7a: /* FCVTPU */
12618         case 0x7b: /* FCVTZU */
12619             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12620             break;
12621         case 0x6f: /* FNEG */
12622             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12623             break;
12624         case 0x7d: /* FRSQRTE */
12625             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12626             break;
12627         default:
12628             g_assert_not_reached();
12629         }
12630 
12631         /* limit any sign extension going on */
12632         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12633         write_fp_sreg(s, rd, tcg_res);
12634     } else {
12635         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12636             TCGv_i32 tcg_op = tcg_temp_new_i32();
12637             TCGv_i32 tcg_res = tcg_temp_new_i32();
12638 
12639             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12640 
12641             switch (fpop) {
12642             case 0x1a: /* FCVTNS */
12643             case 0x1b: /* FCVTMS */
12644             case 0x1c: /* FCVTAS */
12645             case 0x3a: /* FCVTPS */
12646             case 0x3b: /* FCVTZS */
12647                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12648                 break;
12649             case 0x3d: /* FRECPE */
12650                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12651                 break;
12652             case 0x5a: /* FCVTNU */
12653             case 0x5b: /* FCVTMU */
12654             case 0x5c: /* FCVTAU */
12655             case 0x7a: /* FCVTPU */
12656             case 0x7b: /* FCVTZU */
12657                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12658                 break;
12659             case 0x18: /* FRINTN */
12660             case 0x19: /* FRINTM */
12661             case 0x38: /* FRINTP */
12662             case 0x39: /* FRINTZ */
12663             case 0x58: /* FRINTA */
12664             case 0x79: /* FRINTI */
12665                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12666                 break;
12667             case 0x59: /* FRINTX */
12668                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12669                 break;
12670             case 0x2f: /* FABS */
12671                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12672                 break;
12673             case 0x6f: /* FNEG */
12674                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12675                 break;
12676             case 0x7d: /* FRSQRTE */
12677                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12678                 break;
12679             case 0x7f: /* FSQRT */
12680                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12681                 break;
12682             default:
12683                 g_assert_not_reached();
12684             }
12685 
12686             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12687         }
12688 
12689         clear_vec_high(s, is_q, rd);
12690     }
12691 
12692     if (tcg_rmode) {
12693         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12694     }
12695 }
12696 
12697 /* AdvSIMD scalar x indexed element
12698  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12699  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12700  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12701  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12702  * AdvSIMD vector x indexed element
12703  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12704  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12705  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12706  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12707  */
12708 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12709 {
12710     /* This encoding has two kinds of instruction:
12711      *  normal, where we perform elt x idxelt => elt for each
12712      *     element in the vector
12713      *  long, where we perform elt x idxelt and generate a result of
12714      *     double the width of the input element
12715      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12716      */
12717     bool is_scalar = extract32(insn, 28, 1);
12718     bool is_q = extract32(insn, 30, 1);
12719     bool u = extract32(insn, 29, 1);
12720     int size = extract32(insn, 22, 2);
12721     int l = extract32(insn, 21, 1);
12722     int m = extract32(insn, 20, 1);
12723     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12724     int rm = extract32(insn, 16, 4);
12725     int opcode = extract32(insn, 12, 4);
12726     int h = extract32(insn, 11, 1);
12727     int rn = extract32(insn, 5, 5);
12728     int rd = extract32(insn, 0, 5);
12729     bool is_long = false;
12730     int is_fp = 0;
12731     bool is_fp16 = false;
12732     int index;
12733     TCGv_ptr fpst;
12734 
12735     switch (16 * u + opcode) {
12736     case 0x08: /* MUL */
12737     case 0x10: /* MLA */
12738     case 0x14: /* MLS */
12739         if (is_scalar) {
12740             unallocated_encoding(s);
12741             return;
12742         }
12743         break;
12744     case 0x02: /* SMLAL, SMLAL2 */
12745     case 0x12: /* UMLAL, UMLAL2 */
12746     case 0x06: /* SMLSL, SMLSL2 */
12747     case 0x16: /* UMLSL, UMLSL2 */
12748     case 0x0a: /* SMULL, SMULL2 */
12749     case 0x1a: /* UMULL, UMULL2 */
12750         if (is_scalar) {
12751             unallocated_encoding(s);
12752             return;
12753         }
12754         is_long = true;
12755         break;
12756     case 0x03: /* SQDMLAL, SQDMLAL2 */
12757     case 0x07: /* SQDMLSL, SQDMLSL2 */
12758     case 0x0b: /* SQDMULL, SQDMULL2 */
12759         is_long = true;
12760         break;
12761     case 0x0c: /* SQDMULH */
12762     case 0x0d: /* SQRDMULH */
12763         break;
12764     case 0x01: /* FMLA */
12765     case 0x05: /* FMLS */
12766     case 0x09: /* FMUL */
12767     case 0x19: /* FMULX */
12768         is_fp = 1;
12769         break;
12770     case 0x1d: /* SQRDMLAH */
12771     case 0x1f: /* SQRDMLSH */
12772         if (!dc_isar_feature(aa64_rdm, s)) {
12773             unallocated_encoding(s);
12774             return;
12775         }
12776         break;
12777     case 0x0e: /* SDOT */
12778     case 0x1e: /* UDOT */
12779         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12780             unallocated_encoding(s);
12781             return;
12782         }
12783         break;
12784     case 0x0f:
12785         switch (size) {
12786         case 0: /* SUDOT */
12787         case 2: /* USDOT */
12788             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12789                 unallocated_encoding(s);
12790                 return;
12791             }
12792             size = MO_32;
12793             break;
12794         case 1: /* BFDOT */
12795             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12796                 unallocated_encoding(s);
12797                 return;
12798             }
12799             size = MO_32;
12800             break;
12801         case 3: /* BFMLAL{B,T} */
12802             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12803                 unallocated_encoding(s);
12804                 return;
12805             }
12806             /* can't set is_fp without other incorrect size checks */
12807             size = MO_16;
12808             break;
12809         default:
12810             unallocated_encoding(s);
12811             return;
12812         }
12813         break;
12814     case 0x11: /* FCMLA #0 */
12815     case 0x13: /* FCMLA #90 */
12816     case 0x15: /* FCMLA #180 */
12817     case 0x17: /* FCMLA #270 */
12818         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12819             unallocated_encoding(s);
12820             return;
12821         }
12822         is_fp = 2;
12823         break;
12824     case 0x00: /* FMLAL */
12825     case 0x04: /* FMLSL */
12826     case 0x18: /* FMLAL2 */
12827     case 0x1c: /* FMLSL2 */
12828         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12829             unallocated_encoding(s);
12830             return;
12831         }
12832         size = MO_16;
12833         /* is_fp, but we pass cpu_env not fp_status.  */
12834         break;
12835     default:
12836         unallocated_encoding(s);
12837         return;
12838     }
12839 
12840     switch (is_fp) {
12841     case 1: /* normal fp */
12842         /* convert insn encoded size to MemOp size */
12843         switch (size) {
12844         case 0: /* half-precision */
12845             size = MO_16;
12846             is_fp16 = true;
12847             break;
12848         case MO_32: /* single precision */
12849         case MO_64: /* double precision */
12850             break;
12851         default:
12852             unallocated_encoding(s);
12853             return;
12854         }
12855         break;
12856 
12857     case 2: /* complex fp */
12858         /* Each indexable element is a complex pair.  */
12859         size += 1;
12860         switch (size) {
12861         case MO_32:
12862             if (h && !is_q) {
12863                 unallocated_encoding(s);
12864                 return;
12865             }
12866             is_fp16 = true;
12867             break;
12868         case MO_64:
12869             break;
12870         default:
12871             unallocated_encoding(s);
12872             return;
12873         }
12874         break;
12875 
12876     default: /* integer */
12877         switch (size) {
12878         case MO_8:
12879         case MO_64:
12880             unallocated_encoding(s);
12881             return;
12882         }
12883         break;
12884     }
12885     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12886         unallocated_encoding(s);
12887         return;
12888     }
12889 
12890     /* Given MemOp size, adjust register and indexing.  */
12891     switch (size) {
12892     case MO_16:
12893         index = h << 2 | l << 1 | m;
12894         break;
12895     case MO_32:
12896         index = h << 1 | l;
12897         rm |= m << 4;
12898         break;
12899     case MO_64:
12900         if (l || !is_q) {
12901             unallocated_encoding(s);
12902             return;
12903         }
12904         index = h;
12905         rm |= m << 4;
12906         break;
12907     default:
12908         g_assert_not_reached();
12909     }
12910 
12911     if (!fp_access_check(s)) {
12912         return;
12913     }
12914 
12915     if (is_fp) {
12916         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12917     } else {
12918         fpst = NULL;
12919     }
12920 
12921     switch (16 * u + opcode) {
12922     case 0x0e: /* SDOT */
12923     case 0x1e: /* UDOT */
12924         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12925                          u ? gen_helper_gvec_udot_idx_b
12926                          : gen_helper_gvec_sdot_idx_b);
12927         return;
12928     case 0x0f:
12929         switch (extract32(insn, 22, 2)) {
12930         case 0: /* SUDOT */
12931             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12932                              gen_helper_gvec_sudot_idx_b);
12933             return;
12934         case 1: /* BFDOT */
12935             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12936                              gen_helper_gvec_bfdot_idx);
12937             return;
12938         case 2: /* USDOT */
12939             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12940                              gen_helper_gvec_usdot_idx_b);
12941             return;
12942         case 3: /* BFMLAL{B,T} */
12943             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12944                               gen_helper_gvec_bfmlal_idx);
12945             return;
12946         }
12947         g_assert_not_reached();
12948     case 0x11: /* FCMLA #0 */
12949     case 0x13: /* FCMLA #90 */
12950     case 0x15: /* FCMLA #180 */
12951     case 0x17: /* FCMLA #270 */
12952         {
12953             int rot = extract32(insn, 13, 2);
12954             int data = (index << 2) | rot;
12955             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12956                                vec_full_reg_offset(s, rn),
12957                                vec_full_reg_offset(s, rm),
12958                                vec_full_reg_offset(s, rd), fpst,
12959                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12960                                size == MO_64
12961                                ? gen_helper_gvec_fcmlas_idx
12962                                : gen_helper_gvec_fcmlah_idx);
12963         }
12964         return;
12965 
12966     case 0x00: /* FMLAL */
12967     case 0x04: /* FMLSL */
12968     case 0x18: /* FMLAL2 */
12969     case 0x1c: /* FMLSL2 */
12970         {
12971             int is_s = extract32(opcode, 2, 1);
12972             int is_2 = u;
12973             int data = (index << 2) | (is_2 << 1) | is_s;
12974             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12975                                vec_full_reg_offset(s, rn),
12976                                vec_full_reg_offset(s, rm), cpu_env,
12977                                is_q ? 16 : 8, vec_full_reg_size(s),
12978                                data, gen_helper_gvec_fmlal_idx_a64);
12979         }
12980         return;
12981 
12982     case 0x08: /* MUL */
12983         if (!is_long && !is_scalar) {
12984             static gen_helper_gvec_3 * const fns[3] = {
12985                 gen_helper_gvec_mul_idx_h,
12986                 gen_helper_gvec_mul_idx_s,
12987                 gen_helper_gvec_mul_idx_d,
12988             };
12989             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
12990                                vec_full_reg_offset(s, rn),
12991                                vec_full_reg_offset(s, rm),
12992                                is_q ? 16 : 8, vec_full_reg_size(s),
12993                                index, fns[size - 1]);
12994             return;
12995         }
12996         break;
12997 
12998     case 0x10: /* MLA */
12999         if (!is_long && !is_scalar) {
13000             static gen_helper_gvec_4 * const fns[3] = {
13001                 gen_helper_gvec_mla_idx_h,
13002                 gen_helper_gvec_mla_idx_s,
13003                 gen_helper_gvec_mla_idx_d,
13004             };
13005             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13006                                vec_full_reg_offset(s, rn),
13007                                vec_full_reg_offset(s, rm),
13008                                vec_full_reg_offset(s, rd),
13009                                is_q ? 16 : 8, vec_full_reg_size(s),
13010                                index, fns[size - 1]);
13011             return;
13012         }
13013         break;
13014 
13015     case 0x14: /* MLS */
13016         if (!is_long && !is_scalar) {
13017             static gen_helper_gvec_4 * const fns[3] = {
13018                 gen_helper_gvec_mls_idx_h,
13019                 gen_helper_gvec_mls_idx_s,
13020                 gen_helper_gvec_mls_idx_d,
13021             };
13022             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13023                                vec_full_reg_offset(s, rn),
13024                                vec_full_reg_offset(s, rm),
13025                                vec_full_reg_offset(s, rd),
13026                                is_q ? 16 : 8, vec_full_reg_size(s),
13027                                index, fns[size - 1]);
13028             return;
13029         }
13030         break;
13031     }
13032 
13033     if (size == 3) {
13034         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13035         int pass;
13036 
13037         assert(is_fp && is_q && !is_long);
13038 
13039         read_vec_element(s, tcg_idx, rm, index, MO_64);
13040 
13041         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13042             TCGv_i64 tcg_op = tcg_temp_new_i64();
13043             TCGv_i64 tcg_res = tcg_temp_new_i64();
13044 
13045             read_vec_element(s, tcg_op, rn, pass, MO_64);
13046 
13047             switch (16 * u + opcode) {
13048             case 0x05: /* FMLS */
13049                 /* As usual for ARM, separate negation for fused multiply-add */
13050                 gen_helper_vfp_negd(tcg_op, tcg_op);
13051                 /* fall through */
13052             case 0x01: /* FMLA */
13053                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13054                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13055                 break;
13056             case 0x09: /* FMUL */
13057                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13058                 break;
13059             case 0x19: /* FMULX */
13060                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13061                 break;
13062             default:
13063                 g_assert_not_reached();
13064             }
13065 
13066             write_vec_element(s, tcg_res, rd, pass, MO_64);
13067         }
13068 
13069         clear_vec_high(s, !is_scalar, rd);
13070     } else if (!is_long) {
13071         /* 32 bit floating point, or 16 or 32 bit integer.
13072          * For the 16 bit scalar case we use the usual Neon helpers and
13073          * rely on the fact that 0 op 0 == 0 with no side effects.
13074          */
13075         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13076         int pass, maxpasses;
13077 
13078         if (is_scalar) {
13079             maxpasses = 1;
13080         } else {
13081             maxpasses = is_q ? 4 : 2;
13082         }
13083 
13084         read_vec_element_i32(s, tcg_idx, rm, index, size);
13085 
13086         if (size == 1 && !is_scalar) {
13087             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13088              * the index into both halves of the 32 bit tcg_idx and then use
13089              * the usual Neon helpers.
13090              */
13091             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13092         }
13093 
13094         for (pass = 0; pass < maxpasses; pass++) {
13095             TCGv_i32 tcg_op = tcg_temp_new_i32();
13096             TCGv_i32 tcg_res = tcg_temp_new_i32();
13097 
13098             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13099 
13100             switch (16 * u + opcode) {
13101             case 0x08: /* MUL */
13102             case 0x10: /* MLA */
13103             case 0x14: /* MLS */
13104             {
13105                 static NeonGenTwoOpFn * const fns[2][2] = {
13106                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13107                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13108                 };
13109                 NeonGenTwoOpFn *genfn;
13110                 bool is_sub = opcode == 0x4;
13111 
13112                 if (size == 1) {
13113                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13114                 } else {
13115                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13116                 }
13117                 if (opcode == 0x8) {
13118                     break;
13119                 }
13120                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13121                 genfn = fns[size - 1][is_sub];
13122                 genfn(tcg_res, tcg_op, tcg_res);
13123                 break;
13124             }
13125             case 0x05: /* FMLS */
13126             case 0x01: /* FMLA */
13127                 read_vec_element_i32(s, tcg_res, rd, pass,
13128                                      is_scalar ? size : MO_32);
13129                 switch (size) {
13130                 case 1:
13131                     if (opcode == 0x5) {
13132                         /* As usual for ARM, separate negation for fused
13133                          * multiply-add */
13134                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13135                     }
13136                     if (is_scalar) {
13137                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13138                                                    tcg_res, fpst);
13139                     } else {
13140                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13141                                                     tcg_res, fpst);
13142                     }
13143                     break;
13144                 case 2:
13145                     if (opcode == 0x5) {
13146                         /* As usual for ARM, separate negation for
13147                          * fused multiply-add */
13148                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13149                     }
13150                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13151                                            tcg_res, fpst);
13152                     break;
13153                 default:
13154                     g_assert_not_reached();
13155                 }
13156                 break;
13157             case 0x09: /* FMUL */
13158                 switch (size) {
13159                 case 1:
13160                     if (is_scalar) {
13161                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13162                                                 tcg_idx, fpst);
13163                     } else {
13164                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13165                                                  tcg_idx, fpst);
13166                     }
13167                     break;
13168                 case 2:
13169                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13170                     break;
13171                 default:
13172                     g_assert_not_reached();
13173                 }
13174                 break;
13175             case 0x19: /* FMULX */
13176                 switch (size) {
13177                 case 1:
13178                     if (is_scalar) {
13179                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13180                                                  tcg_idx, fpst);
13181                     } else {
13182                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13183                                                   tcg_idx, fpst);
13184                     }
13185                     break;
13186                 case 2:
13187                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13188                     break;
13189                 default:
13190                     g_assert_not_reached();
13191                 }
13192                 break;
13193             case 0x0c: /* SQDMULH */
13194                 if (size == 1) {
13195                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13196                                                tcg_op, tcg_idx);
13197                 } else {
13198                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13199                                                tcg_op, tcg_idx);
13200                 }
13201                 break;
13202             case 0x0d: /* SQRDMULH */
13203                 if (size == 1) {
13204                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13205                                                 tcg_op, tcg_idx);
13206                 } else {
13207                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13208                                                 tcg_op, tcg_idx);
13209                 }
13210                 break;
13211             case 0x1d: /* SQRDMLAH */
13212                 read_vec_element_i32(s, tcg_res, rd, pass,
13213                                      is_scalar ? size : MO_32);
13214                 if (size == 1) {
13215                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13216                                                 tcg_op, tcg_idx, tcg_res);
13217                 } else {
13218                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13219                                                 tcg_op, tcg_idx, tcg_res);
13220                 }
13221                 break;
13222             case 0x1f: /* SQRDMLSH */
13223                 read_vec_element_i32(s, tcg_res, rd, pass,
13224                                      is_scalar ? size : MO_32);
13225                 if (size == 1) {
13226                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13227                                                 tcg_op, tcg_idx, tcg_res);
13228                 } else {
13229                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13230                                                 tcg_op, tcg_idx, tcg_res);
13231                 }
13232                 break;
13233             default:
13234                 g_assert_not_reached();
13235             }
13236 
13237             if (is_scalar) {
13238                 write_fp_sreg(s, rd, tcg_res);
13239             } else {
13240                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13241             }
13242         }
13243 
13244         clear_vec_high(s, is_q, rd);
13245     } else {
13246         /* long ops: 16x16->32 or 32x32->64 */
13247         TCGv_i64 tcg_res[2];
13248         int pass;
13249         bool satop = extract32(opcode, 0, 1);
13250         MemOp memop = MO_32;
13251 
13252         if (satop || !u) {
13253             memop |= MO_SIGN;
13254         }
13255 
13256         if (size == 2) {
13257             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13258 
13259             read_vec_element(s, tcg_idx, rm, index, memop);
13260 
13261             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13262                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13263                 TCGv_i64 tcg_passres;
13264                 int passelt;
13265 
13266                 if (is_scalar) {
13267                     passelt = 0;
13268                 } else {
13269                     passelt = pass + (is_q * 2);
13270                 }
13271 
13272                 read_vec_element(s, tcg_op, rn, passelt, memop);
13273 
13274                 tcg_res[pass] = tcg_temp_new_i64();
13275 
13276                 if (opcode == 0xa || opcode == 0xb) {
13277                     /* Non-accumulating ops */
13278                     tcg_passres = tcg_res[pass];
13279                 } else {
13280                     tcg_passres = tcg_temp_new_i64();
13281                 }
13282 
13283                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13284 
13285                 if (satop) {
13286                     /* saturating, doubling */
13287                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13288                                                       tcg_passres, tcg_passres);
13289                 }
13290 
13291                 if (opcode == 0xa || opcode == 0xb) {
13292                     continue;
13293                 }
13294 
13295                 /* Accumulating op: handle accumulate step */
13296                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13297 
13298                 switch (opcode) {
13299                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13300                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13301                     break;
13302                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13303                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13304                     break;
13305                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13306                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13307                     /* fall through */
13308                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13309                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13310                                                       tcg_res[pass],
13311                                                       tcg_passres);
13312                     break;
13313                 default:
13314                     g_assert_not_reached();
13315                 }
13316             }
13317 
13318             clear_vec_high(s, !is_scalar, rd);
13319         } else {
13320             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13321 
13322             assert(size == 1);
13323             read_vec_element_i32(s, tcg_idx, rm, index, size);
13324 
13325             if (!is_scalar) {
13326                 /* The simplest way to handle the 16x16 indexed ops is to
13327                  * duplicate the index into both halves of the 32 bit tcg_idx
13328                  * and then use the usual Neon helpers.
13329                  */
13330                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13331             }
13332 
13333             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13334                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13335                 TCGv_i64 tcg_passres;
13336 
13337                 if (is_scalar) {
13338                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13339                 } else {
13340                     read_vec_element_i32(s, tcg_op, rn,
13341                                          pass + (is_q * 2), MO_32);
13342                 }
13343 
13344                 tcg_res[pass] = tcg_temp_new_i64();
13345 
13346                 if (opcode == 0xa || opcode == 0xb) {
13347                     /* Non-accumulating ops */
13348                     tcg_passres = tcg_res[pass];
13349                 } else {
13350                     tcg_passres = tcg_temp_new_i64();
13351                 }
13352 
13353                 if (memop & MO_SIGN) {
13354                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13355                 } else {
13356                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13357                 }
13358                 if (satop) {
13359                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13360                                                       tcg_passres, tcg_passres);
13361                 }
13362 
13363                 if (opcode == 0xa || opcode == 0xb) {
13364                     continue;
13365                 }
13366 
13367                 /* Accumulating op: handle accumulate step */
13368                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13369 
13370                 switch (opcode) {
13371                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13372                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13373                                              tcg_passres);
13374                     break;
13375                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13376                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13377                                              tcg_passres);
13378                     break;
13379                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13380                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13381                     /* fall through */
13382                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13383                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13384                                                       tcg_res[pass],
13385                                                       tcg_passres);
13386                     break;
13387                 default:
13388                     g_assert_not_reached();
13389                 }
13390             }
13391 
13392             if (is_scalar) {
13393                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13394             }
13395         }
13396 
13397         if (is_scalar) {
13398             tcg_res[1] = tcg_constant_i64(0);
13399         }
13400 
13401         for (pass = 0; pass < 2; pass++) {
13402             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13403         }
13404     }
13405 }
13406 
13407 /* Crypto AES
13408  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13409  * +-----------------+------+-----------+--------+-----+------+------+
13410  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13411  * +-----------------+------+-----------+--------+-----+------+------+
13412  */
13413 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13414 {
13415     int size = extract32(insn, 22, 2);
13416     int opcode = extract32(insn, 12, 5);
13417     int rn = extract32(insn, 5, 5);
13418     int rd = extract32(insn, 0, 5);
13419     int decrypt;
13420     gen_helper_gvec_2 *genfn2 = NULL;
13421     gen_helper_gvec_3 *genfn3 = NULL;
13422 
13423     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13424         unallocated_encoding(s);
13425         return;
13426     }
13427 
13428     switch (opcode) {
13429     case 0x4: /* AESE */
13430         decrypt = 0;
13431         genfn3 = gen_helper_crypto_aese;
13432         break;
13433     case 0x6: /* AESMC */
13434         decrypt = 0;
13435         genfn2 = gen_helper_crypto_aesmc;
13436         break;
13437     case 0x5: /* AESD */
13438         decrypt = 1;
13439         genfn3 = gen_helper_crypto_aese;
13440         break;
13441     case 0x7: /* AESIMC */
13442         decrypt = 1;
13443         genfn2 = gen_helper_crypto_aesmc;
13444         break;
13445     default:
13446         unallocated_encoding(s);
13447         return;
13448     }
13449 
13450     if (!fp_access_check(s)) {
13451         return;
13452     }
13453     if (genfn2) {
13454         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13455     } else {
13456         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13457     }
13458 }
13459 
13460 /* Crypto three-reg SHA
13461  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13462  * +-----------------+------+---+------+---+--------+-----+------+------+
13463  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13464  * +-----------------+------+---+------+---+--------+-----+------+------+
13465  */
13466 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13467 {
13468     int size = extract32(insn, 22, 2);
13469     int opcode = extract32(insn, 12, 3);
13470     int rm = extract32(insn, 16, 5);
13471     int rn = extract32(insn, 5, 5);
13472     int rd = extract32(insn, 0, 5);
13473     gen_helper_gvec_3 *genfn;
13474     bool feature;
13475 
13476     if (size != 0) {
13477         unallocated_encoding(s);
13478         return;
13479     }
13480 
13481     switch (opcode) {
13482     case 0: /* SHA1C */
13483         genfn = gen_helper_crypto_sha1c;
13484         feature = dc_isar_feature(aa64_sha1, s);
13485         break;
13486     case 1: /* SHA1P */
13487         genfn = gen_helper_crypto_sha1p;
13488         feature = dc_isar_feature(aa64_sha1, s);
13489         break;
13490     case 2: /* SHA1M */
13491         genfn = gen_helper_crypto_sha1m;
13492         feature = dc_isar_feature(aa64_sha1, s);
13493         break;
13494     case 3: /* SHA1SU0 */
13495         genfn = gen_helper_crypto_sha1su0;
13496         feature = dc_isar_feature(aa64_sha1, s);
13497         break;
13498     case 4: /* SHA256H */
13499         genfn = gen_helper_crypto_sha256h;
13500         feature = dc_isar_feature(aa64_sha256, s);
13501         break;
13502     case 5: /* SHA256H2 */
13503         genfn = gen_helper_crypto_sha256h2;
13504         feature = dc_isar_feature(aa64_sha256, s);
13505         break;
13506     case 6: /* SHA256SU1 */
13507         genfn = gen_helper_crypto_sha256su1;
13508         feature = dc_isar_feature(aa64_sha256, s);
13509         break;
13510     default:
13511         unallocated_encoding(s);
13512         return;
13513     }
13514 
13515     if (!feature) {
13516         unallocated_encoding(s);
13517         return;
13518     }
13519 
13520     if (!fp_access_check(s)) {
13521         return;
13522     }
13523     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13524 }
13525 
13526 /* Crypto two-reg SHA
13527  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13528  * +-----------------+------+-----------+--------+-----+------+------+
13529  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13530  * +-----------------+------+-----------+--------+-----+------+------+
13531  */
13532 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13533 {
13534     int size = extract32(insn, 22, 2);
13535     int opcode = extract32(insn, 12, 5);
13536     int rn = extract32(insn, 5, 5);
13537     int rd = extract32(insn, 0, 5);
13538     gen_helper_gvec_2 *genfn;
13539     bool feature;
13540 
13541     if (size != 0) {
13542         unallocated_encoding(s);
13543         return;
13544     }
13545 
13546     switch (opcode) {
13547     case 0: /* SHA1H */
13548         feature = dc_isar_feature(aa64_sha1, s);
13549         genfn = gen_helper_crypto_sha1h;
13550         break;
13551     case 1: /* SHA1SU1 */
13552         feature = dc_isar_feature(aa64_sha1, s);
13553         genfn = gen_helper_crypto_sha1su1;
13554         break;
13555     case 2: /* SHA256SU0 */
13556         feature = dc_isar_feature(aa64_sha256, s);
13557         genfn = gen_helper_crypto_sha256su0;
13558         break;
13559     default:
13560         unallocated_encoding(s);
13561         return;
13562     }
13563 
13564     if (!feature) {
13565         unallocated_encoding(s);
13566         return;
13567     }
13568 
13569     if (!fp_access_check(s)) {
13570         return;
13571     }
13572     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13573 }
13574 
13575 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13576 {
13577     tcg_gen_rotli_i64(d, m, 1);
13578     tcg_gen_xor_i64(d, d, n);
13579 }
13580 
13581 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13582 {
13583     tcg_gen_rotli_vec(vece, d, m, 1);
13584     tcg_gen_xor_vec(vece, d, d, n);
13585 }
13586 
13587 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13588                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13589 {
13590     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13591     static const GVecGen3 op = {
13592         .fni8 = gen_rax1_i64,
13593         .fniv = gen_rax1_vec,
13594         .opt_opc = vecop_list,
13595         .fno = gen_helper_crypto_rax1,
13596         .vece = MO_64,
13597     };
13598     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13599 }
13600 
13601 /* Crypto three-reg SHA512
13602  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13603  * +-----------------------+------+---+---+-----+--------+------+------+
13604  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13605  * +-----------------------+------+---+---+-----+--------+------+------+
13606  */
13607 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13608 {
13609     int opcode = extract32(insn, 10, 2);
13610     int o =  extract32(insn, 14, 1);
13611     int rm = extract32(insn, 16, 5);
13612     int rn = extract32(insn, 5, 5);
13613     int rd = extract32(insn, 0, 5);
13614     bool feature;
13615     gen_helper_gvec_3 *oolfn = NULL;
13616     GVecGen3Fn *gvecfn = NULL;
13617 
13618     if (o == 0) {
13619         switch (opcode) {
13620         case 0: /* SHA512H */
13621             feature = dc_isar_feature(aa64_sha512, s);
13622             oolfn = gen_helper_crypto_sha512h;
13623             break;
13624         case 1: /* SHA512H2 */
13625             feature = dc_isar_feature(aa64_sha512, s);
13626             oolfn = gen_helper_crypto_sha512h2;
13627             break;
13628         case 2: /* SHA512SU1 */
13629             feature = dc_isar_feature(aa64_sha512, s);
13630             oolfn = gen_helper_crypto_sha512su1;
13631             break;
13632         case 3: /* RAX1 */
13633             feature = dc_isar_feature(aa64_sha3, s);
13634             gvecfn = gen_gvec_rax1;
13635             break;
13636         default:
13637             g_assert_not_reached();
13638         }
13639     } else {
13640         switch (opcode) {
13641         case 0: /* SM3PARTW1 */
13642             feature = dc_isar_feature(aa64_sm3, s);
13643             oolfn = gen_helper_crypto_sm3partw1;
13644             break;
13645         case 1: /* SM3PARTW2 */
13646             feature = dc_isar_feature(aa64_sm3, s);
13647             oolfn = gen_helper_crypto_sm3partw2;
13648             break;
13649         case 2: /* SM4EKEY */
13650             feature = dc_isar_feature(aa64_sm4, s);
13651             oolfn = gen_helper_crypto_sm4ekey;
13652             break;
13653         default:
13654             unallocated_encoding(s);
13655             return;
13656         }
13657     }
13658 
13659     if (!feature) {
13660         unallocated_encoding(s);
13661         return;
13662     }
13663 
13664     if (!fp_access_check(s)) {
13665         return;
13666     }
13667 
13668     if (oolfn) {
13669         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13670     } else {
13671         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13672     }
13673 }
13674 
13675 /* Crypto two-reg SHA512
13676  *  31                                     12  11  10  9    5 4    0
13677  * +-----------------------------------------+--------+------+------+
13678  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13679  * +-----------------------------------------+--------+------+------+
13680  */
13681 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13682 {
13683     int opcode = extract32(insn, 10, 2);
13684     int rn = extract32(insn, 5, 5);
13685     int rd = extract32(insn, 0, 5);
13686     bool feature;
13687 
13688     switch (opcode) {
13689     case 0: /* SHA512SU0 */
13690         feature = dc_isar_feature(aa64_sha512, s);
13691         break;
13692     case 1: /* SM4E */
13693         feature = dc_isar_feature(aa64_sm4, s);
13694         break;
13695     default:
13696         unallocated_encoding(s);
13697         return;
13698     }
13699 
13700     if (!feature) {
13701         unallocated_encoding(s);
13702         return;
13703     }
13704 
13705     if (!fp_access_check(s)) {
13706         return;
13707     }
13708 
13709     switch (opcode) {
13710     case 0: /* SHA512SU0 */
13711         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13712         break;
13713     case 1: /* SM4E */
13714         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13715         break;
13716     default:
13717         g_assert_not_reached();
13718     }
13719 }
13720 
13721 /* Crypto four-register
13722  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13723  * +-------------------+-----+------+---+------+------+------+
13724  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13725  * +-------------------+-----+------+---+------+------+------+
13726  */
13727 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13728 {
13729     int op0 = extract32(insn, 21, 2);
13730     int rm = extract32(insn, 16, 5);
13731     int ra = extract32(insn, 10, 5);
13732     int rn = extract32(insn, 5, 5);
13733     int rd = extract32(insn, 0, 5);
13734     bool feature;
13735 
13736     switch (op0) {
13737     case 0: /* EOR3 */
13738     case 1: /* BCAX */
13739         feature = dc_isar_feature(aa64_sha3, s);
13740         break;
13741     case 2: /* SM3SS1 */
13742         feature = dc_isar_feature(aa64_sm3, s);
13743         break;
13744     default:
13745         unallocated_encoding(s);
13746         return;
13747     }
13748 
13749     if (!feature) {
13750         unallocated_encoding(s);
13751         return;
13752     }
13753 
13754     if (!fp_access_check(s)) {
13755         return;
13756     }
13757 
13758     if (op0 < 2) {
13759         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13760         int pass;
13761 
13762         tcg_op1 = tcg_temp_new_i64();
13763         tcg_op2 = tcg_temp_new_i64();
13764         tcg_op3 = tcg_temp_new_i64();
13765         tcg_res[0] = tcg_temp_new_i64();
13766         tcg_res[1] = tcg_temp_new_i64();
13767 
13768         for (pass = 0; pass < 2; pass++) {
13769             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13770             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13771             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13772 
13773             if (op0 == 0) {
13774                 /* EOR3 */
13775                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13776             } else {
13777                 /* BCAX */
13778                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13779             }
13780             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13781         }
13782         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13783         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13784     } else {
13785         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13786 
13787         tcg_op1 = tcg_temp_new_i32();
13788         tcg_op2 = tcg_temp_new_i32();
13789         tcg_op3 = tcg_temp_new_i32();
13790         tcg_res = tcg_temp_new_i32();
13791         tcg_zero = tcg_constant_i32(0);
13792 
13793         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13794         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13795         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13796 
13797         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13798         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13799         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13800         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13801 
13802         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13803         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13804         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13805         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13806     }
13807 }
13808 
13809 /* Crypto XAR
13810  *  31                   21 20  16 15    10 9    5 4    0
13811  * +-----------------------+------+--------+------+------+
13812  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13813  * +-----------------------+------+--------+------+------+
13814  */
13815 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13816 {
13817     int rm = extract32(insn, 16, 5);
13818     int imm6 = extract32(insn, 10, 6);
13819     int rn = extract32(insn, 5, 5);
13820     int rd = extract32(insn, 0, 5);
13821 
13822     if (!dc_isar_feature(aa64_sha3, s)) {
13823         unallocated_encoding(s);
13824         return;
13825     }
13826 
13827     if (!fp_access_check(s)) {
13828         return;
13829     }
13830 
13831     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13832                  vec_full_reg_offset(s, rn),
13833                  vec_full_reg_offset(s, rm), imm6, 16,
13834                  vec_full_reg_size(s));
13835 }
13836 
13837 /* Crypto three-reg imm2
13838  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13839  * +-----------------------+------+-----+------+--------+------+------+
13840  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13841  * +-----------------------+------+-----+------+--------+------+------+
13842  */
13843 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13844 {
13845     static gen_helper_gvec_3 * const fns[4] = {
13846         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13847         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13848     };
13849     int opcode = extract32(insn, 10, 2);
13850     int imm2 = extract32(insn, 12, 2);
13851     int rm = extract32(insn, 16, 5);
13852     int rn = extract32(insn, 5, 5);
13853     int rd = extract32(insn, 0, 5);
13854 
13855     if (!dc_isar_feature(aa64_sm3, s)) {
13856         unallocated_encoding(s);
13857         return;
13858     }
13859 
13860     if (!fp_access_check(s)) {
13861         return;
13862     }
13863 
13864     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13865 }
13866 
13867 /* C3.6 Data processing - SIMD, inc Crypto
13868  *
13869  * As the decode gets a little complex we are using a table based
13870  * approach for this part of the decode.
13871  */
13872 static const AArch64DecodeTable data_proc_simd[] = {
13873     /* pattern  ,  mask     ,  fn                        */
13874     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13875     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13876     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13877     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13878     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13879     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13880     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13881     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13882     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13883     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13884     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13885     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13886     { 0x2e000000, 0xbf208400, disas_simd_ext },
13887     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13888     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13889     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13890     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13891     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13892     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13893     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13894     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13895     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13896     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13897     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13898     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13899     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13900     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13901     { 0xce800000, 0xffe00000, disas_crypto_xar },
13902     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13903     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13904     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13905     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13906     { 0x00000000, 0x00000000, NULL }
13907 };
13908 
13909 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13910 {
13911     /* Note that this is called with all non-FP cases from
13912      * table C3-6 so it must UNDEF for entries not specifically
13913      * allocated to instructions in that table.
13914      */
13915     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13916     if (fn) {
13917         fn(s, insn);
13918     } else {
13919         unallocated_encoding(s);
13920     }
13921 }
13922 
13923 /* C3.6 Data processing - SIMD and floating point */
13924 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13925 {
13926     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13927         disas_data_proc_fp(s, insn);
13928     } else {
13929         /* SIMD, including crypto */
13930         disas_data_proc_simd(s, insn);
13931     }
13932 }
13933 
13934 static bool trans_OK(DisasContext *s, arg_OK *a)
13935 {
13936     return true;
13937 }
13938 
13939 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13940 {
13941     s->is_nonstreaming = true;
13942     return true;
13943 }
13944 
13945 /**
13946  * is_guarded_page:
13947  * @env: The cpu environment
13948  * @s: The DisasContext
13949  *
13950  * Return true if the page is guarded.
13951  */
13952 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13953 {
13954     uint64_t addr = s->base.pc_first;
13955 #ifdef CONFIG_USER_ONLY
13956     return page_get_flags(addr) & PAGE_BTI;
13957 #else
13958     CPUTLBEntryFull *full;
13959     void *host;
13960     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13961     int flags;
13962 
13963     /*
13964      * We test this immediately after reading an insn, which means
13965      * that the TLB entry must be present and valid, and thus this
13966      * access will never raise an exception.
13967      */
13968     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13969                               false, &host, &full, 0);
13970     assert(!(flags & TLB_INVALID_MASK));
13971 
13972     return full->guarded;
13973 #endif
13974 }
13975 
13976 /**
13977  * btype_destination_ok:
13978  * @insn: The instruction at the branch destination
13979  * @bt: SCTLR_ELx.BT
13980  * @btype: PSTATE.BTYPE, and is non-zero
13981  *
13982  * On a guarded page, there are a limited number of insns
13983  * that may be present at the branch target:
13984  *   - branch target identifiers,
13985  *   - paciasp, pacibsp,
13986  *   - BRK insn
13987  *   - HLT insn
13988  * Anything else causes a Branch Target Exception.
13989  *
13990  * Return true if the branch is compatible, false to raise BTITRAP.
13991  */
13992 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13993 {
13994     if ((insn & 0xfffff01fu) == 0xd503201fu) {
13995         /* HINT space */
13996         switch (extract32(insn, 5, 7)) {
13997         case 0b011001: /* PACIASP */
13998         case 0b011011: /* PACIBSP */
13999             /*
14000              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14001              * with btype == 3.  Otherwise all btype are ok.
14002              */
14003             return !bt || btype != 3;
14004         case 0b100000: /* BTI */
14005             /* Not compatible with any btype.  */
14006             return false;
14007         case 0b100010: /* BTI c */
14008             /* Not compatible with btype == 3 */
14009             return btype != 3;
14010         case 0b100100: /* BTI j */
14011             /* Not compatible with btype == 2 */
14012             return btype != 2;
14013         case 0b100110: /* BTI jc */
14014             /* Compatible with any btype.  */
14015             return true;
14016         }
14017     } else {
14018         switch (insn & 0xffe0001fu) {
14019         case 0xd4200000u: /* BRK */
14020         case 0xd4400000u: /* HLT */
14021             /* Give priority to the breakpoint exception.  */
14022             return true;
14023         }
14024     }
14025     return false;
14026 }
14027 
14028 /* C3.1 A64 instruction index by encoding */
14029 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14030 {
14031     switch (extract32(insn, 25, 4)) {
14032     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14033         disas_b_exc_sys(s, insn);
14034         break;
14035     case 0x4:
14036     case 0x6:
14037     case 0xc:
14038     case 0xe:      /* Loads and stores */
14039         disas_ldst(s, insn);
14040         break;
14041     case 0x5:
14042     case 0xd:      /* Data processing - register */
14043         disas_data_proc_reg(s, insn);
14044         break;
14045     case 0x7:
14046     case 0xf:      /* Data processing - SIMD and floating point */
14047         disas_data_proc_simd_fp(s, insn);
14048         break;
14049     default:
14050         unallocated_encoding(s);
14051         break;
14052     }
14053 }
14054 
14055 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14056                                           CPUState *cpu)
14057 {
14058     DisasContext *dc = container_of(dcbase, DisasContext, base);
14059     CPUARMState *env = cpu->env_ptr;
14060     ARMCPU *arm_cpu = env_archcpu(env);
14061     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14062     int bound, core_mmu_idx;
14063 
14064     dc->isar = &arm_cpu->isar;
14065     dc->condjmp = 0;
14066     dc->pc_save = dc->base.pc_first;
14067     dc->aarch64 = true;
14068     dc->thumb = false;
14069     dc->sctlr_b = 0;
14070     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14071     dc->condexec_mask = 0;
14072     dc->condexec_cond = 0;
14073     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14074     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14075     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14076     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14077     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14078     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14079 #if !defined(CONFIG_USER_ONLY)
14080     dc->user = (dc->current_el == 0);
14081 #endif
14082     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14083     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14084     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14085     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14086     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14087     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14088     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14089     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14090     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14091     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14092     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14093     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14094     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14095     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14096     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14097     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14098     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14099     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14100     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14101     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14102     dc->vec_len = 0;
14103     dc->vec_stride = 0;
14104     dc->cp_regs = arm_cpu->cp_regs;
14105     dc->features = env->features;
14106     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14107 
14108 #ifdef CONFIG_USER_ONLY
14109     /* In sve_probe_page, we assume TBI is enabled. */
14110     tcg_debug_assert(dc->tbid & 1);
14111 #endif
14112 
14113     /* Single step state. The code-generation logic here is:
14114      *  SS_ACTIVE == 0:
14115      *   generate code with no special handling for single-stepping (except
14116      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14117      *   this happens anyway because those changes are all system register or
14118      *   PSTATE writes).
14119      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14120      *   emit code for one insn
14121      *   emit code to clear PSTATE.SS
14122      *   emit code to generate software step exception for completed step
14123      *   end TB (as usual for having generated an exception)
14124      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14125      *   emit code to generate a software step exception
14126      *   end the TB
14127      */
14128     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14129     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14130     dc->is_ldex = false;
14131 
14132     /* Bound the number of insns to execute to those left on the page.  */
14133     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14134 
14135     /* If architectural single step active, limit to 1.  */
14136     if (dc->ss_active) {
14137         bound = 1;
14138     }
14139     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14140 }
14141 
14142 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14143 {
14144 }
14145 
14146 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14147 {
14148     DisasContext *dc = container_of(dcbase, DisasContext, base);
14149     target_ulong pc_arg = dc->base.pc_next;
14150 
14151     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14152         pc_arg &= ~TARGET_PAGE_MASK;
14153     }
14154     tcg_gen_insn_start(pc_arg, 0, 0);
14155     dc->insn_start = tcg_last_op();
14156 }
14157 
14158 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14159 {
14160     DisasContext *s = container_of(dcbase, DisasContext, base);
14161     CPUARMState *env = cpu->env_ptr;
14162     uint64_t pc = s->base.pc_next;
14163     uint32_t insn;
14164 
14165     /* Singlestep exceptions have the highest priority. */
14166     if (s->ss_active && !s->pstate_ss) {
14167         /* Singlestep state is Active-pending.
14168          * If we're in this state at the start of a TB then either
14169          *  a) we just took an exception to an EL which is being debugged
14170          *     and this is the first insn in the exception handler
14171          *  b) debug exceptions were masked and we just unmasked them
14172          *     without changing EL (eg by clearing PSTATE.D)
14173          * In either case we're going to take a swstep exception in the
14174          * "did not step an insn" case, and so the syndrome ISV and EX
14175          * bits should be zero.
14176          */
14177         assert(s->base.num_insns == 1);
14178         gen_swstep_exception(s, 0, 0);
14179         s->base.is_jmp = DISAS_NORETURN;
14180         s->base.pc_next = pc + 4;
14181         return;
14182     }
14183 
14184     if (pc & 3) {
14185         /*
14186          * PC alignment fault.  This has priority over the instruction abort
14187          * that we would receive from a translation fault via arm_ldl_code.
14188          * This should only be possible after an indirect branch, at the
14189          * start of the TB.
14190          */
14191         assert(s->base.num_insns == 1);
14192         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14193         s->base.is_jmp = DISAS_NORETURN;
14194         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14195         return;
14196     }
14197 
14198     s->pc_curr = pc;
14199     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14200     s->insn = insn;
14201     s->base.pc_next = pc + 4;
14202 
14203     s->fp_access_checked = false;
14204     s->sve_access_checked = false;
14205 
14206     if (s->pstate_il) {
14207         /*
14208          * Illegal execution state. This has priority over BTI
14209          * exceptions, but comes after instruction abort exceptions.
14210          */
14211         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14212         return;
14213     }
14214 
14215     if (dc_isar_feature(aa64_bti, s)) {
14216         if (s->base.num_insns == 1) {
14217             /*
14218              * At the first insn of the TB, compute s->guarded_page.
14219              * We delayed computing this until successfully reading
14220              * the first insn of the TB, above.  This (mostly) ensures
14221              * that the softmmu tlb entry has been populated, and the
14222              * page table GP bit is available.
14223              *
14224              * Note that we need to compute this even if btype == 0,
14225              * because this value is used for BR instructions later
14226              * where ENV is not available.
14227              */
14228             s->guarded_page = is_guarded_page(env, s);
14229 
14230             /* First insn can have btype set to non-zero.  */
14231             tcg_debug_assert(s->btype >= 0);
14232 
14233             /*
14234              * Note that the Branch Target Exception has fairly high
14235              * priority -- below debugging exceptions but above most
14236              * everything else.  This allows us to handle this now
14237              * instead of waiting until the insn is otherwise decoded.
14238              */
14239             if (s->btype != 0
14240                 && s->guarded_page
14241                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14242                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14243                 return;
14244             }
14245         } else {
14246             /* Not the first insn: btype must be 0.  */
14247             tcg_debug_assert(s->btype == 0);
14248         }
14249     }
14250 
14251     s->is_nonstreaming = false;
14252     if (s->sme_trap_nonstreaming) {
14253         disas_sme_fa64(s, insn);
14254     }
14255 
14256     if (!disas_a64(s, insn) &&
14257         !disas_sme(s, insn) &&
14258         !disas_sve(s, insn)) {
14259         disas_a64_legacy(s, insn);
14260     }
14261 
14262     /*
14263      * After execution of most insns, btype is reset to 0.
14264      * Note that we set btype == -1 when the insn sets btype.
14265      */
14266     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14267         reset_btype(s);
14268     }
14269 }
14270 
14271 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14272 {
14273     DisasContext *dc = container_of(dcbase, DisasContext, base);
14274 
14275     if (unlikely(dc->ss_active)) {
14276         /* Note that this means single stepping WFI doesn't halt the CPU.
14277          * For conditional branch insns this is harmless unreachable code as
14278          * gen_goto_tb() has already handled emitting the debug exception
14279          * (and thus a tb-jump is not possible when singlestepping).
14280          */
14281         switch (dc->base.is_jmp) {
14282         default:
14283             gen_a64_update_pc(dc, 4);
14284             /* fall through */
14285         case DISAS_EXIT:
14286         case DISAS_JUMP:
14287             gen_step_complete_exception(dc);
14288             break;
14289         case DISAS_NORETURN:
14290             break;
14291         }
14292     } else {
14293         switch (dc->base.is_jmp) {
14294         case DISAS_NEXT:
14295         case DISAS_TOO_MANY:
14296             gen_goto_tb(dc, 1, 4);
14297             break;
14298         default:
14299         case DISAS_UPDATE_EXIT:
14300             gen_a64_update_pc(dc, 4);
14301             /* fall through */
14302         case DISAS_EXIT:
14303             tcg_gen_exit_tb(NULL, 0);
14304             break;
14305         case DISAS_UPDATE_NOCHAIN:
14306             gen_a64_update_pc(dc, 4);
14307             /* fall through */
14308         case DISAS_JUMP:
14309             tcg_gen_lookup_and_goto_ptr();
14310             break;
14311         case DISAS_NORETURN:
14312         case DISAS_SWI:
14313             break;
14314         case DISAS_WFE:
14315             gen_a64_update_pc(dc, 4);
14316             gen_helper_wfe(cpu_env);
14317             break;
14318         case DISAS_YIELD:
14319             gen_a64_update_pc(dc, 4);
14320             gen_helper_yield(cpu_env);
14321             break;
14322         case DISAS_WFI:
14323             /*
14324              * This is a special case because we don't want to just halt
14325              * the CPU if trying to debug across a WFI.
14326              */
14327             gen_a64_update_pc(dc, 4);
14328             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14329             /*
14330              * The helper doesn't necessarily throw an exception, but we
14331              * must go back to the main loop to check for interrupts anyway.
14332              */
14333             tcg_gen_exit_tb(NULL, 0);
14334             break;
14335         }
14336     }
14337 }
14338 
14339 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14340                                  CPUState *cpu, FILE *logfile)
14341 {
14342     DisasContext *dc = container_of(dcbase, DisasContext, base);
14343 
14344     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14345     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14346 }
14347 
14348 const TranslatorOps aarch64_translator_ops = {
14349     .init_disas_context = aarch64_tr_init_disas_context,
14350     .tb_start           = aarch64_tr_tb_start,
14351     .insn_start         = aarch64_tr_insn_start,
14352     .translate_insn     = aarch64_tr_translate_insn,
14353     .tb_stop            = aarch64_tr_tb_stop,
14354     .disas_log          = aarch64_tr_disas_log,
14355 };
14356