xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 45fda88e)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30 #include "semihosting/semihost.h"
31 #include "exec/gen-icount.h"
32 #include "exec/helper-proto.h"
33 #include "exec/helper-gen.h"
34 #include "exec/log.h"
35 #include "cpregs.h"
36 #include "translate-a64.h"
37 #include "qemu/atomic128.h"
38 
39 static TCGv_i64 cpu_X[32];
40 static TCGv_i64 cpu_pc;
41 
42 /* Load/store exclusive handling */
43 static TCGv_i64 cpu_exclusive_high;
44 
45 static const char *regnames[] = {
46     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
47     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
48     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
49     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
50 };
51 
52 enum a64_shift_type {
53     A64_SHIFT_TYPE_LSL = 0,
54     A64_SHIFT_TYPE_LSR = 1,
55     A64_SHIFT_TYPE_ASR = 2,
56     A64_SHIFT_TYPE_ROR = 3
57 };
58 
59 /*
60  * Include the generated decoders.
61  */
62 
63 #include "decode-sme-fa64.c.inc"
64 #include "decode-a64.c.inc"
65 
66 /* Table based decoder typedefs - used when the relevant bits for decode
67  * are too awkwardly scattered across the instruction (eg SIMD).
68  */
69 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
70 
71 typedef struct AArch64DecodeTable {
72     uint32_t pattern;
73     uint32_t mask;
74     AArch64DecodeFn *disas_fn;
75 } AArch64DecodeTable;
76 
77 /* initialize TCG globals.  */
78 void a64_translate_init(void)
79 {
80     int i;
81 
82     cpu_pc = tcg_global_mem_new_i64(cpu_env,
83                                     offsetof(CPUARMState, pc),
84                                     "pc");
85     for (i = 0; i < 32; i++) {
86         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
87                                           offsetof(CPUARMState, xregs[i]),
88                                           regnames[i]);
89     }
90 
91     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
92         offsetof(CPUARMState, exclusive_high), "exclusive_high");
93 }
94 
95 /*
96  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
97  */
98 static int get_a64_user_mem_index(DisasContext *s)
99 {
100     /*
101      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
102      * which is the usual mmu_idx for this cpu state.
103      */
104     ARMMMUIdx useridx = s->mmu_idx;
105 
106     if (s->unpriv) {
107         /*
108          * We have pre-computed the condition for AccType_UNPRIV.
109          * Therefore we should never get here with a mmu_idx for
110          * which we do not know the corresponding user mmu_idx.
111          */
112         switch (useridx) {
113         case ARMMMUIdx_E10_1:
114         case ARMMMUIdx_E10_1_PAN:
115             useridx = ARMMMUIdx_E10_0;
116             break;
117         case ARMMMUIdx_E20_2:
118         case ARMMMUIdx_E20_2_PAN:
119             useridx = ARMMMUIdx_E20_0;
120             break;
121         default:
122             g_assert_not_reached();
123         }
124     }
125     return arm_to_core_mmu_idx(useridx);
126 }
127 
128 static void set_btype_raw(int val)
129 {
130     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
131                    offsetof(CPUARMState, btype));
132 }
133 
134 static void set_btype(DisasContext *s, int val)
135 {
136     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
137     tcg_debug_assert(val >= 1 && val <= 3);
138     set_btype_raw(val);
139     s->btype = -1;
140 }
141 
142 static void reset_btype(DisasContext *s)
143 {
144     if (s->btype != 0) {
145         set_btype_raw(0);
146         s->btype = 0;
147     }
148 }
149 
150 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
151 {
152     assert(s->pc_save != -1);
153     if (tb_cflags(s->base.tb) & CF_PCREL) {
154         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
155     } else {
156         tcg_gen_movi_i64(dest, s->pc_curr + diff);
157     }
158 }
159 
160 void gen_a64_update_pc(DisasContext *s, target_long diff)
161 {
162     gen_pc_plus_diff(s, cpu_pc, diff);
163     s->pc_save = s->pc_curr + diff;
164 }
165 
166 /*
167  * Handle Top Byte Ignore (TBI) bits.
168  *
169  * If address tagging is enabled via the TCR TBI bits:
170  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
171  *    then the address is zero-extended, clearing bits [63:56]
172  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
173  *    and TBI1 controls addressses with bit 55 == 1.
174  *    If the appropriate TBI bit is set for the address then
175  *    the address is sign-extended from bit 55 into bits [63:56]
176  *
177  * Here We have concatenated TBI{1,0} into tbi.
178  */
179 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
180                                 TCGv_i64 src, int tbi)
181 {
182     if (tbi == 0) {
183         /* Load unmodified address */
184         tcg_gen_mov_i64(dst, src);
185     } else if (!regime_has_2_ranges(s->mmu_idx)) {
186         /* Force tag byte to all zero */
187         tcg_gen_extract_i64(dst, src, 0, 56);
188     } else {
189         /* Sign-extend from bit 55.  */
190         tcg_gen_sextract_i64(dst, src, 0, 56);
191 
192         switch (tbi) {
193         case 1:
194             /* tbi0 but !tbi1: only use the extension if positive */
195             tcg_gen_and_i64(dst, dst, src);
196             break;
197         case 2:
198             /* !tbi0 but tbi1: only use the extension if negative */
199             tcg_gen_or_i64(dst, dst, src);
200             break;
201         case 3:
202             /* tbi0 and tbi1: always use the extension */
203             break;
204         default:
205             g_assert_not_reached();
206         }
207     }
208 }
209 
210 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
211 {
212     /*
213      * If address tagging is enabled for instructions via the TCR TBI bits,
214      * then loading an address into the PC will clear out any tag.
215      */
216     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
217     s->pc_save = -1;
218 }
219 
220 /*
221  * Handle MTE and/or TBI.
222  *
223  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
224  * for the tag to be present in the FAR_ELx register.  But for user-only
225  * mode we do not have a TLB with which to implement this, so we must
226  * remove the top byte now.
227  *
228  * Always return a fresh temporary that we can increment independently
229  * of the write-back address.
230  */
231 
232 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
233 {
234     TCGv_i64 clean = tcg_temp_new_i64();
235 #ifdef CONFIG_USER_ONLY
236     gen_top_byte_ignore(s, clean, addr, s->tbid);
237 #else
238     tcg_gen_mov_i64(clean, addr);
239 #endif
240     return clean;
241 }
242 
243 /* Insert a zero tag into src, with the result at dst. */
244 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
245 {
246     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
247 }
248 
249 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
250                              MMUAccessType acc, int log2_size)
251 {
252     gen_helper_probe_access(cpu_env, ptr,
253                             tcg_constant_i32(acc),
254                             tcg_constant_i32(get_mem_index(s)),
255                             tcg_constant_i32(1 << log2_size));
256 }
257 
258 /*
259  * For MTE, check a single logical or atomic access.  This probes a single
260  * address, the exact one specified.  The size and alignment of the access
261  * is not relevant to MTE, per se, but watchpoints do require the size,
262  * and we want to recognize those before making any other changes to state.
263  */
264 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
265                                       bool is_write, bool tag_checked,
266                                       int log2_size, bool is_unpriv,
267                                       int core_idx)
268 {
269     if (tag_checked && s->mte_active[is_unpriv]) {
270         TCGv_i64 ret;
271         int desc = 0;
272 
273         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
274         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
275         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
276         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
277         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
278 
279         ret = tcg_temp_new_i64();
280         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
281 
282         return ret;
283     }
284     return clean_data_tbi(s, addr);
285 }
286 
287 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int log2_size)
289 {
290     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
291                                  false, get_mem_index(s));
292 }
293 
294 /*
295  * For MTE, check multiple logical sequential accesses.
296  */
297 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, int size)
299 {
300     if (tag_checked && s->mte_active[0]) {
301         TCGv_i64 ret;
302         int desc = 0;
303 
304         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
305         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
306         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
307         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
308         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
309 
310         ret = tcg_temp_new_i64();
311         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
312 
313         return ret;
314     }
315     return clean_data_tbi(s, addr);
316 }
317 
318 typedef struct DisasCompare64 {
319     TCGCond cond;
320     TCGv_i64 value;
321 } DisasCompare64;
322 
323 static void a64_test_cc(DisasCompare64 *c64, int cc)
324 {
325     DisasCompare c32;
326 
327     arm_test_cc(&c32, cc);
328 
329     /*
330      * Sign-extend the 32-bit value so that the GE/LT comparisons work
331      * properly.  The NE/EQ comparisons are also fine with this choice.
332       */
333     c64->cond = c32.cond;
334     c64->value = tcg_temp_new_i64();
335     tcg_gen_ext_i32_i64(c64->value, c32.value);
336 }
337 
338 static void gen_rebuild_hflags(DisasContext *s)
339 {
340     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
341 }
342 
343 static void gen_exception_internal(int excp)
344 {
345     assert(excp_is_internal(excp));
346     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
347 }
348 
349 static void gen_exception_internal_insn(DisasContext *s, int excp)
350 {
351     gen_a64_update_pc(s, 0);
352     gen_exception_internal(excp);
353     s->base.is_jmp = DISAS_NORETURN;
354 }
355 
356 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
357 {
358     gen_a64_update_pc(s, 0);
359     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
360     s->base.is_jmp = DISAS_NORETURN;
361 }
362 
363 static void gen_step_complete_exception(DisasContext *s)
364 {
365     /* We just completed step of an insn. Move from Active-not-pending
366      * to Active-pending, and then also take the swstep exception.
367      * This corresponds to making the (IMPDEF) choice to prioritize
368      * swstep exceptions over asynchronous exceptions taken to an exception
369      * level where debug is disabled. This choice has the advantage that
370      * we do not need to maintain internal state corresponding to the
371      * ISV/EX syndrome bits between completion of the step and generation
372      * of the exception, and our syndrome information is always correct.
373      */
374     gen_ss_advance(s);
375     gen_swstep_exception(s, 1, s->is_ldex);
376     s->base.is_jmp = DISAS_NORETURN;
377 }
378 
379 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
380 {
381     if (s->ss_active) {
382         return false;
383     }
384     return translator_use_goto_tb(&s->base, dest);
385 }
386 
387 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
388 {
389     if (use_goto_tb(s, s->pc_curr + diff)) {
390         /*
391          * For pcrel, the pc must always be up-to-date on entry to
392          * the linked TB, so that it can use simple additions for all
393          * further adjustments.  For !pcrel, the linked TB is compiled
394          * to know its full virtual address, so we can delay the
395          * update to pc to the unlinked path.  A long chain of links
396          * can thus avoid many updates to the PC.
397          */
398         if (tb_cflags(s->base.tb) & CF_PCREL) {
399             gen_a64_update_pc(s, diff);
400             tcg_gen_goto_tb(n);
401         } else {
402             tcg_gen_goto_tb(n);
403             gen_a64_update_pc(s, diff);
404         }
405         tcg_gen_exit_tb(s->base.tb, n);
406         s->base.is_jmp = DISAS_NORETURN;
407     } else {
408         gen_a64_update_pc(s, diff);
409         if (s->ss_active) {
410             gen_step_complete_exception(s);
411         } else {
412             tcg_gen_lookup_and_goto_ptr();
413             s->base.is_jmp = DISAS_NORETURN;
414         }
415     }
416 }
417 
418 /*
419  * Register access functions
420  *
421  * These functions are used for directly accessing a register in where
422  * changes to the final register value are likely to be made. If you
423  * need to use a register for temporary calculation (e.g. index type
424  * operations) use the read_* form.
425  *
426  * B1.2.1 Register mappings
427  *
428  * In instruction register encoding 31 can refer to ZR (zero register) or
429  * the SP (stack pointer) depending on context. In QEMU's case we map SP
430  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
431  * This is the point of the _sp forms.
432  */
433 TCGv_i64 cpu_reg(DisasContext *s, int reg)
434 {
435     if (reg == 31) {
436         TCGv_i64 t = tcg_temp_new_i64();
437         tcg_gen_movi_i64(t, 0);
438         return t;
439     } else {
440         return cpu_X[reg];
441     }
442 }
443 
444 /* register access for when 31 == SP */
445 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
446 {
447     return cpu_X[reg];
448 }
449 
450 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
451  * representing the register contents. This TCGv is an auto-freed
452  * temporary so it need not be explicitly freed, and may be modified.
453  */
454 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
455 {
456     TCGv_i64 v = tcg_temp_new_i64();
457     if (reg != 31) {
458         if (sf) {
459             tcg_gen_mov_i64(v, cpu_X[reg]);
460         } else {
461             tcg_gen_ext32u_i64(v, cpu_X[reg]);
462         }
463     } else {
464         tcg_gen_movi_i64(v, 0);
465     }
466     return v;
467 }
468 
469 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
470 {
471     TCGv_i64 v = tcg_temp_new_i64();
472     if (sf) {
473         tcg_gen_mov_i64(v, cpu_X[reg]);
474     } else {
475         tcg_gen_ext32u_i64(v, cpu_X[reg]);
476     }
477     return v;
478 }
479 
480 /* Return the offset into CPUARMState of a slice (from
481  * the least significant end) of FP register Qn (ie
482  * Dn, Sn, Hn or Bn).
483  * (Note that this is not the same mapping as for A32; see cpu.h)
484  */
485 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
486 {
487     return vec_reg_offset(s, regno, 0, size);
488 }
489 
490 /* Offset of the high half of the 128 bit vector Qn */
491 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
492 {
493     return vec_reg_offset(s, regno, 1, MO_64);
494 }
495 
496 /* Convenience accessors for reading and writing single and double
497  * FP registers. Writing clears the upper parts of the associated
498  * 128 bit vector register, as required by the architecture.
499  * Note that unlike the GP register accessors, the values returned
500  * by the read functions must be manually freed.
501  */
502 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
503 {
504     TCGv_i64 v = tcg_temp_new_i64();
505 
506     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
507     return v;
508 }
509 
510 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
511 {
512     TCGv_i32 v = tcg_temp_new_i32();
513 
514     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
515     return v;
516 }
517 
518 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
519 {
520     TCGv_i32 v = tcg_temp_new_i32();
521 
522     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
523     return v;
524 }
525 
526 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
527  * If SVE is not enabled, then there are only 128 bits in the vector.
528  */
529 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
530 {
531     unsigned ofs = fp_reg_offset(s, rd, MO_64);
532     unsigned vsz = vec_full_reg_size(s);
533 
534     /* Nop move, with side effect of clearing the tail. */
535     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
536 }
537 
538 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
539 {
540     unsigned ofs = fp_reg_offset(s, reg, MO_64);
541 
542     tcg_gen_st_i64(v, cpu_env, ofs);
543     clear_vec_high(s, false, reg);
544 }
545 
546 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
547 {
548     TCGv_i64 tmp = tcg_temp_new_i64();
549 
550     tcg_gen_extu_i32_i64(tmp, v);
551     write_fp_dreg(s, reg, tmp);
552 }
553 
554 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
555 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
556                          GVecGen2Fn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 2-operand + immediate AdvSIMD vector operation using
563  * an expander function.
564  */
565 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
566                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
567 {
568     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
569             imm, is_q ? 16 : 8, vec_full_reg_size(s));
570 }
571 
572 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
573 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
574                          GVecGen3Fn *gvec_fn, int vece)
575 {
576     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
577             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
578 }
579 
580 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
581 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
582                          int rx, GVecGen4Fn *gvec_fn, int vece)
583 {
584     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
585             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
586             is_q ? 16 : 8, vec_full_reg_size(s));
587 }
588 
589 /* Expand a 2-operand operation using an out-of-line helper.  */
590 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
591                              int rn, int data, gen_helper_gvec_2 *fn)
592 {
593     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
594                        vec_full_reg_offset(s, rn),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand operation using an out-of-line helper.  */
599 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
600                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
601 {
602     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
603                        vec_full_reg_offset(s, rn),
604                        vec_full_reg_offset(s, rm),
605                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
606 }
607 
608 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
609  * an out-of-line helper.
610  */
611 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
612                               int rm, bool is_fp16, int data,
613                               gen_helper_gvec_3_ptr *fn)
614 {
615     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
616     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
617                        vec_full_reg_offset(s, rn),
618                        vec_full_reg_offset(s, rm), fpst,
619                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
620 }
621 
622 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
623 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
624                             int rm, gen_helper_gvec_3_ptr *fn)
625 {
626     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
627 
628     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
629     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm), qc_ptr,
632                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
633 }
634 
635 /* Expand a 4-operand operation using an out-of-line helper.  */
636 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
637                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
638 {
639     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
640                        vec_full_reg_offset(s, rn),
641                        vec_full_reg_offset(s, rm),
642                        vec_full_reg_offset(s, ra),
643                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
644 }
645 
646 /*
647  * Expand a 4-operand + fpstatus pointer + simd data value operation using
648  * an out-of-line helper.
649  */
650 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
651                               int rm, int ra, bool is_fp16, int data,
652                               gen_helper_gvec_4_ptr *fn)
653 {
654     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
655     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
656                        vec_full_reg_offset(s, rn),
657                        vec_full_reg_offset(s, rm),
658                        vec_full_reg_offset(s, ra), fpst,
659                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
660 }
661 
662 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
663  * than the 32 bit equivalent.
664  */
665 static inline void gen_set_NZ64(TCGv_i64 result)
666 {
667     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
668     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
669 }
670 
671 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
672 static inline void gen_logic_CC(int sf, TCGv_i64 result)
673 {
674     if (sf) {
675         gen_set_NZ64(result);
676     } else {
677         tcg_gen_extrl_i64_i32(cpu_ZF, result);
678         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
679     }
680     tcg_gen_movi_i32(cpu_CF, 0);
681     tcg_gen_movi_i32(cpu_VF, 0);
682 }
683 
684 /* dest = T0 + T1; compute C, N, V and Z flags */
685 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
686 {
687     if (sf) {
688         TCGv_i64 result, flag, tmp;
689         result = tcg_temp_new_i64();
690         flag = tcg_temp_new_i64();
691         tmp = tcg_temp_new_i64();
692 
693         tcg_gen_movi_i64(tmp, 0);
694         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
695 
696         tcg_gen_extrl_i64_i32(cpu_CF, flag);
697 
698         gen_set_NZ64(result);
699 
700         tcg_gen_xor_i64(flag, result, t0);
701         tcg_gen_xor_i64(tmp, t0, t1);
702         tcg_gen_andc_i64(flag, flag, tmp);
703         tcg_gen_extrh_i64_i32(cpu_VF, flag);
704 
705         tcg_gen_mov_i64(dest, result);
706     } else {
707         /* 32 bit arithmetic */
708         TCGv_i32 t0_32 = tcg_temp_new_i32();
709         TCGv_i32 t1_32 = tcg_temp_new_i32();
710         TCGv_i32 tmp = tcg_temp_new_i32();
711 
712         tcg_gen_movi_i32(tmp, 0);
713         tcg_gen_extrl_i64_i32(t0_32, t0);
714         tcg_gen_extrl_i64_i32(t1_32, t1);
715         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
716         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
717         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
718         tcg_gen_xor_i32(tmp, t0_32, t1_32);
719         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
720         tcg_gen_extu_i32_i64(dest, cpu_NF);
721     }
722 }
723 
724 /* dest = T0 - T1; compute C, N, V and Z flags */
725 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
726 {
727     if (sf) {
728         /* 64 bit arithmetic */
729         TCGv_i64 result, flag, tmp;
730 
731         result = tcg_temp_new_i64();
732         flag = tcg_temp_new_i64();
733         tcg_gen_sub_i64(result, t0, t1);
734 
735         gen_set_NZ64(result);
736 
737         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
738         tcg_gen_extrl_i64_i32(cpu_CF, flag);
739 
740         tcg_gen_xor_i64(flag, result, t0);
741         tmp = tcg_temp_new_i64();
742         tcg_gen_xor_i64(tmp, t0, t1);
743         tcg_gen_and_i64(flag, flag, tmp);
744         tcg_gen_extrh_i64_i32(cpu_VF, flag);
745         tcg_gen_mov_i64(dest, result);
746     } else {
747         /* 32 bit arithmetic */
748         TCGv_i32 t0_32 = tcg_temp_new_i32();
749         TCGv_i32 t1_32 = tcg_temp_new_i32();
750         TCGv_i32 tmp;
751 
752         tcg_gen_extrl_i64_i32(t0_32, t0);
753         tcg_gen_extrl_i64_i32(t1_32, t1);
754         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
755         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
756         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
757         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
758         tmp = tcg_temp_new_i32();
759         tcg_gen_xor_i32(tmp, t0_32, t1_32);
760         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
761         tcg_gen_extu_i32_i64(dest, cpu_NF);
762     }
763 }
764 
765 /* dest = T0 + T1 + CF; do not compute flags. */
766 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
767 {
768     TCGv_i64 flag = tcg_temp_new_i64();
769     tcg_gen_extu_i32_i64(flag, cpu_CF);
770     tcg_gen_add_i64(dest, t0, t1);
771     tcg_gen_add_i64(dest, dest, flag);
772 
773     if (!sf) {
774         tcg_gen_ext32u_i64(dest, dest);
775     }
776 }
777 
778 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
779 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
780 {
781     if (sf) {
782         TCGv_i64 result = tcg_temp_new_i64();
783         TCGv_i64 cf_64 = tcg_temp_new_i64();
784         TCGv_i64 vf_64 = tcg_temp_new_i64();
785         TCGv_i64 tmp = tcg_temp_new_i64();
786         TCGv_i64 zero = tcg_constant_i64(0);
787 
788         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
789         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
790         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
791         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
792         gen_set_NZ64(result);
793 
794         tcg_gen_xor_i64(vf_64, result, t0);
795         tcg_gen_xor_i64(tmp, t0, t1);
796         tcg_gen_andc_i64(vf_64, vf_64, tmp);
797         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
798 
799         tcg_gen_mov_i64(dest, result);
800     } else {
801         TCGv_i32 t0_32 = tcg_temp_new_i32();
802         TCGv_i32 t1_32 = tcg_temp_new_i32();
803         TCGv_i32 tmp = tcg_temp_new_i32();
804         TCGv_i32 zero = tcg_constant_i32(0);
805 
806         tcg_gen_extrl_i64_i32(t0_32, t0);
807         tcg_gen_extrl_i64_i32(t1_32, t1);
808         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
809         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
810 
811         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
812         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
813         tcg_gen_xor_i32(tmp, t0_32, t1_32);
814         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
815         tcg_gen_extu_i32_i64(dest, cpu_NF);
816     }
817 }
818 
819 /*
820  * Load/Store generators
821  */
822 
823 /*
824  * Store from GPR register to memory.
825  */
826 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
827                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
828                              bool iss_valid,
829                              unsigned int iss_srt,
830                              bool iss_sf, bool iss_ar)
831 {
832     memop = finalize_memop(s, memop);
833     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
834 
835     if (iss_valid) {
836         uint32_t syn;
837 
838         syn = syn_data_abort_with_iss(0,
839                                       (memop & MO_SIZE),
840                                       false,
841                                       iss_srt,
842                                       iss_sf,
843                                       iss_ar,
844                                       0, 0, 0, 0, 0, false);
845         disas_set_insn_syndrome(s, syn);
846     }
847 }
848 
849 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
850                       TCGv_i64 tcg_addr, MemOp memop,
851                       bool iss_valid,
852                       unsigned int iss_srt,
853                       bool iss_sf, bool iss_ar)
854 {
855     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
856                      iss_valid, iss_srt, iss_sf, iss_ar);
857 }
858 
859 /*
860  * Load from memory to GPR register
861  */
862 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
863                              MemOp memop, bool extend, int memidx,
864                              bool iss_valid, unsigned int iss_srt,
865                              bool iss_sf, bool iss_ar)
866 {
867     memop = finalize_memop(s, memop);
868     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
869 
870     if (extend && (memop & MO_SIGN)) {
871         g_assert((memop & MO_SIZE) <= MO_32);
872         tcg_gen_ext32u_i64(dest, dest);
873     }
874 
875     if (iss_valid) {
876         uint32_t syn;
877 
878         syn = syn_data_abort_with_iss(0,
879                                       (memop & MO_SIZE),
880                                       (memop & MO_SIGN) != 0,
881                                       iss_srt,
882                                       iss_sf,
883                                       iss_ar,
884                                       0, 0, 0, 0, 0, false);
885         disas_set_insn_syndrome(s, syn);
886     }
887 }
888 
889 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
890                       MemOp memop, bool extend,
891                       bool iss_valid, unsigned int iss_srt,
892                       bool iss_sf, bool iss_ar)
893 {
894     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
895                      iss_valid, iss_srt, iss_sf, iss_ar);
896 }
897 
898 /*
899  * Store from FP register to memory
900  */
901 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
902 {
903     /* This writes the bottom N bits of a 128 bit wide vector to memory */
904     TCGv_i64 tmplo = tcg_temp_new_i64();
905     MemOp mop;
906 
907     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
908 
909     if (size < 4) {
910         mop = finalize_memop(s, size);
911         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
912     } else {
913         bool be = s->be_data == MO_BE;
914         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
915         TCGv_i64 tmphi = tcg_temp_new_i64();
916 
917         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
918 
919         mop = s->be_data | MO_UQ;
920         tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
921                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
922         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
923         tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
924                             get_mem_index(s), mop);
925     }
926 }
927 
928 /*
929  * Load from memory to FP register
930  */
931 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
932 {
933     /* This always zero-extends and writes to a full 128 bit wide vector */
934     TCGv_i64 tmplo = tcg_temp_new_i64();
935     TCGv_i64 tmphi = NULL;
936     MemOp mop;
937 
938     if (size < 4) {
939         mop = finalize_memop(s, size);
940         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
941     } else {
942         bool be = s->be_data == MO_BE;
943         TCGv_i64 tcg_hiaddr;
944 
945         tmphi = tcg_temp_new_i64();
946         tcg_hiaddr = tcg_temp_new_i64();
947 
948         mop = s->be_data | MO_UQ;
949         tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
950                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
951         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
952         tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
953                             get_mem_index(s), mop);
954     }
955 
956     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
957 
958     if (tmphi) {
959         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
960     }
961     clear_vec_high(s, tmphi != NULL, destidx);
962 }
963 
964 /*
965  * Vector load/store helpers.
966  *
967  * The principal difference between this and a FP load is that we don't
968  * zero extend as we are filling a partial chunk of the vector register.
969  * These functions don't support 128 bit loads/stores, which would be
970  * normal load/store operations.
971  *
972  * The _i32 versions are useful when operating on 32 bit quantities
973  * (eg for floating point single or using Neon helper functions).
974  */
975 
976 /* Get value of an element within a vector register */
977 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
978                              int element, MemOp memop)
979 {
980     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
981     switch ((unsigned)memop) {
982     case MO_8:
983         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
984         break;
985     case MO_16:
986         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
987         break;
988     case MO_32:
989         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
990         break;
991     case MO_8|MO_SIGN:
992         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
993         break;
994     case MO_16|MO_SIGN:
995         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
996         break;
997     case MO_32|MO_SIGN:
998         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
999         break;
1000     case MO_64:
1001     case MO_64|MO_SIGN:
1002         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1003         break;
1004     default:
1005         g_assert_not_reached();
1006     }
1007 }
1008 
1009 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1010                                  int element, MemOp memop)
1011 {
1012     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1013     switch (memop) {
1014     case MO_8:
1015         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1016         break;
1017     case MO_16:
1018         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1019         break;
1020     case MO_8|MO_SIGN:
1021         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1022         break;
1023     case MO_16|MO_SIGN:
1024         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1025         break;
1026     case MO_32:
1027     case MO_32|MO_SIGN:
1028         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1029         break;
1030     default:
1031         g_assert_not_reached();
1032     }
1033 }
1034 
1035 /* Set value of an element within a vector register */
1036 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1037                               int element, MemOp memop)
1038 {
1039     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1040     switch (memop) {
1041     case MO_8:
1042         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1043         break;
1044     case MO_16:
1045         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1046         break;
1047     case MO_32:
1048         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1049         break;
1050     case MO_64:
1051         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1052         break;
1053     default:
1054         g_assert_not_reached();
1055     }
1056 }
1057 
1058 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1059                                   int destidx, int element, MemOp memop)
1060 {
1061     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1062     switch (memop) {
1063     case MO_8:
1064         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1065         break;
1066     case MO_16:
1067         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1068         break;
1069     case MO_32:
1070         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1071         break;
1072     default:
1073         g_assert_not_reached();
1074     }
1075 }
1076 
1077 /* Store from vector register to memory */
1078 static void do_vec_st(DisasContext *s, int srcidx, int element,
1079                       TCGv_i64 tcg_addr, MemOp mop)
1080 {
1081     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1082 
1083     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1084     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1085 }
1086 
1087 /* Load from memory to vector register */
1088 static void do_vec_ld(DisasContext *s, int destidx, int element,
1089                       TCGv_i64 tcg_addr, MemOp mop)
1090 {
1091     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1092 
1093     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1094     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1095 }
1096 
1097 /* Check that FP/Neon access is enabled. If it is, return
1098  * true. If not, emit code to generate an appropriate exception,
1099  * and return false; the caller should not emit any code for
1100  * the instruction. Note that this check must happen after all
1101  * unallocated-encoding checks (otherwise the syndrome information
1102  * for the resulting exception will be incorrect).
1103  */
1104 static bool fp_access_check_only(DisasContext *s)
1105 {
1106     if (s->fp_excp_el) {
1107         assert(!s->fp_access_checked);
1108         s->fp_access_checked = true;
1109 
1110         gen_exception_insn_el(s, 0, EXCP_UDEF,
1111                               syn_fp_access_trap(1, 0xe, false, 0),
1112                               s->fp_excp_el);
1113         return false;
1114     }
1115     s->fp_access_checked = true;
1116     return true;
1117 }
1118 
1119 static bool fp_access_check(DisasContext *s)
1120 {
1121     if (!fp_access_check_only(s)) {
1122         return false;
1123     }
1124     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1125         gen_exception_insn(s, 0, EXCP_UDEF,
1126                            syn_smetrap(SME_ET_Streaming, false));
1127         return false;
1128     }
1129     return true;
1130 }
1131 
1132 /*
1133  * Check that SVE access is enabled.  If it is, return true.
1134  * If not, emit code to generate an appropriate exception and return false.
1135  * This function corresponds to CheckSVEEnabled().
1136  */
1137 bool sve_access_check(DisasContext *s)
1138 {
1139     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1140         assert(dc_isar_feature(aa64_sme, s));
1141         if (!sme_sm_enabled_check(s)) {
1142             goto fail_exit;
1143         }
1144     } else if (s->sve_excp_el) {
1145         gen_exception_insn_el(s, 0, EXCP_UDEF,
1146                               syn_sve_access_trap(), s->sve_excp_el);
1147         goto fail_exit;
1148     }
1149     s->sve_access_checked = true;
1150     return fp_access_check(s);
1151 
1152  fail_exit:
1153     /* Assert that we only raise one exception per instruction. */
1154     assert(!s->sve_access_checked);
1155     s->sve_access_checked = true;
1156     return false;
1157 }
1158 
1159 /*
1160  * Check that SME access is enabled, raise an exception if not.
1161  * Note that this function corresponds to CheckSMEAccess and is
1162  * only used directly for cpregs.
1163  */
1164 static bool sme_access_check(DisasContext *s)
1165 {
1166     if (s->sme_excp_el) {
1167         gen_exception_insn_el(s, 0, EXCP_UDEF,
1168                               syn_smetrap(SME_ET_AccessTrap, false),
1169                               s->sme_excp_el);
1170         return false;
1171     }
1172     return true;
1173 }
1174 
1175 /* This function corresponds to CheckSMEEnabled. */
1176 bool sme_enabled_check(DisasContext *s)
1177 {
1178     /*
1179      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1180      * to be zero when fp_excp_el has priority.  This is because we need
1181      * sme_excp_el by itself for cpregs access checks.
1182      */
1183     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1184         s->fp_access_checked = true;
1185         return sme_access_check(s);
1186     }
1187     return fp_access_check_only(s);
1188 }
1189 
1190 /* Common subroutine for CheckSMEAnd*Enabled. */
1191 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1192 {
1193     if (!sme_enabled_check(s)) {
1194         return false;
1195     }
1196     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1197         gen_exception_insn(s, 0, EXCP_UDEF,
1198                            syn_smetrap(SME_ET_NotStreaming, false));
1199         return false;
1200     }
1201     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1202         gen_exception_insn(s, 0, EXCP_UDEF,
1203                            syn_smetrap(SME_ET_InactiveZA, false));
1204         return false;
1205     }
1206     return true;
1207 }
1208 
1209 /*
1210  * This utility function is for doing register extension with an
1211  * optional shift. You will likely want to pass a temporary for the
1212  * destination register. See DecodeRegExtend() in the ARM ARM.
1213  */
1214 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1215                               int option, unsigned int shift)
1216 {
1217     int extsize = extract32(option, 0, 2);
1218     bool is_signed = extract32(option, 2, 1);
1219 
1220     if (is_signed) {
1221         switch (extsize) {
1222         case 0:
1223             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1224             break;
1225         case 1:
1226             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1227             break;
1228         case 2:
1229             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1230             break;
1231         case 3:
1232             tcg_gen_mov_i64(tcg_out, tcg_in);
1233             break;
1234         }
1235     } else {
1236         switch (extsize) {
1237         case 0:
1238             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1239             break;
1240         case 1:
1241             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1242             break;
1243         case 2:
1244             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1245             break;
1246         case 3:
1247             tcg_gen_mov_i64(tcg_out, tcg_in);
1248             break;
1249         }
1250     }
1251 
1252     if (shift) {
1253         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1254     }
1255 }
1256 
1257 static inline void gen_check_sp_alignment(DisasContext *s)
1258 {
1259     /* The AArch64 architecture mandates that (if enabled via PSTATE
1260      * or SCTLR bits) there is a check that SP is 16-aligned on every
1261      * SP-relative load or store (with an exception generated if it is not).
1262      * In line with general QEMU practice regarding misaligned accesses,
1263      * we omit these checks for the sake of guest program performance.
1264      * This function is provided as a hook so we can more easily add these
1265      * checks in future (possibly as a "favour catching guest program bugs
1266      * over speed" user selectable option).
1267      */
1268 }
1269 
1270 /*
1271  * This provides a simple table based table lookup decoder. It is
1272  * intended to be used when the relevant bits for decode are too
1273  * awkwardly placed and switch/if based logic would be confusing and
1274  * deeply nested. Since it's a linear search through the table, tables
1275  * should be kept small.
1276  *
1277  * It returns the first handler where insn & mask == pattern, or
1278  * NULL if there is no match.
1279  * The table is terminated by an empty mask (i.e. 0)
1280  */
1281 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1282                                                uint32_t insn)
1283 {
1284     const AArch64DecodeTable *tptr = table;
1285 
1286     while (tptr->mask) {
1287         if ((insn & tptr->mask) == tptr->pattern) {
1288             return tptr->disas_fn;
1289         }
1290         tptr++;
1291     }
1292     return NULL;
1293 }
1294 
1295 /*
1296  * The instruction disassembly implemented here matches
1297  * the instruction encoding classifications in chapter C4
1298  * of the ARM Architecture Reference Manual (DDI0487B_a);
1299  * classification names and decode diagrams here should generally
1300  * match up with those in the manual.
1301  */
1302 
1303 /* Unconditional branch (immediate)
1304  *   31  30       26 25                                  0
1305  * +----+-----------+-------------------------------------+
1306  * | op | 0 0 1 0 1 |                 imm26               |
1307  * +----+-----------+-------------------------------------+
1308  */
1309 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1310 {
1311     int64_t diff = sextract32(insn, 0, 26) * 4;
1312 
1313     if (insn & (1U << 31)) {
1314         /* BL Branch with link */
1315         gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1316     }
1317 
1318     /* B Branch / BL Branch with link */
1319     reset_btype(s);
1320     gen_goto_tb(s, 0, diff);
1321 }
1322 
1323 /* Compare and branch (immediate)
1324  *   31  30         25  24  23                  5 4      0
1325  * +----+-------------+----+---------------------+--------+
1326  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1327  * +----+-------------+----+---------------------+--------+
1328  */
1329 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1330 {
1331     unsigned int sf, op, rt;
1332     int64_t diff;
1333     DisasLabel match;
1334     TCGv_i64 tcg_cmp;
1335 
1336     sf = extract32(insn, 31, 1);
1337     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1338     rt = extract32(insn, 0, 5);
1339     diff = sextract32(insn, 5, 19) * 4;
1340 
1341     tcg_cmp = read_cpu_reg(s, rt, sf);
1342     reset_btype(s);
1343 
1344     match = gen_disas_label(s);
1345     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1346                         tcg_cmp, 0, match.label);
1347     gen_goto_tb(s, 0, 4);
1348     set_disas_label(s, match);
1349     gen_goto_tb(s, 1, diff);
1350 }
1351 
1352 /* Test and branch (immediate)
1353  *   31  30         25  24  23   19 18          5 4    0
1354  * +----+-------------+----+-------+-------------+------+
1355  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1356  * +----+-------------+----+-------+-------------+------+
1357  */
1358 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1359 {
1360     unsigned int bit_pos, op, rt;
1361     int64_t diff;
1362     DisasLabel match;
1363     TCGv_i64 tcg_cmp;
1364 
1365     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1366     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1367     diff = sextract32(insn, 5, 14) * 4;
1368     rt = extract32(insn, 0, 5);
1369 
1370     tcg_cmp = tcg_temp_new_i64();
1371     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1372 
1373     reset_btype(s);
1374 
1375     match = gen_disas_label(s);
1376     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1377                         tcg_cmp, 0, match.label);
1378     gen_goto_tb(s, 0, 4);
1379     set_disas_label(s, match);
1380     gen_goto_tb(s, 1, diff);
1381 }
1382 
1383 /* Conditional branch (immediate)
1384  *  31           25  24  23                  5   4  3    0
1385  * +---------------+----+---------------------+----+------+
1386  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1387  * +---------------+----+---------------------+----+------+
1388  */
1389 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1390 {
1391     unsigned int cond;
1392     int64_t diff;
1393 
1394     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1395         unallocated_encoding(s);
1396         return;
1397     }
1398     diff = sextract32(insn, 5, 19) * 4;
1399     cond = extract32(insn, 0, 4);
1400 
1401     reset_btype(s);
1402     if (cond < 0x0e) {
1403         /* genuinely conditional branches */
1404         DisasLabel match = gen_disas_label(s);
1405         arm_gen_test_cc(cond, match.label);
1406         gen_goto_tb(s, 0, 4);
1407         set_disas_label(s, match);
1408         gen_goto_tb(s, 1, diff);
1409     } else {
1410         /* 0xe and 0xf are both "always" conditions */
1411         gen_goto_tb(s, 0, diff);
1412     }
1413 }
1414 
1415 /* HINT instruction group, including various allocated HINTs */
1416 static void handle_hint(DisasContext *s, uint32_t insn,
1417                         unsigned int op1, unsigned int op2, unsigned int crm)
1418 {
1419     unsigned int selector = crm << 3 | op2;
1420 
1421     if (op1 != 3) {
1422         unallocated_encoding(s);
1423         return;
1424     }
1425 
1426     switch (selector) {
1427     case 0b00000: /* NOP */
1428         break;
1429     case 0b00011: /* WFI */
1430         s->base.is_jmp = DISAS_WFI;
1431         break;
1432     case 0b00001: /* YIELD */
1433         /* When running in MTTCG we don't generate jumps to the yield and
1434          * WFE helpers as it won't affect the scheduling of other vCPUs.
1435          * If we wanted to more completely model WFE/SEV so we don't busy
1436          * spin unnecessarily we would need to do something more involved.
1437          */
1438         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1439             s->base.is_jmp = DISAS_YIELD;
1440         }
1441         break;
1442     case 0b00010: /* WFE */
1443         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1444             s->base.is_jmp = DISAS_WFE;
1445         }
1446         break;
1447     case 0b00100: /* SEV */
1448     case 0b00101: /* SEVL */
1449     case 0b00110: /* DGH */
1450         /* we treat all as NOP at least for now */
1451         break;
1452     case 0b00111: /* XPACLRI */
1453         if (s->pauth_active) {
1454             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1455         }
1456         break;
1457     case 0b01000: /* PACIA1716 */
1458         if (s->pauth_active) {
1459             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1460         }
1461         break;
1462     case 0b01010: /* PACIB1716 */
1463         if (s->pauth_active) {
1464             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1465         }
1466         break;
1467     case 0b01100: /* AUTIA1716 */
1468         if (s->pauth_active) {
1469             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1470         }
1471         break;
1472     case 0b01110: /* AUTIB1716 */
1473         if (s->pauth_active) {
1474             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1475         }
1476         break;
1477     case 0b10000: /* ESB */
1478         /* Without RAS, we must implement this as NOP. */
1479         if (dc_isar_feature(aa64_ras, s)) {
1480             /*
1481              * QEMU does not have a source of physical SErrors,
1482              * so we are only concerned with virtual SErrors.
1483              * The pseudocode in the ARM for this case is
1484              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1485              *      AArch64.vESBOperation();
1486              * Most of the condition can be evaluated at translation time.
1487              * Test for EL2 present, and defer test for SEL2 to runtime.
1488              */
1489             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1490                 gen_helper_vesb(cpu_env);
1491             }
1492         }
1493         break;
1494     case 0b11000: /* PACIAZ */
1495         if (s->pauth_active) {
1496             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1497                              tcg_constant_i64(0));
1498         }
1499         break;
1500     case 0b11001: /* PACIASP */
1501         if (s->pauth_active) {
1502             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1503         }
1504         break;
1505     case 0b11010: /* PACIBZ */
1506         if (s->pauth_active) {
1507             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1508                              tcg_constant_i64(0));
1509         }
1510         break;
1511     case 0b11011: /* PACIBSP */
1512         if (s->pauth_active) {
1513             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1514         }
1515         break;
1516     case 0b11100: /* AUTIAZ */
1517         if (s->pauth_active) {
1518             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1519                              tcg_constant_i64(0));
1520         }
1521         break;
1522     case 0b11101: /* AUTIASP */
1523         if (s->pauth_active) {
1524             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1525         }
1526         break;
1527     case 0b11110: /* AUTIBZ */
1528         if (s->pauth_active) {
1529             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1530                              tcg_constant_i64(0));
1531         }
1532         break;
1533     case 0b11111: /* AUTIBSP */
1534         if (s->pauth_active) {
1535             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1536         }
1537         break;
1538     default:
1539         /* default specified as NOP equivalent */
1540         break;
1541     }
1542 }
1543 
1544 static void gen_clrex(DisasContext *s, uint32_t insn)
1545 {
1546     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1547 }
1548 
1549 /* CLREX, DSB, DMB, ISB */
1550 static void handle_sync(DisasContext *s, uint32_t insn,
1551                         unsigned int op1, unsigned int op2, unsigned int crm)
1552 {
1553     TCGBar bar;
1554 
1555     if (op1 != 3) {
1556         unallocated_encoding(s);
1557         return;
1558     }
1559 
1560     switch (op2) {
1561     case 2: /* CLREX */
1562         gen_clrex(s, insn);
1563         return;
1564     case 4: /* DSB */
1565     case 5: /* DMB */
1566         switch (crm & 3) {
1567         case 1: /* MBReqTypes_Reads */
1568             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1569             break;
1570         case 2: /* MBReqTypes_Writes */
1571             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1572             break;
1573         default: /* MBReqTypes_All */
1574             bar = TCG_BAR_SC | TCG_MO_ALL;
1575             break;
1576         }
1577         tcg_gen_mb(bar);
1578         return;
1579     case 6: /* ISB */
1580         /* We need to break the TB after this insn to execute
1581          * a self-modified code correctly and also to take
1582          * any pending interrupts immediately.
1583          */
1584         reset_btype(s);
1585         gen_goto_tb(s, 0, 4);
1586         return;
1587 
1588     case 7: /* SB */
1589         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1590             goto do_unallocated;
1591         }
1592         /*
1593          * TODO: There is no speculation barrier opcode for TCG;
1594          * MB and end the TB instead.
1595          */
1596         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1597         gen_goto_tb(s, 0, 4);
1598         return;
1599 
1600     default:
1601     do_unallocated:
1602         unallocated_encoding(s);
1603         return;
1604     }
1605 }
1606 
1607 static void gen_xaflag(void)
1608 {
1609     TCGv_i32 z = tcg_temp_new_i32();
1610 
1611     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1612 
1613     /*
1614      * (!C & !Z) << 31
1615      * (!(C | Z)) << 31
1616      * ~((C | Z) << 31)
1617      * ~-(C | Z)
1618      * (C | Z) - 1
1619      */
1620     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1621     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1622 
1623     /* !(Z & C) */
1624     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1625     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1626 
1627     /* (!C & Z) << 31 -> -(Z & ~C) */
1628     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1629     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1630 
1631     /* C | Z */
1632     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1633 }
1634 
1635 static void gen_axflag(void)
1636 {
1637     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1638     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1639 
1640     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1641     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1642 
1643     tcg_gen_movi_i32(cpu_NF, 0);
1644     tcg_gen_movi_i32(cpu_VF, 0);
1645 }
1646 
1647 /* MSR (immediate) - move immediate to processor state field */
1648 static void handle_msr_i(DisasContext *s, uint32_t insn,
1649                          unsigned int op1, unsigned int op2, unsigned int crm)
1650 {
1651     int op = op1 << 3 | op2;
1652 
1653     /* End the TB by default, chaining is ok.  */
1654     s->base.is_jmp = DISAS_TOO_MANY;
1655 
1656     switch (op) {
1657     case 0x00: /* CFINV */
1658         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1659             goto do_unallocated;
1660         }
1661         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1662         s->base.is_jmp = DISAS_NEXT;
1663         break;
1664 
1665     case 0x01: /* XAFlag */
1666         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1667             goto do_unallocated;
1668         }
1669         gen_xaflag();
1670         s->base.is_jmp = DISAS_NEXT;
1671         break;
1672 
1673     case 0x02: /* AXFlag */
1674         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1675             goto do_unallocated;
1676         }
1677         gen_axflag();
1678         s->base.is_jmp = DISAS_NEXT;
1679         break;
1680 
1681     case 0x03: /* UAO */
1682         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1683             goto do_unallocated;
1684         }
1685         if (crm & 1) {
1686             set_pstate_bits(PSTATE_UAO);
1687         } else {
1688             clear_pstate_bits(PSTATE_UAO);
1689         }
1690         gen_rebuild_hflags(s);
1691         break;
1692 
1693     case 0x04: /* PAN */
1694         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1695             goto do_unallocated;
1696         }
1697         if (crm & 1) {
1698             set_pstate_bits(PSTATE_PAN);
1699         } else {
1700             clear_pstate_bits(PSTATE_PAN);
1701         }
1702         gen_rebuild_hflags(s);
1703         break;
1704 
1705     case 0x05: /* SPSel */
1706         if (s->current_el == 0) {
1707             goto do_unallocated;
1708         }
1709         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1710         break;
1711 
1712     case 0x19: /* SSBS */
1713         if (!dc_isar_feature(aa64_ssbs, s)) {
1714             goto do_unallocated;
1715         }
1716         if (crm & 1) {
1717             set_pstate_bits(PSTATE_SSBS);
1718         } else {
1719             clear_pstate_bits(PSTATE_SSBS);
1720         }
1721         /* Don't need to rebuild hflags since SSBS is a nop */
1722         break;
1723 
1724     case 0x1a: /* DIT */
1725         if (!dc_isar_feature(aa64_dit, s)) {
1726             goto do_unallocated;
1727         }
1728         if (crm & 1) {
1729             set_pstate_bits(PSTATE_DIT);
1730         } else {
1731             clear_pstate_bits(PSTATE_DIT);
1732         }
1733         /* There's no need to rebuild hflags because DIT is a nop */
1734         break;
1735 
1736     case 0x1e: /* DAIFSet */
1737         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1738         break;
1739 
1740     case 0x1f: /* DAIFClear */
1741         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1742         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1743         s->base.is_jmp = DISAS_UPDATE_EXIT;
1744         break;
1745 
1746     case 0x1c: /* TCO */
1747         if (dc_isar_feature(aa64_mte, s)) {
1748             /* Full MTE is enabled -- set the TCO bit as directed. */
1749             if (crm & 1) {
1750                 set_pstate_bits(PSTATE_TCO);
1751             } else {
1752                 clear_pstate_bits(PSTATE_TCO);
1753             }
1754             gen_rebuild_hflags(s);
1755             /* Many factors, including TCO, go into MTE_ACTIVE. */
1756             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1757         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1758             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1759             s->base.is_jmp = DISAS_NEXT;
1760         } else {
1761             goto do_unallocated;
1762         }
1763         break;
1764 
1765     case 0x1b: /* SVCR* */
1766         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1767             goto do_unallocated;
1768         }
1769         if (sme_access_check(s)) {
1770             int old = s->pstate_sm | (s->pstate_za << 1);
1771             int new = (crm & 1) * 3;
1772             int msk = (crm >> 1) & 3;
1773 
1774             if ((old ^ new) & msk) {
1775                 /* At least one bit changes. */
1776                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1777                                     tcg_constant_i32(msk));
1778             } else {
1779                 s->base.is_jmp = DISAS_NEXT;
1780             }
1781         }
1782         break;
1783 
1784     default:
1785     do_unallocated:
1786         unallocated_encoding(s);
1787         return;
1788     }
1789 }
1790 
1791 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1792 {
1793     TCGv_i32 tmp = tcg_temp_new_i32();
1794     TCGv_i32 nzcv = tcg_temp_new_i32();
1795 
1796     /* build bit 31, N */
1797     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1798     /* build bit 30, Z */
1799     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1800     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1801     /* build bit 29, C */
1802     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1803     /* build bit 28, V */
1804     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1805     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1806     /* generate result */
1807     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1808 }
1809 
1810 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1811 {
1812     TCGv_i32 nzcv = tcg_temp_new_i32();
1813 
1814     /* take NZCV from R[t] */
1815     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1816 
1817     /* bit 31, N */
1818     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1819     /* bit 30, Z */
1820     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1821     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1822     /* bit 29, C */
1823     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1824     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1825     /* bit 28, V */
1826     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1827     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1828 }
1829 
1830 static void gen_sysreg_undef(DisasContext *s, bool isread,
1831                              uint8_t op0, uint8_t op1, uint8_t op2,
1832                              uint8_t crn, uint8_t crm, uint8_t rt)
1833 {
1834     /*
1835      * Generate code to emit an UNDEF with correct syndrome
1836      * information for a failed system register access.
1837      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
1838      * but if FEAT_IDST is implemented then read accesses to registers
1839      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
1840      * syndrome.
1841      */
1842     uint32_t syndrome;
1843 
1844     if (isread && dc_isar_feature(aa64_ids, s) &&
1845         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
1846         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1847     } else {
1848         syndrome = syn_uncategorized();
1849     }
1850     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
1851 }
1852 
1853 /* MRS - move from system register
1854  * MSR (register) - move to system register
1855  * SYS
1856  * SYSL
1857  * These are all essentially the same insn in 'read' and 'write'
1858  * versions, with varying op0 fields.
1859  */
1860 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1861                        unsigned int op0, unsigned int op1, unsigned int op2,
1862                        unsigned int crn, unsigned int crm, unsigned int rt)
1863 {
1864     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1865                                       crn, crm, op0, op1, op2);
1866     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
1867     TCGv_ptr tcg_ri = NULL;
1868     TCGv_i64 tcg_rt;
1869 
1870     if (!ri) {
1871         /* Unknown register; this might be a guest error or a QEMU
1872          * unimplemented feature.
1873          */
1874         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1875                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1876                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1877         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
1878         return;
1879     }
1880 
1881     /* Check access permissions */
1882     if (!cp_access_ok(s->current_el, ri, isread)) {
1883         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
1884         return;
1885     }
1886 
1887     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
1888         /* Emit code to perform further access permissions checks at
1889          * runtime; this may result in an exception.
1890          */
1891         uint32_t syndrome;
1892 
1893         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1894         gen_a64_update_pc(s, 0);
1895         tcg_ri = tcg_temp_new_ptr();
1896         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
1897                                        tcg_constant_i32(key),
1898                                        tcg_constant_i32(syndrome),
1899                                        tcg_constant_i32(isread));
1900     } else if (ri->type & ARM_CP_RAISES_EXC) {
1901         /*
1902          * The readfn or writefn might raise an exception;
1903          * synchronize the CPU state in case it does.
1904          */
1905         gen_a64_update_pc(s, 0);
1906     }
1907 
1908     /* Handle special cases first */
1909     switch (ri->type & ARM_CP_SPECIAL_MASK) {
1910     case 0:
1911         break;
1912     case ARM_CP_NOP:
1913         return;
1914     case ARM_CP_NZCV:
1915         tcg_rt = cpu_reg(s, rt);
1916         if (isread) {
1917             gen_get_nzcv(tcg_rt);
1918         } else {
1919             gen_set_nzcv(tcg_rt);
1920         }
1921         return;
1922     case ARM_CP_CURRENTEL:
1923         /* Reads as current EL value from pstate, which is
1924          * guaranteed to be constant by the tb flags.
1925          */
1926         tcg_rt = cpu_reg(s, rt);
1927         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1928         return;
1929     case ARM_CP_DC_ZVA:
1930         /* Writes clear the aligned block of memory which rt points into. */
1931         if (s->mte_active[0]) {
1932             int desc = 0;
1933 
1934             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1935             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1936             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1937 
1938             tcg_rt = tcg_temp_new_i64();
1939             gen_helper_mte_check_zva(tcg_rt, cpu_env,
1940                                      tcg_constant_i32(desc), cpu_reg(s, rt));
1941         } else {
1942             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1943         }
1944         gen_helper_dc_zva(cpu_env, tcg_rt);
1945         return;
1946     case ARM_CP_DC_GVA:
1947         {
1948             TCGv_i64 clean_addr, tag;
1949 
1950             /*
1951              * DC_GVA, like DC_ZVA, requires that we supply the original
1952              * pointer for an invalid page.  Probe that address first.
1953              */
1954             tcg_rt = cpu_reg(s, rt);
1955             clean_addr = clean_data_tbi(s, tcg_rt);
1956             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
1957 
1958             if (s->ata) {
1959                 /* Extract the tag from the register to match STZGM.  */
1960                 tag = tcg_temp_new_i64();
1961                 tcg_gen_shri_i64(tag, tcg_rt, 56);
1962                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1963             }
1964         }
1965         return;
1966     case ARM_CP_DC_GZVA:
1967         {
1968             TCGv_i64 clean_addr, tag;
1969 
1970             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
1971             tcg_rt = cpu_reg(s, rt);
1972             clean_addr = clean_data_tbi(s, tcg_rt);
1973             gen_helper_dc_zva(cpu_env, clean_addr);
1974 
1975             if (s->ata) {
1976                 /* Extract the tag from the register to match STZGM.  */
1977                 tag = tcg_temp_new_i64();
1978                 tcg_gen_shri_i64(tag, tcg_rt, 56);
1979                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1980             }
1981         }
1982         return;
1983     default:
1984         g_assert_not_reached();
1985     }
1986     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
1987         return;
1988     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1989         return;
1990     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
1991         return;
1992     }
1993 
1994     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1995         gen_io_start();
1996     }
1997 
1998     tcg_rt = cpu_reg(s, rt);
1999 
2000     if (isread) {
2001         if (ri->type & ARM_CP_CONST) {
2002             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2003         } else if (ri->readfn) {
2004             if (!tcg_ri) {
2005                 tcg_ri = gen_lookup_cp_reg(key);
2006             }
2007             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2008         } else {
2009             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2010         }
2011     } else {
2012         if (ri->type & ARM_CP_CONST) {
2013             /* If not forbidden by access permissions, treat as WI */
2014             return;
2015         } else if (ri->writefn) {
2016             if (!tcg_ri) {
2017                 tcg_ri = gen_lookup_cp_reg(key);
2018             }
2019             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2020         } else {
2021             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2022         }
2023     }
2024 
2025     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
2026         /* I/O operations must end the TB here (whether read or write) */
2027         s->base.is_jmp = DISAS_UPDATE_EXIT;
2028     }
2029     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2030         /*
2031          * A write to any coprocessor regiser that ends a TB
2032          * must rebuild the hflags for the next TB.
2033          */
2034         gen_rebuild_hflags(s);
2035         /*
2036          * We default to ending the TB on a coprocessor register write,
2037          * but allow this to be suppressed by the register definition
2038          * (usually only necessary to work around guest bugs).
2039          */
2040         s->base.is_jmp = DISAS_UPDATE_EXIT;
2041     }
2042 }
2043 
2044 /* System
2045  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2046  * +---------------------+---+-----+-----+-------+-------+-----+------+
2047  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2048  * +---------------------+---+-----+-----+-------+-------+-----+------+
2049  */
2050 static void disas_system(DisasContext *s, uint32_t insn)
2051 {
2052     unsigned int l, op0, op1, crn, crm, op2, rt;
2053     l = extract32(insn, 21, 1);
2054     op0 = extract32(insn, 19, 2);
2055     op1 = extract32(insn, 16, 3);
2056     crn = extract32(insn, 12, 4);
2057     crm = extract32(insn, 8, 4);
2058     op2 = extract32(insn, 5, 3);
2059     rt = extract32(insn, 0, 5);
2060 
2061     if (op0 == 0) {
2062         if (l || rt != 31) {
2063             unallocated_encoding(s);
2064             return;
2065         }
2066         switch (crn) {
2067         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2068             handle_hint(s, insn, op1, op2, crm);
2069             break;
2070         case 3: /* CLREX, DSB, DMB, ISB */
2071             handle_sync(s, insn, op1, op2, crm);
2072             break;
2073         case 4: /* MSR (immediate) */
2074             handle_msr_i(s, insn, op1, op2, crm);
2075             break;
2076         default:
2077             unallocated_encoding(s);
2078             break;
2079         }
2080         return;
2081     }
2082     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2083 }
2084 
2085 /* Exception generation
2086  *
2087  *  31             24 23 21 20                     5 4   2 1  0
2088  * +-----------------+-----+------------------------+-----+----+
2089  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2090  * +-----------------------+------------------------+----------+
2091  */
2092 static void disas_exc(DisasContext *s, uint32_t insn)
2093 {
2094     int opc = extract32(insn, 21, 3);
2095     int op2_ll = extract32(insn, 0, 5);
2096     int imm16 = extract32(insn, 5, 16);
2097     uint32_t syndrome;
2098 
2099     switch (opc) {
2100     case 0:
2101         /* For SVC, HVC and SMC we advance the single-step state
2102          * machine before taking the exception. This is architecturally
2103          * mandated, to ensure that single-stepping a system call
2104          * instruction works properly.
2105          */
2106         switch (op2_ll) {
2107         case 1:                                                     /* SVC */
2108             syndrome = syn_aa64_svc(imm16);
2109             if (s->fgt_svc) {
2110                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2111                 break;
2112             }
2113             gen_ss_advance(s);
2114             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2115             break;
2116         case 2:                                                     /* HVC */
2117             if (s->current_el == 0) {
2118                 unallocated_encoding(s);
2119                 break;
2120             }
2121             /* The pre HVC helper handles cases when HVC gets trapped
2122              * as an undefined insn by runtime configuration.
2123              */
2124             gen_a64_update_pc(s, 0);
2125             gen_helper_pre_hvc(cpu_env);
2126             gen_ss_advance(s);
2127             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2128             break;
2129         case 3:                                                     /* SMC */
2130             if (s->current_el == 0) {
2131                 unallocated_encoding(s);
2132                 break;
2133             }
2134             gen_a64_update_pc(s, 0);
2135             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2136             gen_ss_advance(s);
2137             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2138             break;
2139         default:
2140             unallocated_encoding(s);
2141             break;
2142         }
2143         break;
2144     case 1:
2145         if (op2_ll != 0) {
2146             unallocated_encoding(s);
2147             break;
2148         }
2149         /* BRK */
2150         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2151         break;
2152     case 2:
2153         if (op2_ll != 0) {
2154             unallocated_encoding(s);
2155             break;
2156         }
2157         /* HLT. This has two purposes.
2158          * Architecturally, it is an external halting debug instruction.
2159          * Since QEMU doesn't implement external debug, we treat this as
2160          * it is required for halting debug disabled: it will UNDEF.
2161          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2162          */
2163         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2164             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2165         } else {
2166             unallocated_encoding(s);
2167         }
2168         break;
2169     case 5:
2170         if (op2_ll < 1 || op2_ll > 3) {
2171             unallocated_encoding(s);
2172             break;
2173         }
2174         /* DCPS1, DCPS2, DCPS3 */
2175         unallocated_encoding(s);
2176         break;
2177     default:
2178         unallocated_encoding(s);
2179         break;
2180     }
2181 }
2182 
2183 /* Unconditional branch (register)
2184  *  31           25 24   21 20   16 15   10 9    5 4     0
2185  * +---------------+-------+-------+-------+------+-------+
2186  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
2187  * +---------------+-------+-------+-------+------+-------+
2188  */
2189 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2190 {
2191     unsigned int opc, op2, op3, rn, op4;
2192     unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
2193     TCGv_i64 dst;
2194     TCGv_i64 modifier;
2195 
2196     opc = extract32(insn, 21, 4);
2197     op2 = extract32(insn, 16, 5);
2198     op3 = extract32(insn, 10, 6);
2199     rn = extract32(insn, 5, 5);
2200     op4 = extract32(insn, 0, 5);
2201 
2202     if (op2 != 0x1f) {
2203         goto do_unallocated;
2204     }
2205 
2206     switch (opc) {
2207     case 0: /* BR */
2208     case 1: /* BLR */
2209     case 2: /* RET */
2210         btype_mod = opc;
2211         switch (op3) {
2212         case 0:
2213             /* BR, BLR, RET */
2214             if (op4 != 0) {
2215                 goto do_unallocated;
2216             }
2217             dst = cpu_reg(s, rn);
2218             break;
2219 
2220         case 2:
2221         case 3:
2222             if (!dc_isar_feature(aa64_pauth, s)) {
2223                 goto do_unallocated;
2224             }
2225             if (opc == 2) {
2226                 /* RETAA, RETAB */
2227                 if (rn != 0x1f || op4 != 0x1f) {
2228                     goto do_unallocated;
2229                 }
2230                 rn = 30;
2231                 modifier = cpu_X[31];
2232             } else {
2233                 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2234                 if (op4 != 0x1f) {
2235                     goto do_unallocated;
2236                 }
2237                 modifier = tcg_constant_i64(0);
2238             }
2239             if (s->pauth_active) {
2240                 dst = tcg_temp_new_i64();
2241                 if (op3 == 2) {
2242                     gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2243                 } else {
2244                     gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2245                 }
2246             } else {
2247                 dst = cpu_reg(s, rn);
2248             }
2249             break;
2250 
2251         default:
2252             goto do_unallocated;
2253         }
2254         /* BLR also needs to load return address */
2255         if (opc == 1) {
2256             TCGv_i64 lr = cpu_reg(s, 30);
2257             if (dst == lr) {
2258                 TCGv_i64 tmp = tcg_temp_new_i64();
2259                 tcg_gen_mov_i64(tmp, dst);
2260                 dst = tmp;
2261             }
2262             gen_pc_plus_diff(s, lr, curr_insn_len(s));
2263         }
2264         gen_a64_set_pc(s, dst);
2265         break;
2266 
2267     case 8: /* BRAA */
2268     case 9: /* BLRAA */
2269         if (!dc_isar_feature(aa64_pauth, s)) {
2270             goto do_unallocated;
2271         }
2272         if ((op3 & ~1) != 2) {
2273             goto do_unallocated;
2274         }
2275         btype_mod = opc & 1;
2276         if (s->pauth_active) {
2277             dst = tcg_temp_new_i64();
2278             modifier = cpu_reg_sp(s, op4);
2279             if (op3 == 2) {
2280                 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2281             } else {
2282                 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2283             }
2284         } else {
2285             dst = cpu_reg(s, rn);
2286         }
2287         /* BLRAA also needs to load return address */
2288         if (opc == 9) {
2289             TCGv_i64 lr = cpu_reg(s, 30);
2290             if (dst == lr) {
2291                 TCGv_i64 tmp = tcg_temp_new_i64();
2292                 tcg_gen_mov_i64(tmp, dst);
2293                 dst = tmp;
2294             }
2295             gen_pc_plus_diff(s, lr, curr_insn_len(s));
2296         }
2297         gen_a64_set_pc(s, dst);
2298         break;
2299 
2300     case 4: /* ERET */
2301         if (s->current_el == 0) {
2302             goto do_unallocated;
2303         }
2304         switch (op3) {
2305         case 0: /* ERET */
2306             if (op4 != 0) {
2307                 goto do_unallocated;
2308             }
2309             if (s->fgt_eret) {
2310                 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2);
2311                 return;
2312             }
2313             dst = tcg_temp_new_i64();
2314             tcg_gen_ld_i64(dst, cpu_env,
2315                            offsetof(CPUARMState, elr_el[s->current_el]));
2316             break;
2317 
2318         case 2: /* ERETAA */
2319         case 3: /* ERETAB */
2320             if (!dc_isar_feature(aa64_pauth, s)) {
2321                 goto do_unallocated;
2322             }
2323             if (rn != 0x1f || op4 != 0x1f) {
2324                 goto do_unallocated;
2325             }
2326             /* The FGT trap takes precedence over an auth trap. */
2327             if (s->fgt_eret) {
2328                 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2);
2329                 return;
2330             }
2331             dst = tcg_temp_new_i64();
2332             tcg_gen_ld_i64(dst, cpu_env,
2333                            offsetof(CPUARMState, elr_el[s->current_el]));
2334             if (s->pauth_active) {
2335                 modifier = cpu_X[31];
2336                 if (op3 == 2) {
2337                     gen_helper_autia(dst, cpu_env, dst, modifier);
2338                 } else {
2339                     gen_helper_autib(dst, cpu_env, dst, modifier);
2340                 }
2341             }
2342             break;
2343 
2344         default:
2345             goto do_unallocated;
2346         }
2347         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2348             gen_io_start();
2349         }
2350 
2351         gen_helper_exception_return(cpu_env, dst);
2352         /* Must exit loop to check un-masked IRQs */
2353         s->base.is_jmp = DISAS_EXIT;
2354         return;
2355 
2356     case 5: /* DRPS */
2357         if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2358             goto do_unallocated;
2359         } else {
2360             unallocated_encoding(s);
2361         }
2362         return;
2363 
2364     default:
2365     do_unallocated:
2366         unallocated_encoding(s);
2367         return;
2368     }
2369 
2370     switch (btype_mod) {
2371     case 0: /* BR */
2372         if (dc_isar_feature(aa64_bti, s)) {
2373             /* BR to {x16,x17} or !guard -> 1, else 3.  */
2374             set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2375         }
2376         break;
2377 
2378     case 1: /* BLR */
2379         if (dc_isar_feature(aa64_bti, s)) {
2380             /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2381             set_btype(s, 2);
2382         }
2383         break;
2384 
2385     default: /* RET or none of the above.  */
2386         /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2387         break;
2388     }
2389 
2390     s->base.is_jmp = DISAS_JUMP;
2391 }
2392 
2393 /* Branches, exception generating and system instructions */
2394 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2395 {
2396     switch (extract32(insn, 25, 7)) {
2397     case 0x0a: case 0x0b:
2398     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2399         disas_uncond_b_imm(s, insn);
2400         break;
2401     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2402         disas_comp_b_imm(s, insn);
2403         break;
2404     case 0x1b: case 0x5b: /* Test & branch (immediate) */
2405         disas_test_b_imm(s, insn);
2406         break;
2407     case 0x2a: /* Conditional branch (immediate) */
2408         disas_cond_b_imm(s, insn);
2409         break;
2410     case 0x6a: /* Exception generation / System */
2411         if (insn & (1 << 24)) {
2412             if (extract32(insn, 22, 2) == 0) {
2413                 disas_system(s, insn);
2414             } else {
2415                 unallocated_encoding(s);
2416             }
2417         } else {
2418             disas_exc(s, insn);
2419         }
2420         break;
2421     case 0x6b: /* Unconditional branch (register) */
2422         disas_uncond_b_reg(s, insn);
2423         break;
2424     default:
2425         unallocated_encoding(s);
2426         break;
2427     }
2428 }
2429 
2430 /*
2431  * Load/Store exclusive instructions are implemented by remembering
2432  * the value/address loaded, and seeing if these are the same
2433  * when the store is performed. This is not actually the architecturally
2434  * mandated semantics, but it works for typical guest code sequences
2435  * and avoids having to monitor regular stores.
2436  *
2437  * The store exclusive uses the atomic cmpxchg primitives to avoid
2438  * races in multi-threaded linux-user and when MTTCG softmmu is
2439  * enabled.
2440  */
2441 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2442                                TCGv_i64 addr, int size, bool is_pair)
2443 {
2444     int idx = get_mem_index(s);
2445     MemOp memop = s->be_data;
2446 
2447     g_assert(size <= 3);
2448     if (is_pair) {
2449         g_assert(size >= 2);
2450         if (size == 2) {
2451             /* The pair must be single-copy atomic for the doubleword.  */
2452             memop |= MO_64 | MO_ALIGN;
2453             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2454             if (s->be_data == MO_LE) {
2455                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2456                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2457             } else {
2458                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2459                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2460             }
2461         } else {
2462             /* The pair must be single-copy atomic for *each* doubleword, not
2463                the entire quadword, however it must be quadword aligned.  */
2464             memop |= MO_64;
2465             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2466                                 memop | MO_ALIGN_16);
2467 
2468             TCGv_i64 addr2 = tcg_temp_new_i64();
2469             tcg_gen_addi_i64(addr2, addr, 8);
2470             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2471 
2472             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2473             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2474         }
2475     } else {
2476         memop |= size | MO_ALIGN;
2477         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2478         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2479     }
2480     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2481 }
2482 
2483 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2484                                 TCGv_i64 addr, int size, int is_pair)
2485 {
2486     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2487      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2488      *     [addr] = {Rt};
2489      *     if (is_pair) {
2490      *         [addr + datasize] = {Rt2};
2491      *     }
2492      *     {Rd} = 0;
2493      * } else {
2494      *     {Rd} = 1;
2495      * }
2496      * env->exclusive_addr = -1;
2497      */
2498     TCGLabel *fail_label = gen_new_label();
2499     TCGLabel *done_label = gen_new_label();
2500     TCGv_i64 tmp;
2501 
2502     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2503 
2504     tmp = tcg_temp_new_i64();
2505     if (is_pair) {
2506         if (size == 2) {
2507             if (s->be_data == MO_LE) {
2508                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2509             } else {
2510                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2511             }
2512             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2513                                        cpu_exclusive_val, tmp,
2514                                        get_mem_index(s),
2515                                        MO_64 | MO_ALIGN | s->be_data);
2516             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2517         } else {
2518             TCGv_i128 t16 = tcg_temp_new_i128();
2519             TCGv_i128 c16 = tcg_temp_new_i128();
2520             TCGv_i64 a, b;
2521 
2522             if (s->be_data == MO_LE) {
2523                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2524                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2525                                         cpu_exclusive_high);
2526             } else {
2527                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2528                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2529                                         cpu_exclusive_val);
2530             }
2531 
2532             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2533                                         get_mem_index(s),
2534                                         MO_128 | MO_ALIGN | s->be_data);
2535 
2536             a = tcg_temp_new_i64();
2537             b = tcg_temp_new_i64();
2538             if (s->be_data == MO_LE) {
2539                 tcg_gen_extr_i128_i64(a, b, t16);
2540             } else {
2541                 tcg_gen_extr_i128_i64(b, a, t16);
2542             }
2543 
2544             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2545             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2546             tcg_gen_or_i64(tmp, a, b);
2547 
2548             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2549         }
2550     } else {
2551         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2552                                    cpu_reg(s, rt), get_mem_index(s),
2553                                    size | MO_ALIGN | s->be_data);
2554         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2555     }
2556     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2557     tcg_gen_br(done_label);
2558 
2559     gen_set_label(fail_label);
2560     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2561     gen_set_label(done_label);
2562     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2563 }
2564 
2565 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2566                                  int rn, int size)
2567 {
2568     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2569     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2570     int memidx = get_mem_index(s);
2571     TCGv_i64 clean_addr;
2572 
2573     if (rn == 31) {
2574         gen_check_sp_alignment(s);
2575     }
2576     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2577     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2578                                size | MO_ALIGN | s->be_data);
2579 }
2580 
2581 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2582                                       int rn, int size)
2583 {
2584     TCGv_i64 s1 = cpu_reg(s, rs);
2585     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2586     TCGv_i64 t1 = cpu_reg(s, rt);
2587     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2588     TCGv_i64 clean_addr;
2589     int memidx = get_mem_index(s);
2590 
2591     if (rn == 31) {
2592         gen_check_sp_alignment(s);
2593     }
2594 
2595     /* This is a single atomic access, despite the "pair". */
2596     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2597 
2598     if (size == 2) {
2599         TCGv_i64 cmp = tcg_temp_new_i64();
2600         TCGv_i64 val = tcg_temp_new_i64();
2601 
2602         if (s->be_data == MO_LE) {
2603             tcg_gen_concat32_i64(val, t1, t2);
2604             tcg_gen_concat32_i64(cmp, s1, s2);
2605         } else {
2606             tcg_gen_concat32_i64(val, t2, t1);
2607             tcg_gen_concat32_i64(cmp, s2, s1);
2608         }
2609 
2610         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2611                                    MO_64 | MO_ALIGN | s->be_data);
2612 
2613         if (s->be_data == MO_LE) {
2614             tcg_gen_extr32_i64(s1, s2, cmp);
2615         } else {
2616             tcg_gen_extr32_i64(s2, s1, cmp);
2617         }
2618     } else {
2619         TCGv_i128 cmp = tcg_temp_new_i128();
2620         TCGv_i128 val = tcg_temp_new_i128();
2621 
2622         if (s->be_data == MO_LE) {
2623             tcg_gen_concat_i64_i128(val, t1, t2);
2624             tcg_gen_concat_i64_i128(cmp, s1, s2);
2625         } else {
2626             tcg_gen_concat_i64_i128(val, t2, t1);
2627             tcg_gen_concat_i64_i128(cmp, s2, s1);
2628         }
2629 
2630         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2631                                     MO_128 | MO_ALIGN | s->be_data);
2632 
2633         if (s->be_data == MO_LE) {
2634             tcg_gen_extr_i128_i64(s1, s2, cmp);
2635         } else {
2636             tcg_gen_extr_i128_i64(s2, s1, cmp);
2637         }
2638     }
2639 }
2640 
2641 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2642  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2643  */
2644 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2645 {
2646     int opc0 = extract32(opc, 0, 1);
2647     int regsize;
2648 
2649     if (is_signed) {
2650         regsize = opc0 ? 32 : 64;
2651     } else {
2652         regsize = size == 3 ? 64 : 32;
2653     }
2654     return regsize == 64;
2655 }
2656 
2657 /* Load/store exclusive
2658  *
2659  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2660  * +-----+-------------+----+---+----+------+----+-------+------+------+
2661  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2662  * +-----+-------------+----+---+----+------+----+-------+------+------+
2663  *
2664  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2665  *   L: 0 -> store, 1 -> load
2666  *  o2: 0 -> exclusive, 1 -> not
2667  *  o1: 0 -> single register, 1 -> register pair
2668  *  o0: 1 -> load-acquire/store-release, 0 -> not
2669  */
2670 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2671 {
2672     int rt = extract32(insn, 0, 5);
2673     int rn = extract32(insn, 5, 5);
2674     int rt2 = extract32(insn, 10, 5);
2675     int rs = extract32(insn, 16, 5);
2676     int is_lasr = extract32(insn, 15, 1);
2677     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2678     int size = extract32(insn, 30, 2);
2679     TCGv_i64 clean_addr;
2680 
2681     switch (o2_L_o1_o0) {
2682     case 0x0: /* STXR */
2683     case 0x1: /* STLXR */
2684         if (rn == 31) {
2685             gen_check_sp_alignment(s);
2686         }
2687         if (is_lasr) {
2688             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2689         }
2690         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2691                                     true, rn != 31, size);
2692         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2693         return;
2694 
2695     case 0x4: /* LDXR */
2696     case 0x5: /* LDAXR */
2697         if (rn == 31) {
2698             gen_check_sp_alignment(s);
2699         }
2700         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2701                                     false, rn != 31, size);
2702         s->is_ldex = true;
2703         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2704         if (is_lasr) {
2705             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2706         }
2707         return;
2708 
2709     case 0x8: /* STLLR */
2710         if (!dc_isar_feature(aa64_lor, s)) {
2711             break;
2712         }
2713         /* StoreLORelease is the same as Store-Release for QEMU.  */
2714         /* fall through */
2715     case 0x9: /* STLR */
2716         /* Generate ISS for non-exclusive accesses including LASR.  */
2717         if (rn == 31) {
2718             gen_check_sp_alignment(s);
2719         }
2720         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2721         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2722                                     true, rn != 31, size);
2723         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2724         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2725                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2726         return;
2727 
2728     case 0xc: /* LDLAR */
2729         if (!dc_isar_feature(aa64_lor, s)) {
2730             break;
2731         }
2732         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2733         /* fall through */
2734     case 0xd: /* LDAR */
2735         /* Generate ISS for non-exclusive accesses including LASR.  */
2736         if (rn == 31) {
2737             gen_check_sp_alignment(s);
2738         }
2739         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2740                                     false, rn != 31, size);
2741         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2742         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2743                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2744         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2745         return;
2746 
2747     case 0x2: case 0x3: /* CASP / STXP */
2748         if (size & 2) { /* STXP / STLXP */
2749             if (rn == 31) {
2750                 gen_check_sp_alignment(s);
2751             }
2752             if (is_lasr) {
2753                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2754             }
2755             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2756                                         true, rn != 31, size);
2757             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2758             return;
2759         }
2760         if (rt2 == 31
2761             && ((rt | rs) & 1) == 0
2762             && dc_isar_feature(aa64_atomics, s)) {
2763             /* CASP / CASPL */
2764             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2765             return;
2766         }
2767         break;
2768 
2769     case 0x6: case 0x7: /* CASPA / LDXP */
2770         if (size & 2) { /* LDXP / LDAXP */
2771             if (rn == 31) {
2772                 gen_check_sp_alignment(s);
2773             }
2774             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2775                                         false, rn != 31, size);
2776             s->is_ldex = true;
2777             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2778             if (is_lasr) {
2779                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2780             }
2781             return;
2782         }
2783         if (rt2 == 31
2784             && ((rt | rs) & 1) == 0
2785             && dc_isar_feature(aa64_atomics, s)) {
2786             /* CASPA / CASPAL */
2787             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2788             return;
2789         }
2790         break;
2791 
2792     case 0xa: /* CAS */
2793     case 0xb: /* CASL */
2794     case 0xe: /* CASA */
2795     case 0xf: /* CASAL */
2796         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2797             gen_compare_and_swap(s, rs, rt, rn, size);
2798             return;
2799         }
2800         break;
2801     }
2802     unallocated_encoding(s);
2803 }
2804 
2805 /*
2806  * Load register (literal)
2807  *
2808  *  31 30 29   27  26 25 24 23                5 4     0
2809  * +-----+-------+---+-----+-------------------+-------+
2810  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2811  * +-----+-------+---+-----+-------------------+-------+
2812  *
2813  * V: 1 -> vector (simd/fp)
2814  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2815  *                   10-> 32 bit signed, 11 -> prefetch
2816  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2817  */
2818 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2819 {
2820     int rt = extract32(insn, 0, 5);
2821     int64_t imm = sextract32(insn, 5, 19) << 2;
2822     bool is_vector = extract32(insn, 26, 1);
2823     int opc = extract32(insn, 30, 2);
2824     bool is_signed = false;
2825     int size = 2;
2826     TCGv_i64 tcg_rt, clean_addr;
2827 
2828     if (is_vector) {
2829         if (opc == 3) {
2830             unallocated_encoding(s);
2831             return;
2832         }
2833         size = 2 + opc;
2834         if (!fp_access_check(s)) {
2835             return;
2836         }
2837     } else {
2838         if (opc == 3) {
2839             /* PRFM (literal) : prefetch */
2840             return;
2841         }
2842         size = 2 + extract32(opc, 0, 1);
2843         is_signed = extract32(opc, 1, 1);
2844     }
2845 
2846     tcg_rt = cpu_reg(s, rt);
2847 
2848     clean_addr = tcg_temp_new_i64();
2849     gen_pc_plus_diff(s, clean_addr, imm);
2850     if (is_vector) {
2851         do_fp_ld(s, rt, clean_addr, size);
2852     } else {
2853         /* Only unsigned 32bit loads target 32bit registers.  */
2854         bool iss_sf = opc != 0;
2855 
2856         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2857                   false, true, rt, iss_sf, false);
2858     }
2859 }
2860 
2861 /*
2862  * LDNP (Load Pair - non-temporal hint)
2863  * LDP (Load Pair - non vector)
2864  * LDPSW (Load Pair Signed Word - non vector)
2865  * STNP (Store Pair - non-temporal hint)
2866  * STP (Store Pair - non vector)
2867  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2868  * LDP (Load Pair of SIMD&FP)
2869  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2870  * STP (Store Pair of SIMD&FP)
2871  *
2872  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2873  * +-----+-------+---+---+-------+---+-----------------------------+
2874  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2875  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2876  *
2877  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2878  *      LDPSW/STGP               01
2879  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2880  *   V: 0 -> GPR, 1 -> Vector
2881  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2882  *      10 -> signed offset, 11 -> pre-index
2883  *   L: 0 -> Store 1 -> Load
2884  *
2885  * Rt, Rt2 = GPR or SIMD registers to be stored
2886  * Rn = general purpose register containing address
2887  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2888  */
2889 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2890 {
2891     int rt = extract32(insn, 0, 5);
2892     int rn = extract32(insn, 5, 5);
2893     int rt2 = extract32(insn, 10, 5);
2894     uint64_t offset = sextract64(insn, 15, 7);
2895     int index = extract32(insn, 23, 2);
2896     bool is_vector = extract32(insn, 26, 1);
2897     bool is_load = extract32(insn, 22, 1);
2898     int opc = extract32(insn, 30, 2);
2899 
2900     bool is_signed = false;
2901     bool postindex = false;
2902     bool wback = false;
2903     bool set_tag = false;
2904 
2905     TCGv_i64 clean_addr, dirty_addr;
2906 
2907     int size;
2908 
2909     if (opc == 3) {
2910         unallocated_encoding(s);
2911         return;
2912     }
2913 
2914     if (is_vector) {
2915         size = 2 + opc;
2916     } else if (opc == 1 && !is_load) {
2917         /* STGP */
2918         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2919             unallocated_encoding(s);
2920             return;
2921         }
2922         size = 3;
2923         set_tag = true;
2924     } else {
2925         size = 2 + extract32(opc, 1, 1);
2926         is_signed = extract32(opc, 0, 1);
2927         if (!is_load && is_signed) {
2928             unallocated_encoding(s);
2929             return;
2930         }
2931     }
2932 
2933     switch (index) {
2934     case 1: /* post-index */
2935         postindex = true;
2936         wback = true;
2937         break;
2938     case 0:
2939         /* signed offset with "non-temporal" hint. Since we don't emulate
2940          * caches we don't care about hints to the cache system about
2941          * data access patterns, and handle this identically to plain
2942          * signed offset.
2943          */
2944         if (is_signed) {
2945             /* There is no non-temporal-hint version of LDPSW */
2946             unallocated_encoding(s);
2947             return;
2948         }
2949         postindex = false;
2950         break;
2951     case 2: /* signed offset, rn not updated */
2952         postindex = false;
2953         break;
2954     case 3: /* pre-index */
2955         postindex = false;
2956         wback = true;
2957         break;
2958     }
2959 
2960     if (is_vector && !fp_access_check(s)) {
2961         return;
2962     }
2963 
2964     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2965 
2966     if (rn == 31) {
2967         gen_check_sp_alignment(s);
2968     }
2969 
2970     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2971     if (!postindex) {
2972         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2973     }
2974 
2975     if (set_tag) {
2976         if (!s->ata) {
2977             /*
2978              * TODO: We could rely on the stores below, at least for
2979              * system mode, if we arrange to add MO_ALIGN_16.
2980              */
2981             gen_helper_stg_stub(cpu_env, dirty_addr);
2982         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2983             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2984         } else {
2985             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2986         }
2987     }
2988 
2989     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2990                                 (wback || rn != 31) && !set_tag, 2 << size);
2991 
2992     if (is_vector) {
2993         if (is_load) {
2994             do_fp_ld(s, rt, clean_addr, size);
2995         } else {
2996             do_fp_st(s, rt, clean_addr, size);
2997         }
2998         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2999         if (is_load) {
3000             do_fp_ld(s, rt2, clean_addr, size);
3001         } else {
3002             do_fp_st(s, rt2, clean_addr, size);
3003         }
3004     } else {
3005         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3006         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
3007 
3008         if (is_load) {
3009             TCGv_i64 tmp = tcg_temp_new_i64();
3010 
3011             /* Do not modify tcg_rt before recognizing any exception
3012              * from the second load.
3013              */
3014             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
3015                       false, false, 0, false, false);
3016             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
3017             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
3018                       false, false, 0, false, false);
3019 
3020             tcg_gen_mov_i64(tcg_rt, tmp);
3021         } else {
3022             do_gpr_st(s, tcg_rt, clean_addr, size,
3023                       false, 0, false, false);
3024             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
3025             do_gpr_st(s, tcg_rt2, clean_addr, size,
3026                       false, 0, false, false);
3027         }
3028     }
3029 
3030     if (wback) {
3031         if (postindex) {
3032             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3033         }
3034         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3035     }
3036 }
3037 
3038 /*
3039  * Load/store (immediate post-indexed)
3040  * Load/store (immediate pre-indexed)
3041  * Load/store (unscaled immediate)
3042  *
3043  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3044  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3045  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3046  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3047  *
3048  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3049          10 -> unprivileged
3050  * V = 0 -> non-vector
3051  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3052  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3053  */
3054 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3055                                 int opc,
3056                                 int size,
3057                                 int rt,
3058                                 bool is_vector)
3059 {
3060     int rn = extract32(insn, 5, 5);
3061     int imm9 = sextract32(insn, 12, 9);
3062     int idx = extract32(insn, 10, 2);
3063     bool is_signed = false;
3064     bool is_store = false;
3065     bool is_extended = false;
3066     bool is_unpriv = (idx == 2);
3067     bool iss_valid;
3068     bool post_index;
3069     bool writeback;
3070     int memidx;
3071 
3072     TCGv_i64 clean_addr, dirty_addr;
3073 
3074     if (is_vector) {
3075         size |= (opc & 2) << 1;
3076         if (size > 4 || is_unpriv) {
3077             unallocated_encoding(s);
3078             return;
3079         }
3080         is_store = ((opc & 1) == 0);
3081         if (!fp_access_check(s)) {
3082             return;
3083         }
3084     } else {
3085         if (size == 3 && opc == 2) {
3086             /* PRFM - prefetch */
3087             if (idx != 0) {
3088                 unallocated_encoding(s);
3089                 return;
3090             }
3091             return;
3092         }
3093         if (opc == 3 && size > 1) {
3094             unallocated_encoding(s);
3095             return;
3096         }
3097         is_store = (opc == 0);
3098         is_signed = extract32(opc, 1, 1);
3099         is_extended = (size < 3) && extract32(opc, 0, 1);
3100     }
3101 
3102     switch (idx) {
3103     case 0:
3104     case 2:
3105         post_index = false;
3106         writeback = false;
3107         break;
3108     case 1:
3109         post_index = true;
3110         writeback = true;
3111         break;
3112     case 3:
3113         post_index = false;
3114         writeback = true;
3115         break;
3116     default:
3117         g_assert_not_reached();
3118     }
3119 
3120     iss_valid = !is_vector && !writeback;
3121 
3122     if (rn == 31) {
3123         gen_check_sp_alignment(s);
3124     }
3125 
3126     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3127     if (!post_index) {
3128         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3129     }
3130 
3131     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3132     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3133                                        writeback || rn != 31,
3134                                        size, is_unpriv, memidx);
3135 
3136     if (is_vector) {
3137         if (is_store) {
3138             do_fp_st(s, rt, clean_addr, size);
3139         } else {
3140             do_fp_ld(s, rt, clean_addr, size);
3141         }
3142     } else {
3143         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3144         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3145 
3146         if (is_store) {
3147             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3148                              iss_valid, rt, iss_sf, false);
3149         } else {
3150             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3151                              is_extended, memidx,
3152                              iss_valid, rt, iss_sf, false);
3153         }
3154     }
3155 
3156     if (writeback) {
3157         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3158         if (post_index) {
3159             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3160         }
3161         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3162     }
3163 }
3164 
3165 /*
3166  * Load/store (register offset)
3167  *
3168  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3169  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3170  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3171  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3172  *
3173  * For non-vector:
3174  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3175  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3176  * For vector:
3177  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3178  *   opc<0>: 0 -> store, 1 -> load
3179  * V: 1 -> vector/simd
3180  * opt: extend encoding (see DecodeRegExtend)
3181  * S: if S=1 then scale (essentially index by sizeof(size))
3182  * Rt: register to transfer into/out of
3183  * Rn: address register or SP for base
3184  * Rm: offset register or ZR for offset
3185  */
3186 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3187                                    int opc,
3188                                    int size,
3189                                    int rt,
3190                                    bool is_vector)
3191 {
3192     int rn = extract32(insn, 5, 5);
3193     int shift = extract32(insn, 12, 1);
3194     int rm = extract32(insn, 16, 5);
3195     int opt = extract32(insn, 13, 3);
3196     bool is_signed = false;
3197     bool is_store = false;
3198     bool is_extended = false;
3199 
3200     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3201 
3202     if (extract32(opt, 1, 1) == 0) {
3203         unallocated_encoding(s);
3204         return;
3205     }
3206 
3207     if (is_vector) {
3208         size |= (opc & 2) << 1;
3209         if (size > 4) {
3210             unallocated_encoding(s);
3211             return;
3212         }
3213         is_store = !extract32(opc, 0, 1);
3214         if (!fp_access_check(s)) {
3215             return;
3216         }
3217     } else {
3218         if (size == 3 && opc == 2) {
3219             /* PRFM - prefetch */
3220             return;
3221         }
3222         if (opc == 3 && size > 1) {
3223             unallocated_encoding(s);
3224             return;
3225         }
3226         is_store = (opc == 0);
3227         is_signed = extract32(opc, 1, 1);
3228         is_extended = (size < 3) && extract32(opc, 0, 1);
3229     }
3230 
3231     if (rn == 31) {
3232         gen_check_sp_alignment(s);
3233     }
3234     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3235 
3236     tcg_rm = read_cpu_reg(s, rm, 1);
3237     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3238 
3239     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3240     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3241 
3242     if (is_vector) {
3243         if (is_store) {
3244             do_fp_st(s, rt, clean_addr, size);
3245         } else {
3246             do_fp_ld(s, rt, clean_addr, size);
3247         }
3248     } else {
3249         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3250         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3251         if (is_store) {
3252             do_gpr_st(s, tcg_rt, clean_addr, size,
3253                       true, rt, iss_sf, false);
3254         } else {
3255             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3256                       is_extended, true, rt, iss_sf, false);
3257         }
3258     }
3259 }
3260 
3261 /*
3262  * Load/store (unsigned immediate)
3263  *
3264  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3265  * +----+-------+---+-----+-----+------------+-------+------+
3266  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3267  * +----+-------+---+-----+-----+------------+-------+------+
3268  *
3269  * For non-vector:
3270  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3271  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3272  * For vector:
3273  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3274  *   opc<0>: 0 -> store, 1 -> load
3275  * Rn: base address register (inc SP)
3276  * Rt: target register
3277  */
3278 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3279                                         int opc,
3280                                         int size,
3281                                         int rt,
3282                                         bool is_vector)
3283 {
3284     int rn = extract32(insn, 5, 5);
3285     unsigned int imm12 = extract32(insn, 10, 12);
3286     unsigned int offset;
3287 
3288     TCGv_i64 clean_addr, dirty_addr;
3289 
3290     bool is_store;
3291     bool is_signed = false;
3292     bool is_extended = false;
3293 
3294     if (is_vector) {
3295         size |= (opc & 2) << 1;
3296         if (size > 4) {
3297             unallocated_encoding(s);
3298             return;
3299         }
3300         is_store = !extract32(opc, 0, 1);
3301         if (!fp_access_check(s)) {
3302             return;
3303         }
3304     } else {
3305         if (size == 3 && opc == 2) {
3306             /* PRFM - prefetch */
3307             return;
3308         }
3309         if (opc == 3 && size > 1) {
3310             unallocated_encoding(s);
3311             return;
3312         }
3313         is_store = (opc == 0);
3314         is_signed = extract32(opc, 1, 1);
3315         is_extended = (size < 3) && extract32(opc, 0, 1);
3316     }
3317 
3318     if (rn == 31) {
3319         gen_check_sp_alignment(s);
3320     }
3321     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3322     offset = imm12 << size;
3323     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3324     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3325 
3326     if (is_vector) {
3327         if (is_store) {
3328             do_fp_st(s, rt, clean_addr, size);
3329         } else {
3330             do_fp_ld(s, rt, clean_addr, size);
3331         }
3332     } else {
3333         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3334         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3335         if (is_store) {
3336             do_gpr_st(s, tcg_rt, clean_addr, size,
3337                       true, rt, iss_sf, false);
3338         } else {
3339             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3340                       is_extended, true, rt, iss_sf, false);
3341         }
3342     }
3343 }
3344 
3345 /* Atomic memory operations
3346  *
3347  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3348  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3349  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3350  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3351  *
3352  * Rt: the result register
3353  * Rn: base address or SP
3354  * Rs: the source register for the operation
3355  * V: vector flag (always 0 as of v8.3)
3356  * A: acquire flag
3357  * R: release flag
3358  */
3359 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3360                               int size, int rt, bool is_vector)
3361 {
3362     int rs = extract32(insn, 16, 5);
3363     int rn = extract32(insn, 5, 5);
3364     int o3_opc = extract32(insn, 12, 4);
3365     bool r = extract32(insn, 22, 1);
3366     bool a = extract32(insn, 23, 1);
3367     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3368     AtomicThreeOpFn *fn = NULL;
3369     MemOp mop = s->be_data | size | MO_ALIGN;
3370 
3371     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3372         unallocated_encoding(s);
3373         return;
3374     }
3375     switch (o3_opc) {
3376     case 000: /* LDADD */
3377         fn = tcg_gen_atomic_fetch_add_i64;
3378         break;
3379     case 001: /* LDCLR */
3380         fn = tcg_gen_atomic_fetch_and_i64;
3381         break;
3382     case 002: /* LDEOR */
3383         fn = tcg_gen_atomic_fetch_xor_i64;
3384         break;
3385     case 003: /* LDSET */
3386         fn = tcg_gen_atomic_fetch_or_i64;
3387         break;
3388     case 004: /* LDSMAX */
3389         fn = tcg_gen_atomic_fetch_smax_i64;
3390         mop |= MO_SIGN;
3391         break;
3392     case 005: /* LDSMIN */
3393         fn = tcg_gen_atomic_fetch_smin_i64;
3394         mop |= MO_SIGN;
3395         break;
3396     case 006: /* LDUMAX */
3397         fn = tcg_gen_atomic_fetch_umax_i64;
3398         break;
3399     case 007: /* LDUMIN */
3400         fn = tcg_gen_atomic_fetch_umin_i64;
3401         break;
3402     case 010: /* SWP */
3403         fn = tcg_gen_atomic_xchg_i64;
3404         break;
3405     case 014: /* LDAPR, LDAPRH, LDAPRB */
3406         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3407             rs != 31 || a != 1 || r != 0) {
3408             unallocated_encoding(s);
3409             return;
3410         }
3411         break;
3412     default:
3413         unallocated_encoding(s);
3414         return;
3415     }
3416 
3417     if (rn == 31) {
3418         gen_check_sp_alignment(s);
3419     }
3420     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3421 
3422     if (o3_opc == 014) {
3423         /*
3424          * LDAPR* are a special case because they are a simple load, not a
3425          * fetch-and-do-something op.
3426          * The architectural consistency requirements here are weaker than
3427          * full load-acquire (we only need "load-acquire processor consistent"),
3428          * but we choose to implement them as full LDAQ.
3429          */
3430         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3431                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3432         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3433         return;
3434     }
3435 
3436     tcg_rs = read_cpu_reg(s, rs, true);
3437     tcg_rt = cpu_reg(s, rt);
3438 
3439     if (o3_opc == 1) { /* LDCLR */
3440         tcg_gen_not_i64(tcg_rs, tcg_rs);
3441     }
3442 
3443     /* The tcg atomic primitives are all full barriers.  Therefore we
3444      * can ignore the Acquire and Release bits of this instruction.
3445      */
3446     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3447 
3448     if ((mop & MO_SIGN) && size != MO_64) {
3449         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3450     }
3451 }
3452 
3453 /*
3454  * PAC memory operations
3455  *
3456  *  31  30      27  26    24    22  21       12  11  10    5     0
3457  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3458  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3459  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3460  *
3461  * Rt: the result register
3462  * Rn: base address or SP
3463  * V: vector flag (always 0 as of v8.3)
3464  * M: clear for key DA, set for key DB
3465  * W: pre-indexing flag
3466  * S: sign for imm9.
3467  */
3468 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3469                            int size, int rt, bool is_vector)
3470 {
3471     int rn = extract32(insn, 5, 5);
3472     bool is_wback = extract32(insn, 11, 1);
3473     bool use_key_a = !extract32(insn, 23, 1);
3474     int offset;
3475     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3476 
3477     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3478         unallocated_encoding(s);
3479         return;
3480     }
3481 
3482     if (rn == 31) {
3483         gen_check_sp_alignment(s);
3484     }
3485     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3486 
3487     if (s->pauth_active) {
3488         if (use_key_a) {
3489             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3490                              tcg_constant_i64(0));
3491         } else {
3492             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3493                              tcg_constant_i64(0));
3494         }
3495     }
3496 
3497     /* Form the 10-bit signed, scaled offset.  */
3498     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3499     offset = sextract32(offset << size, 0, 10 + size);
3500     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3501 
3502     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3503     clean_addr = gen_mte_check1(s, dirty_addr, false,
3504                                 is_wback || rn != 31, size);
3505 
3506     tcg_rt = cpu_reg(s, rt);
3507     do_gpr_ld(s, tcg_rt, clean_addr, size,
3508               /* extend */ false, /* iss_valid */ !is_wback,
3509               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3510 
3511     if (is_wback) {
3512         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3513     }
3514 }
3515 
3516 /*
3517  * LDAPR/STLR (unscaled immediate)
3518  *
3519  *  31  30            24    22  21       12    10    5     0
3520  * +------+-------------+-----+---+--------+-----+----+-----+
3521  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3522  * +------+-------------+-----+---+--------+-----+----+-----+
3523  *
3524  * Rt: source or destination register
3525  * Rn: base register
3526  * imm9: unscaled immediate offset
3527  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3528  * size: size of load/store
3529  */
3530 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3531 {
3532     int rt = extract32(insn, 0, 5);
3533     int rn = extract32(insn, 5, 5);
3534     int offset = sextract32(insn, 12, 9);
3535     int opc = extract32(insn, 22, 2);
3536     int size = extract32(insn, 30, 2);
3537     TCGv_i64 clean_addr, dirty_addr;
3538     bool is_store = false;
3539     bool extend = false;
3540     bool iss_sf;
3541     MemOp mop;
3542 
3543     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3544         unallocated_encoding(s);
3545         return;
3546     }
3547 
3548     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3549     mop = size | MO_ALIGN;
3550 
3551     switch (opc) {
3552     case 0: /* STLURB */
3553         is_store = true;
3554         break;
3555     case 1: /* LDAPUR* */
3556         break;
3557     case 2: /* LDAPURS* 64-bit variant */
3558         if (size == 3) {
3559             unallocated_encoding(s);
3560             return;
3561         }
3562         mop |= MO_SIGN;
3563         break;
3564     case 3: /* LDAPURS* 32-bit variant */
3565         if (size > 1) {
3566             unallocated_encoding(s);
3567             return;
3568         }
3569         mop |= MO_SIGN;
3570         extend = true; /* zero-extend 32->64 after signed load */
3571         break;
3572     default:
3573         g_assert_not_reached();
3574     }
3575 
3576     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3577 
3578     if (rn == 31) {
3579         gen_check_sp_alignment(s);
3580     }
3581 
3582     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3583     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3584     clean_addr = clean_data_tbi(s, dirty_addr);
3585 
3586     if (is_store) {
3587         /* Store-Release semantics */
3588         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3589         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3590     } else {
3591         /*
3592          * Load-AcquirePC semantics; we implement as the slightly more
3593          * restrictive Load-Acquire.
3594          */
3595         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3596                   extend, true, rt, iss_sf, true);
3597         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3598     }
3599 }
3600 
3601 /* Load/store register (all forms) */
3602 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3603 {
3604     int rt = extract32(insn, 0, 5);
3605     int opc = extract32(insn, 22, 2);
3606     bool is_vector = extract32(insn, 26, 1);
3607     int size = extract32(insn, 30, 2);
3608 
3609     switch (extract32(insn, 24, 2)) {
3610     case 0:
3611         if (extract32(insn, 21, 1) == 0) {
3612             /* Load/store register (unscaled immediate)
3613              * Load/store immediate pre/post-indexed
3614              * Load/store register unprivileged
3615              */
3616             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3617             return;
3618         }
3619         switch (extract32(insn, 10, 2)) {
3620         case 0:
3621             disas_ldst_atomic(s, insn, size, rt, is_vector);
3622             return;
3623         case 2:
3624             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3625             return;
3626         default:
3627             disas_ldst_pac(s, insn, size, rt, is_vector);
3628             return;
3629         }
3630         break;
3631     case 1:
3632         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3633         return;
3634     }
3635     unallocated_encoding(s);
3636 }
3637 
3638 /* AdvSIMD load/store multiple structures
3639  *
3640  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3641  * +---+---+---------------+---+-------------+--------+------+------+------+
3642  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3643  * +---+---+---------------+---+-------------+--------+------+------+------+
3644  *
3645  * AdvSIMD load/store multiple structures (post-indexed)
3646  *
3647  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3648  * +---+---+---------------+---+---+---------+--------+------+------+------+
3649  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3650  * +---+---+---------------+---+---+---------+--------+------+------+------+
3651  *
3652  * Rt: first (or only) SIMD&FP register to be transferred
3653  * Rn: base address or SP
3654  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3655  */
3656 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3657 {
3658     int rt = extract32(insn, 0, 5);
3659     int rn = extract32(insn, 5, 5);
3660     int rm = extract32(insn, 16, 5);
3661     int size = extract32(insn, 10, 2);
3662     int opcode = extract32(insn, 12, 4);
3663     bool is_store = !extract32(insn, 22, 1);
3664     bool is_postidx = extract32(insn, 23, 1);
3665     bool is_q = extract32(insn, 30, 1);
3666     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3667     MemOp endian, align, mop;
3668 
3669     int total;    /* total bytes */
3670     int elements; /* elements per vector */
3671     int rpt;    /* num iterations */
3672     int selem;  /* structure elements */
3673     int r;
3674 
3675     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3676         unallocated_encoding(s);
3677         return;
3678     }
3679 
3680     if (!is_postidx && rm != 0) {
3681         unallocated_encoding(s);
3682         return;
3683     }
3684 
3685     /* From the shared decode logic */
3686     switch (opcode) {
3687     case 0x0:
3688         rpt = 1;
3689         selem = 4;
3690         break;
3691     case 0x2:
3692         rpt = 4;
3693         selem = 1;
3694         break;
3695     case 0x4:
3696         rpt = 1;
3697         selem = 3;
3698         break;
3699     case 0x6:
3700         rpt = 3;
3701         selem = 1;
3702         break;
3703     case 0x7:
3704         rpt = 1;
3705         selem = 1;
3706         break;
3707     case 0x8:
3708         rpt = 1;
3709         selem = 2;
3710         break;
3711     case 0xa:
3712         rpt = 2;
3713         selem = 1;
3714         break;
3715     default:
3716         unallocated_encoding(s);
3717         return;
3718     }
3719 
3720     if (size == 3 && !is_q && selem != 1) {
3721         /* reserved */
3722         unallocated_encoding(s);
3723         return;
3724     }
3725 
3726     if (!fp_access_check(s)) {
3727         return;
3728     }
3729 
3730     if (rn == 31) {
3731         gen_check_sp_alignment(s);
3732     }
3733 
3734     /* For our purposes, bytes are always little-endian.  */
3735     endian = s->be_data;
3736     if (size == 0) {
3737         endian = MO_LE;
3738     }
3739 
3740     total = rpt * selem * (is_q ? 16 : 8);
3741     tcg_rn = cpu_reg_sp(s, rn);
3742 
3743     /*
3744      * Issue the MTE check vs the logical repeat count, before we
3745      * promote consecutive little-endian elements below.
3746      */
3747     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3748                                 total);
3749 
3750     /*
3751      * Consecutive little-endian elements from a single register
3752      * can be promoted to a larger little-endian operation.
3753      */
3754     align = MO_ALIGN;
3755     if (selem == 1 && endian == MO_LE) {
3756         align = pow2_align(size);
3757         size = 3;
3758     }
3759     if (!s->align_mem) {
3760         align = 0;
3761     }
3762     mop = endian | size | align;
3763 
3764     elements = (is_q ? 16 : 8) >> size;
3765     tcg_ebytes = tcg_constant_i64(1 << size);
3766     for (r = 0; r < rpt; r++) {
3767         int e;
3768         for (e = 0; e < elements; e++) {
3769             int xs;
3770             for (xs = 0; xs < selem; xs++) {
3771                 int tt = (rt + r + xs) % 32;
3772                 if (is_store) {
3773                     do_vec_st(s, tt, e, clean_addr, mop);
3774                 } else {
3775                     do_vec_ld(s, tt, e, clean_addr, mop);
3776                 }
3777                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3778             }
3779         }
3780     }
3781 
3782     if (!is_store) {
3783         /* For non-quad operations, setting a slice of the low
3784          * 64 bits of the register clears the high 64 bits (in
3785          * the ARM ARM pseudocode this is implicit in the fact
3786          * that 'rval' is a 64 bit wide variable).
3787          * For quad operations, we might still need to zero the
3788          * high bits of SVE.
3789          */
3790         for (r = 0; r < rpt * selem; r++) {
3791             int tt = (rt + r) % 32;
3792             clear_vec_high(s, is_q, tt);
3793         }
3794     }
3795 
3796     if (is_postidx) {
3797         if (rm == 31) {
3798             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3799         } else {
3800             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3801         }
3802     }
3803 }
3804 
3805 /* AdvSIMD load/store single structure
3806  *
3807  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3808  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3809  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3810  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3811  *
3812  * AdvSIMD load/store single structure (post-indexed)
3813  *
3814  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3815  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3816  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3817  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3818  *
3819  * Rt: first (or only) SIMD&FP register to be transferred
3820  * Rn: base address or SP
3821  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3822  * index = encoded in Q:S:size dependent on size
3823  *
3824  * lane_size = encoded in R, opc
3825  * transfer width = encoded in opc, S, size
3826  */
3827 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3828 {
3829     int rt = extract32(insn, 0, 5);
3830     int rn = extract32(insn, 5, 5);
3831     int rm = extract32(insn, 16, 5);
3832     int size = extract32(insn, 10, 2);
3833     int S = extract32(insn, 12, 1);
3834     int opc = extract32(insn, 13, 3);
3835     int R = extract32(insn, 21, 1);
3836     int is_load = extract32(insn, 22, 1);
3837     int is_postidx = extract32(insn, 23, 1);
3838     int is_q = extract32(insn, 30, 1);
3839 
3840     int scale = extract32(opc, 1, 2);
3841     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3842     bool replicate = false;
3843     int index = is_q << 3 | S << 2 | size;
3844     int xs, total;
3845     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3846     MemOp mop;
3847 
3848     if (extract32(insn, 31, 1)) {
3849         unallocated_encoding(s);
3850         return;
3851     }
3852     if (!is_postidx && rm != 0) {
3853         unallocated_encoding(s);
3854         return;
3855     }
3856 
3857     switch (scale) {
3858     case 3:
3859         if (!is_load || S) {
3860             unallocated_encoding(s);
3861             return;
3862         }
3863         scale = size;
3864         replicate = true;
3865         break;
3866     case 0:
3867         break;
3868     case 1:
3869         if (extract32(size, 0, 1)) {
3870             unallocated_encoding(s);
3871             return;
3872         }
3873         index >>= 1;
3874         break;
3875     case 2:
3876         if (extract32(size, 1, 1)) {
3877             unallocated_encoding(s);
3878             return;
3879         }
3880         if (!extract32(size, 0, 1)) {
3881             index >>= 2;
3882         } else {
3883             if (S) {
3884                 unallocated_encoding(s);
3885                 return;
3886             }
3887             index >>= 3;
3888             scale = 3;
3889         }
3890         break;
3891     default:
3892         g_assert_not_reached();
3893     }
3894 
3895     if (!fp_access_check(s)) {
3896         return;
3897     }
3898 
3899     if (rn == 31) {
3900         gen_check_sp_alignment(s);
3901     }
3902 
3903     total = selem << scale;
3904     tcg_rn = cpu_reg_sp(s, rn);
3905 
3906     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3907                                 total);
3908     mop = finalize_memop(s, scale);
3909 
3910     tcg_ebytes = tcg_constant_i64(1 << scale);
3911     for (xs = 0; xs < selem; xs++) {
3912         if (replicate) {
3913             /* Load and replicate to all elements */
3914             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3915 
3916             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3917             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3918                                  (is_q + 1) * 8, vec_full_reg_size(s),
3919                                  tcg_tmp);
3920         } else {
3921             /* Load/store one element per register */
3922             if (is_load) {
3923                 do_vec_ld(s, rt, index, clean_addr, mop);
3924             } else {
3925                 do_vec_st(s, rt, index, clean_addr, mop);
3926             }
3927         }
3928         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3929         rt = (rt + 1) % 32;
3930     }
3931 
3932     if (is_postidx) {
3933         if (rm == 31) {
3934             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3935         } else {
3936             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3937         }
3938     }
3939 }
3940 
3941 /*
3942  * Load/Store memory tags
3943  *
3944  *  31 30 29         24     22  21     12    10      5      0
3945  * +-----+-------------+-----+---+------+-----+------+------+
3946  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3947  * +-----+-------------+-----+---+------+-----+------+------+
3948  */
3949 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3950 {
3951     int rt = extract32(insn, 0, 5);
3952     int rn = extract32(insn, 5, 5);
3953     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3954     int op2 = extract32(insn, 10, 2);
3955     int op1 = extract32(insn, 22, 2);
3956     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3957     int index = 0;
3958     TCGv_i64 addr, clean_addr, tcg_rt;
3959 
3960     /* We checked insn bits [29:24,21] in the caller.  */
3961     if (extract32(insn, 30, 2) != 3) {
3962         goto do_unallocated;
3963     }
3964 
3965     /*
3966      * @index is a tri-state variable which has 3 states:
3967      * < 0 : post-index, writeback
3968      * = 0 : signed offset
3969      * > 0 : pre-index, writeback
3970      */
3971     switch (op1) {
3972     case 0:
3973         if (op2 != 0) {
3974             /* STG */
3975             index = op2 - 2;
3976         } else {
3977             /* STZGM */
3978             if (s->current_el == 0 || offset != 0) {
3979                 goto do_unallocated;
3980             }
3981             is_mult = is_zero = true;
3982         }
3983         break;
3984     case 1:
3985         if (op2 != 0) {
3986             /* STZG */
3987             is_zero = true;
3988             index = op2 - 2;
3989         } else {
3990             /* LDG */
3991             is_load = true;
3992         }
3993         break;
3994     case 2:
3995         if (op2 != 0) {
3996             /* ST2G */
3997             is_pair = true;
3998             index = op2 - 2;
3999         } else {
4000             /* STGM */
4001             if (s->current_el == 0 || offset != 0) {
4002                 goto do_unallocated;
4003             }
4004             is_mult = true;
4005         }
4006         break;
4007     case 3:
4008         if (op2 != 0) {
4009             /* STZ2G */
4010             is_pair = is_zero = true;
4011             index = op2 - 2;
4012         } else {
4013             /* LDGM */
4014             if (s->current_el == 0 || offset != 0) {
4015                 goto do_unallocated;
4016             }
4017             is_mult = is_load = true;
4018         }
4019         break;
4020 
4021     default:
4022     do_unallocated:
4023         unallocated_encoding(s);
4024         return;
4025     }
4026 
4027     if (is_mult
4028         ? !dc_isar_feature(aa64_mte, s)
4029         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
4030         goto do_unallocated;
4031     }
4032 
4033     if (rn == 31) {
4034         gen_check_sp_alignment(s);
4035     }
4036 
4037     addr = read_cpu_reg_sp(s, rn, true);
4038     if (index >= 0) {
4039         /* pre-index or signed offset */
4040         tcg_gen_addi_i64(addr, addr, offset);
4041     }
4042 
4043     if (is_mult) {
4044         tcg_rt = cpu_reg(s, rt);
4045 
4046         if (is_zero) {
4047             int size = 4 << s->dcz_blocksize;
4048 
4049             if (s->ata) {
4050                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4051             }
4052             /*
4053              * The non-tags portion of STZGM is mostly like DC_ZVA,
4054              * except the alignment happens before the access.
4055              */
4056             clean_addr = clean_data_tbi(s, addr);
4057             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4058             gen_helper_dc_zva(cpu_env, clean_addr);
4059         } else if (s->ata) {
4060             if (is_load) {
4061                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4062             } else {
4063                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4064             }
4065         } else {
4066             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4067             int size = 4 << GMID_EL1_BS;
4068 
4069             clean_addr = clean_data_tbi(s, addr);
4070             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4071             gen_probe_access(s, clean_addr, acc, size);
4072 
4073             if (is_load) {
4074                 /* The result tags are zeros.  */
4075                 tcg_gen_movi_i64(tcg_rt, 0);
4076             }
4077         }
4078         return;
4079     }
4080 
4081     if (is_load) {
4082         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4083         tcg_rt = cpu_reg(s, rt);
4084         if (s->ata) {
4085             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4086         } else {
4087             clean_addr = clean_data_tbi(s, addr);
4088             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4089             gen_address_with_allocation_tag0(tcg_rt, addr);
4090         }
4091     } else {
4092         tcg_rt = cpu_reg_sp(s, rt);
4093         if (!s->ata) {
4094             /*
4095              * For STG and ST2G, we need to check alignment and probe memory.
4096              * TODO: For STZG and STZ2G, we could rely on the stores below,
4097              * at least for system mode; user-only won't enforce alignment.
4098              */
4099             if (is_pair) {
4100                 gen_helper_st2g_stub(cpu_env, addr);
4101             } else {
4102                 gen_helper_stg_stub(cpu_env, addr);
4103             }
4104         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4105             if (is_pair) {
4106                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4107             } else {
4108                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4109             }
4110         } else {
4111             if (is_pair) {
4112                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4113             } else {
4114                 gen_helper_stg(cpu_env, addr, tcg_rt);
4115             }
4116         }
4117     }
4118 
4119     if (is_zero) {
4120         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4121         TCGv_i64 tcg_zero = tcg_constant_i64(0);
4122         int mem_index = get_mem_index(s);
4123         int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4124 
4125         tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4126                             MO_UQ | MO_ALIGN_16);
4127         for (i = 8; i < n; i += 8) {
4128             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4129             tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
4130         }
4131     }
4132 
4133     if (index != 0) {
4134         /* pre-index or post-index */
4135         if (index < 0) {
4136             /* post-index */
4137             tcg_gen_addi_i64(addr, addr, offset);
4138         }
4139         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4140     }
4141 }
4142 
4143 /* Loads and stores */
4144 static void disas_ldst(DisasContext *s, uint32_t insn)
4145 {
4146     switch (extract32(insn, 24, 6)) {
4147     case 0x08: /* Load/store exclusive */
4148         disas_ldst_excl(s, insn);
4149         break;
4150     case 0x18: case 0x1c: /* Load register (literal) */
4151         disas_ld_lit(s, insn);
4152         break;
4153     case 0x28: case 0x29:
4154     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4155         disas_ldst_pair(s, insn);
4156         break;
4157     case 0x38: case 0x39:
4158     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4159         disas_ldst_reg(s, insn);
4160         break;
4161     case 0x0c: /* AdvSIMD load/store multiple structures */
4162         disas_ldst_multiple_struct(s, insn);
4163         break;
4164     case 0x0d: /* AdvSIMD load/store single structure */
4165         disas_ldst_single_struct(s, insn);
4166         break;
4167     case 0x19:
4168         if (extract32(insn, 21, 1) != 0) {
4169             disas_ldst_tag(s, insn);
4170         } else if (extract32(insn, 10, 2) == 0) {
4171             disas_ldst_ldapr_stlr(s, insn);
4172         } else {
4173             unallocated_encoding(s);
4174         }
4175         break;
4176     default:
4177         unallocated_encoding(s);
4178         break;
4179     }
4180 }
4181 
4182 /*
4183  * PC-rel. addressing
4184  */
4185 
4186 static bool trans_ADR(DisasContext *s, arg_ri *a)
4187 {
4188     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4189     return true;
4190 }
4191 
4192 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4193 {
4194     int64_t offset = (int64_t)a->imm << 12;
4195 
4196     /* The page offset is ok for CF_PCREL. */
4197     offset -= s->pc_curr & 0xfff;
4198     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4199     return true;
4200 }
4201 
4202 /*
4203  * Add/subtract (immediate)
4204  *
4205  *  31 30 29 28         23 22 21         10 9   5 4   0
4206  * +--+--+--+-------------+--+-------------+-----+-----+
4207  * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
4208  * +--+--+--+-------------+--+-------------+-----+-----+
4209  *
4210  *    sf: 0 -> 32bit, 1 -> 64bit
4211  *    op: 0 -> add  , 1 -> sub
4212  *     S: 1 -> set flags
4213  *    sh: 1 -> LSL imm by 12
4214  */
4215 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
4216 {
4217     int rd = extract32(insn, 0, 5);
4218     int rn = extract32(insn, 5, 5);
4219     uint64_t imm = extract32(insn, 10, 12);
4220     bool shift = extract32(insn, 22, 1);
4221     bool setflags = extract32(insn, 29, 1);
4222     bool sub_op = extract32(insn, 30, 1);
4223     bool is_64bit = extract32(insn, 31, 1);
4224 
4225     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
4226     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
4227     TCGv_i64 tcg_result;
4228 
4229     if (shift) {
4230         imm <<= 12;
4231     }
4232 
4233     tcg_result = tcg_temp_new_i64();
4234     if (!setflags) {
4235         if (sub_op) {
4236             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
4237         } else {
4238             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
4239         }
4240     } else {
4241         TCGv_i64 tcg_imm = tcg_constant_i64(imm);
4242         if (sub_op) {
4243             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4244         } else {
4245             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4246         }
4247     }
4248 
4249     if (is_64bit) {
4250         tcg_gen_mov_i64(tcg_rd, tcg_result);
4251     } else {
4252         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4253     }
4254 }
4255 
4256 /*
4257  * Add/subtract (immediate, with tags)
4258  *
4259  *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
4260  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4261  * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
4262  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4263  *
4264  *    op: 0 -> add, 1 -> sub
4265  */
4266 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
4267 {
4268     int rd = extract32(insn, 0, 5);
4269     int rn = extract32(insn, 5, 5);
4270     int uimm4 = extract32(insn, 10, 4);
4271     int uimm6 = extract32(insn, 16, 6);
4272     bool sub_op = extract32(insn, 30, 1);
4273     TCGv_i64 tcg_rn, tcg_rd;
4274     int imm;
4275 
4276     /* Test all of sf=1, S=0, o2=0, o3=0.  */
4277     if ((insn & 0xa040c000u) != 0x80000000u ||
4278         !dc_isar_feature(aa64_mte_insn_reg, s)) {
4279         unallocated_encoding(s);
4280         return;
4281     }
4282 
4283     imm = uimm6 << LOG2_TAG_GRANULE;
4284     if (sub_op) {
4285         imm = -imm;
4286     }
4287 
4288     tcg_rn = cpu_reg_sp(s, rn);
4289     tcg_rd = cpu_reg_sp(s, rd);
4290 
4291     if (s->ata) {
4292         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4293                            tcg_constant_i32(imm),
4294                            tcg_constant_i32(uimm4));
4295     } else {
4296         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4297         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4298     }
4299 }
4300 
4301 /* The input should be a value in the bottom e bits (with higher
4302  * bits zero); returns that value replicated into every element
4303  * of size e in a 64 bit integer.
4304  */
4305 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4306 {
4307     assert(e != 0);
4308     while (e < 64) {
4309         mask |= mask << e;
4310         e *= 2;
4311     }
4312     return mask;
4313 }
4314 
4315 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
4316 static inline uint64_t bitmask64(unsigned int length)
4317 {
4318     assert(length > 0 && length <= 64);
4319     return ~0ULL >> (64 - length);
4320 }
4321 
4322 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
4323  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4324  * value (ie should cause a guest UNDEF exception), and true if they are
4325  * valid, in which case the decoded bit pattern is written to result.
4326  */
4327 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4328                             unsigned int imms, unsigned int immr)
4329 {
4330     uint64_t mask;
4331     unsigned e, levels, s, r;
4332     int len;
4333 
4334     assert(immn < 2 && imms < 64 && immr < 64);
4335 
4336     /* The bit patterns we create here are 64 bit patterns which
4337      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4338      * 64 bits each. Each element contains the same value: a run
4339      * of between 1 and e-1 non-zero bits, rotated within the
4340      * element by between 0 and e-1 bits.
4341      *
4342      * The element size and run length are encoded into immn (1 bit)
4343      * and imms (6 bits) as follows:
4344      * 64 bit elements: immn = 1, imms = <length of run - 1>
4345      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4346      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4347      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4348      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4349      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4350      * Notice that immn = 0, imms = 11111x is the only combination
4351      * not covered by one of the above options; this is reserved.
4352      * Further, <length of run - 1> all-ones is a reserved pattern.
4353      *
4354      * In all cases the rotation is by immr % e (and immr is 6 bits).
4355      */
4356 
4357     /* First determine the element size */
4358     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4359     if (len < 1) {
4360         /* This is the immn == 0, imms == 0x11111x case */
4361         return false;
4362     }
4363     e = 1 << len;
4364 
4365     levels = e - 1;
4366     s = imms & levels;
4367     r = immr & levels;
4368 
4369     if (s == levels) {
4370         /* <length of run - 1> mustn't be all-ones. */
4371         return false;
4372     }
4373 
4374     /* Create the value of one element: s+1 set bits rotated
4375      * by r within the element (which is e bits wide)...
4376      */
4377     mask = bitmask64(s + 1);
4378     if (r) {
4379         mask = (mask >> r) | (mask << (e - r));
4380         mask &= bitmask64(e);
4381     }
4382     /* ...then replicate the element over the whole 64 bit value */
4383     mask = bitfield_replicate(mask, e);
4384     *result = mask;
4385     return true;
4386 }
4387 
4388 /* Logical (immediate)
4389  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4390  * +----+-----+-------------+---+------+------+------+------+
4391  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
4392  * +----+-----+-------------+---+------+------+------+------+
4393  */
4394 static void disas_logic_imm(DisasContext *s, uint32_t insn)
4395 {
4396     unsigned int sf, opc, is_n, immr, imms, rn, rd;
4397     TCGv_i64 tcg_rd, tcg_rn;
4398     uint64_t wmask;
4399     bool is_and = false;
4400 
4401     sf = extract32(insn, 31, 1);
4402     opc = extract32(insn, 29, 2);
4403     is_n = extract32(insn, 22, 1);
4404     immr = extract32(insn, 16, 6);
4405     imms = extract32(insn, 10, 6);
4406     rn = extract32(insn, 5, 5);
4407     rd = extract32(insn, 0, 5);
4408 
4409     if (!sf && is_n) {
4410         unallocated_encoding(s);
4411         return;
4412     }
4413 
4414     if (opc == 0x3) { /* ANDS */
4415         tcg_rd = cpu_reg(s, rd);
4416     } else {
4417         tcg_rd = cpu_reg_sp(s, rd);
4418     }
4419     tcg_rn = cpu_reg(s, rn);
4420 
4421     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
4422         /* some immediate field values are reserved */
4423         unallocated_encoding(s);
4424         return;
4425     }
4426 
4427     if (!sf) {
4428         wmask &= 0xffffffff;
4429     }
4430 
4431     switch (opc) {
4432     case 0x3: /* ANDS */
4433     case 0x0: /* AND */
4434         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
4435         is_and = true;
4436         break;
4437     case 0x1: /* ORR */
4438         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
4439         break;
4440     case 0x2: /* EOR */
4441         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
4442         break;
4443     default:
4444         assert(FALSE); /* must handle all above */
4445         break;
4446     }
4447 
4448     if (!sf && !is_and) {
4449         /* zero extend final result; we know we can skip this for AND
4450          * since the immediate had the high 32 bits clear.
4451          */
4452         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4453     }
4454 
4455     if (opc == 3) { /* ANDS */
4456         gen_logic_CC(sf, tcg_rd);
4457     }
4458 }
4459 
4460 /*
4461  * Move wide (immediate)
4462  *
4463  *  31 30 29 28         23 22 21 20             5 4    0
4464  * +--+-----+-------------+-----+----------------+------+
4465  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
4466  * +--+-----+-------------+-----+----------------+------+
4467  *
4468  * sf: 0 -> 32 bit, 1 -> 64 bit
4469  * opc: 00 -> N, 10 -> Z, 11 -> K
4470  * hw: shift/16 (0,16, and sf only 32, 48)
4471  */
4472 static void disas_movw_imm(DisasContext *s, uint32_t insn)
4473 {
4474     int rd = extract32(insn, 0, 5);
4475     uint64_t imm = extract32(insn, 5, 16);
4476     int sf = extract32(insn, 31, 1);
4477     int opc = extract32(insn, 29, 2);
4478     int pos = extract32(insn, 21, 2) << 4;
4479     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4480 
4481     if (!sf && (pos >= 32)) {
4482         unallocated_encoding(s);
4483         return;
4484     }
4485 
4486     switch (opc) {
4487     case 0: /* MOVN */
4488     case 2: /* MOVZ */
4489         imm <<= pos;
4490         if (opc == 0) {
4491             imm = ~imm;
4492         }
4493         if (!sf) {
4494             imm &= 0xffffffffu;
4495         }
4496         tcg_gen_movi_i64(tcg_rd, imm);
4497         break;
4498     case 3: /* MOVK */
4499         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16);
4500         if (!sf) {
4501             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4502         }
4503         break;
4504     default:
4505         unallocated_encoding(s);
4506         break;
4507     }
4508 }
4509 
4510 /* Bitfield
4511  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4512  * +----+-----+-------------+---+------+------+------+------+
4513  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
4514  * +----+-----+-------------+---+------+------+------+------+
4515  */
4516 static void disas_bitfield(DisasContext *s, uint32_t insn)
4517 {
4518     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
4519     TCGv_i64 tcg_rd, tcg_tmp;
4520 
4521     sf = extract32(insn, 31, 1);
4522     opc = extract32(insn, 29, 2);
4523     n = extract32(insn, 22, 1);
4524     ri = extract32(insn, 16, 6);
4525     si = extract32(insn, 10, 6);
4526     rn = extract32(insn, 5, 5);
4527     rd = extract32(insn, 0, 5);
4528     bitsize = sf ? 64 : 32;
4529 
4530     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
4531         unallocated_encoding(s);
4532         return;
4533     }
4534 
4535     tcg_rd = cpu_reg(s, rd);
4536 
4537     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
4538        to be smaller than bitsize, we'll never reference data outside the
4539        low 32-bits anyway.  */
4540     tcg_tmp = read_cpu_reg(s, rn, 1);
4541 
4542     /* Recognize simple(r) extractions.  */
4543     if (si >= ri) {
4544         /* Wd<s-r:0> = Wn<s:r> */
4545         len = (si - ri) + 1;
4546         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
4547             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4548             goto done;
4549         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
4550             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4551             return;
4552         }
4553         /* opc == 1, BFXIL fall through to deposit */
4554         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4555         pos = 0;
4556     } else {
4557         /* Handle the ri > si case with a deposit
4558          * Wd<32+s-r,32-r> = Wn<s:0>
4559          */
4560         len = si + 1;
4561         pos = (bitsize - ri) & (bitsize - 1);
4562     }
4563 
4564     if (opc == 0 && len < ri) {
4565         /* SBFM: sign extend the destination field from len to fill
4566            the balance of the word.  Let the deposit below insert all
4567            of those sign bits.  */
4568         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4569         len = ri;
4570     }
4571 
4572     if (opc == 1) { /* BFM, BFXIL */
4573         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4574     } else {
4575         /* SBFM or UBFM: We start with zero, and we haven't modified
4576            any bits outside bitsize, therefore the zero-extension
4577            below is unneeded.  */
4578         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4579         return;
4580     }
4581 
4582  done:
4583     if (!sf) { /* zero extend final result */
4584         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4585     }
4586 }
4587 
4588 /* Extract
4589  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
4590  * +----+------+-------------+---+----+------+--------+------+------+
4591  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
4592  * +----+------+-------------+---+----+------+--------+------+------+
4593  */
4594 static void disas_extract(DisasContext *s, uint32_t insn)
4595 {
4596     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
4597 
4598     sf = extract32(insn, 31, 1);
4599     n = extract32(insn, 22, 1);
4600     rm = extract32(insn, 16, 5);
4601     imm = extract32(insn, 10, 6);
4602     rn = extract32(insn, 5, 5);
4603     rd = extract32(insn, 0, 5);
4604     op21 = extract32(insn, 29, 2);
4605     op0 = extract32(insn, 21, 1);
4606     bitsize = sf ? 64 : 32;
4607 
4608     if (sf != n || op21 || op0 || imm >= bitsize) {
4609         unallocated_encoding(s);
4610     } else {
4611         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4612 
4613         tcg_rd = cpu_reg(s, rd);
4614 
4615         if (unlikely(imm == 0)) {
4616             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4617              * so an extract from bit 0 is a special case.
4618              */
4619             if (sf) {
4620                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
4621             } else {
4622                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
4623             }
4624         } else {
4625             tcg_rm = cpu_reg(s, rm);
4626             tcg_rn = cpu_reg(s, rn);
4627 
4628             if (sf) {
4629                 /* Specialization to ROR happens in EXTRACT2.  */
4630                 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4631             } else {
4632                 TCGv_i32 t0 = tcg_temp_new_i32();
4633 
4634                 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4635                 if (rm == rn) {
4636                     tcg_gen_rotri_i32(t0, t0, imm);
4637                 } else {
4638                     TCGv_i32 t1 = tcg_temp_new_i32();
4639                     tcg_gen_extrl_i64_i32(t1, tcg_rn);
4640                     tcg_gen_extract2_i32(t0, t0, t1, imm);
4641                 }
4642                 tcg_gen_extu_i32_i64(tcg_rd, t0);
4643             }
4644         }
4645     }
4646 }
4647 
4648 /* Data processing - immediate */
4649 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4650 {
4651     switch (extract32(insn, 23, 6)) {
4652     case 0x22: /* Add/subtract (immediate) */
4653         disas_add_sub_imm(s, insn);
4654         break;
4655     case 0x23: /* Add/subtract (immediate, with tags) */
4656         disas_add_sub_imm_with_tags(s, insn);
4657         break;
4658     case 0x24: /* Logical (immediate) */
4659         disas_logic_imm(s, insn);
4660         break;
4661     case 0x25: /* Move wide (immediate) */
4662         disas_movw_imm(s, insn);
4663         break;
4664     case 0x26: /* Bitfield */
4665         disas_bitfield(s, insn);
4666         break;
4667     case 0x27: /* Extract */
4668         disas_extract(s, insn);
4669         break;
4670     default:
4671         unallocated_encoding(s);
4672         break;
4673     }
4674 }
4675 
4676 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4677  * Note that it is the caller's responsibility to ensure that the
4678  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4679  * mandated semantics for out of range shifts.
4680  */
4681 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4682                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4683 {
4684     switch (shift_type) {
4685     case A64_SHIFT_TYPE_LSL:
4686         tcg_gen_shl_i64(dst, src, shift_amount);
4687         break;
4688     case A64_SHIFT_TYPE_LSR:
4689         tcg_gen_shr_i64(dst, src, shift_amount);
4690         break;
4691     case A64_SHIFT_TYPE_ASR:
4692         if (!sf) {
4693             tcg_gen_ext32s_i64(dst, src);
4694         }
4695         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4696         break;
4697     case A64_SHIFT_TYPE_ROR:
4698         if (sf) {
4699             tcg_gen_rotr_i64(dst, src, shift_amount);
4700         } else {
4701             TCGv_i32 t0, t1;
4702             t0 = tcg_temp_new_i32();
4703             t1 = tcg_temp_new_i32();
4704             tcg_gen_extrl_i64_i32(t0, src);
4705             tcg_gen_extrl_i64_i32(t1, shift_amount);
4706             tcg_gen_rotr_i32(t0, t0, t1);
4707             tcg_gen_extu_i32_i64(dst, t0);
4708         }
4709         break;
4710     default:
4711         assert(FALSE); /* all shift types should be handled */
4712         break;
4713     }
4714 
4715     if (!sf) { /* zero extend final result */
4716         tcg_gen_ext32u_i64(dst, dst);
4717     }
4718 }
4719 
4720 /* Shift a TCGv src by immediate, put result in dst.
4721  * The shift amount must be in range (this should always be true as the
4722  * relevant instructions will UNDEF on bad shift immediates).
4723  */
4724 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4725                           enum a64_shift_type shift_type, unsigned int shift_i)
4726 {
4727     assert(shift_i < (sf ? 64 : 32));
4728 
4729     if (shift_i == 0) {
4730         tcg_gen_mov_i64(dst, src);
4731     } else {
4732         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4733     }
4734 }
4735 
4736 /* Logical (shifted register)
4737  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4738  * +----+-----+-----------+-------+---+------+--------+------+------+
4739  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4740  * +----+-----+-----------+-------+---+------+--------+------+------+
4741  */
4742 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4743 {
4744     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4745     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4746 
4747     sf = extract32(insn, 31, 1);
4748     opc = extract32(insn, 29, 2);
4749     shift_type = extract32(insn, 22, 2);
4750     invert = extract32(insn, 21, 1);
4751     rm = extract32(insn, 16, 5);
4752     shift_amount = extract32(insn, 10, 6);
4753     rn = extract32(insn, 5, 5);
4754     rd = extract32(insn, 0, 5);
4755 
4756     if (!sf && (shift_amount & (1 << 5))) {
4757         unallocated_encoding(s);
4758         return;
4759     }
4760 
4761     tcg_rd = cpu_reg(s, rd);
4762 
4763     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4764         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4765          * register-register MOV and MVN, so it is worth special casing.
4766          */
4767         tcg_rm = cpu_reg(s, rm);
4768         if (invert) {
4769             tcg_gen_not_i64(tcg_rd, tcg_rm);
4770             if (!sf) {
4771                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4772             }
4773         } else {
4774             if (sf) {
4775                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4776             } else {
4777                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4778             }
4779         }
4780         return;
4781     }
4782 
4783     tcg_rm = read_cpu_reg(s, rm, sf);
4784 
4785     if (shift_amount) {
4786         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4787     }
4788 
4789     tcg_rn = cpu_reg(s, rn);
4790 
4791     switch (opc | (invert << 2)) {
4792     case 0: /* AND */
4793     case 3: /* ANDS */
4794         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4795         break;
4796     case 1: /* ORR */
4797         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4798         break;
4799     case 2: /* EOR */
4800         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4801         break;
4802     case 4: /* BIC */
4803     case 7: /* BICS */
4804         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4805         break;
4806     case 5: /* ORN */
4807         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4808         break;
4809     case 6: /* EON */
4810         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4811         break;
4812     default:
4813         assert(FALSE);
4814         break;
4815     }
4816 
4817     if (!sf) {
4818         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4819     }
4820 
4821     if (opc == 3) {
4822         gen_logic_CC(sf, tcg_rd);
4823     }
4824 }
4825 
4826 /*
4827  * Add/subtract (extended register)
4828  *
4829  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4830  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4831  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4832  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4833  *
4834  *  sf: 0 -> 32bit, 1 -> 64bit
4835  *  op: 0 -> add  , 1 -> sub
4836  *   S: 1 -> set flags
4837  * opt: 00
4838  * option: extension type (see DecodeRegExtend)
4839  * imm3: optional shift to Rm
4840  *
4841  * Rd = Rn + LSL(extend(Rm), amount)
4842  */
4843 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4844 {
4845     int rd = extract32(insn, 0, 5);
4846     int rn = extract32(insn, 5, 5);
4847     int imm3 = extract32(insn, 10, 3);
4848     int option = extract32(insn, 13, 3);
4849     int rm = extract32(insn, 16, 5);
4850     int opt = extract32(insn, 22, 2);
4851     bool setflags = extract32(insn, 29, 1);
4852     bool sub_op = extract32(insn, 30, 1);
4853     bool sf = extract32(insn, 31, 1);
4854 
4855     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4856     TCGv_i64 tcg_rd;
4857     TCGv_i64 tcg_result;
4858 
4859     if (imm3 > 4 || opt != 0) {
4860         unallocated_encoding(s);
4861         return;
4862     }
4863 
4864     /* non-flag setting ops may use SP */
4865     if (!setflags) {
4866         tcg_rd = cpu_reg_sp(s, rd);
4867     } else {
4868         tcg_rd = cpu_reg(s, rd);
4869     }
4870     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4871 
4872     tcg_rm = read_cpu_reg(s, rm, sf);
4873     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4874 
4875     tcg_result = tcg_temp_new_i64();
4876 
4877     if (!setflags) {
4878         if (sub_op) {
4879             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4880         } else {
4881             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4882         }
4883     } else {
4884         if (sub_op) {
4885             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4886         } else {
4887             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4888         }
4889     }
4890 
4891     if (sf) {
4892         tcg_gen_mov_i64(tcg_rd, tcg_result);
4893     } else {
4894         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4895     }
4896 }
4897 
4898 /*
4899  * Add/subtract (shifted register)
4900  *
4901  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4902  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4903  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4904  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4905  *
4906  *    sf: 0 -> 32bit, 1 -> 64bit
4907  *    op: 0 -> add  , 1 -> sub
4908  *     S: 1 -> set flags
4909  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4910  *  imm6: Shift amount to apply to Rm before the add/sub
4911  */
4912 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4913 {
4914     int rd = extract32(insn, 0, 5);
4915     int rn = extract32(insn, 5, 5);
4916     int imm6 = extract32(insn, 10, 6);
4917     int rm = extract32(insn, 16, 5);
4918     int shift_type = extract32(insn, 22, 2);
4919     bool setflags = extract32(insn, 29, 1);
4920     bool sub_op = extract32(insn, 30, 1);
4921     bool sf = extract32(insn, 31, 1);
4922 
4923     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4924     TCGv_i64 tcg_rn, tcg_rm;
4925     TCGv_i64 tcg_result;
4926 
4927     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4928         unallocated_encoding(s);
4929         return;
4930     }
4931 
4932     tcg_rn = read_cpu_reg(s, rn, sf);
4933     tcg_rm = read_cpu_reg(s, rm, sf);
4934 
4935     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4936 
4937     tcg_result = tcg_temp_new_i64();
4938 
4939     if (!setflags) {
4940         if (sub_op) {
4941             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4942         } else {
4943             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4944         }
4945     } else {
4946         if (sub_op) {
4947             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4948         } else {
4949             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4950         }
4951     }
4952 
4953     if (sf) {
4954         tcg_gen_mov_i64(tcg_rd, tcg_result);
4955     } else {
4956         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4957     }
4958 }
4959 
4960 /* Data-processing (3 source)
4961  *
4962  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4963  *  +--+------+-----------+------+------+----+------+------+------+
4964  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4965  *  +--+------+-----------+------+------+----+------+------+------+
4966  */
4967 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4968 {
4969     int rd = extract32(insn, 0, 5);
4970     int rn = extract32(insn, 5, 5);
4971     int ra = extract32(insn, 10, 5);
4972     int rm = extract32(insn, 16, 5);
4973     int op_id = (extract32(insn, 29, 3) << 4) |
4974         (extract32(insn, 21, 3) << 1) |
4975         extract32(insn, 15, 1);
4976     bool sf = extract32(insn, 31, 1);
4977     bool is_sub = extract32(op_id, 0, 1);
4978     bool is_high = extract32(op_id, 2, 1);
4979     bool is_signed = false;
4980     TCGv_i64 tcg_op1;
4981     TCGv_i64 tcg_op2;
4982     TCGv_i64 tcg_tmp;
4983 
4984     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4985     switch (op_id) {
4986     case 0x42: /* SMADDL */
4987     case 0x43: /* SMSUBL */
4988     case 0x44: /* SMULH */
4989         is_signed = true;
4990         break;
4991     case 0x0: /* MADD (32bit) */
4992     case 0x1: /* MSUB (32bit) */
4993     case 0x40: /* MADD (64bit) */
4994     case 0x41: /* MSUB (64bit) */
4995     case 0x4a: /* UMADDL */
4996     case 0x4b: /* UMSUBL */
4997     case 0x4c: /* UMULH */
4998         break;
4999     default:
5000         unallocated_encoding(s);
5001         return;
5002     }
5003 
5004     if (is_high) {
5005         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
5006         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5007         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5008         TCGv_i64 tcg_rm = cpu_reg(s, rm);
5009 
5010         if (is_signed) {
5011             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5012         } else {
5013             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5014         }
5015         return;
5016     }
5017 
5018     tcg_op1 = tcg_temp_new_i64();
5019     tcg_op2 = tcg_temp_new_i64();
5020     tcg_tmp = tcg_temp_new_i64();
5021 
5022     if (op_id < 0x42) {
5023         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
5024         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
5025     } else {
5026         if (is_signed) {
5027             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
5028             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
5029         } else {
5030             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
5031             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
5032         }
5033     }
5034 
5035     if (ra == 31 && !is_sub) {
5036         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
5037         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
5038     } else {
5039         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
5040         if (is_sub) {
5041             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5042         } else {
5043             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5044         }
5045     }
5046 
5047     if (!sf) {
5048         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
5049     }
5050 }
5051 
5052 /* Add/subtract (with carry)
5053  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
5054  * +--+--+--+------------------------+------+-------------+------+-----+
5055  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
5056  * +--+--+--+------------------------+------+-------------+------+-----+
5057  */
5058 
5059 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
5060 {
5061     unsigned int sf, op, setflags, rm, rn, rd;
5062     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
5063 
5064     sf = extract32(insn, 31, 1);
5065     op = extract32(insn, 30, 1);
5066     setflags = extract32(insn, 29, 1);
5067     rm = extract32(insn, 16, 5);
5068     rn = extract32(insn, 5, 5);
5069     rd = extract32(insn, 0, 5);
5070 
5071     tcg_rd = cpu_reg(s, rd);
5072     tcg_rn = cpu_reg(s, rn);
5073 
5074     if (op) {
5075         tcg_y = tcg_temp_new_i64();
5076         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
5077     } else {
5078         tcg_y = cpu_reg(s, rm);
5079     }
5080 
5081     if (setflags) {
5082         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
5083     } else {
5084         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
5085     }
5086 }
5087 
5088 /*
5089  * Rotate right into flags
5090  *  31 30 29                21       15          10      5  4      0
5091  * +--+--+--+-----------------+--------+-----------+------+--+------+
5092  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
5093  * +--+--+--+-----------------+--------+-----------+------+--+------+
5094  */
5095 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
5096 {
5097     int mask = extract32(insn, 0, 4);
5098     int o2 = extract32(insn, 4, 1);
5099     int rn = extract32(insn, 5, 5);
5100     int imm6 = extract32(insn, 15, 6);
5101     int sf_op_s = extract32(insn, 29, 3);
5102     TCGv_i64 tcg_rn;
5103     TCGv_i32 nzcv;
5104 
5105     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
5106         unallocated_encoding(s);
5107         return;
5108     }
5109 
5110     tcg_rn = read_cpu_reg(s, rn, 1);
5111     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
5112 
5113     nzcv = tcg_temp_new_i32();
5114     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5115 
5116     if (mask & 8) { /* N */
5117         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5118     }
5119     if (mask & 4) { /* Z */
5120         tcg_gen_not_i32(cpu_ZF, nzcv);
5121         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5122     }
5123     if (mask & 2) { /* C */
5124         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5125     }
5126     if (mask & 1) { /* V */
5127         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5128     }
5129 }
5130 
5131 /*
5132  * Evaluate into flags
5133  *  31 30 29                21        15   14        10      5  4      0
5134  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5135  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5136  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5137  */
5138 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5139 {
5140     int o3_mask = extract32(insn, 0, 5);
5141     int rn = extract32(insn, 5, 5);
5142     int o2 = extract32(insn, 15, 6);
5143     int sz = extract32(insn, 14, 1);
5144     int sf_op_s = extract32(insn, 29, 3);
5145     TCGv_i32 tmp;
5146     int shift;
5147 
5148     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5149         !dc_isar_feature(aa64_condm_4, s)) {
5150         unallocated_encoding(s);
5151         return;
5152     }
5153     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5154 
5155     tmp = tcg_temp_new_i32();
5156     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5157     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5158     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5159     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5160     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5161 }
5162 
5163 /* Conditional compare (immediate / register)
5164  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5165  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5166  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5167  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5168  *        [1]                             y                [0]       [0]
5169  */
5170 static void disas_cc(DisasContext *s, uint32_t insn)
5171 {
5172     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5173     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5174     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5175     DisasCompare c;
5176 
5177     if (!extract32(insn, 29, 1)) {
5178         unallocated_encoding(s);
5179         return;
5180     }
5181     if (insn & (1 << 10 | 1 << 4)) {
5182         unallocated_encoding(s);
5183         return;
5184     }
5185     sf = extract32(insn, 31, 1);
5186     op = extract32(insn, 30, 1);
5187     is_imm = extract32(insn, 11, 1);
5188     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5189     cond = extract32(insn, 12, 4);
5190     rn = extract32(insn, 5, 5);
5191     nzcv = extract32(insn, 0, 4);
5192 
5193     /* Set T0 = !COND.  */
5194     tcg_t0 = tcg_temp_new_i32();
5195     arm_test_cc(&c, cond);
5196     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5197 
5198     /* Load the arguments for the new comparison.  */
5199     if (is_imm) {
5200         tcg_y = tcg_temp_new_i64();
5201         tcg_gen_movi_i64(tcg_y, y);
5202     } else {
5203         tcg_y = cpu_reg(s, y);
5204     }
5205     tcg_rn = cpu_reg(s, rn);
5206 
5207     /* Set the flags for the new comparison.  */
5208     tcg_tmp = tcg_temp_new_i64();
5209     if (op) {
5210         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5211     } else {
5212         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5213     }
5214 
5215     /* If COND was false, force the flags to #nzcv.  Compute two masks
5216      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5217      * For tcg hosts that support ANDC, we can make do with just T1.
5218      * In either case, allow the tcg optimizer to delete any unused mask.
5219      */
5220     tcg_t1 = tcg_temp_new_i32();
5221     tcg_t2 = tcg_temp_new_i32();
5222     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5223     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5224 
5225     if (nzcv & 8) { /* N */
5226         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5227     } else {
5228         if (TCG_TARGET_HAS_andc_i32) {
5229             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5230         } else {
5231             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5232         }
5233     }
5234     if (nzcv & 4) { /* Z */
5235         if (TCG_TARGET_HAS_andc_i32) {
5236             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5237         } else {
5238             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5239         }
5240     } else {
5241         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5242     }
5243     if (nzcv & 2) { /* C */
5244         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5245     } else {
5246         if (TCG_TARGET_HAS_andc_i32) {
5247             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5248         } else {
5249             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5250         }
5251     }
5252     if (nzcv & 1) { /* V */
5253         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5254     } else {
5255         if (TCG_TARGET_HAS_andc_i32) {
5256             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5257         } else {
5258             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5259         }
5260     }
5261 }
5262 
5263 /* Conditional select
5264  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5265  * +----+----+---+-----------------+------+------+-----+------+------+
5266  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5267  * +----+----+---+-----------------+------+------+-----+------+------+
5268  */
5269 static void disas_cond_select(DisasContext *s, uint32_t insn)
5270 {
5271     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5272     TCGv_i64 tcg_rd, zero;
5273     DisasCompare64 c;
5274 
5275     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5276         /* S == 1 or op2<1> == 1 */
5277         unallocated_encoding(s);
5278         return;
5279     }
5280     sf = extract32(insn, 31, 1);
5281     else_inv = extract32(insn, 30, 1);
5282     rm = extract32(insn, 16, 5);
5283     cond = extract32(insn, 12, 4);
5284     else_inc = extract32(insn, 10, 1);
5285     rn = extract32(insn, 5, 5);
5286     rd = extract32(insn, 0, 5);
5287 
5288     tcg_rd = cpu_reg(s, rd);
5289 
5290     a64_test_cc(&c, cond);
5291     zero = tcg_constant_i64(0);
5292 
5293     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5294         /* CSET & CSETM.  */
5295         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5296         if (else_inv) {
5297             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5298         }
5299     } else {
5300         TCGv_i64 t_true = cpu_reg(s, rn);
5301         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5302         if (else_inv && else_inc) {
5303             tcg_gen_neg_i64(t_false, t_false);
5304         } else if (else_inv) {
5305             tcg_gen_not_i64(t_false, t_false);
5306         } else if (else_inc) {
5307             tcg_gen_addi_i64(t_false, t_false, 1);
5308         }
5309         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5310     }
5311 
5312     if (!sf) {
5313         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5314     }
5315 }
5316 
5317 static void handle_clz(DisasContext *s, unsigned int sf,
5318                        unsigned int rn, unsigned int rd)
5319 {
5320     TCGv_i64 tcg_rd, tcg_rn;
5321     tcg_rd = cpu_reg(s, rd);
5322     tcg_rn = cpu_reg(s, rn);
5323 
5324     if (sf) {
5325         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5326     } else {
5327         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5328         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5329         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5330         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5331     }
5332 }
5333 
5334 static void handle_cls(DisasContext *s, unsigned int sf,
5335                        unsigned int rn, unsigned int rd)
5336 {
5337     TCGv_i64 tcg_rd, tcg_rn;
5338     tcg_rd = cpu_reg(s, rd);
5339     tcg_rn = cpu_reg(s, rn);
5340 
5341     if (sf) {
5342         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5343     } else {
5344         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5345         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5346         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5347         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5348     }
5349 }
5350 
5351 static void handle_rbit(DisasContext *s, unsigned int sf,
5352                         unsigned int rn, unsigned int rd)
5353 {
5354     TCGv_i64 tcg_rd, tcg_rn;
5355     tcg_rd = cpu_reg(s, rd);
5356     tcg_rn = cpu_reg(s, rn);
5357 
5358     if (sf) {
5359         gen_helper_rbit64(tcg_rd, tcg_rn);
5360     } else {
5361         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5362         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5363         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5364         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5365     }
5366 }
5367 
5368 /* REV with sf==1, opcode==3 ("REV64") */
5369 static void handle_rev64(DisasContext *s, unsigned int sf,
5370                          unsigned int rn, unsigned int rd)
5371 {
5372     if (!sf) {
5373         unallocated_encoding(s);
5374         return;
5375     }
5376     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5377 }
5378 
5379 /* REV with sf==0, opcode==2
5380  * REV32 (sf==1, opcode==2)
5381  */
5382 static void handle_rev32(DisasContext *s, unsigned int sf,
5383                          unsigned int rn, unsigned int rd)
5384 {
5385     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5386     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5387 
5388     if (sf) {
5389         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5390         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5391     } else {
5392         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5393     }
5394 }
5395 
5396 /* REV16 (opcode==1) */
5397 static void handle_rev16(DisasContext *s, unsigned int sf,
5398                          unsigned int rn, unsigned int rd)
5399 {
5400     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5401     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5402     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5403     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5404 
5405     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5406     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5407     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5408     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5409     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5410 }
5411 
5412 /* Data-processing (1 source)
5413  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5414  * +----+---+---+-----------------+---------+--------+------+------+
5415  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5416  * +----+---+---+-----------------+---------+--------+------+------+
5417  */
5418 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5419 {
5420     unsigned int sf, opcode, opcode2, rn, rd;
5421     TCGv_i64 tcg_rd;
5422 
5423     if (extract32(insn, 29, 1)) {
5424         unallocated_encoding(s);
5425         return;
5426     }
5427 
5428     sf = extract32(insn, 31, 1);
5429     opcode = extract32(insn, 10, 6);
5430     opcode2 = extract32(insn, 16, 5);
5431     rn = extract32(insn, 5, 5);
5432     rd = extract32(insn, 0, 5);
5433 
5434 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5435 
5436     switch (MAP(sf, opcode2, opcode)) {
5437     case MAP(0, 0x00, 0x00): /* RBIT */
5438     case MAP(1, 0x00, 0x00):
5439         handle_rbit(s, sf, rn, rd);
5440         break;
5441     case MAP(0, 0x00, 0x01): /* REV16 */
5442     case MAP(1, 0x00, 0x01):
5443         handle_rev16(s, sf, rn, rd);
5444         break;
5445     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5446     case MAP(1, 0x00, 0x02):
5447         handle_rev32(s, sf, rn, rd);
5448         break;
5449     case MAP(1, 0x00, 0x03): /* REV64 */
5450         handle_rev64(s, sf, rn, rd);
5451         break;
5452     case MAP(0, 0x00, 0x04): /* CLZ */
5453     case MAP(1, 0x00, 0x04):
5454         handle_clz(s, sf, rn, rd);
5455         break;
5456     case MAP(0, 0x00, 0x05): /* CLS */
5457     case MAP(1, 0x00, 0x05):
5458         handle_cls(s, sf, rn, rd);
5459         break;
5460     case MAP(1, 0x01, 0x00): /* PACIA */
5461         if (s->pauth_active) {
5462             tcg_rd = cpu_reg(s, rd);
5463             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5464         } else if (!dc_isar_feature(aa64_pauth, s)) {
5465             goto do_unallocated;
5466         }
5467         break;
5468     case MAP(1, 0x01, 0x01): /* PACIB */
5469         if (s->pauth_active) {
5470             tcg_rd = cpu_reg(s, rd);
5471             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5472         } else if (!dc_isar_feature(aa64_pauth, s)) {
5473             goto do_unallocated;
5474         }
5475         break;
5476     case MAP(1, 0x01, 0x02): /* PACDA */
5477         if (s->pauth_active) {
5478             tcg_rd = cpu_reg(s, rd);
5479             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5480         } else if (!dc_isar_feature(aa64_pauth, s)) {
5481             goto do_unallocated;
5482         }
5483         break;
5484     case MAP(1, 0x01, 0x03): /* PACDB */
5485         if (s->pauth_active) {
5486             tcg_rd = cpu_reg(s, rd);
5487             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5488         } else if (!dc_isar_feature(aa64_pauth, s)) {
5489             goto do_unallocated;
5490         }
5491         break;
5492     case MAP(1, 0x01, 0x04): /* AUTIA */
5493         if (s->pauth_active) {
5494             tcg_rd = cpu_reg(s, rd);
5495             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5496         } else if (!dc_isar_feature(aa64_pauth, s)) {
5497             goto do_unallocated;
5498         }
5499         break;
5500     case MAP(1, 0x01, 0x05): /* AUTIB */
5501         if (s->pauth_active) {
5502             tcg_rd = cpu_reg(s, rd);
5503             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5504         } else if (!dc_isar_feature(aa64_pauth, s)) {
5505             goto do_unallocated;
5506         }
5507         break;
5508     case MAP(1, 0x01, 0x06): /* AUTDA */
5509         if (s->pauth_active) {
5510             tcg_rd = cpu_reg(s, rd);
5511             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5512         } else if (!dc_isar_feature(aa64_pauth, s)) {
5513             goto do_unallocated;
5514         }
5515         break;
5516     case MAP(1, 0x01, 0x07): /* AUTDB */
5517         if (s->pauth_active) {
5518             tcg_rd = cpu_reg(s, rd);
5519             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5520         } else if (!dc_isar_feature(aa64_pauth, s)) {
5521             goto do_unallocated;
5522         }
5523         break;
5524     case MAP(1, 0x01, 0x08): /* PACIZA */
5525         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5526             goto do_unallocated;
5527         } else if (s->pauth_active) {
5528             tcg_rd = cpu_reg(s, rd);
5529             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5530         }
5531         break;
5532     case MAP(1, 0x01, 0x09): /* PACIZB */
5533         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5534             goto do_unallocated;
5535         } else if (s->pauth_active) {
5536             tcg_rd = cpu_reg(s, rd);
5537             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5538         }
5539         break;
5540     case MAP(1, 0x01, 0x0a): /* PACDZA */
5541         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5542             goto do_unallocated;
5543         } else if (s->pauth_active) {
5544             tcg_rd = cpu_reg(s, rd);
5545             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5546         }
5547         break;
5548     case MAP(1, 0x01, 0x0b): /* PACDZB */
5549         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5550             goto do_unallocated;
5551         } else if (s->pauth_active) {
5552             tcg_rd = cpu_reg(s, rd);
5553             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5554         }
5555         break;
5556     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5557         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5558             goto do_unallocated;
5559         } else if (s->pauth_active) {
5560             tcg_rd = cpu_reg(s, rd);
5561             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5562         }
5563         break;
5564     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5565         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5566             goto do_unallocated;
5567         } else if (s->pauth_active) {
5568             tcg_rd = cpu_reg(s, rd);
5569             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5570         }
5571         break;
5572     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5573         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5574             goto do_unallocated;
5575         } else if (s->pauth_active) {
5576             tcg_rd = cpu_reg(s, rd);
5577             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5578         }
5579         break;
5580     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5581         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5582             goto do_unallocated;
5583         } else if (s->pauth_active) {
5584             tcg_rd = cpu_reg(s, rd);
5585             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5586         }
5587         break;
5588     case MAP(1, 0x01, 0x10): /* XPACI */
5589         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5590             goto do_unallocated;
5591         } else if (s->pauth_active) {
5592             tcg_rd = cpu_reg(s, rd);
5593             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5594         }
5595         break;
5596     case MAP(1, 0x01, 0x11): /* XPACD */
5597         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5598             goto do_unallocated;
5599         } else if (s->pauth_active) {
5600             tcg_rd = cpu_reg(s, rd);
5601             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5602         }
5603         break;
5604     default:
5605     do_unallocated:
5606         unallocated_encoding(s);
5607         break;
5608     }
5609 
5610 #undef MAP
5611 }
5612 
5613 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5614                        unsigned int rm, unsigned int rn, unsigned int rd)
5615 {
5616     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5617     tcg_rd = cpu_reg(s, rd);
5618 
5619     if (!sf && is_signed) {
5620         tcg_n = tcg_temp_new_i64();
5621         tcg_m = tcg_temp_new_i64();
5622         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5623         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5624     } else {
5625         tcg_n = read_cpu_reg(s, rn, sf);
5626         tcg_m = read_cpu_reg(s, rm, sf);
5627     }
5628 
5629     if (is_signed) {
5630         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5631     } else {
5632         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5633     }
5634 
5635     if (!sf) { /* zero extend final result */
5636         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5637     }
5638 }
5639 
5640 /* LSLV, LSRV, ASRV, RORV */
5641 static void handle_shift_reg(DisasContext *s,
5642                              enum a64_shift_type shift_type, unsigned int sf,
5643                              unsigned int rm, unsigned int rn, unsigned int rd)
5644 {
5645     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5646     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5647     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5648 
5649     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5650     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5651 }
5652 
5653 /* CRC32[BHWX], CRC32C[BHWX] */
5654 static void handle_crc32(DisasContext *s,
5655                          unsigned int sf, unsigned int sz, bool crc32c,
5656                          unsigned int rm, unsigned int rn, unsigned int rd)
5657 {
5658     TCGv_i64 tcg_acc, tcg_val;
5659     TCGv_i32 tcg_bytes;
5660 
5661     if (!dc_isar_feature(aa64_crc32, s)
5662         || (sf == 1 && sz != 3)
5663         || (sf == 0 && sz == 3)) {
5664         unallocated_encoding(s);
5665         return;
5666     }
5667 
5668     if (sz == 3) {
5669         tcg_val = cpu_reg(s, rm);
5670     } else {
5671         uint64_t mask;
5672         switch (sz) {
5673         case 0:
5674             mask = 0xFF;
5675             break;
5676         case 1:
5677             mask = 0xFFFF;
5678             break;
5679         case 2:
5680             mask = 0xFFFFFFFF;
5681             break;
5682         default:
5683             g_assert_not_reached();
5684         }
5685         tcg_val = tcg_temp_new_i64();
5686         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5687     }
5688 
5689     tcg_acc = cpu_reg(s, rn);
5690     tcg_bytes = tcg_constant_i32(1 << sz);
5691 
5692     if (crc32c) {
5693         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5694     } else {
5695         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5696     }
5697 }
5698 
5699 /* Data-processing (2 source)
5700  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5701  * +----+---+---+-----------------+------+--------+------+------+
5702  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5703  * +----+---+---+-----------------+------+--------+------+------+
5704  */
5705 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5706 {
5707     unsigned int sf, rm, opcode, rn, rd, setflag;
5708     sf = extract32(insn, 31, 1);
5709     setflag = extract32(insn, 29, 1);
5710     rm = extract32(insn, 16, 5);
5711     opcode = extract32(insn, 10, 6);
5712     rn = extract32(insn, 5, 5);
5713     rd = extract32(insn, 0, 5);
5714 
5715     if (setflag && opcode != 0) {
5716         unallocated_encoding(s);
5717         return;
5718     }
5719 
5720     switch (opcode) {
5721     case 0: /* SUBP(S) */
5722         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5723             goto do_unallocated;
5724         } else {
5725             TCGv_i64 tcg_n, tcg_m, tcg_d;
5726 
5727             tcg_n = read_cpu_reg_sp(s, rn, true);
5728             tcg_m = read_cpu_reg_sp(s, rm, true);
5729             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5730             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5731             tcg_d = cpu_reg(s, rd);
5732 
5733             if (setflag) {
5734                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5735             } else {
5736                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5737             }
5738         }
5739         break;
5740     case 2: /* UDIV */
5741         handle_div(s, false, sf, rm, rn, rd);
5742         break;
5743     case 3: /* SDIV */
5744         handle_div(s, true, sf, rm, rn, rd);
5745         break;
5746     case 4: /* IRG */
5747         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5748             goto do_unallocated;
5749         }
5750         if (s->ata) {
5751             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5752                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5753         } else {
5754             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5755                                              cpu_reg_sp(s, rn));
5756         }
5757         break;
5758     case 5: /* GMI */
5759         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5760             goto do_unallocated;
5761         } else {
5762             TCGv_i64 t = tcg_temp_new_i64();
5763 
5764             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5765             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5766             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5767         }
5768         break;
5769     case 8: /* LSLV */
5770         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5771         break;
5772     case 9: /* LSRV */
5773         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5774         break;
5775     case 10: /* ASRV */
5776         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5777         break;
5778     case 11: /* RORV */
5779         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5780         break;
5781     case 12: /* PACGA */
5782         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5783             goto do_unallocated;
5784         }
5785         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5786                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5787         break;
5788     case 16:
5789     case 17:
5790     case 18:
5791     case 19:
5792     case 20:
5793     case 21:
5794     case 22:
5795     case 23: /* CRC32 */
5796     {
5797         int sz = extract32(opcode, 0, 2);
5798         bool crc32c = extract32(opcode, 2, 1);
5799         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5800         break;
5801     }
5802     default:
5803     do_unallocated:
5804         unallocated_encoding(s);
5805         break;
5806     }
5807 }
5808 
5809 /*
5810  * Data processing - register
5811  *  31  30 29  28      25    21  20  16      10         0
5812  * +--+---+--+---+-------+-----+-------+-------+---------+
5813  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5814  * +--+---+--+---+-------+-----+-------+-------+---------+
5815  */
5816 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5817 {
5818     int op0 = extract32(insn, 30, 1);
5819     int op1 = extract32(insn, 28, 1);
5820     int op2 = extract32(insn, 21, 4);
5821     int op3 = extract32(insn, 10, 6);
5822 
5823     if (!op1) {
5824         if (op2 & 8) {
5825             if (op2 & 1) {
5826                 /* Add/sub (extended register) */
5827                 disas_add_sub_ext_reg(s, insn);
5828             } else {
5829                 /* Add/sub (shifted register) */
5830                 disas_add_sub_reg(s, insn);
5831             }
5832         } else {
5833             /* Logical (shifted register) */
5834             disas_logic_reg(s, insn);
5835         }
5836         return;
5837     }
5838 
5839     switch (op2) {
5840     case 0x0:
5841         switch (op3) {
5842         case 0x00: /* Add/subtract (with carry) */
5843             disas_adc_sbc(s, insn);
5844             break;
5845 
5846         case 0x01: /* Rotate right into flags */
5847         case 0x21:
5848             disas_rotate_right_into_flags(s, insn);
5849             break;
5850 
5851         case 0x02: /* Evaluate into flags */
5852         case 0x12:
5853         case 0x22:
5854         case 0x32:
5855             disas_evaluate_into_flags(s, insn);
5856             break;
5857 
5858         default:
5859             goto do_unallocated;
5860         }
5861         break;
5862 
5863     case 0x2: /* Conditional compare */
5864         disas_cc(s, insn); /* both imm and reg forms */
5865         break;
5866 
5867     case 0x4: /* Conditional select */
5868         disas_cond_select(s, insn);
5869         break;
5870 
5871     case 0x6: /* Data-processing */
5872         if (op0) {    /* (1 source) */
5873             disas_data_proc_1src(s, insn);
5874         } else {      /* (2 source) */
5875             disas_data_proc_2src(s, insn);
5876         }
5877         break;
5878     case 0x8 ... 0xf: /* (3 source) */
5879         disas_data_proc_3src(s, insn);
5880         break;
5881 
5882     default:
5883     do_unallocated:
5884         unallocated_encoding(s);
5885         break;
5886     }
5887 }
5888 
5889 static void handle_fp_compare(DisasContext *s, int size,
5890                               unsigned int rn, unsigned int rm,
5891                               bool cmp_with_zero, bool signal_all_nans)
5892 {
5893     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5894     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5895 
5896     if (size == MO_64) {
5897         TCGv_i64 tcg_vn, tcg_vm;
5898 
5899         tcg_vn = read_fp_dreg(s, rn);
5900         if (cmp_with_zero) {
5901             tcg_vm = tcg_constant_i64(0);
5902         } else {
5903             tcg_vm = read_fp_dreg(s, rm);
5904         }
5905         if (signal_all_nans) {
5906             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5907         } else {
5908             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5909         }
5910     } else {
5911         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5912         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5913 
5914         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5915         if (cmp_with_zero) {
5916             tcg_gen_movi_i32(tcg_vm, 0);
5917         } else {
5918             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5919         }
5920 
5921         switch (size) {
5922         case MO_32:
5923             if (signal_all_nans) {
5924                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5925             } else {
5926                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5927             }
5928             break;
5929         case MO_16:
5930             if (signal_all_nans) {
5931                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5932             } else {
5933                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5934             }
5935             break;
5936         default:
5937             g_assert_not_reached();
5938         }
5939     }
5940 
5941     gen_set_nzcv(tcg_flags);
5942 }
5943 
5944 /* Floating point compare
5945  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5946  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5947  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5948  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5949  */
5950 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5951 {
5952     unsigned int mos, type, rm, op, rn, opc, op2r;
5953     int size;
5954 
5955     mos = extract32(insn, 29, 3);
5956     type = extract32(insn, 22, 2);
5957     rm = extract32(insn, 16, 5);
5958     op = extract32(insn, 14, 2);
5959     rn = extract32(insn, 5, 5);
5960     opc = extract32(insn, 3, 2);
5961     op2r = extract32(insn, 0, 3);
5962 
5963     if (mos || op || op2r) {
5964         unallocated_encoding(s);
5965         return;
5966     }
5967 
5968     switch (type) {
5969     case 0:
5970         size = MO_32;
5971         break;
5972     case 1:
5973         size = MO_64;
5974         break;
5975     case 3:
5976         size = MO_16;
5977         if (dc_isar_feature(aa64_fp16, s)) {
5978             break;
5979         }
5980         /* fallthru */
5981     default:
5982         unallocated_encoding(s);
5983         return;
5984     }
5985 
5986     if (!fp_access_check(s)) {
5987         return;
5988     }
5989 
5990     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5991 }
5992 
5993 /* Floating point conditional compare
5994  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5995  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5996  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5997  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5998  */
5999 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
6000 {
6001     unsigned int mos, type, rm, cond, rn, op, nzcv;
6002     TCGLabel *label_continue = NULL;
6003     int size;
6004 
6005     mos = extract32(insn, 29, 3);
6006     type = extract32(insn, 22, 2);
6007     rm = extract32(insn, 16, 5);
6008     cond = extract32(insn, 12, 4);
6009     rn = extract32(insn, 5, 5);
6010     op = extract32(insn, 4, 1);
6011     nzcv = extract32(insn, 0, 4);
6012 
6013     if (mos) {
6014         unallocated_encoding(s);
6015         return;
6016     }
6017 
6018     switch (type) {
6019     case 0:
6020         size = MO_32;
6021         break;
6022     case 1:
6023         size = MO_64;
6024         break;
6025     case 3:
6026         size = MO_16;
6027         if (dc_isar_feature(aa64_fp16, s)) {
6028             break;
6029         }
6030         /* fallthru */
6031     default:
6032         unallocated_encoding(s);
6033         return;
6034     }
6035 
6036     if (!fp_access_check(s)) {
6037         return;
6038     }
6039 
6040     if (cond < 0x0e) { /* not always */
6041         TCGLabel *label_match = gen_new_label();
6042         label_continue = gen_new_label();
6043         arm_gen_test_cc(cond, label_match);
6044         /* nomatch: */
6045         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
6046         tcg_gen_br(label_continue);
6047         gen_set_label(label_match);
6048     }
6049 
6050     handle_fp_compare(s, size, rn, rm, false, op);
6051 
6052     if (cond < 0x0e) {
6053         gen_set_label(label_continue);
6054     }
6055 }
6056 
6057 /* Floating point conditional select
6058  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
6059  * +---+---+---+-----------+------+---+------+------+-----+------+------+
6060  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
6061  * +---+---+---+-----------+------+---+------+------+-----+------+------+
6062  */
6063 static void disas_fp_csel(DisasContext *s, uint32_t insn)
6064 {
6065     unsigned int mos, type, rm, cond, rn, rd;
6066     TCGv_i64 t_true, t_false;
6067     DisasCompare64 c;
6068     MemOp sz;
6069 
6070     mos = extract32(insn, 29, 3);
6071     type = extract32(insn, 22, 2);
6072     rm = extract32(insn, 16, 5);
6073     cond = extract32(insn, 12, 4);
6074     rn = extract32(insn, 5, 5);
6075     rd = extract32(insn, 0, 5);
6076 
6077     if (mos) {
6078         unallocated_encoding(s);
6079         return;
6080     }
6081 
6082     switch (type) {
6083     case 0:
6084         sz = MO_32;
6085         break;
6086     case 1:
6087         sz = MO_64;
6088         break;
6089     case 3:
6090         sz = MO_16;
6091         if (dc_isar_feature(aa64_fp16, s)) {
6092             break;
6093         }
6094         /* fallthru */
6095     default:
6096         unallocated_encoding(s);
6097         return;
6098     }
6099 
6100     if (!fp_access_check(s)) {
6101         return;
6102     }
6103 
6104     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6105     t_true = tcg_temp_new_i64();
6106     t_false = tcg_temp_new_i64();
6107     read_vec_element(s, t_true, rn, 0, sz);
6108     read_vec_element(s, t_false, rm, 0, sz);
6109 
6110     a64_test_cc(&c, cond);
6111     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6112                         t_true, t_false);
6113 
6114     /* Note that sregs & hregs write back zeros to the high bits,
6115        and we've already done the zero-extension.  */
6116     write_fp_dreg(s, rd, t_true);
6117 }
6118 
6119 /* Floating-point data-processing (1 source) - half precision */
6120 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6121 {
6122     TCGv_ptr fpst = NULL;
6123     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6124     TCGv_i32 tcg_res = tcg_temp_new_i32();
6125 
6126     switch (opcode) {
6127     case 0x0: /* FMOV */
6128         tcg_gen_mov_i32(tcg_res, tcg_op);
6129         break;
6130     case 0x1: /* FABS */
6131         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6132         break;
6133     case 0x2: /* FNEG */
6134         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6135         break;
6136     case 0x3: /* FSQRT */
6137         fpst = fpstatus_ptr(FPST_FPCR_F16);
6138         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6139         break;
6140     case 0x8: /* FRINTN */
6141     case 0x9: /* FRINTP */
6142     case 0xa: /* FRINTM */
6143     case 0xb: /* FRINTZ */
6144     case 0xc: /* FRINTA */
6145     {
6146         TCGv_i32 tcg_rmode;
6147 
6148         fpst = fpstatus_ptr(FPST_FPCR_F16);
6149         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6150         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6151         gen_restore_rmode(tcg_rmode, fpst);
6152         break;
6153     }
6154     case 0xe: /* FRINTX */
6155         fpst = fpstatus_ptr(FPST_FPCR_F16);
6156         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6157         break;
6158     case 0xf: /* FRINTI */
6159         fpst = fpstatus_ptr(FPST_FPCR_F16);
6160         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6161         break;
6162     default:
6163         g_assert_not_reached();
6164     }
6165 
6166     write_fp_sreg(s, rd, tcg_res);
6167 }
6168 
6169 /* Floating-point data-processing (1 source) - single precision */
6170 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6171 {
6172     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6173     TCGv_i32 tcg_op, tcg_res;
6174     TCGv_ptr fpst;
6175     int rmode = -1;
6176 
6177     tcg_op = read_fp_sreg(s, rn);
6178     tcg_res = tcg_temp_new_i32();
6179 
6180     switch (opcode) {
6181     case 0x0: /* FMOV */
6182         tcg_gen_mov_i32(tcg_res, tcg_op);
6183         goto done;
6184     case 0x1: /* FABS */
6185         gen_helper_vfp_abss(tcg_res, tcg_op);
6186         goto done;
6187     case 0x2: /* FNEG */
6188         gen_helper_vfp_negs(tcg_res, tcg_op);
6189         goto done;
6190     case 0x3: /* FSQRT */
6191         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6192         goto done;
6193     case 0x6: /* BFCVT */
6194         gen_fpst = gen_helper_bfcvt;
6195         break;
6196     case 0x8: /* FRINTN */
6197     case 0x9: /* FRINTP */
6198     case 0xa: /* FRINTM */
6199     case 0xb: /* FRINTZ */
6200     case 0xc: /* FRINTA */
6201         rmode = opcode & 7;
6202         gen_fpst = gen_helper_rints;
6203         break;
6204     case 0xe: /* FRINTX */
6205         gen_fpst = gen_helper_rints_exact;
6206         break;
6207     case 0xf: /* FRINTI */
6208         gen_fpst = gen_helper_rints;
6209         break;
6210     case 0x10: /* FRINT32Z */
6211         rmode = FPROUNDING_ZERO;
6212         gen_fpst = gen_helper_frint32_s;
6213         break;
6214     case 0x11: /* FRINT32X */
6215         gen_fpst = gen_helper_frint32_s;
6216         break;
6217     case 0x12: /* FRINT64Z */
6218         rmode = FPROUNDING_ZERO;
6219         gen_fpst = gen_helper_frint64_s;
6220         break;
6221     case 0x13: /* FRINT64X */
6222         gen_fpst = gen_helper_frint64_s;
6223         break;
6224     default:
6225         g_assert_not_reached();
6226     }
6227 
6228     fpst = fpstatus_ptr(FPST_FPCR);
6229     if (rmode >= 0) {
6230         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6231         gen_fpst(tcg_res, tcg_op, fpst);
6232         gen_restore_rmode(tcg_rmode, fpst);
6233     } else {
6234         gen_fpst(tcg_res, tcg_op, fpst);
6235     }
6236 
6237  done:
6238     write_fp_sreg(s, rd, tcg_res);
6239 }
6240 
6241 /* Floating-point data-processing (1 source) - double precision */
6242 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6243 {
6244     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6245     TCGv_i64 tcg_op, tcg_res;
6246     TCGv_ptr fpst;
6247     int rmode = -1;
6248 
6249     switch (opcode) {
6250     case 0x0: /* FMOV */
6251         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6252         return;
6253     }
6254 
6255     tcg_op = read_fp_dreg(s, rn);
6256     tcg_res = tcg_temp_new_i64();
6257 
6258     switch (opcode) {
6259     case 0x1: /* FABS */
6260         gen_helper_vfp_absd(tcg_res, tcg_op);
6261         goto done;
6262     case 0x2: /* FNEG */
6263         gen_helper_vfp_negd(tcg_res, tcg_op);
6264         goto done;
6265     case 0x3: /* FSQRT */
6266         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6267         goto done;
6268     case 0x8: /* FRINTN */
6269     case 0x9: /* FRINTP */
6270     case 0xa: /* FRINTM */
6271     case 0xb: /* FRINTZ */
6272     case 0xc: /* FRINTA */
6273         rmode = opcode & 7;
6274         gen_fpst = gen_helper_rintd;
6275         break;
6276     case 0xe: /* FRINTX */
6277         gen_fpst = gen_helper_rintd_exact;
6278         break;
6279     case 0xf: /* FRINTI */
6280         gen_fpst = gen_helper_rintd;
6281         break;
6282     case 0x10: /* FRINT32Z */
6283         rmode = FPROUNDING_ZERO;
6284         gen_fpst = gen_helper_frint32_d;
6285         break;
6286     case 0x11: /* FRINT32X */
6287         gen_fpst = gen_helper_frint32_d;
6288         break;
6289     case 0x12: /* FRINT64Z */
6290         rmode = FPROUNDING_ZERO;
6291         gen_fpst = gen_helper_frint64_d;
6292         break;
6293     case 0x13: /* FRINT64X */
6294         gen_fpst = gen_helper_frint64_d;
6295         break;
6296     default:
6297         g_assert_not_reached();
6298     }
6299 
6300     fpst = fpstatus_ptr(FPST_FPCR);
6301     if (rmode >= 0) {
6302         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6303         gen_fpst(tcg_res, tcg_op, fpst);
6304         gen_restore_rmode(tcg_rmode, fpst);
6305     } else {
6306         gen_fpst(tcg_res, tcg_op, fpst);
6307     }
6308 
6309  done:
6310     write_fp_dreg(s, rd, tcg_res);
6311 }
6312 
6313 static void handle_fp_fcvt(DisasContext *s, int opcode,
6314                            int rd, int rn, int dtype, int ntype)
6315 {
6316     switch (ntype) {
6317     case 0x0:
6318     {
6319         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6320         if (dtype == 1) {
6321             /* Single to double */
6322             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6323             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6324             write_fp_dreg(s, rd, tcg_rd);
6325         } else {
6326             /* Single to half */
6327             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6328             TCGv_i32 ahp = get_ahp_flag();
6329             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6330 
6331             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6332             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6333             write_fp_sreg(s, rd, tcg_rd);
6334         }
6335         break;
6336     }
6337     case 0x1:
6338     {
6339         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6340         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6341         if (dtype == 0) {
6342             /* Double to single */
6343             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6344         } else {
6345             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6346             TCGv_i32 ahp = get_ahp_flag();
6347             /* Double to half */
6348             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6349             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6350         }
6351         write_fp_sreg(s, rd, tcg_rd);
6352         break;
6353     }
6354     case 0x3:
6355     {
6356         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6357         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6358         TCGv_i32 tcg_ahp = get_ahp_flag();
6359         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6360         if (dtype == 0) {
6361             /* Half to single */
6362             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6363             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6364             write_fp_sreg(s, rd, tcg_rd);
6365         } else {
6366             /* Half to double */
6367             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6368             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6369             write_fp_dreg(s, rd, tcg_rd);
6370         }
6371         break;
6372     }
6373     default:
6374         g_assert_not_reached();
6375     }
6376 }
6377 
6378 /* Floating point data-processing (1 source)
6379  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6380  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6381  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6382  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6383  */
6384 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6385 {
6386     int mos = extract32(insn, 29, 3);
6387     int type = extract32(insn, 22, 2);
6388     int opcode = extract32(insn, 15, 6);
6389     int rn = extract32(insn, 5, 5);
6390     int rd = extract32(insn, 0, 5);
6391 
6392     if (mos) {
6393         goto do_unallocated;
6394     }
6395 
6396     switch (opcode) {
6397     case 0x4: case 0x5: case 0x7:
6398     {
6399         /* FCVT between half, single and double precision */
6400         int dtype = extract32(opcode, 0, 2);
6401         if (type == 2 || dtype == type) {
6402             goto do_unallocated;
6403         }
6404         if (!fp_access_check(s)) {
6405             return;
6406         }
6407 
6408         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6409         break;
6410     }
6411 
6412     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6413         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6414             goto do_unallocated;
6415         }
6416         /* fall through */
6417     case 0x0 ... 0x3:
6418     case 0x8 ... 0xc:
6419     case 0xe ... 0xf:
6420         /* 32-to-32 and 64-to-64 ops */
6421         switch (type) {
6422         case 0:
6423             if (!fp_access_check(s)) {
6424                 return;
6425             }
6426             handle_fp_1src_single(s, opcode, rd, rn);
6427             break;
6428         case 1:
6429             if (!fp_access_check(s)) {
6430                 return;
6431             }
6432             handle_fp_1src_double(s, opcode, rd, rn);
6433             break;
6434         case 3:
6435             if (!dc_isar_feature(aa64_fp16, s)) {
6436                 goto do_unallocated;
6437             }
6438 
6439             if (!fp_access_check(s)) {
6440                 return;
6441             }
6442             handle_fp_1src_half(s, opcode, rd, rn);
6443             break;
6444         default:
6445             goto do_unallocated;
6446         }
6447         break;
6448 
6449     case 0x6:
6450         switch (type) {
6451         case 1: /* BFCVT */
6452             if (!dc_isar_feature(aa64_bf16, s)) {
6453                 goto do_unallocated;
6454             }
6455             if (!fp_access_check(s)) {
6456                 return;
6457             }
6458             handle_fp_1src_single(s, opcode, rd, rn);
6459             break;
6460         default:
6461             goto do_unallocated;
6462         }
6463         break;
6464 
6465     default:
6466     do_unallocated:
6467         unallocated_encoding(s);
6468         break;
6469     }
6470 }
6471 
6472 /* Floating-point data-processing (2 source) - single precision */
6473 static void handle_fp_2src_single(DisasContext *s, int opcode,
6474                                   int rd, int rn, int rm)
6475 {
6476     TCGv_i32 tcg_op1;
6477     TCGv_i32 tcg_op2;
6478     TCGv_i32 tcg_res;
6479     TCGv_ptr fpst;
6480 
6481     tcg_res = tcg_temp_new_i32();
6482     fpst = fpstatus_ptr(FPST_FPCR);
6483     tcg_op1 = read_fp_sreg(s, rn);
6484     tcg_op2 = read_fp_sreg(s, rm);
6485 
6486     switch (opcode) {
6487     case 0x0: /* FMUL */
6488         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6489         break;
6490     case 0x1: /* FDIV */
6491         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6492         break;
6493     case 0x2: /* FADD */
6494         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6495         break;
6496     case 0x3: /* FSUB */
6497         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6498         break;
6499     case 0x4: /* FMAX */
6500         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6501         break;
6502     case 0x5: /* FMIN */
6503         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6504         break;
6505     case 0x6: /* FMAXNM */
6506         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6507         break;
6508     case 0x7: /* FMINNM */
6509         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6510         break;
6511     case 0x8: /* FNMUL */
6512         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6513         gen_helper_vfp_negs(tcg_res, tcg_res);
6514         break;
6515     }
6516 
6517     write_fp_sreg(s, rd, tcg_res);
6518 }
6519 
6520 /* Floating-point data-processing (2 source) - double precision */
6521 static void handle_fp_2src_double(DisasContext *s, int opcode,
6522                                   int rd, int rn, int rm)
6523 {
6524     TCGv_i64 tcg_op1;
6525     TCGv_i64 tcg_op2;
6526     TCGv_i64 tcg_res;
6527     TCGv_ptr fpst;
6528 
6529     tcg_res = tcg_temp_new_i64();
6530     fpst = fpstatus_ptr(FPST_FPCR);
6531     tcg_op1 = read_fp_dreg(s, rn);
6532     tcg_op2 = read_fp_dreg(s, rm);
6533 
6534     switch (opcode) {
6535     case 0x0: /* FMUL */
6536         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6537         break;
6538     case 0x1: /* FDIV */
6539         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6540         break;
6541     case 0x2: /* FADD */
6542         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6543         break;
6544     case 0x3: /* FSUB */
6545         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6546         break;
6547     case 0x4: /* FMAX */
6548         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6549         break;
6550     case 0x5: /* FMIN */
6551         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6552         break;
6553     case 0x6: /* FMAXNM */
6554         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6555         break;
6556     case 0x7: /* FMINNM */
6557         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6558         break;
6559     case 0x8: /* FNMUL */
6560         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6561         gen_helper_vfp_negd(tcg_res, tcg_res);
6562         break;
6563     }
6564 
6565     write_fp_dreg(s, rd, tcg_res);
6566 }
6567 
6568 /* Floating-point data-processing (2 source) - half precision */
6569 static void handle_fp_2src_half(DisasContext *s, int opcode,
6570                                 int rd, int rn, int rm)
6571 {
6572     TCGv_i32 tcg_op1;
6573     TCGv_i32 tcg_op2;
6574     TCGv_i32 tcg_res;
6575     TCGv_ptr fpst;
6576 
6577     tcg_res = tcg_temp_new_i32();
6578     fpst = fpstatus_ptr(FPST_FPCR_F16);
6579     tcg_op1 = read_fp_hreg(s, rn);
6580     tcg_op2 = read_fp_hreg(s, rm);
6581 
6582     switch (opcode) {
6583     case 0x0: /* FMUL */
6584         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6585         break;
6586     case 0x1: /* FDIV */
6587         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6588         break;
6589     case 0x2: /* FADD */
6590         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6591         break;
6592     case 0x3: /* FSUB */
6593         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6594         break;
6595     case 0x4: /* FMAX */
6596         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6597         break;
6598     case 0x5: /* FMIN */
6599         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6600         break;
6601     case 0x6: /* FMAXNM */
6602         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6603         break;
6604     case 0x7: /* FMINNM */
6605         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6606         break;
6607     case 0x8: /* FNMUL */
6608         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6609         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6610         break;
6611     default:
6612         g_assert_not_reached();
6613     }
6614 
6615     write_fp_sreg(s, rd, tcg_res);
6616 }
6617 
6618 /* Floating point data-processing (2 source)
6619  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6620  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6621  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6622  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6623  */
6624 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6625 {
6626     int mos = extract32(insn, 29, 3);
6627     int type = extract32(insn, 22, 2);
6628     int rd = extract32(insn, 0, 5);
6629     int rn = extract32(insn, 5, 5);
6630     int rm = extract32(insn, 16, 5);
6631     int opcode = extract32(insn, 12, 4);
6632 
6633     if (opcode > 8 || mos) {
6634         unallocated_encoding(s);
6635         return;
6636     }
6637 
6638     switch (type) {
6639     case 0:
6640         if (!fp_access_check(s)) {
6641             return;
6642         }
6643         handle_fp_2src_single(s, opcode, rd, rn, rm);
6644         break;
6645     case 1:
6646         if (!fp_access_check(s)) {
6647             return;
6648         }
6649         handle_fp_2src_double(s, opcode, rd, rn, rm);
6650         break;
6651     case 3:
6652         if (!dc_isar_feature(aa64_fp16, s)) {
6653             unallocated_encoding(s);
6654             return;
6655         }
6656         if (!fp_access_check(s)) {
6657             return;
6658         }
6659         handle_fp_2src_half(s, opcode, rd, rn, rm);
6660         break;
6661     default:
6662         unallocated_encoding(s);
6663     }
6664 }
6665 
6666 /* Floating-point data-processing (3 source) - single precision */
6667 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6668                                   int rd, int rn, int rm, int ra)
6669 {
6670     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6671     TCGv_i32 tcg_res = tcg_temp_new_i32();
6672     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6673 
6674     tcg_op1 = read_fp_sreg(s, rn);
6675     tcg_op2 = read_fp_sreg(s, rm);
6676     tcg_op3 = read_fp_sreg(s, ra);
6677 
6678     /* These are fused multiply-add, and must be done as one
6679      * floating point operation with no rounding between the
6680      * multiplication and addition steps.
6681      * NB that doing the negations here as separate steps is
6682      * correct : an input NaN should come out with its sign bit
6683      * flipped if it is a negated-input.
6684      */
6685     if (o1 == true) {
6686         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6687     }
6688 
6689     if (o0 != o1) {
6690         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6691     }
6692 
6693     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6694 
6695     write_fp_sreg(s, rd, tcg_res);
6696 }
6697 
6698 /* Floating-point data-processing (3 source) - double precision */
6699 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6700                                   int rd, int rn, int rm, int ra)
6701 {
6702     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6703     TCGv_i64 tcg_res = tcg_temp_new_i64();
6704     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6705 
6706     tcg_op1 = read_fp_dreg(s, rn);
6707     tcg_op2 = read_fp_dreg(s, rm);
6708     tcg_op3 = read_fp_dreg(s, ra);
6709 
6710     /* These are fused multiply-add, and must be done as one
6711      * floating point operation with no rounding between the
6712      * multiplication and addition steps.
6713      * NB that doing the negations here as separate steps is
6714      * correct : an input NaN should come out with its sign bit
6715      * flipped if it is a negated-input.
6716      */
6717     if (o1 == true) {
6718         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6719     }
6720 
6721     if (o0 != o1) {
6722         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6723     }
6724 
6725     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6726 
6727     write_fp_dreg(s, rd, tcg_res);
6728 }
6729 
6730 /* Floating-point data-processing (3 source) - half precision */
6731 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6732                                 int rd, int rn, int rm, int ra)
6733 {
6734     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6735     TCGv_i32 tcg_res = tcg_temp_new_i32();
6736     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6737 
6738     tcg_op1 = read_fp_hreg(s, rn);
6739     tcg_op2 = read_fp_hreg(s, rm);
6740     tcg_op3 = read_fp_hreg(s, ra);
6741 
6742     /* These are fused multiply-add, and must be done as one
6743      * floating point operation with no rounding between the
6744      * multiplication and addition steps.
6745      * NB that doing the negations here as separate steps is
6746      * correct : an input NaN should come out with its sign bit
6747      * flipped if it is a negated-input.
6748      */
6749     if (o1 == true) {
6750         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6751     }
6752 
6753     if (o0 != o1) {
6754         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6755     }
6756 
6757     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6758 
6759     write_fp_sreg(s, rd, tcg_res);
6760 }
6761 
6762 /* Floating point data-processing (3 source)
6763  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6764  * +---+---+---+-----------+------+----+------+----+------+------+------+
6765  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6766  * +---+---+---+-----------+------+----+------+----+------+------+------+
6767  */
6768 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6769 {
6770     int mos = extract32(insn, 29, 3);
6771     int type = extract32(insn, 22, 2);
6772     int rd = extract32(insn, 0, 5);
6773     int rn = extract32(insn, 5, 5);
6774     int ra = extract32(insn, 10, 5);
6775     int rm = extract32(insn, 16, 5);
6776     bool o0 = extract32(insn, 15, 1);
6777     bool o1 = extract32(insn, 21, 1);
6778 
6779     if (mos) {
6780         unallocated_encoding(s);
6781         return;
6782     }
6783 
6784     switch (type) {
6785     case 0:
6786         if (!fp_access_check(s)) {
6787             return;
6788         }
6789         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6790         break;
6791     case 1:
6792         if (!fp_access_check(s)) {
6793             return;
6794         }
6795         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6796         break;
6797     case 3:
6798         if (!dc_isar_feature(aa64_fp16, s)) {
6799             unallocated_encoding(s);
6800             return;
6801         }
6802         if (!fp_access_check(s)) {
6803             return;
6804         }
6805         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6806         break;
6807     default:
6808         unallocated_encoding(s);
6809     }
6810 }
6811 
6812 /* Floating point immediate
6813  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6814  * +---+---+---+-----------+------+---+------------+-------+------+------+
6815  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6816  * +---+---+---+-----------+------+---+------------+-------+------+------+
6817  */
6818 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6819 {
6820     int rd = extract32(insn, 0, 5);
6821     int imm5 = extract32(insn, 5, 5);
6822     int imm8 = extract32(insn, 13, 8);
6823     int type = extract32(insn, 22, 2);
6824     int mos = extract32(insn, 29, 3);
6825     uint64_t imm;
6826     MemOp sz;
6827 
6828     if (mos || imm5) {
6829         unallocated_encoding(s);
6830         return;
6831     }
6832 
6833     switch (type) {
6834     case 0:
6835         sz = MO_32;
6836         break;
6837     case 1:
6838         sz = MO_64;
6839         break;
6840     case 3:
6841         sz = MO_16;
6842         if (dc_isar_feature(aa64_fp16, s)) {
6843             break;
6844         }
6845         /* fallthru */
6846     default:
6847         unallocated_encoding(s);
6848         return;
6849     }
6850 
6851     if (!fp_access_check(s)) {
6852         return;
6853     }
6854 
6855     imm = vfp_expand_imm(sz, imm8);
6856     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6857 }
6858 
6859 /* Handle floating point <=> fixed point conversions. Note that we can
6860  * also deal with fp <=> integer conversions as a special case (scale == 64)
6861  * OPTME: consider handling that special case specially or at least skipping
6862  * the call to scalbn in the helpers for zero shifts.
6863  */
6864 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6865                            bool itof, int rmode, int scale, int sf, int type)
6866 {
6867     bool is_signed = !(opcode & 1);
6868     TCGv_ptr tcg_fpstatus;
6869     TCGv_i32 tcg_shift, tcg_single;
6870     TCGv_i64 tcg_double;
6871 
6872     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6873 
6874     tcg_shift = tcg_constant_i32(64 - scale);
6875 
6876     if (itof) {
6877         TCGv_i64 tcg_int = cpu_reg(s, rn);
6878         if (!sf) {
6879             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6880 
6881             if (is_signed) {
6882                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6883             } else {
6884                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6885             }
6886 
6887             tcg_int = tcg_extend;
6888         }
6889 
6890         switch (type) {
6891         case 1: /* float64 */
6892             tcg_double = tcg_temp_new_i64();
6893             if (is_signed) {
6894                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6895                                      tcg_shift, tcg_fpstatus);
6896             } else {
6897                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6898                                      tcg_shift, tcg_fpstatus);
6899             }
6900             write_fp_dreg(s, rd, tcg_double);
6901             break;
6902 
6903         case 0: /* float32 */
6904             tcg_single = tcg_temp_new_i32();
6905             if (is_signed) {
6906                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6907                                      tcg_shift, tcg_fpstatus);
6908             } else {
6909                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6910                                      tcg_shift, tcg_fpstatus);
6911             }
6912             write_fp_sreg(s, rd, tcg_single);
6913             break;
6914 
6915         case 3: /* float16 */
6916             tcg_single = tcg_temp_new_i32();
6917             if (is_signed) {
6918                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6919                                      tcg_shift, tcg_fpstatus);
6920             } else {
6921                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6922                                      tcg_shift, tcg_fpstatus);
6923             }
6924             write_fp_sreg(s, rd, tcg_single);
6925             break;
6926 
6927         default:
6928             g_assert_not_reached();
6929         }
6930     } else {
6931         TCGv_i64 tcg_int = cpu_reg(s, rd);
6932         TCGv_i32 tcg_rmode;
6933 
6934         if (extract32(opcode, 2, 1)) {
6935             /* There are too many rounding modes to all fit into rmode,
6936              * so FCVTA[US] is a special case.
6937              */
6938             rmode = FPROUNDING_TIEAWAY;
6939         }
6940 
6941         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6942 
6943         switch (type) {
6944         case 1: /* float64 */
6945             tcg_double = read_fp_dreg(s, rn);
6946             if (is_signed) {
6947                 if (!sf) {
6948                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6949                                          tcg_shift, tcg_fpstatus);
6950                 } else {
6951                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6952                                          tcg_shift, tcg_fpstatus);
6953                 }
6954             } else {
6955                 if (!sf) {
6956                     gen_helper_vfp_tould(tcg_int, tcg_double,
6957                                          tcg_shift, tcg_fpstatus);
6958                 } else {
6959                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6960                                          tcg_shift, tcg_fpstatus);
6961                 }
6962             }
6963             if (!sf) {
6964                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6965             }
6966             break;
6967 
6968         case 0: /* float32 */
6969             tcg_single = read_fp_sreg(s, rn);
6970             if (sf) {
6971                 if (is_signed) {
6972                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6973                                          tcg_shift, tcg_fpstatus);
6974                 } else {
6975                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6976                                          tcg_shift, tcg_fpstatus);
6977                 }
6978             } else {
6979                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6980                 if (is_signed) {
6981                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6982                                          tcg_shift, tcg_fpstatus);
6983                 } else {
6984                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6985                                          tcg_shift, tcg_fpstatus);
6986                 }
6987                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6988             }
6989             break;
6990 
6991         case 3: /* float16 */
6992             tcg_single = read_fp_sreg(s, rn);
6993             if (sf) {
6994                 if (is_signed) {
6995                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6996                                          tcg_shift, tcg_fpstatus);
6997                 } else {
6998                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6999                                          tcg_shift, tcg_fpstatus);
7000                 }
7001             } else {
7002                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
7003                 if (is_signed) {
7004                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
7005                                          tcg_shift, tcg_fpstatus);
7006                 } else {
7007                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
7008                                          tcg_shift, tcg_fpstatus);
7009                 }
7010                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7011             }
7012             break;
7013 
7014         default:
7015             g_assert_not_reached();
7016         }
7017 
7018         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
7019     }
7020 }
7021 
7022 /* Floating point <-> fixed point conversions
7023  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
7024  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7025  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
7026  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7027  */
7028 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
7029 {
7030     int rd = extract32(insn, 0, 5);
7031     int rn = extract32(insn, 5, 5);
7032     int scale = extract32(insn, 10, 6);
7033     int opcode = extract32(insn, 16, 3);
7034     int rmode = extract32(insn, 19, 2);
7035     int type = extract32(insn, 22, 2);
7036     bool sbit = extract32(insn, 29, 1);
7037     bool sf = extract32(insn, 31, 1);
7038     bool itof;
7039 
7040     if (sbit || (!sf && scale < 32)) {
7041         unallocated_encoding(s);
7042         return;
7043     }
7044 
7045     switch (type) {
7046     case 0: /* float32 */
7047     case 1: /* float64 */
7048         break;
7049     case 3: /* float16 */
7050         if (dc_isar_feature(aa64_fp16, s)) {
7051             break;
7052         }
7053         /* fallthru */
7054     default:
7055         unallocated_encoding(s);
7056         return;
7057     }
7058 
7059     switch ((rmode << 3) | opcode) {
7060     case 0x2: /* SCVTF */
7061     case 0x3: /* UCVTF */
7062         itof = true;
7063         break;
7064     case 0x18: /* FCVTZS */
7065     case 0x19: /* FCVTZU */
7066         itof = false;
7067         break;
7068     default:
7069         unallocated_encoding(s);
7070         return;
7071     }
7072 
7073     if (!fp_access_check(s)) {
7074         return;
7075     }
7076 
7077     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
7078 }
7079 
7080 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
7081 {
7082     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
7083      * without conversion.
7084      */
7085 
7086     if (itof) {
7087         TCGv_i64 tcg_rn = cpu_reg(s, rn);
7088         TCGv_i64 tmp;
7089 
7090         switch (type) {
7091         case 0:
7092             /* 32 bit */
7093             tmp = tcg_temp_new_i64();
7094             tcg_gen_ext32u_i64(tmp, tcg_rn);
7095             write_fp_dreg(s, rd, tmp);
7096             break;
7097         case 1:
7098             /* 64 bit */
7099             write_fp_dreg(s, rd, tcg_rn);
7100             break;
7101         case 2:
7102             /* 64 bit to top half. */
7103             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
7104             clear_vec_high(s, true, rd);
7105             break;
7106         case 3:
7107             /* 16 bit */
7108             tmp = tcg_temp_new_i64();
7109             tcg_gen_ext16u_i64(tmp, tcg_rn);
7110             write_fp_dreg(s, rd, tmp);
7111             break;
7112         default:
7113             g_assert_not_reached();
7114         }
7115     } else {
7116         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7117 
7118         switch (type) {
7119         case 0:
7120             /* 32 bit */
7121             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
7122             break;
7123         case 1:
7124             /* 64 bit */
7125             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
7126             break;
7127         case 2:
7128             /* 64 bits from top half */
7129             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7130             break;
7131         case 3:
7132             /* 16 bit */
7133             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7134             break;
7135         default:
7136             g_assert_not_reached();
7137         }
7138     }
7139 }
7140 
7141 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7142 {
7143     TCGv_i64 t = read_fp_dreg(s, rn);
7144     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7145 
7146     gen_helper_fjcvtzs(t, t, fpstatus);
7147 
7148     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7149     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7150     tcg_gen_movi_i32(cpu_CF, 0);
7151     tcg_gen_movi_i32(cpu_NF, 0);
7152     tcg_gen_movi_i32(cpu_VF, 0);
7153 }
7154 
7155 /* Floating point <-> integer conversions
7156  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7157  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7158  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7159  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7160  */
7161 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7162 {
7163     int rd = extract32(insn, 0, 5);
7164     int rn = extract32(insn, 5, 5);
7165     int opcode = extract32(insn, 16, 3);
7166     int rmode = extract32(insn, 19, 2);
7167     int type = extract32(insn, 22, 2);
7168     bool sbit = extract32(insn, 29, 1);
7169     bool sf = extract32(insn, 31, 1);
7170     bool itof = false;
7171 
7172     if (sbit) {
7173         goto do_unallocated;
7174     }
7175 
7176     switch (opcode) {
7177     case 2: /* SCVTF */
7178     case 3: /* UCVTF */
7179         itof = true;
7180         /* fallthru */
7181     case 4: /* FCVTAS */
7182     case 5: /* FCVTAU */
7183         if (rmode != 0) {
7184             goto do_unallocated;
7185         }
7186         /* fallthru */
7187     case 0: /* FCVT[NPMZ]S */
7188     case 1: /* FCVT[NPMZ]U */
7189         switch (type) {
7190         case 0: /* float32 */
7191         case 1: /* float64 */
7192             break;
7193         case 3: /* float16 */
7194             if (!dc_isar_feature(aa64_fp16, s)) {
7195                 goto do_unallocated;
7196             }
7197             break;
7198         default:
7199             goto do_unallocated;
7200         }
7201         if (!fp_access_check(s)) {
7202             return;
7203         }
7204         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7205         break;
7206 
7207     default:
7208         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7209         case 0b01100110: /* FMOV half <-> 32-bit int */
7210         case 0b01100111:
7211         case 0b11100110: /* FMOV half <-> 64-bit int */
7212         case 0b11100111:
7213             if (!dc_isar_feature(aa64_fp16, s)) {
7214                 goto do_unallocated;
7215             }
7216             /* fallthru */
7217         case 0b00000110: /* FMOV 32-bit */
7218         case 0b00000111:
7219         case 0b10100110: /* FMOV 64-bit */
7220         case 0b10100111:
7221         case 0b11001110: /* FMOV top half of 128-bit */
7222         case 0b11001111:
7223             if (!fp_access_check(s)) {
7224                 return;
7225             }
7226             itof = opcode & 1;
7227             handle_fmov(s, rd, rn, type, itof);
7228             break;
7229 
7230         case 0b00111110: /* FJCVTZS */
7231             if (!dc_isar_feature(aa64_jscvt, s)) {
7232                 goto do_unallocated;
7233             } else if (fp_access_check(s)) {
7234                 handle_fjcvtzs(s, rd, rn);
7235             }
7236             break;
7237 
7238         default:
7239         do_unallocated:
7240             unallocated_encoding(s);
7241             return;
7242         }
7243         break;
7244     }
7245 }
7246 
7247 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7248  *   31  30  29 28     25 24                          0
7249  * +---+---+---+---------+-----------------------------+
7250  * |   | 0 |   | 1 1 1 1 |                             |
7251  * +---+---+---+---------+-----------------------------+
7252  */
7253 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7254 {
7255     if (extract32(insn, 24, 1)) {
7256         /* Floating point data-processing (3 source) */
7257         disas_fp_3src(s, insn);
7258     } else if (extract32(insn, 21, 1) == 0) {
7259         /* Floating point to fixed point conversions */
7260         disas_fp_fixed_conv(s, insn);
7261     } else {
7262         switch (extract32(insn, 10, 2)) {
7263         case 1:
7264             /* Floating point conditional compare */
7265             disas_fp_ccomp(s, insn);
7266             break;
7267         case 2:
7268             /* Floating point data-processing (2 source) */
7269             disas_fp_2src(s, insn);
7270             break;
7271         case 3:
7272             /* Floating point conditional select */
7273             disas_fp_csel(s, insn);
7274             break;
7275         case 0:
7276             switch (ctz32(extract32(insn, 12, 4))) {
7277             case 0: /* [15:12] == xxx1 */
7278                 /* Floating point immediate */
7279                 disas_fp_imm(s, insn);
7280                 break;
7281             case 1: /* [15:12] == xx10 */
7282                 /* Floating point compare */
7283                 disas_fp_compare(s, insn);
7284                 break;
7285             case 2: /* [15:12] == x100 */
7286                 /* Floating point data-processing (1 source) */
7287                 disas_fp_1src(s, insn);
7288                 break;
7289             case 3: /* [15:12] == 1000 */
7290                 unallocated_encoding(s);
7291                 break;
7292             default: /* [15:12] == 0000 */
7293                 /* Floating point <-> integer conversions */
7294                 disas_fp_int_conv(s, insn);
7295                 break;
7296             }
7297             break;
7298         }
7299     }
7300 }
7301 
7302 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7303                      int pos)
7304 {
7305     /* Extract 64 bits from the middle of two concatenated 64 bit
7306      * vector register slices left:right. The extracted bits start
7307      * at 'pos' bits into the right (least significant) side.
7308      * We return the result in tcg_right, and guarantee not to
7309      * trash tcg_left.
7310      */
7311     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7312     assert(pos > 0 && pos < 64);
7313 
7314     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7315     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7316     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7317 }
7318 
7319 /* EXT
7320  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7321  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7322  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7323  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7324  */
7325 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7326 {
7327     int is_q = extract32(insn, 30, 1);
7328     int op2 = extract32(insn, 22, 2);
7329     int imm4 = extract32(insn, 11, 4);
7330     int rm = extract32(insn, 16, 5);
7331     int rn = extract32(insn, 5, 5);
7332     int rd = extract32(insn, 0, 5);
7333     int pos = imm4 << 3;
7334     TCGv_i64 tcg_resl, tcg_resh;
7335 
7336     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7337         unallocated_encoding(s);
7338         return;
7339     }
7340 
7341     if (!fp_access_check(s)) {
7342         return;
7343     }
7344 
7345     tcg_resh = tcg_temp_new_i64();
7346     tcg_resl = tcg_temp_new_i64();
7347 
7348     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7349      * either extracting 128 bits from a 128:128 concatenation, or
7350      * extracting 64 bits from a 64:64 concatenation.
7351      */
7352     if (!is_q) {
7353         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7354         if (pos != 0) {
7355             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7356             do_ext64(s, tcg_resh, tcg_resl, pos);
7357         }
7358     } else {
7359         TCGv_i64 tcg_hh;
7360         typedef struct {
7361             int reg;
7362             int elt;
7363         } EltPosns;
7364         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7365         EltPosns *elt = eltposns;
7366 
7367         if (pos >= 64) {
7368             elt++;
7369             pos -= 64;
7370         }
7371 
7372         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7373         elt++;
7374         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7375         elt++;
7376         if (pos != 0) {
7377             do_ext64(s, tcg_resh, tcg_resl, pos);
7378             tcg_hh = tcg_temp_new_i64();
7379             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7380             do_ext64(s, tcg_hh, tcg_resh, pos);
7381         }
7382     }
7383 
7384     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7385     if (is_q) {
7386         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7387     }
7388     clear_vec_high(s, is_q, rd);
7389 }
7390 
7391 /* TBL/TBX
7392  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7393  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7394  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7395  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7396  */
7397 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7398 {
7399     int op2 = extract32(insn, 22, 2);
7400     int is_q = extract32(insn, 30, 1);
7401     int rm = extract32(insn, 16, 5);
7402     int rn = extract32(insn, 5, 5);
7403     int rd = extract32(insn, 0, 5);
7404     int is_tbx = extract32(insn, 12, 1);
7405     int len = (extract32(insn, 13, 2) + 1) * 16;
7406 
7407     if (op2 != 0) {
7408         unallocated_encoding(s);
7409         return;
7410     }
7411 
7412     if (!fp_access_check(s)) {
7413         return;
7414     }
7415 
7416     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7417                        vec_full_reg_offset(s, rm), cpu_env,
7418                        is_q ? 16 : 8, vec_full_reg_size(s),
7419                        (len << 6) | (is_tbx << 5) | rn,
7420                        gen_helper_simd_tblx);
7421 }
7422 
7423 /* ZIP/UZP/TRN
7424  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7425  * +---+---+-------------+------+---+------+---+------------------+------+
7426  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7427  * +---+---+-------------+------+---+------+---+------------------+------+
7428  */
7429 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7430 {
7431     int rd = extract32(insn, 0, 5);
7432     int rn = extract32(insn, 5, 5);
7433     int rm = extract32(insn, 16, 5);
7434     int size = extract32(insn, 22, 2);
7435     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7436      * bit 2 indicates 1 vs 2 variant of the insn.
7437      */
7438     int opcode = extract32(insn, 12, 2);
7439     bool part = extract32(insn, 14, 1);
7440     bool is_q = extract32(insn, 30, 1);
7441     int esize = 8 << size;
7442     int i;
7443     int datasize = is_q ? 128 : 64;
7444     int elements = datasize / esize;
7445     TCGv_i64 tcg_res[2], tcg_ele;
7446 
7447     if (opcode == 0 || (size == 3 && !is_q)) {
7448         unallocated_encoding(s);
7449         return;
7450     }
7451 
7452     if (!fp_access_check(s)) {
7453         return;
7454     }
7455 
7456     tcg_res[0] = tcg_temp_new_i64();
7457     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7458     tcg_ele = tcg_temp_new_i64();
7459 
7460     for (i = 0; i < elements; i++) {
7461         int o, w;
7462 
7463         switch (opcode) {
7464         case 1: /* UZP1/2 */
7465         {
7466             int midpoint = elements / 2;
7467             if (i < midpoint) {
7468                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7469             } else {
7470                 read_vec_element(s, tcg_ele, rm,
7471                                  2 * (i - midpoint) + part, size);
7472             }
7473             break;
7474         }
7475         case 2: /* TRN1/2 */
7476             if (i & 1) {
7477                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7478             } else {
7479                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7480             }
7481             break;
7482         case 3: /* ZIP1/2 */
7483         {
7484             int base = part * elements / 2;
7485             if (i & 1) {
7486                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7487             } else {
7488                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7489             }
7490             break;
7491         }
7492         default:
7493             g_assert_not_reached();
7494         }
7495 
7496         w = (i * esize) / 64;
7497         o = (i * esize) % 64;
7498         if (o == 0) {
7499             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7500         } else {
7501             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7502             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7503         }
7504     }
7505 
7506     for (i = 0; i <= is_q; ++i) {
7507         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7508     }
7509     clear_vec_high(s, is_q, rd);
7510 }
7511 
7512 /*
7513  * do_reduction_op helper
7514  *
7515  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7516  * important for correct NaN propagation that we do these
7517  * operations in exactly the order specified by the pseudocode.
7518  *
7519  * This is a recursive function, TCG temps should be freed by the
7520  * calling function once it is done with the values.
7521  */
7522 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7523                                 int esize, int size, int vmap, TCGv_ptr fpst)
7524 {
7525     if (esize == size) {
7526         int element;
7527         MemOp msize = esize == 16 ? MO_16 : MO_32;
7528         TCGv_i32 tcg_elem;
7529 
7530         /* We should have one register left here */
7531         assert(ctpop8(vmap) == 1);
7532         element = ctz32(vmap);
7533         assert(element < 8);
7534 
7535         tcg_elem = tcg_temp_new_i32();
7536         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7537         return tcg_elem;
7538     } else {
7539         int bits = size / 2;
7540         int shift = ctpop8(vmap) / 2;
7541         int vmap_lo = (vmap >> shift) & vmap;
7542         int vmap_hi = (vmap & ~vmap_lo);
7543         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7544 
7545         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7546         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7547         tcg_res = tcg_temp_new_i32();
7548 
7549         switch (fpopcode) {
7550         case 0x0c: /* fmaxnmv half-precision */
7551             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7552             break;
7553         case 0x0f: /* fmaxv half-precision */
7554             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7555             break;
7556         case 0x1c: /* fminnmv half-precision */
7557             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7558             break;
7559         case 0x1f: /* fminv half-precision */
7560             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7561             break;
7562         case 0x2c: /* fmaxnmv */
7563             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7564             break;
7565         case 0x2f: /* fmaxv */
7566             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7567             break;
7568         case 0x3c: /* fminnmv */
7569             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7570             break;
7571         case 0x3f: /* fminv */
7572             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7573             break;
7574         default:
7575             g_assert_not_reached();
7576         }
7577         return tcg_res;
7578     }
7579 }
7580 
7581 /* AdvSIMD across lanes
7582  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7583  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7584  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7585  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7586  */
7587 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7588 {
7589     int rd = extract32(insn, 0, 5);
7590     int rn = extract32(insn, 5, 5);
7591     int size = extract32(insn, 22, 2);
7592     int opcode = extract32(insn, 12, 5);
7593     bool is_q = extract32(insn, 30, 1);
7594     bool is_u = extract32(insn, 29, 1);
7595     bool is_fp = false;
7596     bool is_min = false;
7597     int esize;
7598     int elements;
7599     int i;
7600     TCGv_i64 tcg_res, tcg_elt;
7601 
7602     switch (opcode) {
7603     case 0x1b: /* ADDV */
7604         if (is_u) {
7605             unallocated_encoding(s);
7606             return;
7607         }
7608         /* fall through */
7609     case 0x3: /* SADDLV, UADDLV */
7610     case 0xa: /* SMAXV, UMAXV */
7611     case 0x1a: /* SMINV, UMINV */
7612         if (size == 3 || (size == 2 && !is_q)) {
7613             unallocated_encoding(s);
7614             return;
7615         }
7616         break;
7617     case 0xc: /* FMAXNMV, FMINNMV */
7618     case 0xf: /* FMAXV, FMINV */
7619         /* Bit 1 of size field encodes min vs max and the actual size
7620          * depends on the encoding of the U bit. If not set (and FP16
7621          * enabled) then we do half-precision float instead of single
7622          * precision.
7623          */
7624         is_min = extract32(size, 1, 1);
7625         is_fp = true;
7626         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7627             size = 1;
7628         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7629             unallocated_encoding(s);
7630             return;
7631         } else {
7632             size = 2;
7633         }
7634         break;
7635     default:
7636         unallocated_encoding(s);
7637         return;
7638     }
7639 
7640     if (!fp_access_check(s)) {
7641         return;
7642     }
7643 
7644     esize = 8 << size;
7645     elements = (is_q ? 128 : 64) / esize;
7646 
7647     tcg_res = tcg_temp_new_i64();
7648     tcg_elt = tcg_temp_new_i64();
7649 
7650     /* These instructions operate across all lanes of a vector
7651      * to produce a single result. We can guarantee that a 64
7652      * bit intermediate is sufficient:
7653      *  + for [US]ADDLV the maximum element size is 32 bits, and
7654      *    the result type is 64 bits
7655      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7656      *    same as the element size, which is 32 bits at most
7657      * For the integer operations we can choose to work at 64
7658      * or 32 bits and truncate at the end; for simplicity
7659      * we use 64 bits always. The floating point
7660      * ops do require 32 bit intermediates, though.
7661      */
7662     if (!is_fp) {
7663         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7664 
7665         for (i = 1; i < elements; i++) {
7666             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7667 
7668             switch (opcode) {
7669             case 0x03: /* SADDLV / UADDLV */
7670             case 0x1b: /* ADDV */
7671                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7672                 break;
7673             case 0x0a: /* SMAXV / UMAXV */
7674                 if (is_u) {
7675                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7676                 } else {
7677                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7678                 }
7679                 break;
7680             case 0x1a: /* SMINV / UMINV */
7681                 if (is_u) {
7682                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7683                 } else {
7684                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7685                 }
7686                 break;
7687             default:
7688                 g_assert_not_reached();
7689             }
7690 
7691         }
7692     } else {
7693         /* Floating point vector reduction ops which work across 32
7694          * bit (single) or 16 bit (half-precision) intermediates.
7695          * Note that correct NaN propagation requires that we do these
7696          * operations in exactly the order specified by the pseudocode.
7697          */
7698         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7699         int fpopcode = opcode | is_min << 4 | is_u << 5;
7700         int vmap = (1 << elements) - 1;
7701         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7702                                              (is_q ? 128 : 64), vmap, fpst);
7703         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7704     }
7705 
7706     /* Now truncate the result to the width required for the final output */
7707     if (opcode == 0x03) {
7708         /* SADDLV, UADDLV: result is 2*esize */
7709         size++;
7710     }
7711 
7712     switch (size) {
7713     case 0:
7714         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7715         break;
7716     case 1:
7717         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7718         break;
7719     case 2:
7720         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7721         break;
7722     case 3:
7723         break;
7724     default:
7725         g_assert_not_reached();
7726     }
7727 
7728     write_fp_dreg(s, rd, tcg_res);
7729 }
7730 
7731 /* DUP (Element, Vector)
7732  *
7733  *  31  30   29              21 20    16 15        10  9    5 4    0
7734  * +---+---+-------------------+--------+-------------+------+------+
7735  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7736  * +---+---+-------------------+--------+-------------+------+------+
7737  *
7738  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7739  */
7740 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7741                              int imm5)
7742 {
7743     int size = ctz32(imm5);
7744     int index;
7745 
7746     if (size > 3 || (size == 3 && !is_q)) {
7747         unallocated_encoding(s);
7748         return;
7749     }
7750 
7751     if (!fp_access_check(s)) {
7752         return;
7753     }
7754 
7755     index = imm5 >> (size + 1);
7756     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7757                          vec_reg_offset(s, rn, index, size),
7758                          is_q ? 16 : 8, vec_full_reg_size(s));
7759 }
7760 
7761 /* DUP (element, scalar)
7762  *  31                   21 20    16 15        10  9    5 4    0
7763  * +-----------------------+--------+-------------+------+------+
7764  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7765  * +-----------------------+--------+-------------+------+------+
7766  */
7767 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7768                               int imm5)
7769 {
7770     int size = ctz32(imm5);
7771     int index;
7772     TCGv_i64 tmp;
7773 
7774     if (size > 3) {
7775         unallocated_encoding(s);
7776         return;
7777     }
7778 
7779     if (!fp_access_check(s)) {
7780         return;
7781     }
7782 
7783     index = imm5 >> (size + 1);
7784 
7785     /* This instruction just extracts the specified element and
7786      * zero-extends it into the bottom of the destination register.
7787      */
7788     tmp = tcg_temp_new_i64();
7789     read_vec_element(s, tmp, rn, index, size);
7790     write_fp_dreg(s, rd, tmp);
7791 }
7792 
7793 /* DUP (General)
7794  *
7795  *  31  30   29              21 20    16 15        10  9    5 4    0
7796  * +---+---+-------------------+--------+-------------+------+------+
7797  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7798  * +---+---+-------------------+--------+-------------+------+------+
7799  *
7800  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7801  */
7802 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7803                              int imm5)
7804 {
7805     int size = ctz32(imm5);
7806     uint32_t dofs, oprsz, maxsz;
7807 
7808     if (size > 3 || ((size == 3) && !is_q)) {
7809         unallocated_encoding(s);
7810         return;
7811     }
7812 
7813     if (!fp_access_check(s)) {
7814         return;
7815     }
7816 
7817     dofs = vec_full_reg_offset(s, rd);
7818     oprsz = is_q ? 16 : 8;
7819     maxsz = vec_full_reg_size(s);
7820 
7821     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7822 }
7823 
7824 /* INS (Element)
7825  *
7826  *  31                   21 20    16 15  14    11  10 9    5 4    0
7827  * +-----------------------+--------+------------+---+------+------+
7828  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7829  * +-----------------------+--------+------------+---+------+------+
7830  *
7831  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7832  * index: encoded in imm5<4:size+1>
7833  */
7834 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7835                              int imm4, int imm5)
7836 {
7837     int size = ctz32(imm5);
7838     int src_index, dst_index;
7839     TCGv_i64 tmp;
7840 
7841     if (size > 3) {
7842         unallocated_encoding(s);
7843         return;
7844     }
7845 
7846     if (!fp_access_check(s)) {
7847         return;
7848     }
7849 
7850     dst_index = extract32(imm5, 1+size, 5);
7851     src_index = extract32(imm4, size, 4);
7852 
7853     tmp = tcg_temp_new_i64();
7854 
7855     read_vec_element(s, tmp, rn, src_index, size);
7856     write_vec_element(s, tmp, rd, dst_index, size);
7857 
7858     /* INS is considered a 128-bit write for SVE. */
7859     clear_vec_high(s, true, rd);
7860 }
7861 
7862 
7863 /* INS (General)
7864  *
7865  *  31                   21 20    16 15        10  9    5 4    0
7866  * +-----------------------+--------+-------------+------+------+
7867  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7868  * +-----------------------+--------+-------------+------+------+
7869  *
7870  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7871  * index: encoded in imm5<4:size+1>
7872  */
7873 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7874 {
7875     int size = ctz32(imm5);
7876     int idx;
7877 
7878     if (size > 3) {
7879         unallocated_encoding(s);
7880         return;
7881     }
7882 
7883     if (!fp_access_check(s)) {
7884         return;
7885     }
7886 
7887     idx = extract32(imm5, 1 + size, 4 - size);
7888     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7889 
7890     /* INS is considered a 128-bit write for SVE. */
7891     clear_vec_high(s, true, rd);
7892 }
7893 
7894 /*
7895  * UMOV (General)
7896  * SMOV (General)
7897  *
7898  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7899  * +---+---+-------------------+--------+-------------+------+------+
7900  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7901  * +---+---+-------------------+--------+-------------+------+------+
7902  *
7903  * U: unsigned when set
7904  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7905  */
7906 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7907                                   int rn, int rd, int imm5)
7908 {
7909     int size = ctz32(imm5);
7910     int element;
7911     TCGv_i64 tcg_rd;
7912 
7913     /* Check for UnallocatedEncodings */
7914     if (is_signed) {
7915         if (size > 2 || (size == 2 && !is_q)) {
7916             unallocated_encoding(s);
7917             return;
7918         }
7919     } else {
7920         if (size > 3
7921             || (size < 3 && is_q)
7922             || (size == 3 && !is_q)) {
7923             unallocated_encoding(s);
7924             return;
7925         }
7926     }
7927 
7928     if (!fp_access_check(s)) {
7929         return;
7930     }
7931 
7932     element = extract32(imm5, 1+size, 4);
7933 
7934     tcg_rd = cpu_reg(s, rd);
7935     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7936     if (is_signed && !is_q) {
7937         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7938     }
7939 }
7940 
7941 /* AdvSIMD copy
7942  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7943  * +---+---+----+-----------------+------+---+------+---+------+------+
7944  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7945  * +---+---+----+-----------------+------+---+------+---+------+------+
7946  */
7947 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7948 {
7949     int rd = extract32(insn, 0, 5);
7950     int rn = extract32(insn, 5, 5);
7951     int imm4 = extract32(insn, 11, 4);
7952     int op = extract32(insn, 29, 1);
7953     int is_q = extract32(insn, 30, 1);
7954     int imm5 = extract32(insn, 16, 5);
7955 
7956     if (op) {
7957         if (is_q) {
7958             /* INS (element) */
7959             handle_simd_inse(s, rd, rn, imm4, imm5);
7960         } else {
7961             unallocated_encoding(s);
7962         }
7963     } else {
7964         switch (imm4) {
7965         case 0:
7966             /* DUP (element - vector) */
7967             handle_simd_dupe(s, is_q, rd, rn, imm5);
7968             break;
7969         case 1:
7970             /* DUP (general) */
7971             handle_simd_dupg(s, is_q, rd, rn, imm5);
7972             break;
7973         case 3:
7974             if (is_q) {
7975                 /* INS (general) */
7976                 handle_simd_insg(s, rd, rn, imm5);
7977             } else {
7978                 unallocated_encoding(s);
7979             }
7980             break;
7981         case 5:
7982         case 7:
7983             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7984             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7985             break;
7986         default:
7987             unallocated_encoding(s);
7988             break;
7989         }
7990     }
7991 }
7992 
7993 /* AdvSIMD modified immediate
7994  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7995  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7996  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7997  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7998  *
7999  * There are a number of operations that can be carried out here:
8000  *   MOVI - move (shifted) imm into register
8001  *   MVNI - move inverted (shifted) imm into register
8002  *   ORR  - bitwise OR of (shifted) imm with register
8003  *   BIC  - bitwise clear of (shifted) imm with register
8004  * With ARMv8.2 we also have:
8005  *   FMOV half-precision
8006  */
8007 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
8008 {
8009     int rd = extract32(insn, 0, 5);
8010     int cmode = extract32(insn, 12, 4);
8011     int o2 = extract32(insn, 11, 1);
8012     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
8013     bool is_neg = extract32(insn, 29, 1);
8014     bool is_q = extract32(insn, 30, 1);
8015     uint64_t imm = 0;
8016 
8017     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
8018         /* Check for FMOV (vector, immediate) - half-precision */
8019         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
8020             unallocated_encoding(s);
8021             return;
8022         }
8023     }
8024 
8025     if (!fp_access_check(s)) {
8026         return;
8027     }
8028 
8029     if (cmode == 15 && o2 && !is_neg) {
8030         /* FMOV (vector, immediate) - half-precision */
8031         imm = vfp_expand_imm(MO_16, abcdefgh);
8032         /* now duplicate across the lanes */
8033         imm = dup_const(MO_16, imm);
8034     } else {
8035         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
8036     }
8037 
8038     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
8039         /* MOVI or MVNI, with MVNI negation handled above.  */
8040         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
8041                              vec_full_reg_size(s), imm);
8042     } else {
8043         /* ORR or BIC, with BIC negation to AND handled above.  */
8044         if (is_neg) {
8045             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
8046         } else {
8047             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
8048         }
8049     }
8050 }
8051 
8052 /* AdvSIMD scalar copy
8053  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
8054  * +-----+----+-----------------+------+---+------+---+------+------+
8055  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8056  * +-----+----+-----------------+------+---+------+---+------+------+
8057  */
8058 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
8059 {
8060     int rd = extract32(insn, 0, 5);
8061     int rn = extract32(insn, 5, 5);
8062     int imm4 = extract32(insn, 11, 4);
8063     int imm5 = extract32(insn, 16, 5);
8064     int op = extract32(insn, 29, 1);
8065 
8066     if (op != 0 || imm4 != 0) {
8067         unallocated_encoding(s);
8068         return;
8069     }
8070 
8071     /* DUP (element, scalar) */
8072     handle_simd_dupes(s, rd, rn, imm5);
8073 }
8074 
8075 /* AdvSIMD scalar pairwise
8076  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8077  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8078  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8079  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8080  */
8081 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
8082 {
8083     int u = extract32(insn, 29, 1);
8084     int size = extract32(insn, 22, 2);
8085     int opcode = extract32(insn, 12, 5);
8086     int rn = extract32(insn, 5, 5);
8087     int rd = extract32(insn, 0, 5);
8088     TCGv_ptr fpst;
8089 
8090     /* For some ops (the FP ones), size[1] is part of the encoding.
8091      * For ADDP strictly it is not but size[1] is always 1 for valid
8092      * encodings.
8093      */
8094     opcode |= (extract32(size, 1, 1) << 5);
8095 
8096     switch (opcode) {
8097     case 0x3b: /* ADDP */
8098         if (u || size != 3) {
8099             unallocated_encoding(s);
8100             return;
8101         }
8102         if (!fp_access_check(s)) {
8103             return;
8104         }
8105 
8106         fpst = NULL;
8107         break;
8108     case 0xc: /* FMAXNMP */
8109     case 0xd: /* FADDP */
8110     case 0xf: /* FMAXP */
8111     case 0x2c: /* FMINNMP */
8112     case 0x2f: /* FMINP */
8113         /* FP op, size[0] is 32 or 64 bit*/
8114         if (!u) {
8115             if (!dc_isar_feature(aa64_fp16, s)) {
8116                 unallocated_encoding(s);
8117                 return;
8118             } else {
8119                 size = MO_16;
8120             }
8121         } else {
8122             size = extract32(size, 0, 1) ? MO_64 : MO_32;
8123         }
8124 
8125         if (!fp_access_check(s)) {
8126             return;
8127         }
8128 
8129         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8130         break;
8131     default:
8132         unallocated_encoding(s);
8133         return;
8134     }
8135 
8136     if (size == MO_64) {
8137         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8138         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8139         TCGv_i64 tcg_res = tcg_temp_new_i64();
8140 
8141         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8142         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8143 
8144         switch (opcode) {
8145         case 0x3b: /* ADDP */
8146             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8147             break;
8148         case 0xc: /* FMAXNMP */
8149             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8150             break;
8151         case 0xd: /* FADDP */
8152             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8153             break;
8154         case 0xf: /* FMAXP */
8155             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8156             break;
8157         case 0x2c: /* FMINNMP */
8158             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8159             break;
8160         case 0x2f: /* FMINP */
8161             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8162             break;
8163         default:
8164             g_assert_not_reached();
8165         }
8166 
8167         write_fp_dreg(s, rd, tcg_res);
8168     } else {
8169         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8170         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8171         TCGv_i32 tcg_res = tcg_temp_new_i32();
8172 
8173         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8174         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8175 
8176         if (size == MO_16) {
8177             switch (opcode) {
8178             case 0xc: /* FMAXNMP */
8179                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8180                 break;
8181             case 0xd: /* FADDP */
8182                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8183                 break;
8184             case 0xf: /* FMAXP */
8185                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8186                 break;
8187             case 0x2c: /* FMINNMP */
8188                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8189                 break;
8190             case 0x2f: /* FMINP */
8191                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8192                 break;
8193             default:
8194                 g_assert_not_reached();
8195             }
8196         } else {
8197             switch (opcode) {
8198             case 0xc: /* FMAXNMP */
8199                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8200                 break;
8201             case 0xd: /* FADDP */
8202                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8203                 break;
8204             case 0xf: /* FMAXP */
8205                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8206                 break;
8207             case 0x2c: /* FMINNMP */
8208                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8209                 break;
8210             case 0x2f: /* FMINP */
8211                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8212                 break;
8213             default:
8214                 g_assert_not_reached();
8215             }
8216         }
8217 
8218         write_fp_sreg(s, rd, tcg_res);
8219     }
8220 }
8221 
8222 /*
8223  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8224  *
8225  * This code is handles the common shifting code and is used by both
8226  * the vector and scalar code.
8227  */
8228 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8229                                     TCGv_i64 tcg_rnd, bool accumulate,
8230                                     bool is_u, int size, int shift)
8231 {
8232     bool extended_result = false;
8233     bool round = tcg_rnd != NULL;
8234     int ext_lshift = 0;
8235     TCGv_i64 tcg_src_hi;
8236 
8237     if (round && size == 3) {
8238         extended_result = true;
8239         ext_lshift = 64 - shift;
8240         tcg_src_hi = tcg_temp_new_i64();
8241     } else if (shift == 64) {
8242         if (!accumulate && is_u) {
8243             /* result is zero */
8244             tcg_gen_movi_i64(tcg_res, 0);
8245             return;
8246         }
8247     }
8248 
8249     /* Deal with the rounding step */
8250     if (round) {
8251         if (extended_result) {
8252             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8253             if (!is_u) {
8254                 /* take care of sign extending tcg_res */
8255                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8256                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8257                                  tcg_src, tcg_src_hi,
8258                                  tcg_rnd, tcg_zero);
8259             } else {
8260                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8261                                  tcg_src, tcg_zero,
8262                                  tcg_rnd, tcg_zero);
8263             }
8264         } else {
8265             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8266         }
8267     }
8268 
8269     /* Now do the shift right */
8270     if (round && extended_result) {
8271         /* extended case, >64 bit precision required */
8272         if (ext_lshift == 0) {
8273             /* special case, only high bits matter */
8274             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8275         } else {
8276             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8277             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8278             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8279         }
8280     } else {
8281         if (is_u) {
8282             if (shift == 64) {
8283                 /* essentially shifting in 64 zeros */
8284                 tcg_gen_movi_i64(tcg_src, 0);
8285             } else {
8286                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8287             }
8288         } else {
8289             if (shift == 64) {
8290                 /* effectively extending the sign-bit */
8291                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8292             } else {
8293                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8294             }
8295         }
8296     }
8297 
8298     if (accumulate) {
8299         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8300     } else {
8301         tcg_gen_mov_i64(tcg_res, tcg_src);
8302     }
8303 }
8304 
8305 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8306 static void handle_scalar_simd_shri(DisasContext *s,
8307                                     bool is_u, int immh, int immb,
8308                                     int opcode, int rn, int rd)
8309 {
8310     const int size = 3;
8311     int immhb = immh << 3 | immb;
8312     int shift = 2 * (8 << size) - immhb;
8313     bool accumulate = false;
8314     bool round = false;
8315     bool insert = false;
8316     TCGv_i64 tcg_rn;
8317     TCGv_i64 tcg_rd;
8318     TCGv_i64 tcg_round;
8319 
8320     if (!extract32(immh, 3, 1)) {
8321         unallocated_encoding(s);
8322         return;
8323     }
8324 
8325     if (!fp_access_check(s)) {
8326         return;
8327     }
8328 
8329     switch (opcode) {
8330     case 0x02: /* SSRA / USRA (accumulate) */
8331         accumulate = true;
8332         break;
8333     case 0x04: /* SRSHR / URSHR (rounding) */
8334         round = true;
8335         break;
8336     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8337         accumulate = round = true;
8338         break;
8339     case 0x08: /* SRI */
8340         insert = true;
8341         break;
8342     }
8343 
8344     if (round) {
8345         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8346     } else {
8347         tcg_round = NULL;
8348     }
8349 
8350     tcg_rn = read_fp_dreg(s, rn);
8351     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8352 
8353     if (insert) {
8354         /* shift count same as element size is valid but does nothing;
8355          * special case to avoid potential shift by 64.
8356          */
8357         int esize = 8 << size;
8358         if (shift != esize) {
8359             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8360             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8361         }
8362     } else {
8363         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8364                                 accumulate, is_u, size, shift);
8365     }
8366 
8367     write_fp_dreg(s, rd, tcg_rd);
8368 }
8369 
8370 /* SHL/SLI - Scalar shift left */
8371 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8372                                     int immh, int immb, int opcode,
8373                                     int rn, int rd)
8374 {
8375     int size = 32 - clz32(immh) - 1;
8376     int immhb = immh << 3 | immb;
8377     int shift = immhb - (8 << size);
8378     TCGv_i64 tcg_rn;
8379     TCGv_i64 tcg_rd;
8380 
8381     if (!extract32(immh, 3, 1)) {
8382         unallocated_encoding(s);
8383         return;
8384     }
8385 
8386     if (!fp_access_check(s)) {
8387         return;
8388     }
8389 
8390     tcg_rn = read_fp_dreg(s, rn);
8391     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8392 
8393     if (insert) {
8394         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8395     } else {
8396         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8397     }
8398 
8399     write_fp_dreg(s, rd, tcg_rd);
8400 }
8401 
8402 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8403  * (signed/unsigned) narrowing */
8404 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8405                                    bool is_u_shift, bool is_u_narrow,
8406                                    int immh, int immb, int opcode,
8407                                    int rn, int rd)
8408 {
8409     int immhb = immh << 3 | immb;
8410     int size = 32 - clz32(immh) - 1;
8411     int esize = 8 << size;
8412     int shift = (2 * esize) - immhb;
8413     int elements = is_scalar ? 1 : (64 / esize);
8414     bool round = extract32(opcode, 0, 1);
8415     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8416     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8417     TCGv_i32 tcg_rd_narrowed;
8418     TCGv_i64 tcg_final;
8419 
8420     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8421         { gen_helper_neon_narrow_sat_s8,
8422           gen_helper_neon_unarrow_sat8 },
8423         { gen_helper_neon_narrow_sat_s16,
8424           gen_helper_neon_unarrow_sat16 },
8425         { gen_helper_neon_narrow_sat_s32,
8426           gen_helper_neon_unarrow_sat32 },
8427         { NULL, NULL },
8428     };
8429     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8430         gen_helper_neon_narrow_sat_u8,
8431         gen_helper_neon_narrow_sat_u16,
8432         gen_helper_neon_narrow_sat_u32,
8433         NULL
8434     };
8435     NeonGenNarrowEnvFn *narrowfn;
8436 
8437     int i;
8438 
8439     assert(size < 4);
8440 
8441     if (extract32(immh, 3, 1)) {
8442         unallocated_encoding(s);
8443         return;
8444     }
8445 
8446     if (!fp_access_check(s)) {
8447         return;
8448     }
8449 
8450     if (is_u_shift) {
8451         narrowfn = unsigned_narrow_fns[size];
8452     } else {
8453         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8454     }
8455 
8456     tcg_rn = tcg_temp_new_i64();
8457     tcg_rd = tcg_temp_new_i64();
8458     tcg_rd_narrowed = tcg_temp_new_i32();
8459     tcg_final = tcg_temp_new_i64();
8460 
8461     if (round) {
8462         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8463     } else {
8464         tcg_round = NULL;
8465     }
8466 
8467     for (i = 0; i < elements; i++) {
8468         read_vec_element(s, tcg_rn, rn, i, ldop);
8469         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8470                                 false, is_u_shift, size+1, shift);
8471         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8472         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8473         if (i == 0) {
8474             tcg_gen_mov_i64(tcg_final, tcg_rd);
8475         } else {
8476             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8477         }
8478     }
8479 
8480     if (!is_q) {
8481         write_vec_element(s, tcg_final, rd, 0, MO_64);
8482     } else {
8483         write_vec_element(s, tcg_final, rd, 1, MO_64);
8484     }
8485     clear_vec_high(s, is_q, rd);
8486 }
8487 
8488 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8489 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8490                              bool src_unsigned, bool dst_unsigned,
8491                              int immh, int immb, int rn, int rd)
8492 {
8493     int immhb = immh << 3 | immb;
8494     int size = 32 - clz32(immh) - 1;
8495     int shift = immhb - (8 << size);
8496     int pass;
8497 
8498     assert(immh != 0);
8499     assert(!(scalar && is_q));
8500 
8501     if (!scalar) {
8502         if (!is_q && extract32(immh, 3, 1)) {
8503             unallocated_encoding(s);
8504             return;
8505         }
8506 
8507         /* Since we use the variable-shift helpers we must
8508          * replicate the shift count into each element of
8509          * the tcg_shift value.
8510          */
8511         switch (size) {
8512         case 0:
8513             shift |= shift << 8;
8514             /* fall through */
8515         case 1:
8516             shift |= shift << 16;
8517             break;
8518         case 2:
8519         case 3:
8520             break;
8521         default:
8522             g_assert_not_reached();
8523         }
8524     }
8525 
8526     if (!fp_access_check(s)) {
8527         return;
8528     }
8529 
8530     if (size == 3) {
8531         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8532         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8533             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8534             { NULL, gen_helper_neon_qshl_u64 },
8535         };
8536         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8537         int maxpass = is_q ? 2 : 1;
8538 
8539         for (pass = 0; pass < maxpass; pass++) {
8540             TCGv_i64 tcg_op = tcg_temp_new_i64();
8541 
8542             read_vec_element(s, tcg_op, rn, pass, MO_64);
8543             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8544             write_vec_element(s, tcg_op, rd, pass, MO_64);
8545         }
8546         clear_vec_high(s, is_q, rd);
8547     } else {
8548         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8549         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8550             {
8551                 { gen_helper_neon_qshl_s8,
8552                   gen_helper_neon_qshl_s16,
8553                   gen_helper_neon_qshl_s32 },
8554                 { gen_helper_neon_qshlu_s8,
8555                   gen_helper_neon_qshlu_s16,
8556                   gen_helper_neon_qshlu_s32 }
8557             }, {
8558                 { NULL, NULL, NULL },
8559                 { gen_helper_neon_qshl_u8,
8560                   gen_helper_neon_qshl_u16,
8561                   gen_helper_neon_qshl_u32 }
8562             }
8563         };
8564         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8565         MemOp memop = scalar ? size : MO_32;
8566         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8567 
8568         for (pass = 0; pass < maxpass; pass++) {
8569             TCGv_i32 tcg_op = tcg_temp_new_i32();
8570 
8571             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8572             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8573             if (scalar) {
8574                 switch (size) {
8575                 case 0:
8576                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8577                     break;
8578                 case 1:
8579                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8580                     break;
8581                 case 2:
8582                     break;
8583                 default:
8584                     g_assert_not_reached();
8585                 }
8586                 write_fp_sreg(s, rd, tcg_op);
8587             } else {
8588                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8589             }
8590         }
8591 
8592         if (!scalar) {
8593             clear_vec_high(s, is_q, rd);
8594         }
8595     }
8596 }
8597 
8598 /* Common vector code for handling integer to FP conversion */
8599 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8600                                    int elements, int is_signed,
8601                                    int fracbits, int size)
8602 {
8603     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8604     TCGv_i32 tcg_shift = NULL;
8605 
8606     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8607     int pass;
8608 
8609     if (fracbits || size == MO_64) {
8610         tcg_shift = tcg_constant_i32(fracbits);
8611     }
8612 
8613     if (size == MO_64) {
8614         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8615         TCGv_i64 tcg_double = tcg_temp_new_i64();
8616 
8617         for (pass = 0; pass < elements; pass++) {
8618             read_vec_element(s, tcg_int64, rn, pass, mop);
8619 
8620             if (is_signed) {
8621                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8622                                      tcg_shift, tcg_fpst);
8623             } else {
8624                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8625                                      tcg_shift, tcg_fpst);
8626             }
8627             if (elements == 1) {
8628                 write_fp_dreg(s, rd, tcg_double);
8629             } else {
8630                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8631             }
8632         }
8633     } else {
8634         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8635         TCGv_i32 tcg_float = tcg_temp_new_i32();
8636 
8637         for (pass = 0; pass < elements; pass++) {
8638             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8639 
8640             switch (size) {
8641             case MO_32:
8642                 if (fracbits) {
8643                     if (is_signed) {
8644                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8645                                              tcg_shift, tcg_fpst);
8646                     } else {
8647                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8648                                              tcg_shift, tcg_fpst);
8649                     }
8650                 } else {
8651                     if (is_signed) {
8652                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8653                     } else {
8654                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8655                     }
8656                 }
8657                 break;
8658             case MO_16:
8659                 if (fracbits) {
8660                     if (is_signed) {
8661                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8662                                              tcg_shift, tcg_fpst);
8663                     } else {
8664                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8665                                              tcg_shift, tcg_fpst);
8666                     }
8667                 } else {
8668                     if (is_signed) {
8669                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8670                     } else {
8671                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8672                     }
8673                 }
8674                 break;
8675             default:
8676                 g_assert_not_reached();
8677             }
8678 
8679             if (elements == 1) {
8680                 write_fp_sreg(s, rd, tcg_float);
8681             } else {
8682                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8683             }
8684         }
8685     }
8686 
8687     clear_vec_high(s, elements << size == 16, rd);
8688 }
8689 
8690 /* UCVTF/SCVTF - Integer to FP conversion */
8691 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8692                                          bool is_q, bool is_u,
8693                                          int immh, int immb, int opcode,
8694                                          int rn, int rd)
8695 {
8696     int size, elements, fracbits;
8697     int immhb = immh << 3 | immb;
8698 
8699     if (immh & 8) {
8700         size = MO_64;
8701         if (!is_scalar && !is_q) {
8702             unallocated_encoding(s);
8703             return;
8704         }
8705     } else if (immh & 4) {
8706         size = MO_32;
8707     } else if (immh & 2) {
8708         size = MO_16;
8709         if (!dc_isar_feature(aa64_fp16, s)) {
8710             unallocated_encoding(s);
8711             return;
8712         }
8713     } else {
8714         /* immh == 0 would be a failure of the decode logic */
8715         g_assert(immh == 1);
8716         unallocated_encoding(s);
8717         return;
8718     }
8719 
8720     if (is_scalar) {
8721         elements = 1;
8722     } else {
8723         elements = (8 << is_q) >> size;
8724     }
8725     fracbits = (16 << size) - immhb;
8726 
8727     if (!fp_access_check(s)) {
8728         return;
8729     }
8730 
8731     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8732 }
8733 
8734 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8735 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8736                                          bool is_q, bool is_u,
8737                                          int immh, int immb, int rn, int rd)
8738 {
8739     int immhb = immh << 3 | immb;
8740     int pass, size, fracbits;
8741     TCGv_ptr tcg_fpstatus;
8742     TCGv_i32 tcg_rmode, tcg_shift;
8743 
8744     if (immh & 0x8) {
8745         size = MO_64;
8746         if (!is_scalar && !is_q) {
8747             unallocated_encoding(s);
8748             return;
8749         }
8750     } else if (immh & 0x4) {
8751         size = MO_32;
8752     } else if (immh & 0x2) {
8753         size = MO_16;
8754         if (!dc_isar_feature(aa64_fp16, s)) {
8755             unallocated_encoding(s);
8756             return;
8757         }
8758     } else {
8759         /* Should have split out AdvSIMD modified immediate earlier.  */
8760         assert(immh == 1);
8761         unallocated_encoding(s);
8762         return;
8763     }
8764 
8765     if (!fp_access_check(s)) {
8766         return;
8767     }
8768 
8769     assert(!(is_scalar && is_q));
8770 
8771     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8772     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8773     fracbits = (16 << size) - immhb;
8774     tcg_shift = tcg_constant_i32(fracbits);
8775 
8776     if (size == MO_64) {
8777         int maxpass = is_scalar ? 1 : 2;
8778 
8779         for (pass = 0; pass < maxpass; pass++) {
8780             TCGv_i64 tcg_op = tcg_temp_new_i64();
8781 
8782             read_vec_element(s, tcg_op, rn, pass, MO_64);
8783             if (is_u) {
8784                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8785             } else {
8786                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8787             }
8788             write_vec_element(s, tcg_op, rd, pass, MO_64);
8789         }
8790         clear_vec_high(s, is_q, rd);
8791     } else {
8792         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8793         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8794 
8795         switch (size) {
8796         case MO_16:
8797             if (is_u) {
8798                 fn = gen_helper_vfp_touhh;
8799             } else {
8800                 fn = gen_helper_vfp_toshh;
8801             }
8802             break;
8803         case MO_32:
8804             if (is_u) {
8805                 fn = gen_helper_vfp_touls;
8806             } else {
8807                 fn = gen_helper_vfp_tosls;
8808             }
8809             break;
8810         default:
8811             g_assert_not_reached();
8812         }
8813 
8814         for (pass = 0; pass < maxpass; pass++) {
8815             TCGv_i32 tcg_op = tcg_temp_new_i32();
8816 
8817             read_vec_element_i32(s, tcg_op, rn, pass, size);
8818             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8819             if (is_scalar) {
8820                 write_fp_sreg(s, rd, tcg_op);
8821             } else {
8822                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8823             }
8824         }
8825         if (!is_scalar) {
8826             clear_vec_high(s, is_q, rd);
8827         }
8828     }
8829 
8830     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8831 }
8832 
8833 /* AdvSIMD scalar shift by immediate
8834  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8835  * +-----+---+-------------+------+------+--------+---+------+------+
8836  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8837  * +-----+---+-------------+------+------+--------+---+------+------+
8838  *
8839  * This is the scalar version so it works on a fixed sized registers
8840  */
8841 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8842 {
8843     int rd = extract32(insn, 0, 5);
8844     int rn = extract32(insn, 5, 5);
8845     int opcode = extract32(insn, 11, 5);
8846     int immb = extract32(insn, 16, 3);
8847     int immh = extract32(insn, 19, 4);
8848     bool is_u = extract32(insn, 29, 1);
8849 
8850     if (immh == 0) {
8851         unallocated_encoding(s);
8852         return;
8853     }
8854 
8855     switch (opcode) {
8856     case 0x08: /* SRI */
8857         if (!is_u) {
8858             unallocated_encoding(s);
8859             return;
8860         }
8861         /* fall through */
8862     case 0x00: /* SSHR / USHR */
8863     case 0x02: /* SSRA / USRA */
8864     case 0x04: /* SRSHR / URSHR */
8865     case 0x06: /* SRSRA / URSRA */
8866         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8867         break;
8868     case 0x0a: /* SHL / SLI */
8869         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8870         break;
8871     case 0x1c: /* SCVTF, UCVTF */
8872         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8873                                      opcode, rn, rd);
8874         break;
8875     case 0x10: /* SQSHRUN, SQSHRUN2 */
8876     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8877         if (!is_u) {
8878             unallocated_encoding(s);
8879             return;
8880         }
8881         handle_vec_simd_sqshrn(s, true, false, false, true,
8882                                immh, immb, opcode, rn, rd);
8883         break;
8884     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8885     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8886         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8887                                immh, immb, opcode, rn, rd);
8888         break;
8889     case 0xc: /* SQSHLU */
8890         if (!is_u) {
8891             unallocated_encoding(s);
8892             return;
8893         }
8894         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8895         break;
8896     case 0xe: /* SQSHL, UQSHL */
8897         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8898         break;
8899     case 0x1f: /* FCVTZS, FCVTZU */
8900         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8901         break;
8902     default:
8903         unallocated_encoding(s);
8904         break;
8905     }
8906 }
8907 
8908 /* AdvSIMD scalar three different
8909  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8910  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8911  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8912  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8913  */
8914 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8915 {
8916     bool is_u = extract32(insn, 29, 1);
8917     int size = extract32(insn, 22, 2);
8918     int opcode = extract32(insn, 12, 4);
8919     int rm = extract32(insn, 16, 5);
8920     int rn = extract32(insn, 5, 5);
8921     int rd = extract32(insn, 0, 5);
8922 
8923     if (is_u) {
8924         unallocated_encoding(s);
8925         return;
8926     }
8927 
8928     switch (opcode) {
8929     case 0x9: /* SQDMLAL, SQDMLAL2 */
8930     case 0xb: /* SQDMLSL, SQDMLSL2 */
8931     case 0xd: /* SQDMULL, SQDMULL2 */
8932         if (size == 0 || size == 3) {
8933             unallocated_encoding(s);
8934             return;
8935         }
8936         break;
8937     default:
8938         unallocated_encoding(s);
8939         return;
8940     }
8941 
8942     if (!fp_access_check(s)) {
8943         return;
8944     }
8945 
8946     if (size == 2) {
8947         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8948         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8949         TCGv_i64 tcg_res = tcg_temp_new_i64();
8950 
8951         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8952         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8953 
8954         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8955         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8956 
8957         switch (opcode) {
8958         case 0xd: /* SQDMULL, SQDMULL2 */
8959             break;
8960         case 0xb: /* SQDMLSL, SQDMLSL2 */
8961             tcg_gen_neg_i64(tcg_res, tcg_res);
8962             /* fall through */
8963         case 0x9: /* SQDMLAL, SQDMLAL2 */
8964             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8965             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8966                                               tcg_res, tcg_op1);
8967             break;
8968         default:
8969             g_assert_not_reached();
8970         }
8971 
8972         write_fp_dreg(s, rd, tcg_res);
8973     } else {
8974         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8975         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8976         TCGv_i64 tcg_res = tcg_temp_new_i64();
8977 
8978         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8979         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8980 
8981         switch (opcode) {
8982         case 0xd: /* SQDMULL, SQDMULL2 */
8983             break;
8984         case 0xb: /* SQDMLSL, SQDMLSL2 */
8985             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8986             /* fall through */
8987         case 0x9: /* SQDMLAL, SQDMLAL2 */
8988         {
8989             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8990             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8991             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8992                                               tcg_res, tcg_op3);
8993             break;
8994         }
8995         default:
8996             g_assert_not_reached();
8997         }
8998 
8999         tcg_gen_ext32u_i64(tcg_res, tcg_res);
9000         write_fp_dreg(s, rd, tcg_res);
9001     }
9002 }
9003 
9004 static void handle_3same_64(DisasContext *s, int opcode, bool u,
9005                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
9006 {
9007     /* Handle 64x64->64 opcodes which are shared between the scalar
9008      * and vector 3-same groups. We cover every opcode where size == 3
9009      * is valid in either the three-reg-same (integer, not pairwise)
9010      * or scalar-three-reg-same groups.
9011      */
9012     TCGCond cond;
9013 
9014     switch (opcode) {
9015     case 0x1: /* SQADD */
9016         if (u) {
9017             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9018         } else {
9019             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9020         }
9021         break;
9022     case 0x5: /* SQSUB */
9023         if (u) {
9024             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9025         } else {
9026             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9027         }
9028         break;
9029     case 0x6: /* CMGT, CMHI */
9030         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
9031          * We implement this using setcond (test) and then negating.
9032          */
9033         cond = u ? TCG_COND_GTU : TCG_COND_GT;
9034     do_cmop:
9035         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
9036         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9037         break;
9038     case 0x7: /* CMGE, CMHS */
9039         cond = u ? TCG_COND_GEU : TCG_COND_GE;
9040         goto do_cmop;
9041     case 0x11: /* CMTST, CMEQ */
9042         if (u) {
9043             cond = TCG_COND_EQ;
9044             goto do_cmop;
9045         }
9046         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
9047         break;
9048     case 0x8: /* SSHL, USHL */
9049         if (u) {
9050             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
9051         } else {
9052             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
9053         }
9054         break;
9055     case 0x9: /* SQSHL, UQSHL */
9056         if (u) {
9057             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9058         } else {
9059             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9060         }
9061         break;
9062     case 0xa: /* SRSHL, URSHL */
9063         if (u) {
9064             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
9065         } else {
9066             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
9067         }
9068         break;
9069     case 0xb: /* SQRSHL, UQRSHL */
9070         if (u) {
9071             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9072         } else {
9073             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9074         }
9075         break;
9076     case 0x10: /* ADD, SUB */
9077         if (u) {
9078             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
9079         } else {
9080             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
9081         }
9082         break;
9083     default:
9084         g_assert_not_reached();
9085     }
9086 }
9087 
9088 /* Handle the 3-same-operands float operations; shared by the scalar
9089  * and vector encodings. The caller must filter out any encodings
9090  * not allocated for the encoding it is dealing with.
9091  */
9092 static void handle_3same_float(DisasContext *s, int size, int elements,
9093                                int fpopcode, int rd, int rn, int rm)
9094 {
9095     int pass;
9096     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9097 
9098     for (pass = 0; pass < elements; pass++) {
9099         if (size) {
9100             /* Double */
9101             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9102             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9103             TCGv_i64 tcg_res = tcg_temp_new_i64();
9104 
9105             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9106             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9107 
9108             switch (fpopcode) {
9109             case 0x39: /* FMLS */
9110                 /* As usual for ARM, separate negation for fused multiply-add */
9111                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
9112                 /* fall through */
9113             case 0x19: /* FMLA */
9114                 read_vec_element(s, tcg_res, rd, pass, MO_64);
9115                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9116                                        tcg_res, fpst);
9117                 break;
9118             case 0x18: /* FMAXNM */
9119                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9120                 break;
9121             case 0x1a: /* FADD */
9122                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9123                 break;
9124             case 0x1b: /* FMULX */
9125                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9126                 break;
9127             case 0x1c: /* FCMEQ */
9128                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9129                 break;
9130             case 0x1e: /* FMAX */
9131                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9132                 break;
9133             case 0x1f: /* FRECPS */
9134                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9135                 break;
9136             case 0x38: /* FMINNM */
9137                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9138                 break;
9139             case 0x3a: /* FSUB */
9140                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9141                 break;
9142             case 0x3e: /* FMIN */
9143                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9144                 break;
9145             case 0x3f: /* FRSQRTS */
9146                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9147                 break;
9148             case 0x5b: /* FMUL */
9149                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9150                 break;
9151             case 0x5c: /* FCMGE */
9152                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9153                 break;
9154             case 0x5d: /* FACGE */
9155                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9156                 break;
9157             case 0x5f: /* FDIV */
9158                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9159                 break;
9160             case 0x7a: /* FABD */
9161                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9162                 gen_helper_vfp_absd(tcg_res, tcg_res);
9163                 break;
9164             case 0x7c: /* FCMGT */
9165                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9166                 break;
9167             case 0x7d: /* FACGT */
9168                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9169                 break;
9170             default:
9171                 g_assert_not_reached();
9172             }
9173 
9174             write_vec_element(s, tcg_res, rd, pass, MO_64);
9175         } else {
9176             /* Single */
9177             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9178             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9179             TCGv_i32 tcg_res = tcg_temp_new_i32();
9180 
9181             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9182             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9183 
9184             switch (fpopcode) {
9185             case 0x39: /* FMLS */
9186                 /* As usual for ARM, separate negation for fused multiply-add */
9187                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9188                 /* fall through */
9189             case 0x19: /* FMLA */
9190                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9191                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9192                                        tcg_res, fpst);
9193                 break;
9194             case 0x1a: /* FADD */
9195                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9196                 break;
9197             case 0x1b: /* FMULX */
9198                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9199                 break;
9200             case 0x1c: /* FCMEQ */
9201                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9202                 break;
9203             case 0x1e: /* FMAX */
9204                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9205                 break;
9206             case 0x1f: /* FRECPS */
9207                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9208                 break;
9209             case 0x18: /* FMAXNM */
9210                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9211                 break;
9212             case 0x38: /* FMINNM */
9213                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9214                 break;
9215             case 0x3a: /* FSUB */
9216                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9217                 break;
9218             case 0x3e: /* FMIN */
9219                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9220                 break;
9221             case 0x3f: /* FRSQRTS */
9222                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9223                 break;
9224             case 0x5b: /* FMUL */
9225                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9226                 break;
9227             case 0x5c: /* FCMGE */
9228                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9229                 break;
9230             case 0x5d: /* FACGE */
9231                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9232                 break;
9233             case 0x5f: /* FDIV */
9234                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9235                 break;
9236             case 0x7a: /* FABD */
9237                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9238                 gen_helper_vfp_abss(tcg_res, tcg_res);
9239                 break;
9240             case 0x7c: /* FCMGT */
9241                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9242                 break;
9243             case 0x7d: /* FACGT */
9244                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9245                 break;
9246             default:
9247                 g_assert_not_reached();
9248             }
9249 
9250             if (elements == 1) {
9251                 /* scalar single so clear high part */
9252                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9253 
9254                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9255                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9256             } else {
9257                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9258             }
9259         }
9260     }
9261 
9262     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9263 }
9264 
9265 /* AdvSIMD scalar three same
9266  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9267  * +-----+---+-----------+------+---+------+--------+---+------+------+
9268  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9269  * +-----+---+-----------+------+---+------+--------+---+------+------+
9270  */
9271 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9272 {
9273     int rd = extract32(insn, 0, 5);
9274     int rn = extract32(insn, 5, 5);
9275     int opcode = extract32(insn, 11, 5);
9276     int rm = extract32(insn, 16, 5);
9277     int size = extract32(insn, 22, 2);
9278     bool u = extract32(insn, 29, 1);
9279     TCGv_i64 tcg_rd;
9280 
9281     if (opcode >= 0x18) {
9282         /* Floating point: U, size[1] and opcode indicate operation */
9283         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9284         switch (fpopcode) {
9285         case 0x1b: /* FMULX */
9286         case 0x1f: /* FRECPS */
9287         case 0x3f: /* FRSQRTS */
9288         case 0x5d: /* FACGE */
9289         case 0x7d: /* FACGT */
9290         case 0x1c: /* FCMEQ */
9291         case 0x5c: /* FCMGE */
9292         case 0x7c: /* FCMGT */
9293         case 0x7a: /* FABD */
9294             break;
9295         default:
9296             unallocated_encoding(s);
9297             return;
9298         }
9299 
9300         if (!fp_access_check(s)) {
9301             return;
9302         }
9303 
9304         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9305         return;
9306     }
9307 
9308     switch (opcode) {
9309     case 0x1: /* SQADD, UQADD */
9310     case 0x5: /* SQSUB, UQSUB */
9311     case 0x9: /* SQSHL, UQSHL */
9312     case 0xb: /* SQRSHL, UQRSHL */
9313         break;
9314     case 0x8: /* SSHL, USHL */
9315     case 0xa: /* SRSHL, URSHL */
9316     case 0x6: /* CMGT, CMHI */
9317     case 0x7: /* CMGE, CMHS */
9318     case 0x11: /* CMTST, CMEQ */
9319     case 0x10: /* ADD, SUB (vector) */
9320         if (size != 3) {
9321             unallocated_encoding(s);
9322             return;
9323         }
9324         break;
9325     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9326         if (size != 1 && size != 2) {
9327             unallocated_encoding(s);
9328             return;
9329         }
9330         break;
9331     default:
9332         unallocated_encoding(s);
9333         return;
9334     }
9335 
9336     if (!fp_access_check(s)) {
9337         return;
9338     }
9339 
9340     tcg_rd = tcg_temp_new_i64();
9341 
9342     if (size == 3) {
9343         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9344         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9345 
9346         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9347     } else {
9348         /* Do a single operation on the lowest element in the vector.
9349          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9350          * no side effects for all these operations.
9351          * OPTME: special-purpose helpers would avoid doing some
9352          * unnecessary work in the helper for the 8 and 16 bit cases.
9353          */
9354         NeonGenTwoOpEnvFn *genenvfn;
9355         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9356         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9357         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9358 
9359         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9360         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9361 
9362         switch (opcode) {
9363         case 0x1: /* SQADD, UQADD */
9364         {
9365             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9366                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9367                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9368                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9369             };
9370             genenvfn = fns[size][u];
9371             break;
9372         }
9373         case 0x5: /* SQSUB, UQSUB */
9374         {
9375             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9376                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9377                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9378                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9379             };
9380             genenvfn = fns[size][u];
9381             break;
9382         }
9383         case 0x9: /* SQSHL, UQSHL */
9384         {
9385             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9386                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9387                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9388                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9389             };
9390             genenvfn = fns[size][u];
9391             break;
9392         }
9393         case 0xb: /* SQRSHL, UQRSHL */
9394         {
9395             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9396                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9397                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9398                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9399             };
9400             genenvfn = fns[size][u];
9401             break;
9402         }
9403         case 0x16: /* SQDMULH, SQRDMULH */
9404         {
9405             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9406                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9407                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9408             };
9409             assert(size == 1 || size == 2);
9410             genenvfn = fns[size - 1][u];
9411             break;
9412         }
9413         default:
9414             g_assert_not_reached();
9415         }
9416 
9417         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9418         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9419     }
9420 
9421     write_fp_dreg(s, rd, tcg_rd);
9422 }
9423 
9424 /* AdvSIMD scalar three same FP16
9425  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9426  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9427  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9428  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9429  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9430  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9431  */
9432 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9433                                                   uint32_t insn)
9434 {
9435     int rd = extract32(insn, 0, 5);
9436     int rn = extract32(insn, 5, 5);
9437     int opcode = extract32(insn, 11, 3);
9438     int rm = extract32(insn, 16, 5);
9439     bool u = extract32(insn, 29, 1);
9440     bool a = extract32(insn, 23, 1);
9441     int fpopcode = opcode | (a << 3) |  (u << 4);
9442     TCGv_ptr fpst;
9443     TCGv_i32 tcg_op1;
9444     TCGv_i32 tcg_op2;
9445     TCGv_i32 tcg_res;
9446 
9447     switch (fpopcode) {
9448     case 0x03: /* FMULX */
9449     case 0x04: /* FCMEQ (reg) */
9450     case 0x07: /* FRECPS */
9451     case 0x0f: /* FRSQRTS */
9452     case 0x14: /* FCMGE (reg) */
9453     case 0x15: /* FACGE */
9454     case 0x1a: /* FABD */
9455     case 0x1c: /* FCMGT (reg) */
9456     case 0x1d: /* FACGT */
9457         break;
9458     default:
9459         unallocated_encoding(s);
9460         return;
9461     }
9462 
9463     if (!dc_isar_feature(aa64_fp16, s)) {
9464         unallocated_encoding(s);
9465     }
9466 
9467     if (!fp_access_check(s)) {
9468         return;
9469     }
9470 
9471     fpst = fpstatus_ptr(FPST_FPCR_F16);
9472 
9473     tcg_op1 = read_fp_hreg(s, rn);
9474     tcg_op2 = read_fp_hreg(s, rm);
9475     tcg_res = tcg_temp_new_i32();
9476 
9477     switch (fpopcode) {
9478     case 0x03: /* FMULX */
9479         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9480         break;
9481     case 0x04: /* FCMEQ (reg) */
9482         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9483         break;
9484     case 0x07: /* FRECPS */
9485         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9486         break;
9487     case 0x0f: /* FRSQRTS */
9488         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9489         break;
9490     case 0x14: /* FCMGE (reg) */
9491         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9492         break;
9493     case 0x15: /* FACGE */
9494         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9495         break;
9496     case 0x1a: /* FABD */
9497         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9498         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9499         break;
9500     case 0x1c: /* FCMGT (reg) */
9501         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9502         break;
9503     case 0x1d: /* FACGT */
9504         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9505         break;
9506     default:
9507         g_assert_not_reached();
9508     }
9509 
9510     write_fp_sreg(s, rd, tcg_res);
9511 }
9512 
9513 /* AdvSIMD scalar three same extra
9514  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9515  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9516  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9517  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9518  */
9519 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9520                                                    uint32_t insn)
9521 {
9522     int rd = extract32(insn, 0, 5);
9523     int rn = extract32(insn, 5, 5);
9524     int opcode = extract32(insn, 11, 4);
9525     int rm = extract32(insn, 16, 5);
9526     int size = extract32(insn, 22, 2);
9527     bool u = extract32(insn, 29, 1);
9528     TCGv_i32 ele1, ele2, ele3;
9529     TCGv_i64 res;
9530     bool feature;
9531 
9532     switch (u * 16 + opcode) {
9533     case 0x10: /* SQRDMLAH (vector) */
9534     case 0x11: /* SQRDMLSH (vector) */
9535         if (size != 1 && size != 2) {
9536             unallocated_encoding(s);
9537             return;
9538         }
9539         feature = dc_isar_feature(aa64_rdm, s);
9540         break;
9541     default:
9542         unallocated_encoding(s);
9543         return;
9544     }
9545     if (!feature) {
9546         unallocated_encoding(s);
9547         return;
9548     }
9549     if (!fp_access_check(s)) {
9550         return;
9551     }
9552 
9553     /* Do a single operation on the lowest element in the vector.
9554      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9555      * with no side effects for all these operations.
9556      * OPTME: special-purpose helpers would avoid doing some
9557      * unnecessary work in the helper for the 16 bit cases.
9558      */
9559     ele1 = tcg_temp_new_i32();
9560     ele2 = tcg_temp_new_i32();
9561     ele3 = tcg_temp_new_i32();
9562 
9563     read_vec_element_i32(s, ele1, rn, 0, size);
9564     read_vec_element_i32(s, ele2, rm, 0, size);
9565     read_vec_element_i32(s, ele3, rd, 0, size);
9566 
9567     switch (opcode) {
9568     case 0x0: /* SQRDMLAH */
9569         if (size == 1) {
9570             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9571         } else {
9572             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9573         }
9574         break;
9575     case 0x1: /* SQRDMLSH */
9576         if (size == 1) {
9577             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9578         } else {
9579             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9580         }
9581         break;
9582     default:
9583         g_assert_not_reached();
9584     }
9585 
9586     res = tcg_temp_new_i64();
9587     tcg_gen_extu_i32_i64(res, ele3);
9588     write_fp_dreg(s, rd, res);
9589 }
9590 
9591 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9592                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9593                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9594 {
9595     /* Handle 64->64 opcodes which are shared between the scalar and
9596      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9597      * is valid in either group and also the double-precision fp ops.
9598      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9599      * requires them.
9600      */
9601     TCGCond cond;
9602 
9603     switch (opcode) {
9604     case 0x4: /* CLS, CLZ */
9605         if (u) {
9606             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9607         } else {
9608             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9609         }
9610         break;
9611     case 0x5: /* NOT */
9612         /* This opcode is shared with CNT and RBIT but we have earlier
9613          * enforced that size == 3 if and only if this is the NOT insn.
9614          */
9615         tcg_gen_not_i64(tcg_rd, tcg_rn);
9616         break;
9617     case 0x7: /* SQABS, SQNEG */
9618         if (u) {
9619             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9620         } else {
9621             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9622         }
9623         break;
9624     case 0xa: /* CMLT */
9625         /* 64 bit integer comparison against zero, result is
9626          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9627          * subtracting 1.
9628          */
9629         cond = TCG_COND_LT;
9630     do_cmop:
9631         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9632         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9633         break;
9634     case 0x8: /* CMGT, CMGE */
9635         cond = u ? TCG_COND_GE : TCG_COND_GT;
9636         goto do_cmop;
9637     case 0x9: /* CMEQ, CMLE */
9638         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9639         goto do_cmop;
9640     case 0xb: /* ABS, NEG */
9641         if (u) {
9642             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9643         } else {
9644             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9645         }
9646         break;
9647     case 0x2f: /* FABS */
9648         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9649         break;
9650     case 0x6f: /* FNEG */
9651         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9652         break;
9653     case 0x7f: /* FSQRT */
9654         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9655         break;
9656     case 0x1a: /* FCVTNS */
9657     case 0x1b: /* FCVTMS */
9658     case 0x1c: /* FCVTAS */
9659     case 0x3a: /* FCVTPS */
9660     case 0x3b: /* FCVTZS */
9661         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9662         break;
9663     case 0x5a: /* FCVTNU */
9664     case 0x5b: /* FCVTMU */
9665     case 0x5c: /* FCVTAU */
9666     case 0x7a: /* FCVTPU */
9667     case 0x7b: /* FCVTZU */
9668         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9669         break;
9670     case 0x18: /* FRINTN */
9671     case 0x19: /* FRINTM */
9672     case 0x38: /* FRINTP */
9673     case 0x39: /* FRINTZ */
9674     case 0x58: /* FRINTA */
9675     case 0x79: /* FRINTI */
9676         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9677         break;
9678     case 0x59: /* FRINTX */
9679         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9680         break;
9681     case 0x1e: /* FRINT32Z */
9682     case 0x5e: /* FRINT32X */
9683         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9684         break;
9685     case 0x1f: /* FRINT64Z */
9686     case 0x5f: /* FRINT64X */
9687         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9688         break;
9689     default:
9690         g_assert_not_reached();
9691     }
9692 }
9693 
9694 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9695                                    bool is_scalar, bool is_u, bool is_q,
9696                                    int size, int rn, int rd)
9697 {
9698     bool is_double = (size == MO_64);
9699     TCGv_ptr fpst;
9700 
9701     if (!fp_access_check(s)) {
9702         return;
9703     }
9704 
9705     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9706 
9707     if (is_double) {
9708         TCGv_i64 tcg_op = tcg_temp_new_i64();
9709         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9710         TCGv_i64 tcg_res = tcg_temp_new_i64();
9711         NeonGenTwoDoubleOpFn *genfn;
9712         bool swap = false;
9713         int pass;
9714 
9715         switch (opcode) {
9716         case 0x2e: /* FCMLT (zero) */
9717             swap = true;
9718             /* fallthrough */
9719         case 0x2c: /* FCMGT (zero) */
9720             genfn = gen_helper_neon_cgt_f64;
9721             break;
9722         case 0x2d: /* FCMEQ (zero) */
9723             genfn = gen_helper_neon_ceq_f64;
9724             break;
9725         case 0x6d: /* FCMLE (zero) */
9726             swap = true;
9727             /* fall through */
9728         case 0x6c: /* FCMGE (zero) */
9729             genfn = gen_helper_neon_cge_f64;
9730             break;
9731         default:
9732             g_assert_not_reached();
9733         }
9734 
9735         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9736             read_vec_element(s, tcg_op, rn, pass, MO_64);
9737             if (swap) {
9738                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9739             } else {
9740                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9741             }
9742             write_vec_element(s, tcg_res, rd, pass, MO_64);
9743         }
9744 
9745         clear_vec_high(s, !is_scalar, rd);
9746     } else {
9747         TCGv_i32 tcg_op = tcg_temp_new_i32();
9748         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9749         TCGv_i32 tcg_res = tcg_temp_new_i32();
9750         NeonGenTwoSingleOpFn *genfn;
9751         bool swap = false;
9752         int pass, maxpasses;
9753 
9754         if (size == MO_16) {
9755             switch (opcode) {
9756             case 0x2e: /* FCMLT (zero) */
9757                 swap = true;
9758                 /* fall through */
9759             case 0x2c: /* FCMGT (zero) */
9760                 genfn = gen_helper_advsimd_cgt_f16;
9761                 break;
9762             case 0x2d: /* FCMEQ (zero) */
9763                 genfn = gen_helper_advsimd_ceq_f16;
9764                 break;
9765             case 0x6d: /* FCMLE (zero) */
9766                 swap = true;
9767                 /* fall through */
9768             case 0x6c: /* FCMGE (zero) */
9769                 genfn = gen_helper_advsimd_cge_f16;
9770                 break;
9771             default:
9772                 g_assert_not_reached();
9773             }
9774         } else {
9775             switch (opcode) {
9776             case 0x2e: /* FCMLT (zero) */
9777                 swap = true;
9778                 /* fall through */
9779             case 0x2c: /* FCMGT (zero) */
9780                 genfn = gen_helper_neon_cgt_f32;
9781                 break;
9782             case 0x2d: /* FCMEQ (zero) */
9783                 genfn = gen_helper_neon_ceq_f32;
9784                 break;
9785             case 0x6d: /* FCMLE (zero) */
9786                 swap = true;
9787                 /* fall through */
9788             case 0x6c: /* FCMGE (zero) */
9789                 genfn = gen_helper_neon_cge_f32;
9790                 break;
9791             default:
9792                 g_assert_not_reached();
9793             }
9794         }
9795 
9796         if (is_scalar) {
9797             maxpasses = 1;
9798         } else {
9799             int vector_size = 8 << is_q;
9800             maxpasses = vector_size >> size;
9801         }
9802 
9803         for (pass = 0; pass < maxpasses; pass++) {
9804             read_vec_element_i32(s, tcg_op, rn, pass, size);
9805             if (swap) {
9806                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9807             } else {
9808                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9809             }
9810             if (is_scalar) {
9811                 write_fp_sreg(s, rd, tcg_res);
9812             } else {
9813                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9814             }
9815         }
9816 
9817         if (!is_scalar) {
9818             clear_vec_high(s, is_q, rd);
9819         }
9820     }
9821 }
9822 
9823 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9824                                     bool is_scalar, bool is_u, bool is_q,
9825                                     int size, int rn, int rd)
9826 {
9827     bool is_double = (size == 3);
9828     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9829 
9830     if (is_double) {
9831         TCGv_i64 tcg_op = tcg_temp_new_i64();
9832         TCGv_i64 tcg_res = tcg_temp_new_i64();
9833         int pass;
9834 
9835         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9836             read_vec_element(s, tcg_op, rn, pass, MO_64);
9837             switch (opcode) {
9838             case 0x3d: /* FRECPE */
9839                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9840                 break;
9841             case 0x3f: /* FRECPX */
9842                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9843                 break;
9844             case 0x7d: /* FRSQRTE */
9845                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9846                 break;
9847             default:
9848                 g_assert_not_reached();
9849             }
9850             write_vec_element(s, tcg_res, rd, pass, MO_64);
9851         }
9852         clear_vec_high(s, !is_scalar, rd);
9853     } else {
9854         TCGv_i32 tcg_op = tcg_temp_new_i32();
9855         TCGv_i32 tcg_res = tcg_temp_new_i32();
9856         int pass, maxpasses;
9857 
9858         if (is_scalar) {
9859             maxpasses = 1;
9860         } else {
9861             maxpasses = is_q ? 4 : 2;
9862         }
9863 
9864         for (pass = 0; pass < maxpasses; pass++) {
9865             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9866 
9867             switch (opcode) {
9868             case 0x3c: /* URECPE */
9869                 gen_helper_recpe_u32(tcg_res, tcg_op);
9870                 break;
9871             case 0x3d: /* FRECPE */
9872                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9873                 break;
9874             case 0x3f: /* FRECPX */
9875                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9876                 break;
9877             case 0x7d: /* FRSQRTE */
9878                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9879                 break;
9880             default:
9881                 g_assert_not_reached();
9882             }
9883 
9884             if (is_scalar) {
9885                 write_fp_sreg(s, rd, tcg_res);
9886             } else {
9887                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9888             }
9889         }
9890         if (!is_scalar) {
9891             clear_vec_high(s, is_q, rd);
9892         }
9893     }
9894 }
9895 
9896 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9897                                 int opcode, bool u, bool is_q,
9898                                 int size, int rn, int rd)
9899 {
9900     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9901      * in the source becomes a size element in the destination).
9902      */
9903     int pass;
9904     TCGv_i32 tcg_res[2];
9905     int destelt = is_q ? 2 : 0;
9906     int passes = scalar ? 1 : 2;
9907 
9908     if (scalar) {
9909         tcg_res[1] = tcg_constant_i32(0);
9910     }
9911 
9912     for (pass = 0; pass < passes; pass++) {
9913         TCGv_i64 tcg_op = tcg_temp_new_i64();
9914         NeonGenNarrowFn *genfn = NULL;
9915         NeonGenNarrowEnvFn *genenvfn = NULL;
9916 
9917         if (scalar) {
9918             read_vec_element(s, tcg_op, rn, pass, size + 1);
9919         } else {
9920             read_vec_element(s, tcg_op, rn, pass, MO_64);
9921         }
9922         tcg_res[pass] = tcg_temp_new_i32();
9923 
9924         switch (opcode) {
9925         case 0x12: /* XTN, SQXTUN */
9926         {
9927             static NeonGenNarrowFn * const xtnfns[3] = {
9928                 gen_helper_neon_narrow_u8,
9929                 gen_helper_neon_narrow_u16,
9930                 tcg_gen_extrl_i64_i32,
9931             };
9932             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9933                 gen_helper_neon_unarrow_sat8,
9934                 gen_helper_neon_unarrow_sat16,
9935                 gen_helper_neon_unarrow_sat32,
9936             };
9937             if (u) {
9938                 genenvfn = sqxtunfns[size];
9939             } else {
9940                 genfn = xtnfns[size];
9941             }
9942             break;
9943         }
9944         case 0x14: /* SQXTN, UQXTN */
9945         {
9946             static NeonGenNarrowEnvFn * const fns[3][2] = {
9947                 { gen_helper_neon_narrow_sat_s8,
9948                   gen_helper_neon_narrow_sat_u8 },
9949                 { gen_helper_neon_narrow_sat_s16,
9950                   gen_helper_neon_narrow_sat_u16 },
9951                 { gen_helper_neon_narrow_sat_s32,
9952                   gen_helper_neon_narrow_sat_u32 },
9953             };
9954             genenvfn = fns[size][u];
9955             break;
9956         }
9957         case 0x16: /* FCVTN, FCVTN2 */
9958             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9959             if (size == 2) {
9960                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9961             } else {
9962                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9963                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9964                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9965                 TCGv_i32 ahp = get_ahp_flag();
9966 
9967                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9968                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9969                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9970                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9971             }
9972             break;
9973         case 0x36: /* BFCVTN, BFCVTN2 */
9974             {
9975                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9976                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9977             }
9978             break;
9979         case 0x56:  /* FCVTXN, FCVTXN2 */
9980             /* 64 bit to 32 bit float conversion
9981              * with von Neumann rounding (round to odd)
9982              */
9983             assert(size == 2);
9984             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9985             break;
9986         default:
9987             g_assert_not_reached();
9988         }
9989 
9990         if (genfn) {
9991             genfn(tcg_res[pass], tcg_op);
9992         } else if (genenvfn) {
9993             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9994         }
9995     }
9996 
9997     for (pass = 0; pass < 2; pass++) {
9998         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9999     }
10000     clear_vec_high(s, is_q, rd);
10001 }
10002 
10003 /* Remaining saturating accumulating ops */
10004 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
10005                                 bool is_q, int size, int rn, int rd)
10006 {
10007     bool is_double = (size == 3);
10008 
10009     if (is_double) {
10010         TCGv_i64 tcg_rn = tcg_temp_new_i64();
10011         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10012         int pass;
10013 
10014         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10015             read_vec_element(s, tcg_rn, rn, pass, MO_64);
10016             read_vec_element(s, tcg_rd, rd, pass, MO_64);
10017 
10018             if (is_u) { /* USQADD */
10019                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10020             } else { /* SUQADD */
10021                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10022             }
10023             write_vec_element(s, tcg_rd, rd, pass, MO_64);
10024         }
10025         clear_vec_high(s, !is_scalar, rd);
10026     } else {
10027         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10028         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10029         int pass, maxpasses;
10030 
10031         if (is_scalar) {
10032             maxpasses = 1;
10033         } else {
10034             maxpasses = is_q ? 4 : 2;
10035         }
10036 
10037         for (pass = 0; pass < maxpasses; pass++) {
10038             if (is_scalar) {
10039                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
10040                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
10041             } else {
10042                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
10043                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10044             }
10045 
10046             if (is_u) { /* USQADD */
10047                 switch (size) {
10048                 case 0:
10049                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10050                     break;
10051                 case 1:
10052                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10053                     break;
10054                 case 2:
10055                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10056                     break;
10057                 default:
10058                     g_assert_not_reached();
10059                 }
10060             } else { /* SUQADD */
10061                 switch (size) {
10062                 case 0:
10063                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10064                     break;
10065                 case 1:
10066                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10067                     break;
10068                 case 2:
10069                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10070                     break;
10071                 default:
10072                     g_assert_not_reached();
10073                 }
10074             }
10075 
10076             if (is_scalar) {
10077                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
10078             }
10079             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10080         }
10081         clear_vec_high(s, is_q, rd);
10082     }
10083 }
10084 
10085 /* AdvSIMD scalar two reg misc
10086  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10087  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10088  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10089  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10090  */
10091 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10092 {
10093     int rd = extract32(insn, 0, 5);
10094     int rn = extract32(insn, 5, 5);
10095     int opcode = extract32(insn, 12, 5);
10096     int size = extract32(insn, 22, 2);
10097     bool u = extract32(insn, 29, 1);
10098     bool is_fcvt = false;
10099     int rmode;
10100     TCGv_i32 tcg_rmode;
10101     TCGv_ptr tcg_fpstatus;
10102 
10103     switch (opcode) {
10104     case 0x3: /* USQADD / SUQADD*/
10105         if (!fp_access_check(s)) {
10106             return;
10107         }
10108         handle_2misc_satacc(s, true, u, false, size, rn, rd);
10109         return;
10110     case 0x7: /* SQABS / SQNEG */
10111         break;
10112     case 0xa: /* CMLT */
10113         if (u) {
10114             unallocated_encoding(s);
10115             return;
10116         }
10117         /* fall through */
10118     case 0x8: /* CMGT, CMGE */
10119     case 0x9: /* CMEQ, CMLE */
10120     case 0xb: /* ABS, NEG */
10121         if (size != 3) {
10122             unallocated_encoding(s);
10123             return;
10124         }
10125         break;
10126     case 0x12: /* SQXTUN */
10127         if (!u) {
10128             unallocated_encoding(s);
10129             return;
10130         }
10131         /* fall through */
10132     case 0x14: /* SQXTN, UQXTN */
10133         if (size == 3) {
10134             unallocated_encoding(s);
10135             return;
10136         }
10137         if (!fp_access_check(s)) {
10138             return;
10139         }
10140         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10141         return;
10142     case 0xc ... 0xf:
10143     case 0x16 ... 0x1d:
10144     case 0x1f:
10145         /* Floating point: U, size[1] and opcode indicate operation;
10146          * size[0] indicates single or double precision.
10147          */
10148         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10149         size = extract32(size, 0, 1) ? 3 : 2;
10150         switch (opcode) {
10151         case 0x2c: /* FCMGT (zero) */
10152         case 0x2d: /* FCMEQ (zero) */
10153         case 0x2e: /* FCMLT (zero) */
10154         case 0x6c: /* FCMGE (zero) */
10155         case 0x6d: /* FCMLE (zero) */
10156             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10157             return;
10158         case 0x1d: /* SCVTF */
10159         case 0x5d: /* UCVTF */
10160         {
10161             bool is_signed = (opcode == 0x1d);
10162             if (!fp_access_check(s)) {
10163                 return;
10164             }
10165             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10166             return;
10167         }
10168         case 0x3d: /* FRECPE */
10169         case 0x3f: /* FRECPX */
10170         case 0x7d: /* FRSQRTE */
10171             if (!fp_access_check(s)) {
10172                 return;
10173             }
10174             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10175             return;
10176         case 0x1a: /* FCVTNS */
10177         case 0x1b: /* FCVTMS */
10178         case 0x3a: /* FCVTPS */
10179         case 0x3b: /* FCVTZS */
10180         case 0x5a: /* FCVTNU */
10181         case 0x5b: /* FCVTMU */
10182         case 0x7a: /* FCVTPU */
10183         case 0x7b: /* FCVTZU */
10184             is_fcvt = true;
10185             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10186             break;
10187         case 0x1c: /* FCVTAS */
10188         case 0x5c: /* FCVTAU */
10189             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10190             is_fcvt = true;
10191             rmode = FPROUNDING_TIEAWAY;
10192             break;
10193         case 0x56: /* FCVTXN, FCVTXN2 */
10194             if (size == 2) {
10195                 unallocated_encoding(s);
10196                 return;
10197             }
10198             if (!fp_access_check(s)) {
10199                 return;
10200             }
10201             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10202             return;
10203         default:
10204             unallocated_encoding(s);
10205             return;
10206         }
10207         break;
10208     default:
10209         unallocated_encoding(s);
10210         return;
10211     }
10212 
10213     if (!fp_access_check(s)) {
10214         return;
10215     }
10216 
10217     if (is_fcvt) {
10218         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10219         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10220     } else {
10221         tcg_fpstatus = NULL;
10222         tcg_rmode = NULL;
10223     }
10224 
10225     if (size == 3) {
10226         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10227         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10228 
10229         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10230         write_fp_dreg(s, rd, tcg_rd);
10231     } else {
10232         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10233         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10234 
10235         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10236 
10237         switch (opcode) {
10238         case 0x7: /* SQABS, SQNEG */
10239         {
10240             NeonGenOneOpEnvFn *genfn;
10241             static NeonGenOneOpEnvFn * const fns[3][2] = {
10242                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10243                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10244                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10245             };
10246             genfn = fns[size][u];
10247             genfn(tcg_rd, cpu_env, tcg_rn);
10248             break;
10249         }
10250         case 0x1a: /* FCVTNS */
10251         case 0x1b: /* FCVTMS */
10252         case 0x1c: /* FCVTAS */
10253         case 0x3a: /* FCVTPS */
10254         case 0x3b: /* FCVTZS */
10255             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10256                                  tcg_fpstatus);
10257             break;
10258         case 0x5a: /* FCVTNU */
10259         case 0x5b: /* FCVTMU */
10260         case 0x5c: /* FCVTAU */
10261         case 0x7a: /* FCVTPU */
10262         case 0x7b: /* FCVTZU */
10263             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10264                                  tcg_fpstatus);
10265             break;
10266         default:
10267             g_assert_not_reached();
10268         }
10269 
10270         write_fp_sreg(s, rd, tcg_rd);
10271     }
10272 
10273     if (is_fcvt) {
10274         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10275     }
10276 }
10277 
10278 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10279 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10280                                  int immh, int immb, int opcode, int rn, int rd)
10281 {
10282     int size = 32 - clz32(immh) - 1;
10283     int immhb = immh << 3 | immb;
10284     int shift = 2 * (8 << size) - immhb;
10285     GVecGen2iFn *gvec_fn;
10286 
10287     if (extract32(immh, 3, 1) && !is_q) {
10288         unallocated_encoding(s);
10289         return;
10290     }
10291     tcg_debug_assert(size <= 3);
10292 
10293     if (!fp_access_check(s)) {
10294         return;
10295     }
10296 
10297     switch (opcode) {
10298     case 0x02: /* SSRA / USRA (accumulate) */
10299         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10300         break;
10301 
10302     case 0x08: /* SRI */
10303         gvec_fn = gen_gvec_sri;
10304         break;
10305 
10306     case 0x00: /* SSHR / USHR */
10307         if (is_u) {
10308             if (shift == 8 << size) {
10309                 /* Shift count the same size as element size produces zero.  */
10310                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10311                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10312                 return;
10313             }
10314             gvec_fn = tcg_gen_gvec_shri;
10315         } else {
10316             /* Shift count the same size as element size produces all sign.  */
10317             if (shift == 8 << size) {
10318                 shift -= 1;
10319             }
10320             gvec_fn = tcg_gen_gvec_sari;
10321         }
10322         break;
10323 
10324     case 0x04: /* SRSHR / URSHR (rounding) */
10325         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10326         break;
10327 
10328     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10329         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10330         break;
10331 
10332     default:
10333         g_assert_not_reached();
10334     }
10335 
10336     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10337 }
10338 
10339 /* SHL/SLI - Vector shift left */
10340 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10341                                  int immh, int immb, int opcode, int rn, int rd)
10342 {
10343     int size = 32 - clz32(immh) - 1;
10344     int immhb = immh << 3 | immb;
10345     int shift = immhb - (8 << size);
10346 
10347     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10348     assert(size >= 0 && size <= 3);
10349 
10350     if (extract32(immh, 3, 1) && !is_q) {
10351         unallocated_encoding(s);
10352         return;
10353     }
10354 
10355     if (!fp_access_check(s)) {
10356         return;
10357     }
10358 
10359     if (insert) {
10360         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10361     } else {
10362         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10363     }
10364 }
10365 
10366 /* USHLL/SHLL - Vector shift left with widening */
10367 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10368                                  int immh, int immb, int opcode, int rn, int rd)
10369 {
10370     int size = 32 - clz32(immh) - 1;
10371     int immhb = immh << 3 | immb;
10372     int shift = immhb - (8 << size);
10373     int dsize = 64;
10374     int esize = 8 << size;
10375     int elements = dsize/esize;
10376     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10377     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10378     int i;
10379 
10380     if (size >= 3) {
10381         unallocated_encoding(s);
10382         return;
10383     }
10384 
10385     if (!fp_access_check(s)) {
10386         return;
10387     }
10388 
10389     /* For the LL variants the store is larger than the load,
10390      * so if rd == rn we would overwrite parts of our input.
10391      * So load everything right now and use shifts in the main loop.
10392      */
10393     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10394 
10395     for (i = 0; i < elements; i++) {
10396         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10397         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10398         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10399         write_vec_element(s, tcg_rd, rd, i, size + 1);
10400     }
10401 }
10402 
10403 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10404 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10405                                  int immh, int immb, int opcode, int rn, int rd)
10406 {
10407     int immhb = immh << 3 | immb;
10408     int size = 32 - clz32(immh) - 1;
10409     int dsize = 64;
10410     int esize = 8 << size;
10411     int elements = dsize/esize;
10412     int shift = (2 * esize) - immhb;
10413     bool round = extract32(opcode, 0, 1);
10414     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10415     TCGv_i64 tcg_round;
10416     int i;
10417 
10418     if (extract32(immh, 3, 1)) {
10419         unallocated_encoding(s);
10420         return;
10421     }
10422 
10423     if (!fp_access_check(s)) {
10424         return;
10425     }
10426 
10427     tcg_rn = tcg_temp_new_i64();
10428     tcg_rd = tcg_temp_new_i64();
10429     tcg_final = tcg_temp_new_i64();
10430     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10431 
10432     if (round) {
10433         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10434     } else {
10435         tcg_round = NULL;
10436     }
10437 
10438     for (i = 0; i < elements; i++) {
10439         read_vec_element(s, tcg_rn, rn, i, size+1);
10440         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10441                                 false, true, size+1, shift);
10442 
10443         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10444     }
10445 
10446     if (!is_q) {
10447         write_vec_element(s, tcg_final, rd, 0, MO_64);
10448     } else {
10449         write_vec_element(s, tcg_final, rd, 1, MO_64);
10450     }
10451 
10452     clear_vec_high(s, is_q, rd);
10453 }
10454 
10455 
10456 /* AdvSIMD shift by immediate
10457  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10458  * +---+---+---+-------------+------+------+--------+---+------+------+
10459  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10460  * +---+---+---+-------------+------+------+--------+---+------+------+
10461  */
10462 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10463 {
10464     int rd = extract32(insn, 0, 5);
10465     int rn = extract32(insn, 5, 5);
10466     int opcode = extract32(insn, 11, 5);
10467     int immb = extract32(insn, 16, 3);
10468     int immh = extract32(insn, 19, 4);
10469     bool is_u = extract32(insn, 29, 1);
10470     bool is_q = extract32(insn, 30, 1);
10471 
10472     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10473     assert(immh != 0);
10474 
10475     switch (opcode) {
10476     case 0x08: /* SRI */
10477         if (!is_u) {
10478             unallocated_encoding(s);
10479             return;
10480         }
10481         /* fall through */
10482     case 0x00: /* SSHR / USHR */
10483     case 0x02: /* SSRA / USRA (accumulate) */
10484     case 0x04: /* SRSHR / URSHR (rounding) */
10485     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10486         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10487         break;
10488     case 0x0a: /* SHL / SLI */
10489         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10490         break;
10491     case 0x10: /* SHRN */
10492     case 0x11: /* RSHRN / SQRSHRUN */
10493         if (is_u) {
10494             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10495                                    opcode, rn, rd);
10496         } else {
10497             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10498         }
10499         break;
10500     case 0x12: /* SQSHRN / UQSHRN */
10501     case 0x13: /* SQRSHRN / UQRSHRN */
10502         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10503                                opcode, rn, rd);
10504         break;
10505     case 0x14: /* SSHLL / USHLL */
10506         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10507         break;
10508     case 0x1c: /* SCVTF / UCVTF */
10509         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10510                                      opcode, rn, rd);
10511         break;
10512     case 0xc: /* SQSHLU */
10513         if (!is_u) {
10514             unallocated_encoding(s);
10515             return;
10516         }
10517         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10518         break;
10519     case 0xe: /* SQSHL, UQSHL */
10520         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10521         break;
10522     case 0x1f: /* FCVTZS/ FCVTZU */
10523         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10524         return;
10525     default:
10526         unallocated_encoding(s);
10527         return;
10528     }
10529 }
10530 
10531 /* Generate code to do a "long" addition or subtraction, ie one done in
10532  * TCGv_i64 on vector lanes twice the width specified by size.
10533  */
10534 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10535                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10536 {
10537     static NeonGenTwo64OpFn * const fns[3][2] = {
10538         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10539         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10540         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10541     };
10542     NeonGenTwo64OpFn *genfn;
10543     assert(size < 3);
10544 
10545     genfn = fns[size][is_sub];
10546     genfn(tcg_res, tcg_op1, tcg_op2);
10547 }
10548 
10549 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10550                                 int opcode, int rd, int rn, int rm)
10551 {
10552     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10553     TCGv_i64 tcg_res[2];
10554     int pass, accop;
10555 
10556     tcg_res[0] = tcg_temp_new_i64();
10557     tcg_res[1] = tcg_temp_new_i64();
10558 
10559     /* Does this op do an adding accumulate, a subtracting accumulate,
10560      * or no accumulate at all?
10561      */
10562     switch (opcode) {
10563     case 5:
10564     case 8:
10565     case 9:
10566         accop = 1;
10567         break;
10568     case 10:
10569     case 11:
10570         accop = -1;
10571         break;
10572     default:
10573         accop = 0;
10574         break;
10575     }
10576 
10577     if (accop != 0) {
10578         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10579         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10580     }
10581 
10582     /* size == 2 means two 32x32->64 operations; this is worth special
10583      * casing because we can generally handle it inline.
10584      */
10585     if (size == 2) {
10586         for (pass = 0; pass < 2; pass++) {
10587             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10588             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10589             TCGv_i64 tcg_passres;
10590             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10591 
10592             int elt = pass + is_q * 2;
10593 
10594             read_vec_element(s, tcg_op1, rn, elt, memop);
10595             read_vec_element(s, tcg_op2, rm, elt, memop);
10596 
10597             if (accop == 0) {
10598                 tcg_passres = tcg_res[pass];
10599             } else {
10600                 tcg_passres = tcg_temp_new_i64();
10601             }
10602 
10603             switch (opcode) {
10604             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10605                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10606                 break;
10607             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10608                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10609                 break;
10610             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10611             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10612             {
10613                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10614                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10615 
10616                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10617                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10618                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10619                                     tcg_passres,
10620                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10621                 break;
10622             }
10623             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10624             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10625             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10626                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10627                 break;
10628             case 9: /* SQDMLAL, SQDMLAL2 */
10629             case 11: /* SQDMLSL, SQDMLSL2 */
10630             case 13: /* SQDMULL, SQDMULL2 */
10631                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10632                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10633                                                   tcg_passres, tcg_passres);
10634                 break;
10635             default:
10636                 g_assert_not_reached();
10637             }
10638 
10639             if (opcode == 9 || opcode == 11) {
10640                 /* saturating accumulate ops */
10641                 if (accop < 0) {
10642                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10643                 }
10644                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10645                                                   tcg_res[pass], tcg_passres);
10646             } else if (accop > 0) {
10647                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10648             } else if (accop < 0) {
10649                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10650             }
10651         }
10652     } else {
10653         /* size 0 or 1, generally helper functions */
10654         for (pass = 0; pass < 2; pass++) {
10655             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10656             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10657             TCGv_i64 tcg_passres;
10658             int elt = pass + is_q * 2;
10659 
10660             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10661             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10662 
10663             if (accop == 0) {
10664                 tcg_passres = tcg_res[pass];
10665             } else {
10666                 tcg_passres = tcg_temp_new_i64();
10667             }
10668 
10669             switch (opcode) {
10670             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10671             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10672             {
10673                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10674                 static NeonGenWidenFn * const widenfns[2][2] = {
10675                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10676                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10677                 };
10678                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10679 
10680                 widenfn(tcg_op2_64, tcg_op2);
10681                 widenfn(tcg_passres, tcg_op1);
10682                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10683                               tcg_passres, tcg_op2_64);
10684                 break;
10685             }
10686             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10687             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10688                 if (size == 0) {
10689                     if (is_u) {
10690                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10691                     } else {
10692                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10693                     }
10694                 } else {
10695                     if (is_u) {
10696                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10697                     } else {
10698                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10699                     }
10700                 }
10701                 break;
10702             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10703             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10704             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10705                 if (size == 0) {
10706                     if (is_u) {
10707                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10708                     } else {
10709                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10710                     }
10711                 } else {
10712                     if (is_u) {
10713                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10714                     } else {
10715                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10716                     }
10717                 }
10718                 break;
10719             case 9: /* SQDMLAL, SQDMLAL2 */
10720             case 11: /* SQDMLSL, SQDMLSL2 */
10721             case 13: /* SQDMULL, SQDMULL2 */
10722                 assert(size == 1);
10723                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10724                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10725                                                   tcg_passres, tcg_passres);
10726                 break;
10727             default:
10728                 g_assert_not_reached();
10729             }
10730 
10731             if (accop != 0) {
10732                 if (opcode == 9 || opcode == 11) {
10733                     /* saturating accumulate ops */
10734                     if (accop < 0) {
10735                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10736                     }
10737                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10738                                                       tcg_res[pass],
10739                                                       tcg_passres);
10740                 } else {
10741                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10742                                   tcg_res[pass], tcg_passres);
10743                 }
10744             }
10745         }
10746     }
10747 
10748     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10749     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10750 }
10751 
10752 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10753                             int opcode, int rd, int rn, int rm)
10754 {
10755     TCGv_i64 tcg_res[2];
10756     int part = is_q ? 2 : 0;
10757     int pass;
10758 
10759     for (pass = 0; pass < 2; pass++) {
10760         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10761         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10762         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10763         static NeonGenWidenFn * const widenfns[3][2] = {
10764             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10765             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10766             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10767         };
10768         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10769 
10770         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10771         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10772         widenfn(tcg_op2_wide, tcg_op2);
10773         tcg_res[pass] = tcg_temp_new_i64();
10774         gen_neon_addl(size, (opcode == 3),
10775                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10776     }
10777 
10778     for (pass = 0; pass < 2; pass++) {
10779         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10780     }
10781 }
10782 
10783 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10784 {
10785     tcg_gen_addi_i64(in, in, 1U << 31);
10786     tcg_gen_extrh_i64_i32(res, in);
10787 }
10788 
10789 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10790                                  int opcode, int rd, int rn, int rm)
10791 {
10792     TCGv_i32 tcg_res[2];
10793     int part = is_q ? 2 : 0;
10794     int pass;
10795 
10796     for (pass = 0; pass < 2; pass++) {
10797         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10798         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10799         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10800         static NeonGenNarrowFn * const narrowfns[3][2] = {
10801             { gen_helper_neon_narrow_high_u8,
10802               gen_helper_neon_narrow_round_high_u8 },
10803             { gen_helper_neon_narrow_high_u16,
10804               gen_helper_neon_narrow_round_high_u16 },
10805             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10806         };
10807         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10808 
10809         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10810         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10811 
10812         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10813 
10814         tcg_res[pass] = tcg_temp_new_i32();
10815         gennarrow(tcg_res[pass], tcg_wideres);
10816     }
10817 
10818     for (pass = 0; pass < 2; pass++) {
10819         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10820     }
10821     clear_vec_high(s, is_q, rd);
10822 }
10823 
10824 /* AdvSIMD three different
10825  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10826  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10827  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10828  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10829  */
10830 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10831 {
10832     /* Instructions in this group fall into three basic classes
10833      * (in each case with the operation working on each element in
10834      * the input vectors):
10835      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10836      *     128 bit input)
10837      * (2) wide 64 x 128 -> 128
10838      * (3) narrowing 128 x 128 -> 64
10839      * Here we do initial decode, catch unallocated cases and
10840      * dispatch to separate functions for each class.
10841      */
10842     int is_q = extract32(insn, 30, 1);
10843     int is_u = extract32(insn, 29, 1);
10844     int size = extract32(insn, 22, 2);
10845     int opcode = extract32(insn, 12, 4);
10846     int rm = extract32(insn, 16, 5);
10847     int rn = extract32(insn, 5, 5);
10848     int rd = extract32(insn, 0, 5);
10849 
10850     switch (opcode) {
10851     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10852     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10853         /* 64 x 128 -> 128 */
10854         if (size == 3) {
10855             unallocated_encoding(s);
10856             return;
10857         }
10858         if (!fp_access_check(s)) {
10859             return;
10860         }
10861         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10862         break;
10863     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10864     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10865         /* 128 x 128 -> 64 */
10866         if (size == 3) {
10867             unallocated_encoding(s);
10868             return;
10869         }
10870         if (!fp_access_check(s)) {
10871             return;
10872         }
10873         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10874         break;
10875     case 14: /* PMULL, PMULL2 */
10876         if (is_u) {
10877             unallocated_encoding(s);
10878             return;
10879         }
10880         switch (size) {
10881         case 0: /* PMULL.P8 */
10882             if (!fp_access_check(s)) {
10883                 return;
10884             }
10885             /* The Q field specifies lo/hi half input for this insn.  */
10886             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10887                              gen_helper_neon_pmull_h);
10888             break;
10889 
10890         case 3: /* PMULL.P64 */
10891             if (!dc_isar_feature(aa64_pmull, s)) {
10892                 unallocated_encoding(s);
10893                 return;
10894             }
10895             if (!fp_access_check(s)) {
10896                 return;
10897             }
10898             /* The Q field specifies lo/hi half input for this insn.  */
10899             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10900                              gen_helper_gvec_pmull_q);
10901             break;
10902 
10903         default:
10904             unallocated_encoding(s);
10905             break;
10906         }
10907         return;
10908     case 9: /* SQDMLAL, SQDMLAL2 */
10909     case 11: /* SQDMLSL, SQDMLSL2 */
10910     case 13: /* SQDMULL, SQDMULL2 */
10911         if (is_u || size == 0) {
10912             unallocated_encoding(s);
10913             return;
10914         }
10915         /* fall through */
10916     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10917     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10918     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10919     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10920     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10921     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10922     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10923         /* 64 x 64 -> 128 */
10924         if (size == 3) {
10925             unallocated_encoding(s);
10926             return;
10927         }
10928         if (!fp_access_check(s)) {
10929             return;
10930         }
10931 
10932         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10933         break;
10934     default:
10935         /* opcode 15 not allocated */
10936         unallocated_encoding(s);
10937         break;
10938     }
10939 }
10940 
10941 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10942 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10943 {
10944     int rd = extract32(insn, 0, 5);
10945     int rn = extract32(insn, 5, 5);
10946     int rm = extract32(insn, 16, 5);
10947     int size = extract32(insn, 22, 2);
10948     bool is_u = extract32(insn, 29, 1);
10949     bool is_q = extract32(insn, 30, 1);
10950 
10951     if (!fp_access_check(s)) {
10952         return;
10953     }
10954 
10955     switch (size + 4 * is_u) {
10956     case 0: /* AND */
10957         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10958         return;
10959     case 1: /* BIC */
10960         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10961         return;
10962     case 2: /* ORR */
10963         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10964         return;
10965     case 3: /* ORN */
10966         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10967         return;
10968     case 4: /* EOR */
10969         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10970         return;
10971 
10972     case 5: /* BSL bitwise select */
10973         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10974         return;
10975     case 6: /* BIT, bitwise insert if true */
10976         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10977         return;
10978     case 7: /* BIF, bitwise insert if false */
10979         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10980         return;
10981 
10982     default:
10983         g_assert_not_reached();
10984     }
10985 }
10986 
10987 /* Pairwise op subgroup of C3.6.16.
10988  *
10989  * This is called directly or via the handle_3same_float for float pairwise
10990  * operations where the opcode and size are calculated differently.
10991  */
10992 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10993                                    int size, int rn, int rm, int rd)
10994 {
10995     TCGv_ptr fpst;
10996     int pass;
10997 
10998     /* Floating point operations need fpst */
10999     if (opcode >= 0x58) {
11000         fpst = fpstatus_ptr(FPST_FPCR);
11001     } else {
11002         fpst = NULL;
11003     }
11004 
11005     if (!fp_access_check(s)) {
11006         return;
11007     }
11008 
11009     /* These operations work on the concatenated rm:rn, with each pair of
11010      * adjacent elements being operated on to produce an element in the result.
11011      */
11012     if (size == 3) {
11013         TCGv_i64 tcg_res[2];
11014 
11015         for (pass = 0; pass < 2; pass++) {
11016             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11017             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11018             int passreg = (pass == 0) ? rn : rm;
11019 
11020             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
11021             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
11022             tcg_res[pass] = tcg_temp_new_i64();
11023 
11024             switch (opcode) {
11025             case 0x17: /* ADDP */
11026                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11027                 break;
11028             case 0x58: /* FMAXNMP */
11029                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11030                 break;
11031             case 0x5a: /* FADDP */
11032                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11033                 break;
11034             case 0x5e: /* FMAXP */
11035                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11036                 break;
11037             case 0x78: /* FMINNMP */
11038                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11039                 break;
11040             case 0x7e: /* FMINP */
11041                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11042                 break;
11043             default:
11044                 g_assert_not_reached();
11045             }
11046         }
11047 
11048         for (pass = 0; pass < 2; pass++) {
11049             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11050         }
11051     } else {
11052         int maxpass = is_q ? 4 : 2;
11053         TCGv_i32 tcg_res[4];
11054 
11055         for (pass = 0; pass < maxpass; pass++) {
11056             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11057             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11058             NeonGenTwoOpFn *genfn = NULL;
11059             int passreg = pass < (maxpass / 2) ? rn : rm;
11060             int passelt = (is_q && (pass & 1)) ? 2 : 0;
11061 
11062             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
11063             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
11064             tcg_res[pass] = tcg_temp_new_i32();
11065 
11066             switch (opcode) {
11067             case 0x17: /* ADDP */
11068             {
11069                 static NeonGenTwoOpFn * const fns[3] = {
11070                     gen_helper_neon_padd_u8,
11071                     gen_helper_neon_padd_u16,
11072                     tcg_gen_add_i32,
11073                 };
11074                 genfn = fns[size];
11075                 break;
11076             }
11077             case 0x14: /* SMAXP, UMAXP */
11078             {
11079                 static NeonGenTwoOpFn * const fns[3][2] = {
11080                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
11081                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
11082                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
11083                 };
11084                 genfn = fns[size][u];
11085                 break;
11086             }
11087             case 0x15: /* SMINP, UMINP */
11088             {
11089                 static NeonGenTwoOpFn * const fns[3][2] = {
11090                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
11091                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
11092                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11093                 };
11094                 genfn = fns[size][u];
11095                 break;
11096             }
11097             /* The FP operations are all on single floats (32 bit) */
11098             case 0x58: /* FMAXNMP */
11099                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11100                 break;
11101             case 0x5a: /* FADDP */
11102                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11103                 break;
11104             case 0x5e: /* FMAXP */
11105                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11106                 break;
11107             case 0x78: /* FMINNMP */
11108                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11109                 break;
11110             case 0x7e: /* FMINP */
11111                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11112                 break;
11113             default:
11114                 g_assert_not_reached();
11115             }
11116 
11117             /* FP ops called directly, otherwise call now */
11118             if (genfn) {
11119                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
11120             }
11121         }
11122 
11123         for (pass = 0; pass < maxpass; pass++) {
11124             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11125         }
11126         clear_vec_high(s, is_q, rd);
11127     }
11128 }
11129 
11130 /* Floating point op subgroup of C3.6.16. */
11131 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11132 {
11133     /* For floating point ops, the U, size[1] and opcode bits
11134      * together indicate the operation. size[0] indicates single
11135      * or double.
11136      */
11137     int fpopcode = extract32(insn, 11, 5)
11138         | (extract32(insn, 23, 1) << 5)
11139         | (extract32(insn, 29, 1) << 6);
11140     int is_q = extract32(insn, 30, 1);
11141     int size = extract32(insn, 22, 1);
11142     int rm = extract32(insn, 16, 5);
11143     int rn = extract32(insn, 5, 5);
11144     int rd = extract32(insn, 0, 5);
11145 
11146     int datasize = is_q ? 128 : 64;
11147     int esize = 32 << size;
11148     int elements = datasize / esize;
11149 
11150     if (size == 1 && !is_q) {
11151         unallocated_encoding(s);
11152         return;
11153     }
11154 
11155     switch (fpopcode) {
11156     case 0x58: /* FMAXNMP */
11157     case 0x5a: /* FADDP */
11158     case 0x5e: /* FMAXP */
11159     case 0x78: /* FMINNMP */
11160     case 0x7e: /* FMINP */
11161         if (size && !is_q) {
11162             unallocated_encoding(s);
11163             return;
11164         }
11165         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11166                                rn, rm, rd);
11167         return;
11168     case 0x1b: /* FMULX */
11169     case 0x1f: /* FRECPS */
11170     case 0x3f: /* FRSQRTS */
11171     case 0x5d: /* FACGE */
11172     case 0x7d: /* FACGT */
11173     case 0x19: /* FMLA */
11174     case 0x39: /* FMLS */
11175     case 0x18: /* FMAXNM */
11176     case 0x1a: /* FADD */
11177     case 0x1c: /* FCMEQ */
11178     case 0x1e: /* FMAX */
11179     case 0x38: /* FMINNM */
11180     case 0x3a: /* FSUB */
11181     case 0x3e: /* FMIN */
11182     case 0x5b: /* FMUL */
11183     case 0x5c: /* FCMGE */
11184     case 0x5f: /* FDIV */
11185     case 0x7a: /* FABD */
11186     case 0x7c: /* FCMGT */
11187         if (!fp_access_check(s)) {
11188             return;
11189         }
11190         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11191         return;
11192 
11193     case 0x1d: /* FMLAL  */
11194     case 0x3d: /* FMLSL  */
11195     case 0x59: /* FMLAL2 */
11196     case 0x79: /* FMLSL2 */
11197         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11198             unallocated_encoding(s);
11199             return;
11200         }
11201         if (fp_access_check(s)) {
11202             int is_s = extract32(insn, 23, 1);
11203             int is_2 = extract32(insn, 29, 1);
11204             int data = (is_2 << 1) | is_s;
11205             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11206                                vec_full_reg_offset(s, rn),
11207                                vec_full_reg_offset(s, rm), cpu_env,
11208                                is_q ? 16 : 8, vec_full_reg_size(s),
11209                                data, gen_helper_gvec_fmlal_a64);
11210         }
11211         return;
11212 
11213     default:
11214         unallocated_encoding(s);
11215         return;
11216     }
11217 }
11218 
11219 /* Integer op subgroup of C3.6.16. */
11220 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11221 {
11222     int is_q = extract32(insn, 30, 1);
11223     int u = extract32(insn, 29, 1);
11224     int size = extract32(insn, 22, 2);
11225     int opcode = extract32(insn, 11, 5);
11226     int rm = extract32(insn, 16, 5);
11227     int rn = extract32(insn, 5, 5);
11228     int rd = extract32(insn, 0, 5);
11229     int pass;
11230     TCGCond cond;
11231 
11232     switch (opcode) {
11233     case 0x13: /* MUL, PMUL */
11234         if (u && size != 0) {
11235             unallocated_encoding(s);
11236             return;
11237         }
11238         /* fall through */
11239     case 0x0: /* SHADD, UHADD */
11240     case 0x2: /* SRHADD, URHADD */
11241     case 0x4: /* SHSUB, UHSUB */
11242     case 0xc: /* SMAX, UMAX */
11243     case 0xd: /* SMIN, UMIN */
11244     case 0xe: /* SABD, UABD */
11245     case 0xf: /* SABA, UABA */
11246     case 0x12: /* MLA, MLS */
11247         if (size == 3) {
11248             unallocated_encoding(s);
11249             return;
11250         }
11251         break;
11252     case 0x16: /* SQDMULH, SQRDMULH */
11253         if (size == 0 || size == 3) {
11254             unallocated_encoding(s);
11255             return;
11256         }
11257         break;
11258     default:
11259         if (size == 3 && !is_q) {
11260             unallocated_encoding(s);
11261             return;
11262         }
11263         break;
11264     }
11265 
11266     if (!fp_access_check(s)) {
11267         return;
11268     }
11269 
11270     switch (opcode) {
11271     case 0x01: /* SQADD, UQADD */
11272         if (u) {
11273             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11274         } else {
11275             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11276         }
11277         return;
11278     case 0x05: /* SQSUB, UQSUB */
11279         if (u) {
11280             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11281         } else {
11282             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11283         }
11284         return;
11285     case 0x08: /* SSHL, USHL */
11286         if (u) {
11287             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11288         } else {
11289             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11290         }
11291         return;
11292     case 0x0c: /* SMAX, UMAX */
11293         if (u) {
11294             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11295         } else {
11296             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11297         }
11298         return;
11299     case 0x0d: /* SMIN, UMIN */
11300         if (u) {
11301             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11302         } else {
11303             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11304         }
11305         return;
11306     case 0xe: /* SABD, UABD */
11307         if (u) {
11308             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11309         } else {
11310             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11311         }
11312         return;
11313     case 0xf: /* SABA, UABA */
11314         if (u) {
11315             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11316         } else {
11317             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11318         }
11319         return;
11320     case 0x10: /* ADD, SUB */
11321         if (u) {
11322             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11323         } else {
11324             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11325         }
11326         return;
11327     case 0x13: /* MUL, PMUL */
11328         if (!u) { /* MUL */
11329             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11330         } else {  /* PMUL */
11331             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11332         }
11333         return;
11334     case 0x12: /* MLA, MLS */
11335         if (u) {
11336             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11337         } else {
11338             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11339         }
11340         return;
11341     case 0x16: /* SQDMULH, SQRDMULH */
11342         {
11343             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11344                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11345                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11346             };
11347             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11348         }
11349         return;
11350     case 0x11:
11351         if (!u) { /* CMTST */
11352             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11353             return;
11354         }
11355         /* else CMEQ */
11356         cond = TCG_COND_EQ;
11357         goto do_gvec_cmp;
11358     case 0x06: /* CMGT, CMHI */
11359         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11360         goto do_gvec_cmp;
11361     case 0x07: /* CMGE, CMHS */
11362         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11363     do_gvec_cmp:
11364         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11365                          vec_full_reg_offset(s, rn),
11366                          vec_full_reg_offset(s, rm),
11367                          is_q ? 16 : 8, vec_full_reg_size(s));
11368         return;
11369     }
11370 
11371     if (size == 3) {
11372         assert(is_q);
11373         for (pass = 0; pass < 2; pass++) {
11374             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11375             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11376             TCGv_i64 tcg_res = tcg_temp_new_i64();
11377 
11378             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11379             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11380 
11381             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11382 
11383             write_vec_element(s, tcg_res, rd, pass, MO_64);
11384         }
11385     } else {
11386         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11387             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11388             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11389             TCGv_i32 tcg_res = tcg_temp_new_i32();
11390             NeonGenTwoOpFn *genfn = NULL;
11391             NeonGenTwoOpEnvFn *genenvfn = NULL;
11392 
11393             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11394             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11395 
11396             switch (opcode) {
11397             case 0x0: /* SHADD, UHADD */
11398             {
11399                 static NeonGenTwoOpFn * const fns[3][2] = {
11400                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11401                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11402                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11403                 };
11404                 genfn = fns[size][u];
11405                 break;
11406             }
11407             case 0x2: /* SRHADD, URHADD */
11408             {
11409                 static NeonGenTwoOpFn * const fns[3][2] = {
11410                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11411                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11412                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11413                 };
11414                 genfn = fns[size][u];
11415                 break;
11416             }
11417             case 0x4: /* SHSUB, UHSUB */
11418             {
11419                 static NeonGenTwoOpFn * const fns[3][2] = {
11420                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11421                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11422                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11423                 };
11424                 genfn = fns[size][u];
11425                 break;
11426             }
11427             case 0x9: /* SQSHL, UQSHL */
11428             {
11429                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11430                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11431                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11432                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11433                 };
11434                 genenvfn = fns[size][u];
11435                 break;
11436             }
11437             case 0xa: /* SRSHL, URSHL */
11438             {
11439                 static NeonGenTwoOpFn * const fns[3][2] = {
11440                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11441                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11442                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11443                 };
11444                 genfn = fns[size][u];
11445                 break;
11446             }
11447             case 0xb: /* SQRSHL, UQRSHL */
11448             {
11449                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11450                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11451                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11452                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11453                 };
11454                 genenvfn = fns[size][u];
11455                 break;
11456             }
11457             default:
11458                 g_assert_not_reached();
11459             }
11460 
11461             if (genenvfn) {
11462                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11463             } else {
11464                 genfn(tcg_res, tcg_op1, tcg_op2);
11465             }
11466 
11467             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11468         }
11469     }
11470     clear_vec_high(s, is_q, rd);
11471 }
11472 
11473 /* AdvSIMD three same
11474  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11475  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11476  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11477  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11478  */
11479 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11480 {
11481     int opcode = extract32(insn, 11, 5);
11482 
11483     switch (opcode) {
11484     case 0x3: /* logic ops */
11485         disas_simd_3same_logic(s, insn);
11486         break;
11487     case 0x17: /* ADDP */
11488     case 0x14: /* SMAXP, UMAXP */
11489     case 0x15: /* SMINP, UMINP */
11490     {
11491         /* Pairwise operations */
11492         int is_q = extract32(insn, 30, 1);
11493         int u = extract32(insn, 29, 1);
11494         int size = extract32(insn, 22, 2);
11495         int rm = extract32(insn, 16, 5);
11496         int rn = extract32(insn, 5, 5);
11497         int rd = extract32(insn, 0, 5);
11498         if (opcode == 0x17) {
11499             if (u || (size == 3 && !is_q)) {
11500                 unallocated_encoding(s);
11501                 return;
11502             }
11503         } else {
11504             if (size == 3) {
11505                 unallocated_encoding(s);
11506                 return;
11507             }
11508         }
11509         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11510         break;
11511     }
11512     case 0x18 ... 0x31:
11513         /* floating point ops, sz[1] and U are part of opcode */
11514         disas_simd_3same_float(s, insn);
11515         break;
11516     default:
11517         disas_simd_3same_int(s, insn);
11518         break;
11519     }
11520 }
11521 
11522 /*
11523  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11524  *
11525  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11526  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11527  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11528  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11529  *
11530  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11531  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11532  *
11533  */
11534 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11535 {
11536     int opcode = extract32(insn, 11, 3);
11537     int u = extract32(insn, 29, 1);
11538     int a = extract32(insn, 23, 1);
11539     int is_q = extract32(insn, 30, 1);
11540     int rm = extract32(insn, 16, 5);
11541     int rn = extract32(insn, 5, 5);
11542     int rd = extract32(insn, 0, 5);
11543     /*
11544      * For these floating point ops, the U, a and opcode bits
11545      * together indicate the operation.
11546      */
11547     int fpopcode = opcode | (a << 3) | (u << 4);
11548     int datasize = is_q ? 128 : 64;
11549     int elements = datasize / 16;
11550     bool pairwise;
11551     TCGv_ptr fpst;
11552     int pass;
11553 
11554     switch (fpopcode) {
11555     case 0x0: /* FMAXNM */
11556     case 0x1: /* FMLA */
11557     case 0x2: /* FADD */
11558     case 0x3: /* FMULX */
11559     case 0x4: /* FCMEQ */
11560     case 0x6: /* FMAX */
11561     case 0x7: /* FRECPS */
11562     case 0x8: /* FMINNM */
11563     case 0x9: /* FMLS */
11564     case 0xa: /* FSUB */
11565     case 0xe: /* FMIN */
11566     case 0xf: /* FRSQRTS */
11567     case 0x13: /* FMUL */
11568     case 0x14: /* FCMGE */
11569     case 0x15: /* FACGE */
11570     case 0x17: /* FDIV */
11571     case 0x1a: /* FABD */
11572     case 0x1c: /* FCMGT */
11573     case 0x1d: /* FACGT */
11574         pairwise = false;
11575         break;
11576     case 0x10: /* FMAXNMP */
11577     case 0x12: /* FADDP */
11578     case 0x16: /* FMAXP */
11579     case 0x18: /* FMINNMP */
11580     case 0x1e: /* FMINP */
11581         pairwise = true;
11582         break;
11583     default:
11584         unallocated_encoding(s);
11585         return;
11586     }
11587 
11588     if (!dc_isar_feature(aa64_fp16, s)) {
11589         unallocated_encoding(s);
11590         return;
11591     }
11592 
11593     if (!fp_access_check(s)) {
11594         return;
11595     }
11596 
11597     fpst = fpstatus_ptr(FPST_FPCR_F16);
11598 
11599     if (pairwise) {
11600         int maxpass = is_q ? 8 : 4;
11601         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11602         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11603         TCGv_i32 tcg_res[8];
11604 
11605         for (pass = 0; pass < maxpass; pass++) {
11606             int passreg = pass < (maxpass / 2) ? rn : rm;
11607             int passelt = (pass << 1) & (maxpass - 1);
11608 
11609             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11610             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11611             tcg_res[pass] = tcg_temp_new_i32();
11612 
11613             switch (fpopcode) {
11614             case 0x10: /* FMAXNMP */
11615                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11616                                            fpst);
11617                 break;
11618             case 0x12: /* FADDP */
11619                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11620                 break;
11621             case 0x16: /* FMAXP */
11622                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11623                 break;
11624             case 0x18: /* FMINNMP */
11625                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11626                                            fpst);
11627                 break;
11628             case 0x1e: /* FMINP */
11629                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11630                 break;
11631             default:
11632                 g_assert_not_reached();
11633             }
11634         }
11635 
11636         for (pass = 0; pass < maxpass; pass++) {
11637             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11638         }
11639     } else {
11640         for (pass = 0; pass < elements; pass++) {
11641             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11642             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11643             TCGv_i32 tcg_res = tcg_temp_new_i32();
11644 
11645             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11646             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11647 
11648             switch (fpopcode) {
11649             case 0x0: /* FMAXNM */
11650                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11651                 break;
11652             case 0x1: /* FMLA */
11653                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11654                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11655                                            fpst);
11656                 break;
11657             case 0x2: /* FADD */
11658                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11659                 break;
11660             case 0x3: /* FMULX */
11661                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11662                 break;
11663             case 0x4: /* FCMEQ */
11664                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11665                 break;
11666             case 0x6: /* FMAX */
11667                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11668                 break;
11669             case 0x7: /* FRECPS */
11670                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11671                 break;
11672             case 0x8: /* FMINNM */
11673                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11674                 break;
11675             case 0x9: /* FMLS */
11676                 /* As usual for ARM, separate negation for fused multiply-add */
11677                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11678                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11679                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11680                                            fpst);
11681                 break;
11682             case 0xa: /* FSUB */
11683                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11684                 break;
11685             case 0xe: /* FMIN */
11686                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11687                 break;
11688             case 0xf: /* FRSQRTS */
11689                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11690                 break;
11691             case 0x13: /* FMUL */
11692                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11693                 break;
11694             case 0x14: /* FCMGE */
11695                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11696                 break;
11697             case 0x15: /* FACGE */
11698                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11699                 break;
11700             case 0x17: /* FDIV */
11701                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11702                 break;
11703             case 0x1a: /* FABD */
11704                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11705                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11706                 break;
11707             case 0x1c: /* FCMGT */
11708                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11709                 break;
11710             case 0x1d: /* FACGT */
11711                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11712                 break;
11713             default:
11714                 g_assert_not_reached();
11715             }
11716 
11717             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11718         }
11719     }
11720 
11721     clear_vec_high(s, is_q, rd);
11722 }
11723 
11724 /* AdvSIMD three same extra
11725  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11726  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11727  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11728  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11729  */
11730 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11731 {
11732     int rd = extract32(insn, 0, 5);
11733     int rn = extract32(insn, 5, 5);
11734     int opcode = extract32(insn, 11, 4);
11735     int rm = extract32(insn, 16, 5);
11736     int size = extract32(insn, 22, 2);
11737     bool u = extract32(insn, 29, 1);
11738     bool is_q = extract32(insn, 30, 1);
11739     bool feature;
11740     int rot;
11741 
11742     switch (u * 16 + opcode) {
11743     case 0x10: /* SQRDMLAH (vector) */
11744     case 0x11: /* SQRDMLSH (vector) */
11745         if (size != 1 && size != 2) {
11746             unallocated_encoding(s);
11747             return;
11748         }
11749         feature = dc_isar_feature(aa64_rdm, s);
11750         break;
11751     case 0x02: /* SDOT (vector) */
11752     case 0x12: /* UDOT (vector) */
11753         if (size != MO_32) {
11754             unallocated_encoding(s);
11755             return;
11756         }
11757         feature = dc_isar_feature(aa64_dp, s);
11758         break;
11759     case 0x03: /* USDOT */
11760         if (size != MO_32) {
11761             unallocated_encoding(s);
11762             return;
11763         }
11764         feature = dc_isar_feature(aa64_i8mm, s);
11765         break;
11766     case 0x04: /* SMMLA */
11767     case 0x14: /* UMMLA */
11768     case 0x05: /* USMMLA */
11769         if (!is_q || size != MO_32) {
11770             unallocated_encoding(s);
11771             return;
11772         }
11773         feature = dc_isar_feature(aa64_i8mm, s);
11774         break;
11775     case 0x18: /* FCMLA, #0 */
11776     case 0x19: /* FCMLA, #90 */
11777     case 0x1a: /* FCMLA, #180 */
11778     case 0x1b: /* FCMLA, #270 */
11779     case 0x1c: /* FCADD, #90 */
11780     case 0x1e: /* FCADD, #270 */
11781         if (size == 0
11782             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11783             || (size == 3 && !is_q)) {
11784             unallocated_encoding(s);
11785             return;
11786         }
11787         feature = dc_isar_feature(aa64_fcma, s);
11788         break;
11789     case 0x1d: /* BFMMLA */
11790         if (size != MO_16 || !is_q) {
11791             unallocated_encoding(s);
11792             return;
11793         }
11794         feature = dc_isar_feature(aa64_bf16, s);
11795         break;
11796     case 0x1f:
11797         switch (size) {
11798         case 1: /* BFDOT */
11799         case 3: /* BFMLAL{B,T} */
11800             feature = dc_isar_feature(aa64_bf16, s);
11801             break;
11802         default:
11803             unallocated_encoding(s);
11804             return;
11805         }
11806         break;
11807     default:
11808         unallocated_encoding(s);
11809         return;
11810     }
11811     if (!feature) {
11812         unallocated_encoding(s);
11813         return;
11814     }
11815     if (!fp_access_check(s)) {
11816         return;
11817     }
11818 
11819     switch (opcode) {
11820     case 0x0: /* SQRDMLAH (vector) */
11821         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11822         return;
11823 
11824     case 0x1: /* SQRDMLSH (vector) */
11825         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11826         return;
11827 
11828     case 0x2: /* SDOT / UDOT */
11829         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11830                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11831         return;
11832 
11833     case 0x3: /* USDOT */
11834         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11835         return;
11836 
11837     case 0x04: /* SMMLA, UMMLA */
11838         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11839                          u ? gen_helper_gvec_ummla_b
11840                          : gen_helper_gvec_smmla_b);
11841         return;
11842     case 0x05: /* USMMLA */
11843         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11844         return;
11845 
11846     case 0x8: /* FCMLA, #0 */
11847     case 0x9: /* FCMLA, #90 */
11848     case 0xa: /* FCMLA, #180 */
11849     case 0xb: /* FCMLA, #270 */
11850         rot = extract32(opcode, 0, 2);
11851         switch (size) {
11852         case 1:
11853             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11854                               gen_helper_gvec_fcmlah);
11855             break;
11856         case 2:
11857             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11858                               gen_helper_gvec_fcmlas);
11859             break;
11860         case 3:
11861             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11862                               gen_helper_gvec_fcmlad);
11863             break;
11864         default:
11865             g_assert_not_reached();
11866         }
11867         return;
11868 
11869     case 0xc: /* FCADD, #90 */
11870     case 0xe: /* FCADD, #270 */
11871         rot = extract32(opcode, 1, 1);
11872         switch (size) {
11873         case 1:
11874             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11875                               gen_helper_gvec_fcaddh);
11876             break;
11877         case 2:
11878             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11879                               gen_helper_gvec_fcadds);
11880             break;
11881         case 3:
11882             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11883                               gen_helper_gvec_fcaddd);
11884             break;
11885         default:
11886             g_assert_not_reached();
11887         }
11888         return;
11889 
11890     case 0xd: /* BFMMLA */
11891         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11892         return;
11893     case 0xf:
11894         switch (size) {
11895         case 1: /* BFDOT */
11896             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11897             break;
11898         case 3: /* BFMLAL{B,T} */
11899             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11900                               gen_helper_gvec_bfmlal);
11901             break;
11902         default:
11903             g_assert_not_reached();
11904         }
11905         return;
11906 
11907     default:
11908         g_assert_not_reached();
11909     }
11910 }
11911 
11912 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11913                                   int size, int rn, int rd)
11914 {
11915     /* Handle 2-reg-misc ops which are widening (so each size element
11916      * in the source becomes a 2*size element in the destination.
11917      * The only instruction like this is FCVTL.
11918      */
11919     int pass;
11920 
11921     if (size == 3) {
11922         /* 32 -> 64 bit fp conversion */
11923         TCGv_i64 tcg_res[2];
11924         int srcelt = is_q ? 2 : 0;
11925 
11926         for (pass = 0; pass < 2; pass++) {
11927             TCGv_i32 tcg_op = tcg_temp_new_i32();
11928             tcg_res[pass] = tcg_temp_new_i64();
11929 
11930             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11931             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11932         }
11933         for (pass = 0; pass < 2; pass++) {
11934             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11935         }
11936     } else {
11937         /* 16 -> 32 bit fp conversion */
11938         int srcelt = is_q ? 4 : 0;
11939         TCGv_i32 tcg_res[4];
11940         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11941         TCGv_i32 ahp = get_ahp_flag();
11942 
11943         for (pass = 0; pass < 4; pass++) {
11944             tcg_res[pass] = tcg_temp_new_i32();
11945 
11946             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11947             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11948                                            fpst, ahp);
11949         }
11950         for (pass = 0; pass < 4; pass++) {
11951             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11952         }
11953     }
11954 }
11955 
11956 static void handle_rev(DisasContext *s, int opcode, bool u,
11957                        bool is_q, int size, int rn, int rd)
11958 {
11959     int op = (opcode << 1) | u;
11960     int opsz = op + size;
11961     int grp_size = 3 - opsz;
11962     int dsize = is_q ? 128 : 64;
11963     int i;
11964 
11965     if (opsz >= 3) {
11966         unallocated_encoding(s);
11967         return;
11968     }
11969 
11970     if (!fp_access_check(s)) {
11971         return;
11972     }
11973 
11974     if (size == 0) {
11975         /* Special case bytes, use bswap op on each group of elements */
11976         int groups = dsize / (8 << grp_size);
11977 
11978         for (i = 0; i < groups; i++) {
11979             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11980 
11981             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11982             switch (grp_size) {
11983             case MO_16:
11984                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11985                 break;
11986             case MO_32:
11987                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11988                 break;
11989             case MO_64:
11990                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11991                 break;
11992             default:
11993                 g_assert_not_reached();
11994             }
11995             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11996         }
11997         clear_vec_high(s, is_q, rd);
11998     } else {
11999         int revmask = (1 << grp_size) - 1;
12000         int esize = 8 << size;
12001         int elements = dsize / esize;
12002         TCGv_i64 tcg_rn = tcg_temp_new_i64();
12003         TCGv_i64 tcg_rd[2];
12004 
12005         for (i = 0; i < 2; i++) {
12006             tcg_rd[i] = tcg_temp_new_i64();
12007             tcg_gen_movi_i64(tcg_rd[i], 0);
12008         }
12009 
12010         for (i = 0; i < elements; i++) {
12011             int e_rev = (i & 0xf) ^ revmask;
12012             int w = (e_rev * esize) / 64;
12013             int o = (e_rev * esize) % 64;
12014 
12015             read_vec_element(s, tcg_rn, rn, i, size);
12016             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
12017         }
12018 
12019         for (i = 0; i < 2; i++) {
12020             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
12021         }
12022         clear_vec_high(s, true, rd);
12023     }
12024 }
12025 
12026 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
12027                                   bool is_q, int size, int rn, int rd)
12028 {
12029     /* Implement the pairwise operations from 2-misc:
12030      * SADDLP, UADDLP, SADALP, UADALP.
12031      * These all add pairs of elements in the input to produce a
12032      * double-width result element in the output (possibly accumulating).
12033      */
12034     bool accum = (opcode == 0x6);
12035     int maxpass = is_q ? 2 : 1;
12036     int pass;
12037     TCGv_i64 tcg_res[2];
12038 
12039     if (size == 2) {
12040         /* 32 + 32 -> 64 op */
12041         MemOp memop = size + (u ? 0 : MO_SIGN);
12042 
12043         for (pass = 0; pass < maxpass; pass++) {
12044             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
12045             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
12046 
12047             tcg_res[pass] = tcg_temp_new_i64();
12048 
12049             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
12050             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
12051             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
12052             if (accum) {
12053                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
12054                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12055             }
12056         }
12057     } else {
12058         for (pass = 0; pass < maxpass; pass++) {
12059             TCGv_i64 tcg_op = tcg_temp_new_i64();
12060             NeonGenOne64OpFn *genfn;
12061             static NeonGenOne64OpFn * const fns[2][2] = {
12062                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
12063                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
12064             };
12065 
12066             genfn = fns[size][u];
12067 
12068             tcg_res[pass] = tcg_temp_new_i64();
12069 
12070             read_vec_element(s, tcg_op, rn, pass, MO_64);
12071             genfn(tcg_res[pass], tcg_op);
12072 
12073             if (accum) {
12074                 read_vec_element(s, tcg_op, rd, pass, MO_64);
12075                 if (size == 0) {
12076                     gen_helper_neon_addl_u16(tcg_res[pass],
12077                                              tcg_res[pass], tcg_op);
12078                 } else {
12079                     gen_helper_neon_addl_u32(tcg_res[pass],
12080                                              tcg_res[pass], tcg_op);
12081                 }
12082             }
12083         }
12084     }
12085     if (!is_q) {
12086         tcg_res[1] = tcg_constant_i64(0);
12087     }
12088     for (pass = 0; pass < 2; pass++) {
12089         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12090     }
12091 }
12092 
12093 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
12094 {
12095     /* Implement SHLL and SHLL2 */
12096     int pass;
12097     int part = is_q ? 2 : 0;
12098     TCGv_i64 tcg_res[2];
12099 
12100     for (pass = 0; pass < 2; pass++) {
12101         static NeonGenWidenFn * const widenfns[3] = {
12102             gen_helper_neon_widen_u8,
12103             gen_helper_neon_widen_u16,
12104             tcg_gen_extu_i32_i64,
12105         };
12106         NeonGenWidenFn *widenfn = widenfns[size];
12107         TCGv_i32 tcg_op = tcg_temp_new_i32();
12108 
12109         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
12110         tcg_res[pass] = tcg_temp_new_i64();
12111         widenfn(tcg_res[pass], tcg_op);
12112         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
12113     }
12114 
12115     for (pass = 0; pass < 2; pass++) {
12116         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12117     }
12118 }
12119 
12120 /* AdvSIMD two reg misc
12121  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12122  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12123  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12124  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12125  */
12126 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12127 {
12128     int size = extract32(insn, 22, 2);
12129     int opcode = extract32(insn, 12, 5);
12130     bool u = extract32(insn, 29, 1);
12131     bool is_q = extract32(insn, 30, 1);
12132     int rn = extract32(insn, 5, 5);
12133     int rd = extract32(insn, 0, 5);
12134     bool need_fpstatus = false;
12135     int rmode = -1;
12136     TCGv_i32 tcg_rmode;
12137     TCGv_ptr tcg_fpstatus;
12138 
12139     switch (opcode) {
12140     case 0x0: /* REV64, REV32 */
12141     case 0x1: /* REV16 */
12142         handle_rev(s, opcode, u, is_q, size, rn, rd);
12143         return;
12144     case 0x5: /* CNT, NOT, RBIT */
12145         if (u && size == 0) {
12146             /* NOT */
12147             break;
12148         } else if (u && size == 1) {
12149             /* RBIT */
12150             break;
12151         } else if (!u && size == 0) {
12152             /* CNT */
12153             break;
12154         }
12155         unallocated_encoding(s);
12156         return;
12157     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12158     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12159         if (size == 3) {
12160             unallocated_encoding(s);
12161             return;
12162         }
12163         if (!fp_access_check(s)) {
12164             return;
12165         }
12166 
12167         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12168         return;
12169     case 0x4: /* CLS, CLZ */
12170         if (size == 3) {
12171             unallocated_encoding(s);
12172             return;
12173         }
12174         break;
12175     case 0x2: /* SADDLP, UADDLP */
12176     case 0x6: /* SADALP, UADALP */
12177         if (size == 3) {
12178             unallocated_encoding(s);
12179             return;
12180         }
12181         if (!fp_access_check(s)) {
12182             return;
12183         }
12184         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12185         return;
12186     case 0x13: /* SHLL, SHLL2 */
12187         if (u == 0 || size == 3) {
12188             unallocated_encoding(s);
12189             return;
12190         }
12191         if (!fp_access_check(s)) {
12192             return;
12193         }
12194         handle_shll(s, is_q, size, rn, rd);
12195         return;
12196     case 0xa: /* CMLT */
12197         if (u == 1) {
12198             unallocated_encoding(s);
12199             return;
12200         }
12201         /* fall through */
12202     case 0x8: /* CMGT, CMGE */
12203     case 0x9: /* CMEQ, CMLE */
12204     case 0xb: /* ABS, NEG */
12205         if (size == 3 && !is_q) {
12206             unallocated_encoding(s);
12207             return;
12208         }
12209         break;
12210     case 0x3: /* SUQADD, USQADD */
12211         if (size == 3 && !is_q) {
12212             unallocated_encoding(s);
12213             return;
12214         }
12215         if (!fp_access_check(s)) {
12216             return;
12217         }
12218         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12219         return;
12220     case 0x7: /* SQABS, SQNEG */
12221         if (size == 3 && !is_q) {
12222             unallocated_encoding(s);
12223             return;
12224         }
12225         break;
12226     case 0xc ... 0xf:
12227     case 0x16 ... 0x1f:
12228     {
12229         /* Floating point: U, size[1] and opcode indicate operation;
12230          * size[0] indicates single or double precision.
12231          */
12232         int is_double = extract32(size, 0, 1);
12233         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12234         size = is_double ? 3 : 2;
12235         switch (opcode) {
12236         case 0x2f: /* FABS */
12237         case 0x6f: /* FNEG */
12238             if (size == 3 && !is_q) {
12239                 unallocated_encoding(s);
12240                 return;
12241             }
12242             break;
12243         case 0x1d: /* SCVTF */
12244         case 0x5d: /* UCVTF */
12245         {
12246             bool is_signed = (opcode == 0x1d) ? true : false;
12247             int elements = is_double ? 2 : is_q ? 4 : 2;
12248             if (is_double && !is_q) {
12249                 unallocated_encoding(s);
12250                 return;
12251             }
12252             if (!fp_access_check(s)) {
12253                 return;
12254             }
12255             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12256             return;
12257         }
12258         case 0x2c: /* FCMGT (zero) */
12259         case 0x2d: /* FCMEQ (zero) */
12260         case 0x2e: /* FCMLT (zero) */
12261         case 0x6c: /* FCMGE (zero) */
12262         case 0x6d: /* FCMLE (zero) */
12263             if (size == 3 && !is_q) {
12264                 unallocated_encoding(s);
12265                 return;
12266             }
12267             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12268             return;
12269         case 0x7f: /* FSQRT */
12270             if (size == 3 && !is_q) {
12271                 unallocated_encoding(s);
12272                 return;
12273             }
12274             break;
12275         case 0x1a: /* FCVTNS */
12276         case 0x1b: /* FCVTMS */
12277         case 0x3a: /* FCVTPS */
12278         case 0x3b: /* FCVTZS */
12279         case 0x5a: /* FCVTNU */
12280         case 0x5b: /* FCVTMU */
12281         case 0x7a: /* FCVTPU */
12282         case 0x7b: /* FCVTZU */
12283             need_fpstatus = true;
12284             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12285             if (size == 3 && !is_q) {
12286                 unallocated_encoding(s);
12287                 return;
12288             }
12289             break;
12290         case 0x5c: /* FCVTAU */
12291         case 0x1c: /* FCVTAS */
12292             need_fpstatus = true;
12293             rmode = FPROUNDING_TIEAWAY;
12294             if (size == 3 && !is_q) {
12295                 unallocated_encoding(s);
12296                 return;
12297             }
12298             break;
12299         case 0x3c: /* URECPE */
12300             if (size == 3) {
12301                 unallocated_encoding(s);
12302                 return;
12303             }
12304             /* fall through */
12305         case 0x3d: /* FRECPE */
12306         case 0x7d: /* FRSQRTE */
12307             if (size == 3 && !is_q) {
12308                 unallocated_encoding(s);
12309                 return;
12310             }
12311             if (!fp_access_check(s)) {
12312                 return;
12313             }
12314             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12315             return;
12316         case 0x56: /* FCVTXN, FCVTXN2 */
12317             if (size == 2) {
12318                 unallocated_encoding(s);
12319                 return;
12320             }
12321             /* fall through */
12322         case 0x16: /* FCVTN, FCVTN2 */
12323             /* handle_2misc_narrow does a 2*size -> size operation, but these
12324              * instructions encode the source size rather than dest size.
12325              */
12326             if (!fp_access_check(s)) {
12327                 return;
12328             }
12329             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12330             return;
12331         case 0x36: /* BFCVTN, BFCVTN2 */
12332             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12333                 unallocated_encoding(s);
12334                 return;
12335             }
12336             if (!fp_access_check(s)) {
12337                 return;
12338             }
12339             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12340             return;
12341         case 0x17: /* FCVTL, FCVTL2 */
12342             if (!fp_access_check(s)) {
12343                 return;
12344             }
12345             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12346             return;
12347         case 0x18: /* FRINTN */
12348         case 0x19: /* FRINTM */
12349         case 0x38: /* FRINTP */
12350         case 0x39: /* FRINTZ */
12351             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12352             /* fall through */
12353         case 0x59: /* FRINTX */
12354         case 0x79: /* FRINTI */
12355             need_fpstatus = true;
12356             if (size == 3 && !is_q) {
12357                 unallocated_encoding(s);
12358                 return;
12359             }
12360             break;
12361         case 0x58: /* FRINTA */
12362             rmode = FPROUNDING_TIEAWAY;
12363             need_fpstatus = true;
12364             if (size == 3 && !is_q) {
12365                 unallocated_encoding(s);
12366                 return;
12367             }
12368             break;
12369         case 0x7c: /* URSQRTE */
12370             if (size == 3) {
12371                 unallocated_encoding(s);
12372                 return;
12373             }
12374             break;
12375         case 0x1e: /* FRINT32Z */
12376         case 0x1f: /* FRINT64Z */
12377             rmode = FPROUNDING_ZERO;
12378             /* fall through */
12379         case 0x5e: /* FRINT32X */
12380         case 0x5f: /* FRINT64X */
12381             need_fpstatus = true;
12382             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12383                 unallocated_encoding(s);
12384                 return;
12385             }
12386             break;
12387         default:
12388             unallocated_encoding(s);
12389             return;
12390         }
12391         break;
12392     }
12393     default:
12394         unallocated_encoding(s);
12395         return;
12396     }
12397 
12398     if (!fp_access_check(s)) {
12399         return;
12400     }
12401 
12402     if (need_fpstatus || rmode >= 0) {
12403         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12404     } else {
12405         tcg_fpstatus = NULL;
12406     }
12407     if (rmode >= 0) {
12408         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12409     } else {
12410         tcg_rmode = NULL;
12411     }
12412 
12413     switch (opcode) {
12414     case 0x5:
12415         if (u && size == 0) { /* NOT */
12416             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12417             return;
12418         }
12419         break;
12420     case 0x8: /* CMGT, CMGE */
12421         if (u) {
12422             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12423         } else {
12424             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12425         }
12426         return;
12427     case 0x9: /* CMEQ, CMLE */
12428         if (u) {
12429             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12430         } else {
12431             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12432         }
12433         return;
12434     case 0xa: /* CMLT */
12435         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12436         return;
12437     case 0xb:
12438         if (u) { /* ABS, NEG */
12439             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12440         } else {
12441             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12442         }
12443         return;
12444     }
12445 
12446     if (size == 3) {
12447         /* All 64-bit element operations can be shared with scalar 2misc */
12448         int pass;
12449 
12450         /* Coverity claims (size == 3 && !is_q) has been eliminated
12451          * from all paths leading to here.
12452          */
12453         tcg_debug_assert(is_q);
12454         for (pass = 0; pass < 2; pass++) {
12455             TCGv_i64 tcg_op = tcg_temp_new_i64();
12456             TCGv_i64 tcg_res = tcg_temp_new_i64();
12457 
12458             read_vec_element(s, tcg_op, rn, pass, MO_64);
12459 
12460             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12461                             tcg_rmode, tcg_fpstatus);
12462 
12463             write_vec_element(s, tcg_res, rd, pass, MO_64);
12464         }
12465     } else {
12466         int pass;
12467 
12468         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12469             TCGv_i32 tcg_op = tcg_temp_new_i32();
12470             TCGv_i32 tcg_res = tcg_temp_new_i32();
12471 
12472             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12473 
12474             if (size == 2) {
12475                 /* Special cases for 32 bit elements */
12476                 switch (opcode) {
12477                 case 0x4: /* CLS */
12478                     if (u) {
12479                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12480                     } else {
12481                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12482                     }
12483                     break;
12484                 case 0x7: /* SQABS, SQNEG */
12485                     if (u) {
12486                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12487                     } else {
12488                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12489                     }
12490                     break;
12491                 case 0x2f: /* FABS */
12492                     gen_helper_vfp_abss(tcg_res, tcg_op);
12493                     break;
12494                 case 0x6f: /* FNEG */
12495                     gen_helper_vfp_negs(tcg_res, tcg_op);
12496                     break;
12497                 case 0x7f: /* FSQRT */
12498                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12499                     break;
12500                 case 0x1a: /* FCVTNS */
12501                 case 0x1b: /* FCVTMS */
12502                 case 0x1c: /* FCVTAS */
12503                 case 0x3a: /* FCVTPS */
12504                 case 0x3b: /* FCVTZS */
12505                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12506                                          tcg_constant_i32(0), tcg_fpstatus);
12507                     break;
12508                 case 0x5a: /* FCVTNU */
12509                 case 0x5b: /* FCVTMU */
12510                 case 0x5c: /* FCVTAU */
12511                 case 0x7a: /* FCVTPU */
12512                 case 0x7b: /* FCVTZU */
12513                     gen_helper_vfp_touls(tcg_res, tcg_op,
12514                                          tcg_constant_i32(0), tcg_fpstatus);
12515                     break;
12516                 case 0x18: /* FRINTN */
12517                 case 0x19: /* FRINTM */
12518                 case 0x38: /* FRINTP */
12519                 case 0x39: /* FRINTZ */
12520                 case 0x58: /* FRINTA */
12521                 case 0x79: /* FRINTI */
12522                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12523                     break;
12524                 case 0x59: /* FRINTX */
12525                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12526                     break;
12527                 case 0x7c: /* URSQRTE */
12528                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12529                     break;
12530                 case 0x1e: /* FRINT32Z */
12531                 case 0x5e: /* FRINT32X */
12532                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12533                     break;
12534                 case 0x1f: /* FRINT64Z */
12535                 case 0x5f: /* FRINT64X */
12536                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12537                     break;
12538                 default:
12539                     g_assert_not_reached();
12540                 }
12541             } else {
12542                 /* Use helpers for 8 and 16 bit elements */
12543                 switch (opcode) {
12544                 case 0x5: /* CNT, RBIT */
12545                     /* For these two insns size is part of the opcode specifier
12546                      * (handled earlier); they always operate on byte elements.
12547                      */
12548                     if (u) {
12549                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12550                     } else {
12551                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12552                     }
12553                     break;
12554                 case 0x7: /* SQABS, SQNEG */
12555                 {
12556                     NeonGenOneOpEnvFn *genfn;
12557                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12558                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12559                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12560                     };
12561                     genfn = fns[size][u];
12562                     genfn(tcg_res, cpu_env, tcg_op);
12563                     break;
12564                 }
12565                 case 0x4: /* CLS, CLZ */
12566                     if (u) {
12567                         if (size == 0) {
12568                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12569                         } else {
12570                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12571                         }
12572                     } else {
12573                         if (size == 0) {
12574                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12575                         } else {
12576                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12577                         }
12578                     }
12579                     break;
12580                 default:
12581                     g_assert_not_reached();
12582                 }
12583             }
12584 
12585             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12586         }
12587     }
12588     clear_vec_high(s, is_q, rd);
12589 
12590     if (tcg_rmode) {
12591         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12592     }
12593 }
12594 
12595 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12596  *
12597  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12598  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12599  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12600  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12601  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12602  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12603  *
12604  * This actually covers two groups where scalar access is governed by
12605  * bit 28. A bunch of the instructions (float to integral) only exist
12606  * in the vector form and are un-allocated for the scalar decode. Also
12607  * in the scalar decode Q is always 1.
12608  */
12609 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12610 {
12611     int fpop, opcode, a, u;
12612     int rn, rd;
12613     bool is_q;
12614     bool is_scalar;
12615     bool only_in_vector = false;
12616 
12617     int pass;
12618     TCGv_i32 tcg_rmode = NULL;
12619     TCGv_ptr tcg_fpstatus = NULL;
12620     bool need_fpst = true;
12621     int rmode = -1;
12622 
12623     if (!dc_isar_feature(aa64_fp16, s)) {
12624         unallocated_encoding(s);
12625         return;
12626     }
12627 
12628     rd = extract32(insn, 0, 5);
12629     rn = extract32(insn, 5, 5);
12630 
12631     a = extract32(insn, 23, 1);
12632     u = extract32(insn, 29, 1);
12633     is_scalar = extract32(insn, 28, 1);
12634     is_q = extract32(insn, 30, 1);
12635 
12636     opcode = extract32(insn, 12, 5);
12637     fpop = deposit32(opcode, 5, 1, a);
12638     fpop = deposit32(fpop, 6, 1, u);
12639 
12640     switch (fpop) {
12641     case 0x1d: /* SCVTF */
12642     case 0x5d: /* UCVTF */
12643     {
12644         int elements;
12645 
12646         if (is_scalar) {
12647             elements = 1;
12648         } else {
12649             elements = (is_q ? 8 : 4);
12650         }
12651 
12652         if (!fp_access_check(s)) {
12653             return;
12654         }
12655         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12656         return;
12657     }
12658     break;
12659     case 0x2c: /* FCMGT (zero) */
12660     case 0x2d: /* FCMEQ (zero) */
12661     case 0x2e: /* FCMLT (zero) */
12662     case 0x6c: /* FCMGE (zero) */
12663     case 0x6d: /* FCMLE (zero) */
12664         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12665         return;
12666     case 0x3d: /* FRECPE */
12667     case 0x3f: /* FRECPX */
12668         break;
12669     case 0x18: /* FRINTN */
12670         only_in_vector = true;
12671         rmode = FPROUNDING_TIEEVEN;
12672         break;
12673     case 0x19: /* FRINTM */
12674         only_in_vector = true;
12675         rmode = FPROUNDING_NEGINF;
12676         break;
12677     case 0x38: /* FRINTP */
12678         only_in_vector = true;
12679         rmode = FPROUNDING_POSINF;
12680         break;
12681     case 0x39: /* FRINTZ */
12682         only_in_vector = true;
12683         rmode = FPROUNDING_ZERO;
12684         break;
12685     case 0x58: /* FRINTA */
12686         only_in_vector = true;
12687         rmode = FPROUNDING_TIEAWAY;
12688         break;
12689     case 0x59: /* FRINTX */
12690     case 0x79: /* FRINTI */
12691         only_in_vector = true;
12692         /* current rounding mode */
12693         break;
12694     case 0x1a: /* FCVTNS */
12695         rmode = FPROUNDING_TIEEVEN;
12696         break;
12697     case 0x1b: /* FCVTMS */
12698         rmode = FPROUNDING_NEGINF;
12699         break;
12700     case 0x1c: /* FCVTAS */
12701         rmode = FPROUNDING_TIEAWAY;
12702         break;
12703     case 0x3a: /* FCVTPS */
12704         rmode = FPROUNDING_POSINF;
12705         break;
12706     case 0x3b: /* FCVTZS */
12707         rmode = FPROUNDING_ZERO;
12708         break;
12709     case 0x5a: /* FCVTNU */
12710         rmode = FPROUNDING_TIEEVEN;
12711         break;
12712     case 0x5b: /* FCVTMU */
12713         rmode = FPROUNDING_NEGINF;
12714         break;
12715     case 0x5c: /* FCVTAU */
12716         rmode = FPROUNDING_TIEAWAY;
12717         break;
12718     case 0x7a: /* FCVTPU */
12719         rmode = FPROUNDING_POSINF;
12720         break;
12721     case 0x7b: /* FCVTZU */
12722         rmode = FPROUNDING_ZERO;
12723         break;
12724     case 0x2f: /* FABS */
12725     case 0x6f: /* FNEG */
12726         need_fpst = false;
12727         break;
12728     case 0x7d: /* FRSQRTE */
12729     case 0x7f: /* FSQRT (vector) */
12730         break;
12731     default:
12732         unallocated_encoding(s);
12733         return;
12734     }
12735 
12736 
12737     /* Check additional constraints for the scalar encoding */
12738     if (is_scalar) {
12739         if (!is_q) {
12740             unallocated_encoding(s);
12741             return;
12742         }
12743         /* FRINTxx is only in the vector form */
12744         if (only_in_vector) {
12745             unallocated_encoding(s);
12746             return;
12747         }
12748     }
12749 
12750     if (!fp_access_check(s)) {
12751         return;
12752     }
12753 
12754     if (rmode >= 0 || need_fpst) {
12755         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12756     }
12757 
12758     if (rmode >= 0) {
12759         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12760     }
12761 
12762     if (is_scalar) {
12763         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12764         TCGv_i32 tcg_res = tcg_temp_new_i32();
12765 
12766         switch (fpop) {
12767         case 0x1a: /* FCVTNS */
12768         case 0x1b: /* FCVTMS */
12769         case 0x1c: /* FCVTAS */
12770         case 0x3a: /* FCVTPS */
12771         case 0x3b: /* FCVTZS */
12772             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12773             break;
12774         case 0x3d: /* FRECPE */
12775             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12776             break;
12777         case 0x3f: /* FRECPX */
12778             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12779             break;
12780         case 0x5a: /* FCVTNU */
12781         case 0x5b: /* FCVTMU */
12782         case 0x5c: /* FCVTAU */
12783         case 0x7a: /* FCVTPU */
12784         case 0x7b: /* FCVTZU */
12785             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12786             break;
12787         case 0x6f: /* FNEG */
12788             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12789             break;
12790         case 0x7d: /* FRSQRTE */
12791             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12792             break;
12793         default:
12794             g_assert_not_reached();
12795         }
12796 
12797         /* limit any sign extension going on */
12798         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12799         write_fp_sreg(s, rd, tcg_res);
12800     } else {
12801         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12802             TCGv_i32 tcg_op = tcg_temp_new_i32();
12803             TCGv_i32 tcg_res = tcg_temp_new_i32();
12804 
12805             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12806 
12807             switch (fpop) {
12808             case 0x1a: /* FCVTNS */
12809             case 0x1b: /* FCVTMS */
12810             case 0x1c: /* FCVTAS */
12811             case 0x3a: /* FCVTPS */
12812             case 0x3b: /* FCVTZS */
12813                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12814                 break;
12815             case 0x3d: /* FRECPE */
12816                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12817                 break;
12818             case 0x5a: /* FCVTNU */
12819             case 0x5b: /* FCVTMU */
12820             case 0x5c: /* FCVTAU */
12821             case 0x7a: /* FCVTPU */
12822             case 0x7b: /* FCVTZU */
12823                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12824                 break;
12825             case 0x18: /* FRINTN */
12826             case 0x19: /* FRINTM */
12827             case 0x38: /* FRINTP */
12828             case 0x39: /* FRINTZ */
12829             case 0x58: /* FRINTA */
12830             case 0x79: /* FRINTI */
12831                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12832                 break;
12833             case 0x59: /* FRINTX */
12834                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12835                 break;
12836             case 0x2f: /* FABS */
12837                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12838                 break;
12839             case 0x6f: /* FNEG */
12840                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12841                 break;
12842             case 0x7d: /* FRSQRTE */
12843                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12844                 break;
12845             case 0x7f: /* FSQRT */
12846                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12847                 break;
12848             default:
12849                 g_assert_not_reached();
12850             }
12851 
12852             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12853         }
12854 
12855         clear_vec_high(s, is_q, rd);
12856     }
12857 
12858     if (tcg_rmode) {
12859         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12860     }
12861 }
12862 
12863 /* AdvSIMD scalar x indexed element
12864  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12865  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12866  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12867  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12868  * AdvSIMD vector x indexed element
12869  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12870  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12871  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12872  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12873  */
12874 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12875 {
12876     /* This encoding has two kinds of instruction:
12877      *  normal, where we perform elt x idxelt => elt for each
12878      *     element in the vector
12879      *  long, where we perform elt x idxelt and generate a result of
12880      *     double the width of the input element
12881      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12882      */
12883     bool is_scalar = extract32(insn, 28, 1);
12884     bool is_q = extract32(insn, 30, 1);
12885     bool u = extract32(insn, 29, 1);
12886     int size = extract32(insn, 22, 2);
12887     int l = extract32(insn, 21, 1);
12888     int m = extract32(insn, 20, 1);
12889     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12890     int rm = extract32(insn, 16, 4);
12891     int opcode = extract32(insn, 12, 4);
12892     int h = extract32(insn, 11, 1);
12893     int rn = extract32(insn, 5, 5);
12894     int rd = extract32(insn, 0, 5);
12895     bool is_long = false;
12896     int is_fp = 0;
12897     bool is_fp16 = false;
12898     int index;
12899     TCGv_ptr fpst;
12900 
12901     switch (16 * u + opcode) {
12902     case 0x08: /* MUL */
12903     case 0x10: /* MLA */
12904     case 0x14: /* MLS */
12905         if (is_scalar) {
12906             unallocated_encoding(s);
12907             return;
12908         }
12909         break;
12910     case 0x02: /* SMLAL, SMLAL2 */
12911     case 0x12: /* UMLAL, UMLAL2 */
12912     case 0x06: /* SMLSL, SMLSL2 */
12913     case 0x16: /* UMLSL, UMLSL2 */
12914     case 0x0a: /* SMULL, SMULL2 */
12915     case 0x1a: /* UMULL, UMULL2 */
12916         if (is_scalar) {
12917             unallocated_encoding(s);
12918             return;
12919         }
12920         is_long = true;
12921         break;
12922     case 0x03: /* SQDMLAL, SQDMLAL2 */
12923     case 0x07: /* SQDMLSL, SQDMLSL2 */
12924     case 0x0b: /* SQDMULL, SQDMULL2 */
12925         is_long = true;
12926         break;
12927     case 0x0c: /* SQDMULH */
12928     case 0x0d: /* SQRDMULH */
12929         break;
12930     case 0x01: /* FMLA */
12931     case 0x05: /* FMLS */
12932     case 0x09: /* FMUL */
12933     case 0x19: /* FMULX */
12934         is_fp = 1;
12935         break;
12936     case 0x1d: /* SQRDMLAH */
12937     case 0x1f: /* SQRDMLSH */
12938         if (!dc_isar_feature(aa64_rdm, s)) {
12939             unallocated_encoding(s);
12940             return;
12941         }
12942         break;
12943     case 0x0e: /* SDOT */
12944     case 0x1e: /* UDOT */
12945         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12946             unallocated_encoding(s);
12947             return;
12948         }
12949         break;
12950     case 0x0f:
12951         switch (size) {
12952         case 0: /* SUDOT */
12953         case 2: /* USDOT */
12954             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12955                 unallocated_encoding(s);
12956                 return;
12957             }
12958             size = MO_32;
12959             break;
12960         case 1: /* BFDOT */
12961             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12962                 unallocated_encoding(s);
12963                 return;
12964             }
12965             size = MO_32;
12966             break;
12967         case 3: /* BFMLAL{B,T} */
12968             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12969                 unallocated_encoding(s);
12970                 return;
12971             }
12972             /* can't set is_fp without other incorrect size checks */
12973             size = MO_16;
12974             break;
12975         default:
12976             unallocated_encoding(s);
12977             return;
12978         }
12979         break;
12980     case 0x11: /* FCMLA #0 */
12981     case 0x13: /* FCMLA #90 */
12982     case 0x15: /* FCMLA #180 */
12983     case 0x17: /* FCMLA #270 */
12984         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12985             unallocated_encoding(s);
12986             return;
12987         }
12988         is_fp = 2;
12989         break;
12990     case 0x00: /* FMLAL */
12991     case 0x04: /* FMLSL */
12992     case 0x18: /* FMLAL2 */
12993     case 0x1c: /* FMLSL2 */
12994         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12995             unallocated_encoding(s);
12996             return;
12997         }
12998         size = MO_16;
12999         /* is_fp, but we pass cpu_env not fp_status.  */
13000         break;
13001     default:
13002         unallocated_encoding(s);
13003         return;
13004     }
13005 
13006     switch (is_fp) {
13007     case 1: /* normal fp */
13008         /* convert insn encoded size to MemOp size */
13009         switch (size) {
13010         case 0: /* half-precision */
13011             size = MO_16;
13012             is_fp16 = true;
13013             break;
13014         case MO_32: /* single precision */
13015         case MO_64: /* double precision */
13016             break;
13017         default:
13018             unallocated_encoding(s);
13019             return;
13020         }
13021         break;
13022 
13023     case 2: /* complex fp */
13024         /* Each indexable element is a complex pair.  */
13025         size += 1;
13026         switch (size) {
13027         case MO_32:
13028             if (h && !is_q) {
13029                 unallocated_encoding(s);
13030                 return;
13031             }
13032             is_fp16 = true;
13033             break;
13034         case MO_64:
13035             break;
13036         default:
13037             unallocated_encoding(s);
13038             return;
13039         }
13040         break;
13041 
13042     default: /* integer */
13043         switch (size) {
13044         case MO_8:
13045         case MO_64:
13046             unallocated_encoding(s);
13047             return;
13048         }
13049         break;
13050     }
13051     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
13052         unallocated_encoding(s);
13053         return;
13054     }
13055 
13056     /* Given MemOp size, adjust register and indexing.  */
13057     switch (size) {
13058     case MO_16:
13059         index = h << 2 | l << 1 | m;
13060         break;
13061     case MO_32:
13062         index = h << 1 | l;
13063         rm |= m << 4;
13064         break;
13065     case MO_64:
13066         if (l || !is_q) {
13067             unallocated_encoding(s);
13068             return;
13069         }
13070         index = h;
13071         rm |= m << 4;
13072         break;
13073     default:
13074         g_assert_not_reached();
13075     }
13076 
13077     if (!fp_access_check(s)) {
13078         return;
13079     }
13080 
13081     if (is_fp) {
13082         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
13083     } else {
13084         fpst = NULL;
13085     }
13086 
13087     switch (16 * u + opcode) {
13088     case 0x0e: /* SDOT */
13089     case 0x1e: /* UDOT */
13090         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13091                          u ? gen_helper_gvec_udot_idx_b
13092                          : gen_helper_gvec_sdot_idx_b);
13093         return;
13094     case 0x0f:
13095         switch (extract32(insn, 22, 2)) {
13096         case 0: /* SUDOT */
13097             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13098                              gen_helper_gvec_sudot_idx_b);
13099             return;
13100         case 1: /* BFDOT */
13101             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13102                              gen_helper_gvec_bfdot_idx);
13103             return;
13104         case 2: /* USDOT */
13105             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13106                              gen_helper_gvec_usdot_idx_b);
13107             return;
13108         case 3: /* BFMLAL{B,T} */
13109             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
13110                               gen_helper_gvec_bfmlal_idx);
13111             return;
13112         }
13113         g_assert_not_reached();
13114     case 0x11: /* FCMLA #0 */
13115     case 0x13: /* FCMLA #90 */
13116     case 0x15: /* FCMLA #180 */
13117     case 0x17: /* FCMLA #270 */
13118         {
13119             int rot = extract32(insn, 13, 2);
13120             int data = (index << 2) | rot;
13121             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
13122                                vec_full_reg_offset(s, rn),
13123                                vec_full_reg_offset(s, rm),
13124                                vec_full_reg_offset(s, rd), fpst,
13125                                is_q ? 16 : 8, vec_full_reg_size(s), data,
13126                                size == MO_64
13127                                ? gen_helper_gvec_fcmlas_idx
13128                                : gen_helper_gvec_fcmlah_idx);
13129         }
13130         return;
13131 
13132     case 0x00: /* FMLAL */
13133     case 0x04: /* FMLSL */
13134     case 0x18: /* FMLAL2 */
13135     case 0x1c: /* FMLSL2 */
13136         {
13137             int is_s = extract32(opcode, 2, 1);
13138             int is_2 = u;
13139             int data = (index << 2) | (is_2 << 1) | is_s;
13140             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13141                                vec_full_reg_offset(s, rn),
13142                                vec_full_reg_offset(s, rm), cpu_env,
13143                                is_q ? 16 : 8, vec_full_reg_size(s),
13144                                data, gen_helper_gvec_fmlal_idx_a64);
13145         }
13146         return;
13147 
13148     case 0x08: /* MUL */
13149         if (!is_long && !is_scalar) {
13150             static gen_helper_gvec_3 * const fns[3] = {
13151                 gen_helper_gvec_mul_idx_h,
13152                 gen_helper_gvec_mul_idx_s,
13153                 gen_helper_gvec_mul_idx_d,
13154             };
13155             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13156                                vec_full_reg_offset(s, rn),
13157                                vec_full_reg_offset(s, rm),
13158                                is_q ? 16 : 8, vec_full_reg_size(s),
13159                                index, fns[size - 1]);
13160             return;
13161         }
13162         break;
13163 
13164     case 0x10: /* MLA */
13165         if (!is_long && !is_scalar) {
13166             static gen_helper_gvec_4 * const fns[3] = {
13167                 gen_helper_gvec_mla_idx_h,
13168                 gen_helper_gvec_mla_idx_s,
13169                 gen_helper_gvec_mla_idx_d,
13170             };
13171             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13172                                vec_full_reg_offset(s, rn),
13173                                vec_full_reg_offset(s, rm),
13174                                vec_full_reg_offset(s, rd),
13175                                is_q ? 16 : 8, vec_full_reg_size(s),
13176                                index, fns[size - 1]);
13177             return;
13178         }
13179         break;
13180 
13181     case 0x14: /* MLS */
13182         if (!is_long && !is_scalar) {
13183             static gen_helper_gvec_4 * const fns[3] = {
13184                 gen_helper_gvec_mls_idx_h,
13185                 gen_helper_gvec_mls_idx_s,
13186                 gen_helper_gvec_mls_idx_d,
13187             };
13188             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13189                                vec_full_reg_offset(s, rn),
13190                                vec_full_reg_offset(s, rm),
13191                                vec_full_reg_offset(s, rd),
13192                                is_q ? 16 : 8, vec_full_reg_size(s),
13193                                index, fns[size - 1]);
13194             return;
13195         }
13196         break;
13197     }
13198 
13199     if (size == 3) {
13200         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13201         int pass;
13202 
13203         assert(is_fp && is_q && !is_long);
13204 
13205         read_vec_element(s, tcg_idx, rm, index, MO_64);
13206 
13207         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13208             TCGv_i64 tcg_op = tcg_temp_new_i64();
13209             TCGv_i64 tcg_res = tcg_temp_new_i64();
13210 
13211             read_vec_element(s, tcg_op, rn, pass, MO_64);
13212 
13213             switch (16 * u + opcode) {
13214             case 0x05: /* FMLS */
13215                 /* As usual for ARM, separate negation for fused multiply-add */
13216                 gen_helper_vfp_negd(tcg_op, tcg_op);
13217                 /* fall through */
13218             case 0x01: /* FMLA */
13219                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13220                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13221                 break;
13222             case 0x09: /* FMUL */
13223                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13224                 break;
13225             case 0x19: /* FMULX */
13226                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13227                 break;
13228             default:
13229                 g_assert_not_reached();
13230             }
13231 
13232             write_vec_element(s, tcg_res, rd, pass, MO_64);
13233         }
13234 
13235         clear_vec_high(s, !is_scalar, rd);
13236     } else if (!is_long) {
13237         /* 32 bit floating point, or 16 or 32 bit integer.
13238          * For the 16 bit scalar case we use the usual Neon helpers and
13239          * rely on the fact that 0 op 0 == 0 with no side effects.
13240          */
13241         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13242         int pass, maxpasses;
13243 
13244         if (is_scalar) {
13245             maxpasses = 1;
13246         } else {
13247             maxpasses = is_q ? 4 : 2;
13248         }
13249 
13250         read_vec_element_i32(s, tcg_idx, rm, index, size);
13251 
13252         if (size == 1 && !is_scalar) {
13253             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13254              * the index into both halves of the 32 bit tcg_idx and then use
13255              * the usual Neon helpers.
13256              */
13257             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13258         }
13259 
13260         for (pass = 0; pass < maxpasses; pass++) {
13261             TCGv_i32 tcg_op = tcg_temp_new_i32();
13262             TCGv_i32 tcg_res = tcg_temp_new_i32();
13263 
13264             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13265 
13266             switch (16 * u + opcode) {
13267             case 0x08: /* MUL */
13268             case 0x10: /* MLA */
13269             case 0x14: /* MLS */
13270             {
13271                 static NeonGenTwoOpFn * const fns[2][2] = {
13272                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13273                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13274                 };
13275                 NeonGenTwoOpFn *genfn;
13276                 bool is_sub = opcode == 0x4;
13277 
13278                 if (size == 1) {
13279                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13280                 } else {
13281                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13282                 }
13283                 if (opcode == 0x8) {
13284                     break;
13285                 }
13286                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13287                 genfn = fns[size - 1][is_sub];
13288                 genfn(tcg_res, tcg_op, tcg_res);
13289                 break;
13290             }
13291             case 0x05: /* FMLS */
13292             case 0x01: /* FMLA */
13293                 read_vec_element_i32(s, tcg_res, rd, pass,
13294                                      is_scalar ? size : MO_32);
13295                 switch (size) {
13296                 case 1:
13297                     if (opcode == 0x5) {
13298                         /* As usual for ARM, separate negation for fused
13299                          * multiply-add */
13300                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13301                     }
13302                     if (is_scalar) {
13303                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13304                                                    tcg_res, fpst);
13305                     } else {
13306                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13307                                                     tcg_res, fpst);
13308                     }
13309                     break;
13310                 case 2:
13311                     if (opcode == 0x5) {
13312                         /* As usual for ARM, separate negation for
13313                          * fused multiply-add */
13314                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13315                     }
13316                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13317                                            tcg_res, fpst);
13318                     break;
13319                 default:
13320                     g_assert_not_reached();
13321                 }
13322                 break;
13323             case 0x09: /* FMUL */
13324                 switch (size) {
13325                 case 1:
13326                     if (is_scalar) {
13327                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13328                                                 tcg_idx, fpst);
13329                     } else {
13330                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13331                                                  tcg_idx, fpst);
13332                     }
13333                     break;
13334                 case 2:
13335                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13336                     break;
13337                 default:
13338                     g_assert_not_reached();
13339                 }
13340                 break;
13341             case 0x19: /* FMULX */
13342                 switch (size) {
13343                 case 1:
13344                     if (is_scalar) {
13345                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13346                                                  tcg_idx, fpst);
13347                     } else {
13348                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13349                                                   tcg_idx, fpst);
13350                     }
13351                     break;
13352                 case 2:
13353                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13354                     break;
13355                 default:
13356                     g_assert_not_reached();
13357                 }
13358                 break;
13359             case 0x0c: /* SQDMULH */
13360                 if (size == 1) {
13361                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13362                                                tcg_op, tcg_idx);
13363                 } else {
13364                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13365                                                tcg_op, tcg_idx);
13366                 }
13367                 break;
13368             case 0x0d: /* SQRDMULH */
13369                 if (size == 1) {
13370                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13371                                                 tcg_op, tcg_idx);
13372                 } else {
13373                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13374                                                 tcg_op, tcg_idx);
13375                 }
13376                 break;
13377             case 0x1d: /* SQRDMLAH */
13378                 read_vec_element_i32(s, tcg_res, rd, pass,
13379                                      is_scalar ? size : MO_32);
13380                 if (size == 1) {
13381                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13382                                                 tcg_op, tcg_idx, tcg_res);
13383                 } else {
13384                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13385                                                 tcg_op, tcg_idx, tcg_res);
13386                 }
13387                 break;
13388             case 0x1f: /* SQRDMLSH */
13389                 read_vec_element_i32(s, tcg_res, rd, pass,
13390                                      is_scalar ? size : MO_32);
13391                 if (size == 1) {
13392                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13393                                                 tcg_op, tcg_idx, tcg_res);
13394                 } else {
13395                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13396                                                 tcg_op, tcg_idx, tcg_res);
13397                 }
13398                 break;
13399             default:
13400                 g_assert_not_reached();
13401             }
13402 
13403             if (is_scalar) {
13404                 write_fp_sreg(s, rd, tcg_res);
13405             } else {
13406                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13407             }
13408         }
13409 
13410         clear_vec_high(s, is_q, rd);
13411     } else {
13412         /* long ops: 16x16->32 or 32x32->64 */
13413         TCGv_i64 tcg_res[2];
13414         int pass;
13415         bool satop = extract32(opcode, 0, 1);
13416         MemOp memop = MO_32;
13417 
13418         if (satop || !u) {
13419             memop |= MO_SIGN;
13420         }
13421 
13422         if (size == 2) {
13423             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13424 
13425             read_vec_element(s, tcg_idx, rm, index, memop);
13426 
13427             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13428                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13429                 TCGv_i64 tcg_passres;
13430                 int passelt;
13431 
13432                 if (is_scalar) {
13433                     passelt = 0;
13434                 } else {
13435                     passelt = pass + (is_q * 2);
13436                 }
13437 
13438                 read_vec_element(s, tcg_op, rn, passelt, memop);
13439 
13440                 tcg_res[pass] = tcg_temp_new_i64();
13441 
13442                 if (opcode == 0xa || opcode == 0xb) {
13443                     /* Non-accumulating ops */
13444                     tcg_passres = tcg_res[pass];
13445                 } else {
13446                     tcg_passres = tcg_temp_new_i64();
13447                 }
13448 
13449                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13450 
13451                 if (satop) {
13452                     /* saturating, doubling */
13453                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13454                                                       tcg_passres, tcg_passres);
13455                 }
13456 
13457                 if (opcode == 0xa || opcode == 0xb) {
13458                     continue;
13459                 }
13460 
13461                 /* Accumulating op: handle accumulate step */
13462                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13463 
13464                 switch (opcode) {
13465                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13466                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13467                     break;
13468                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13469                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13470                     break;
13471                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13472                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13473                     /* fall through */
13474                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13475                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13476                                                       tcg_res[pass],
13477                                                       tcg_passres);
13478                     break;
13479                 default:
13480                     g_assert_not_reached();
13481                 }
13482             }
13483 
13484             clear_vec_high(s, !is_scalar, rd);
13485         } else {
13486             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13487 
13488             assert(size == 1);
13489             read_vec_element_i32(s, tcg_idx, rm, index, size);
13490 
13491             if (!is_scalar) {
13492                 /* The simplest way to handle the 16x16 indexed ops is to
13493                  * duplicate the index into both halves of the 32 bit tcg_idx
13494                  * and then use the usual Neon helpers.
13495                  */
13496                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13497             }
13498 
13499             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13500                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13501                 TCGv_i64 tcg_passres;
13502 
13503                 if (is_scalar) {
13504                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13505                 } else {
13506                     read_vec_element_i32(s, tcg_op, rn,
13507                                          pass + (is_q * 2), MO_32);
13508                 }
13509 
13510                 tcg_res[pass] = tcg_temp_new_i64();
13511 
13512                 if (opcode == 0xa || opcode == 0xb) {
13513                     /* Non-accumulating ops */
13514                     tcg_passres = tcg_res[pass];
13515                 } else {
13516                     tcg_passres = tcg_temp_new_i64();
13517                 }
13518 
13519                 if (memop & MO_SIGN) {
13520                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13521                 } else {
13522                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13523                 }
13524                 if (satop) {
13525                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13526                                                       tcg_passres, tcg_passres);
13527                 }
13528 
13529                 if (opcode == 0xa || opcode == 0xb) {
13530                     continue;
13531                 }
13532 
13533                 /* Accumulating op: handle accumulate step */
13534                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13535 
13536                 switch (opcode) {
13537                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13538                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13539                                              tcg_passres);
13540                     break;
13541                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13542                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13543                                              tcg_passres);
13544                     break;
13545                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13546                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13547                     /* fall through */
13548                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13549                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13550                                                       tcg_res[pass],
13551                                                       tcg_passres);
13552                     break;
13553                 default:
13554                     g_assert_not_reached();
13555                 }
13556             }
13557 
13558             if (is_scalar) {
13559                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13560             }
13561         }
13562 
13563         if (is_scalar) {
13564             tcg_res[1] = tcg_constant_i64(0);
13565         }
13566 
13567         for (pass = 0; pass < 2; pass++) {
13568             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13569         }
13570     }
13571 }
13572 
13573 /* Crypto AES
13574  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13575  * +-----------------+------+-----------+--------+-----+------+------+
13576  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13577  * +-----------------+------+-----------+--------+-----+------+------+
13578  */
13579 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13580 {
13581     int size = extract32(insn, 22, 2);
13582     int opcode = extract32(insn, 12, 5);
13583     int rn = extract32(insn, 5, 5);
13584     int rd = extract32(insn, 0, 5);
13585     int decrypt;
13586     gen_helper_gvec_2 *genfn2 = NULL;
13587     gen_helper_gvec_3 *genfn3 = NULL;
13588 
13589     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13590         unallocated_encoding(s);
13591         return;
13592     }
13593 
13594     switch (opcode) {
13595     case 0x4: /* AESE */
13596         decrypt = 0;
13597         genfn3 = gen_helper_crypto_aese;
13598         break;
13599     case 0x6: /* AESMC */
13600         decrypt = 0;
13601         genfn2 = gen_helper_crypto_aesmc;
13602         break;
13603     case 0x5: /* AESD */
13604         decrypt = 1;
13605         genfn3 = gen_helper_crypto_aese;
13606         break;
13607     case 0x7: /* AESIMC */
13608         decrypt = 1;
13609         genfn2 = gen_helper_crypto_aesmc;
13610         break;
13611     default:
13612         unallocated_encoding(s);
13613         return;
13614     }
13615 
13616     if (!fp_access_check(s)) {
13617         return;
13618     }
13619     if (genfn2) {
13620         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13621     } else {
13622         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13623     }
13624 }
13625 
13626 /* Crypto three-reg SHA
13627  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13628  * +-----------------+------+---+------+---+--------+-----+------+------+
13629  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13630  * +-----------------+------+---+------+---+--------+-----+------+------+
13631  */
13632 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13633 {
13634     int size = extract32(insn, 22, 2);
13635     int opcode = extract32(insn, 12, 3);
13636     int rm = extract32(insn, 16, 5);
13637     int rn = extract32(insn, 5, 5);
13638     int rd = extract32(insn, 0, 5);
13639     gen_helper_gvec_3 *genfn;
13640     bool feature;
13641 
13642     if (size != 0) {
13643         unallocated_encoding(s);
13644         return;
13645     }
13646 
13647     switch (opcode) {
13648     case 0: /* SHA1C */
13649         genfn = gen_helper_crypto_sha1c;
13650         feature = dc_isar_feature(aa64_sha1, s);
13651         break;
13652     case 1: /* SHA1P */
13653         genfn = gen_helper_crypto_sha1p;
13654         feature = dc_isar_feature(aa64_sha1, s);
13655         break;
13656     case 2: /* SHA1M */
13657         genfn = gen_helper_crypto_sha1m;
13658         feature = dc_isar_feature(aa64_sha1, s);
13659         break;
13660     case 3: /* SHA1SU0 */
13661         genfn = gen_helper_crypto_sha1su0;
13662         feature = dc_isar_feature(aa64_sha1, s);
13663         break;
13664     case 4: /* SHA256H */
13665         genfn = gen_helper_crypto_sha256h;
13666         feature = dc_isar_feature(aa64_sha256, s);
13667         break;
13668     case 5: /* SHA256H2 */
13669         genfn = gen_helper_crypto_sha256h2;
13670         feature = dc_isar_feature(aa64_sha256, s);
13671         break;
13672     case 6: /* SHA256SU1 */
13673         genfn = gen_helper_crypto_sha256su1;
13674         feature = dc_isar_feature(aa64_sha256, s);
13675         break;
13676     default:
13677         unallocated_encoding(s);
13678         return;
13679     }
13680 
13681     if (!feature) {
13682         unallocated_encoding(s);
13683         return;
13684     }
13685 
13686     if (!fp_access_check(s)) {
13687         return;
13688     }
13689     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13690 }
13691 
13692 /* Crypto two-reg SHA
13693  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13694  * +-----------------+------+-----------+--------+-----+------+------+
13695  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13696  * +-----------------+------+-----------+--------+-----+------+------+
13697  */
13698 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13699 {
13700     int size = extract32(insn, 22, 2);
13701     int opcode = extract32(insn, 12, 5);
13702     int rn = extract32(insn, 5, 5);
13703     int rd = extract32(insn, 0, 5);
13704     gen_helper_gvec_2 *genfn;
13705     bool feature;
13706 
13707     if (size != 0) {
13708         unallocated_encoding(s);
13709         return;
13710     }
13711 
13712     switch (opcode) {
13713     case 0: /* SHA1H */
13714         feature = dc_isar_feature(aa64_sha1, s);
13715         genfn = gen_helper_crypto_sha1h;
13716         break;
13717     case 1: /* SHA1SU1 */
13718         feature = dc_isar_feature(aa64_sha1, s);
13719         genfn = gen_helper_crypto_sha1su1;
13720         break;
13721     case 2: /* SHA256SU0 */
13722         feature = dc_isar_feature(aa64_sha256, s);
13723         genfn = gen_helper_crypto_sha256su0;
13724         break;
13725     default:
13726         unallocated_encoding(s);
13727         return;
13728     }
13729 
13730     if (!feature) {
13731         unallocated_encoding(s);
13732         return;
13733     }
13734 
13735     if (!fp_access_check(s)) {
13736         return;
13737     }
13738     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13739 }
13740 
13741 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13742 {
13743     tcg_gen_rotli_i64(d, m, 1);
13744     tcg_gen_xor_i64(d, d, n);
13745 }
13746 
13747 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13748 {
13749     tcg_gen_rotli_vec(vece, d, m, 1);
13750     tcg_gen_xor_vec(vece, d, d, n);
13751 }
13752 
13753 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13754                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13755 {
13756     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13757     static const GVecGen3 op = {
13758         .fni8 = gen_rax1_i64,
13759         .fniv = gen_rax1_vec,
13760         .opt_opc = vecop_list,
13761         .fno = gen_helper_crypto_rax1,
13762         .vece = MO_64,
13763     };
13764     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13765 }
13766 
13767 /* Crypto three-reg SHA512
13768  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13769  * +-----------------------+------+---+---+-----+--------+------+------+
13770  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13771  * +-----------------------+------+---+---+-----+--------+------+------+
13772  */
13773 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13774 {
13775     int opcode = extract32(insn, 10, 2);
13776     int o =  extract32(insn, 14, 1);
13777     int rm = extract32(insn, 16, 5);
13778     int rn = extract32(insn, 5, 5);
13779     int rd = extract32(insn, 0, 5);
13780     bool feature;
13781     gen_helper_gvec_3 *oolfn = NULL;
13782     GVecGen3Fn *gvecfn = NULL;
13783 
13784     if (o == 0) {
13785         switch (opcode) {
13786         case 0: /* SHA512H */
13787             feature = dc_isar_feature(aa64_sha512, s);
13788             oolfn = gen_helper_crypto_sha512h;
13789             break;
13790         case 1: /* SHA512H2 */
13791             feature = dc_isar_feature(aa64_sha512, s);
13792             oolfn = gen_helper_crypto_sha512h2;
13793             break;
13794         case 2: /* SHA512SU1 */
13795             feature = dc_isar_feature(aa64_sha512, s);
13796             oolfn = gen_helper_crypto_sha512su1;
13797             break;
13798         case 3: /* RAX1 */
13799             feature = dc_isar_feature(aa64_sha3, s);
13800             gvecfn = gen_gvec_rax1;
13801             break;
13802         default:
13803             g_assert_not_reached();
13804         }
13805     } else {
13806         switch (opcode) {
13807         case 0: /* SM3PARTW1 */
13808             feature = dc_isar_feature(aa64_sm3, s);
13809             oolfn = gen_helper_crypto_sm3partw1;
13810             break;
13811         case 1: /* SM3PARTW2 */
13812             feature = dc_isar_feature(aa64_sm3, s);
13813             oolfn = gen_helper_crypto_sm3partw2;
13814             break;
13815         case 2: /* SM4EKEY */
13816             feature = dc_isar_feature(aa64_sm4, s);
13817             oolfn = gen_helper_crypto_sm4ekey;
13818             break;
13819         default:
13820             unallocated_encoding(s);
13821             return;
13822         }
13823     }
13824 
13825     if (!feature) {
13826         unallocated_encoding(s);
13827         return;
13828     }
13829 
13830     if (!fp_access_check(s)) {
13831         return;
13832     }
13833 
13834     if (oolfn) {
13835         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13836     } else {
13837         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13838     }
13839 }
13840 
13841 /* Crypto two-reg SHA512
13842  *  31                                     12  11  10  9    5 4    0
13843  * +-----------------------------------------+--------+------+------+
13844  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13845  * +-----------------------------------------+--------+------+------+
13846  */
13847 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13848 {
13849     int opcode = extract32(insn, 10, 2);
13850     int rn = extract32(insn, 5, 5);
13851     int rd = extract32(insn, 0, 5);
13852     bool feature;
13853 
13854     switch (opcode) {
13855     case 0: /* SHA512SU0 */
13856         feature = dc_isar_feature(aa64_sha512, s);
13857         break;
13858     case 1: /* SM4E */
13859         feature = dc_isar_feature(aa64_sm4, s);
13860         break;
13861     default:
13862         unallocated_encoding(s);
13863         return;
13864     }
13865 
13866     if (!feature) {
13867         unallocated_encoding(s);
13868         return;
13869     }
13870 
13871     if (!fp_access_check(s)) {
13872         return;
13873     }
13874 
13875     switch (opcode) {
13876     case 0: /* SHA512SU0 */
13877         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13878         break;
13879     case 1: /* SM4E */
13880         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13881         break;
13882     default:
13883         g_assert_not_reached();
13884     }
13885 }
13886 
13887 /* Crypto four-register
13888  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13889  * +-------------------+-----+------+---+------+------+------+
13890  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13891  * +-------------------+-----+------+---+------+------+------+
13892  */
13893 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13894 {
13895     int op0 = extract32(insn, 21, 2);
13896     int rm = extract32(insn, 16, 5);
13897     int ra = extract32(insn, 10, 5);
13898     int rn = extract32(insn, 5, 5);
13899     int rd = extract32(insn, 0, 5);
13900     bool feature;
13901 
13902     switch (op0) {
13903     case 0: /* EOR3 */
13904     case 1: /* BCAX */
13905         feature = dc_isar_feature(aa64_sha3, s);
13906         break;
13907     case 2: /* SM3SS1 */
13908         feature = dc_isar_feature(aa64_sm3, s);
13909         break;
13910     default:
13911         unallocated_encoding(s);
13912         return;
13913     }
13914 
13915     if (!feature) {
13916         unallocated_encoding(s);
13917         return;
13918     }
13919 
13920     if (!fp_access_check(s)) {
13921         return;
13922     }
13923 
13924     if (op0 < 2) {
13925         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13926         int pass;
13927 
13928         tcg_op1 = tcg_temp_new_i64();
13929         tcg_op2 = tcg_temp_new_i64();
13930         tcg_op3 = tcg_temp_new_i64();
13931         tcg_res[0] = tcg_temp_new_i64();
13932         tcg_res[1] = tcg_temp_new_i64();
13933 
13934         for (pass = 0; pass < 2; pass++) {
13935             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13936             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13937             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13938 
13939             if (op0 == 0) {
13940                 /* EOR3 */
13941                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13942             } else {
13943                 /* BCAX */
13944                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13945             }
13946             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13947         }
13948         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13949         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13950     } else {
13951         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13952 
13953         tcg_op1 = tcg_temp_new_i32();
13954         tcg_op2 = tcg_temp_new_i32();
13955         tcg_op3 = tcg_temp_new_i32();
13956         tcg_res = tcg_temp_new_i32();
13957         tcg_zero = tcg_constant_i32(0);
13958 
13959         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13960         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13961         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13962 
13963         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13964         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13965         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13966         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13967 
13968         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13969         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13970         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13971         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13972     }
13973 }
13974 
13975 /* Crypto XAR
13976  *  31                   21 20  16 15    10 9    5 4    0
13977  * +-----------------------+------+--------+------+------+
13978  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13979  * +-----------------------+------+--------+------+------+
13980  */
13981 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13982 {
13983     int rm = extract32(insn, 16, 5);
13984     int imm6 = extract32(insn, 10, 6);
13985     int rn = extract32(insn, 5, 5);
13986     int rd = extract32(insn, 0, 5);
13987 
13988     if (!dc_isar_feature(aa64_sha3, s)) {
13989         unallocated_encoding(s);
13990         return;
13991     }
13992 
13993     if (!fp_access_check(s)) {
13994         return;
13995     }
13996 
13997     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13998                  vec_full_reg_offset(s, rn),
13999                  vec_full_reg_offset(s, rm), imm6, 16,
14000                  vec_full_reg_size(s));
14001 }
14002 
14003 /* Crypto three-reg imm2
14004  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
14005  * +-----------------------+------+-----+------+--------+------+------+
14006  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
14007  * +-----------------------+------+-----+------+--------+------+------+
14008  */
14009 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
14010 {
14011     static gen_helper_gvec_3 * const fns[4] = {
14012         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
14013         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
14014     };
14015     int opcode = extract32(insn, 10, 2);
14016     int imm2 = extract32(insn, 12, 2);
14017     int rm = extract32(insn, 16, 5);
14018     int rn = extract32(insn, 5, 5);
14019     int rd = extract32(insn, 0, 5);
14020 
14021     if (!dc_isar_feature(aa64_sm3, s)) {
14022         unallocated_encoding(s);
14023         return;
14024     }
14025 
14026     if (!fp_access_check(s)) {
14027         return;
14028     }
14029 
14030     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
14031 }
14032 
14033 /* C3.6 Data processing - SIMD, inc Crypto
14034  *
14035  * As the decode gets a little complex we are using a table based
14036  * approach for this part of the decode.
14037  */
14038 static const AArch64DecodeTable data_proc_simd[] = {
14039     /* pattern  ,  mask     ,  fn                        */
14040     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
14041     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
14042     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
14043     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
14044     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
14045     { 0x0e000400, 0x9fe08400, disas_simd_copy },
14046     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
14047     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
14048     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
14049     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
14050     { 0x0e000000, 0xbf208c00, disas_simd_tb },
14051     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
14052     { 0x2e000000, 0xbf208400, disas_simd_ext },
14053     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
14054     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
14055     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
14056     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
14057     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
14058     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
14059     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
14060     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
14061     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
14062     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
14063     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
14064     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
14065     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
14066     { 0xce000000, 0xff808000, disas_crypto_four_reg },
14067     { 0xce800000, 0xffe00000, disas_crypto_xar },
14068     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
14069     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
14070     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
14071     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
14072     { 0x00000000, 0x00000000, NULL }
14073 };
14074 
14075 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
14076 {
14077     /* Note that this is called with all non-FP cases from
14078      * table C3-6 so it must UNDEF for entries not specifically
14079      * allocated to instructions in that table.
14080      */
14081     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
14082     if (fn) {
14083         fn(s, insn);
14084     } else {
14085         unallocated_encoding(s);
14086     }
14087 }
14088 
14089 /* C3.6 Data processing - SIMD and floating point */
14090 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
14091 {
14092     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
14093         disas_data_proc_fp(s, insn);
14094     } else {
14095         /* SIMD, including crypto */
14096         disas_data_proc_simd(s, insn);
14097     }
14098 }
14099 
14100 static bool trans_OK(DisasContext *s, arg_OK *a)
14101 {
14102     return true;
14103 }
14104 
14105 static bool trans_FAIL(DisasContext *s, arg_OK *a)
14106 {
14107     s->is_nonstreaming = true;
14108     return true;
14109 }
14110 
14111 /**
14112  * is_guarded_page:
14113  * @env: The cpu environment
14114  * @s: The DisasContext
14115  *
14116  * Return true if the page is guarded.
14117  */
14118 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
14119 {
14120     uint64_t addr = s->base.pc_first;
14121 #ifdef CONFIG_USER_ONLY
14122     return page_get_flags(addr) & PAGE_BTI;
14123 #else
14124     CPUTLBEntryFull *full;
14125     void *host;
14126     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14127     int flags;
14128 
14129     /*
14130      * We test this immediately after reading an insn, which means
14131      * that the TLB entry must be present and valid, and thus this
14132      * access will never raise an exception.
14133      */
14134     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
14135                               false, &host, &full, 0);
14136     assert(!(flags & TLB_INVALID_MASK));
14137 
14138     return full->guarded;
14139 #endif
14140 }
14141 
14142 /**
14143  * btype_destination_ok:
14144  * @insn: The instruction at the branch destination
14145  * @bt: SCTLR_ELx.BT
14146  * @btype: PSTATE.BTYPE, and is non-zero
14147  *
14148  * On a guarded page, there are a limited number of insns
14149  * that may be present at the branch target:
14150  *   - branch target identifiers,
14151  *   - paciasp, pacibsp,
14152  *   - BRK insn
14153  *   - HLT insn
14154  * Anything else causes a Branch Target Exception.
14155  *
14156  * Return true if the branch is compatible, false to raise BTITRAP.
14157  */
14158 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14159 {
14160     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14161         /* HINT space */
14162         switch (extract32(insn, 5, 7)) {
14163         case 0b011001: /* PACIASP */
14164         case 0b011011: /* PACIBSP */
14165             /*
14166              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14167              * with btype == 3.  Otherwise all btype are ok.
14168              */
14169             return !bt || btype != 3;
14170         case 0b100000: /* BTI */
14171             /* Not compatible with any btype.  */
14172             return false;
14173         case 0b100010: /* BTI c */
14174             /* Not compatible with btype == 3 */
14175             return btype != 3;
14176         case 0b100100: /* BTI j */
14177             /* Not compatible with btype == 2 */
14178             return btype != 2;
14179         case 0b100110: /* BTI jc */
14180             /* Compatible with any btype.  */
14181             return true;
14182         }
14183     } else {
14184         switch (insn & 0xffe0001fu) {
14185         case 0xd4200000u: /* BRK */
14186         case 0xd4400000u: /* HLT */
14187             /* Give priority to the breakpoint exception.  */
14188             return true;
14189         }
14190     }
14191     return false;
14192 }
14193 
14194 /* C3.1 A64 instruction index by encoding */
14195 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14196 {
14197     switch (extract32(insn, 25, 4)) {
14198     case 0x8: case 0x9: /* Data processing - immediate */
14199         disas_data_proc_imm(s, insn);
14200         break;
14201     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14202         disas_b_exc_sys(s, insn);
14203         break;
14204     case 0x4:
14205     case 0x6:
14206     case 0xc:
14207     case 0xe:      /* Loads and stores */
14208         disas_ldst(s, insn);
14209         break;
14210     case 0x5:
14211     case 0xd:      /* Data processing - register */
14212         disas_data_proc_reg(s, insn);
14213         break;
14214     case 0x7:
14215     case 0xf:      /* Data processing - SIMD and floating point */
14216         disas_data_proc_simd_fp(s, insn);
14217         break;
14218     default:
14219         unallocated_encoding(s);
14220         break;
14221     }
14222 }
14223 
14224 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14225                                           CPUState *cpu)
14226 {
14227     DisasContext *dc = container_of(dcbase, DisasContext, base);
14228     CPUARMState *env = cpu->env_ptr;
14229     ARMCPU *arm_cpu = env_archcpu(env);
14230     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14231     int bound, core_mmu_idx;
14232 
14233     dc->isar = &arm_cpu->isar;
14234     dc->condjmp = 0;
14235     dc->pc_save = dc->base.pc_first;
14236     dc->aarch64 = true;
14237     dc->thumb = false;
14238     dc->sctlr_b = 0;
14239     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14240     dc->condexec_mask = 0;
14241     dc->condexec_cond = 0;
14242     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14243     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14244     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14245     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14246     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14247     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14248 #if !defined(CONFIG_USER_ONLY)
14249     dc->user = (dc->current_el == 0);
14250 #endif
14251     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14252     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14253     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14254     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14255     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14256     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14257     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14258     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14259     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14260     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14261     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14262     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14263     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14264     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14265     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14266     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14267     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14268     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14269     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14270     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14271     dc->vec_len = 0;
14272     dc->vec_stride = 0;
14273     dc->cp_regs = arm_cpu->cp_regs;
14274     dc->features = env->features;
14275     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14276 
14277 #ifdef CONFIG_USER_ONLY
14278     /* In sve_probe_page, we assume TBI is enabled. */
14279     tcg_debug_assert(dc->tbid & 1);
14280 #endif
14281 
14282     /* Single step state. The code-generation logic here is:
14283      *  SS_ACTIVE == 0:
14284      *   generate code with no special handling for single-stepping (except
14285      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14286      *   this happens anyway because those changes are all system register or
14287      *   PSTATE writes).
14288      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14289      *   emit code for one insn
14290      *   emit code to clear PSTATE.SS
14291      *   emit code to generate software step exception for completed step
14292      *   end TB (as usual for having generated an exception)
14293      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14294      *   emit code to generate a software step exception
14295      *   end the TB
14296      */
14297     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14298     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14299     dc->is_ldex = false;
14300 
14301     /* Bound the number of insns to execute to those left on the page.  */
14302     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14303 
14304     /* If architectural single step active, limit to 1.  */
14305     if (dc->ss_active) {
14306         bound = 1;
14307     }
14308     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14309 }
14310 
14311 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14312 {
14313 }
14314 
14315 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14316 {
14317     DisasContext *dc = container_of(dcbase, DisasContext, base);
14318     target_ulong pc_arg = dc->base.pc_next;
14319 
14320     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14321         pc_arg &= ~TARGET_PAGE_MASK;
14322     }
14323     tcg_gen_insn_start(pc_arg, 0, 0);
14324     dc->insn_start = tcg_last_op();
14325 }
14326 
14327 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14328 {
14329     DisasContext *s = container_of(dcbase, DisasContext, base);
14330     CPUARMState *env = cpu->env_ptr;
14331     uint64_t pc = s->base.pc_next;
14332     uint32_t insn;
14333 
14334     /* Singlestep exceptions have the highest priority. */
14335     if (s->ss_active && !s->pstate_ss) {
14336         /* Singlestep state is Active-pending.
14337          * If we're in this state at the start of a TB then either
14338          *  a) we just took an exception to an EL which is being debugged
14339          *     and this is the first insn in the exception handler
14340          *  b) debug exceptions were masked and we just unmasked them
14341          *     without changing EL (eg by clearing PSTATE.D)
14342          * In either case we're going to take a swstep exception in the
14343          * "did not step an insn" case, and so the syndrome ISV and EX
14344          * bits should be zero.
14345          */
14346         assert(s->base.num_insns == 1);
14347         gen_swstep_exception(s, 0, 0);
14348         s->base.is_jmp = DISAS_NORETURN;
14349         s->base.pc_next = pc + 4;
14350         return;
14351     }
14352 
14353     if (pc & 3) {
14354         /*
14355          * PC alignment fault.  This has priority over the instruction abort
14356          * that we would receive from a translation fault via arm_ldl_code.
14357          * This should only be possible after an indirect branch, at the
14358          * start of the TB.
14359          */
14360         assert(s->base.num_insns == 1);
14361         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14362         s->base.is_jmp = DISAS_NORETURN;
14363         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14364         return;
14365     }
14366 
14367     s->pc_curr = pc;
14368     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14369     s->insn = insn;
14370     s->base.pc_next = pc + 4;
14371 
14372     s->fp_access_checked = false;
14373     s->sve_access_checked = false;
14374 
14375     if (s->pstate_il) {
14376         /*
14377          * Illegal execution state. This has priority over BTI
14378          * exceptions, but comes after instruction abort exceptions.
14379          */
14380         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14381         return;
14382     }
14383 
14384     if (dc_isar_feature(aa64_bti, s)) {
14385         if (s->base.num_insns == 1) {
14386             /*
14387              * At the first insn of the TB, compute s->guarded_page.
14388              * We delayed computing this until successfully reading
14389              * the first insn of the TB, above.  This (mostly) ensures
14390              * that the softmmu tlb entry has been populated, and the
14391              * page table GP bit is available.
14392              *
14393              * Note that we need to compute this even if btype == 0,
14394              * because this value is used for BR instructions later
14395              * where ENV is not available.
14396              */
14397             s->guarded_page = is_guarded_page(env, s);
14398 
14399             /* First insn can have btype set to non-zero.  */
14400             tcg_debug_assert(s->btype >= 0);
14401 
14402             /*
14403              * Note that the Branch Target Exception has fairly high
14404              * priority -- below debugging exceptions but above most
14405              * everything else.  This allows us to handle this now
14406              * instead of waiting until the insn is otherwise decoded.
14407              */
14408             if (s->btype != 0
14409                 && s->guarded_page
14410                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14411                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14412                 return;
14413             }
14414         } else {
14415             /* Not the first insn: btype must be 0.  */
14416             tcg_debug_assert(s->btype == 0);
14417         }
14418     }
14419 
14420     s->is_nonstreaming = false;
14421     if (s->sme_trap_nonstreaming) {
14422         disas_sme_fa64(s, insn);
14423     }
14424 
14425     if (!disas_a64(s, insn) &&
14426         !disas_sme(s, insn) &&
14427         !disas_sve(s, insn)) {
14428         disas_a64_legacy(s, insn);
14429     }
14430 
14431     /*
14432      * After execution of most insns, btype is reset to 0.
14433      * Note that we set btype == -1 when the insn sets btype.
14434      */
14435     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14436         reset_btype(s);
14437     }
14438 }
14439 
14440 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14441 {
14442     DisasContext *dc = container_of(dcbase, DisasContext, base);
14443 
14444     if (unlikely(dc->ss_active)) {
14445         /* Note that this means single stepping WFI doesn't halt the CPU.
14446          * For conditional branch insns this is harmless unreachable code as
14447          * gen_goto_tb() has already handled emitting the debug exception
14448          * (and thus a tb-jump is not possible when singlestepping).
14449          */
14450         switch (dc->base.is_jmp) {
14451         default:
14452             gen_a64_update_pc(dc, 4);
14453             /* fall through */
14454         case DISAS_EXIT:
14455         case DISAS_JUMP:
14456             gen_step_complete_exception(dc);
14457             break;
14458         case DISAS_NORETURN:
14459             break;
14460         }
14461     } else {
14462         switch (dc->base.is_jmp) {
14463         case DISAS_NEXT:
14464         case DISAS_TOO_MANY:
14465             gen_goto_tb(dc, 1, 4);
14466             break;
14467         default:
14468         case DISAS_UPDATE_EXIT:
14469             gen_a64_update_pc(dc, 4);
14470             /* fall through */
14471         case DISAS_EXIT:
14472             tcg_gen_exit_tb(NULL, 0);
14473             break;
14474         case DISAS_UPDATE_NOCHAIN:
14475             gen_a64_update_pc(dc, 4);
14476             /* fall through */
14477         case DISAS_JUMP:
14478             tcg_gen_lookup_and_goto_ptr();
14479             break;
14480         case DISAS_NORETURN:
14481         case DISAS_SWI:
14482             break;
14483         case DISAS_WFE:
14484             gen_a64_update_pc(dc, 4);
14485             gen_helper_wfe(cpu_env);
14486             break;
14487         case DISAS_YIELD:
14488             gen_a64_update_pc(dc, 4);
14489             gen_helper_yield(cpu_env);
14490             break;
14491         case DISAS_WFI:
14492             /*
14493              * This is a special case because we don't want to just halt
14494              * the CPU if trying to debug across a WFI.
14495              */
14496             gen_a64_update_pc(dc, 4);
14497             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14498             /*
14499              * The helper doesn't necessarily throw an exception, but we
14500              * must go back to the main loop to check for interrupts anyway.
14501              */
14502             tcg_gen_exit_tb(NULL, 0);
14503             break;
14504         }
14505     }
14506 }
14507 
14508 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14509                                  CPUState *cpu, FILE *logfile)
14510 {
14511     DisasContext *dc = container_of(dcbase, DisasContext, base);
14512 
14513     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14514     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14515 }
14516 
14517 const TranslatorOps aarch64_translator_ops = {
14518     .init_disas_context = aarch64_tr_init_disas_context,
14519     .tb_start           = aarch64_tr_tb_start,
14520     .insn_start         = aarch64_tr_insn_start,
14521     .translate_insn     = aarch64_tr_translate_insn,
14522     .tb_stop            = aarch64_tr_tb_stop,
14523     .disas_log          = aarch64_tr_disas_log,
14524 };
14525