xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 270076d0)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "cpu.h"
22 #include "exec/exec-all.h"
23 #include "tcg/tcg-op.h"
24 #include "tcg/tcg-op-gvec.h"
25 #include "qemu/log.h"
26 #include "arm_ldst.h"
27 #include "translate.h"
28 #include "internals.h"
29 #include "qemu/host-utils.h"
30 #include "semihosting/semihost.h"
31 #include "exec/gen-icount.h"
32 #include "exec/helper-proto.h"
33 #include "exec/helper-gen.h"
34 #include "exec/log.h"
35 #include "cpregs.h"
36 #include "translate-a64.h"
37 #include "qemu/atomic128.h"
38 
39 static TCGv_i64 cpu_X[32];
40 static TCGv_i64 cpu_pc;
41 
42 /* Load/store exclusive handling */
43 static TCGv_i64 cpu_exclusive_high;
44 
45 static const char *regnames[] = {
46     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
47     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
48     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
49     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
50 };
51 
52 enum a64_shift_type {
53     A64_SHIFT_TYPE_LSL = 0,
54     A64_SHIFT_TYPE_LSR = 1,
55     A64_SHIFT_TYPE_ASR = 2,
56     A64_SHIFT_TYPE_ROR = 3
57 };
58 
59 /*
60  * Include the generated decoders.
61  */
62 
63 #include "decode-sme-fa64.c.inc"
64 #include "decode-a64.c.inc"
65 
66 /* Table based decoder typedefs - used when the relevant bits for decode
67  * are too awkwardly scattered across the instruction (eg SIMD).
68  */
69 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
70 
71 typedef struct AArch64DecodeTable {
72     uint32_t pattern;
73     uint32_t mask;
74     AArch64DecodeFn *disas_fn;
75 } AArch64DecodeTable;
76 
77 /* initialize TCG globals.  */
78 void a64_translate_init(void)
79 {
80     int i;
81 
82     cpu_pc = tcg_global_mem_new_i64(cpu_env,
83                                     offsetof(CPUARMState, pc),
84                                     "pc");
85     for (i = 0; i < 32; i++) {
86         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
87                                           offsetof(CPUARMState, xregs[i]),
88                                           regnames[i]);
89     }
90 
91     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
92         offsetof(CPUARMState, exclusive_high), "exclusive_high");
93 }
94 
95 /*
96  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
97  */
98 static int get_a64_user_mem_index(DisasContext *s)
99 {
100     /*
101      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
102      * which is the usual mmu_idx for this cpu state.
103      */
104     ARMMMUIdx useridx = s->mmu_idx;
105 
106     if (s->unpriv) {
107         /*
108          * We have pre-computed the condition for AccType_UNPRIV.
109          * Therefore we should never get here with a mmu_idx for
110          * which we do not know the corresponding user mmu_idx.
111          */
112         switch (useridx) {
113         case ARMMMUIdx_E10_1:
114         case ARMMMUIdx_E10_1_PAN:
115             useridx = ARMMMUIdx_E10_0;
116             break;
117         case ARMMMUIdx_E20_2:
118         case ARMMMUIdx_E20_2_PAN:
119             useridx = ARMMMUIdx_E20_0;
120             break;
121         default:
122             g_assert_not_reached();
123         }
124     }
125     return arm_to_core_mmu_idx(useridx);
126 }
127 
128 static void set_btype_raw(int val)
129 {
130     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
131                    offsetof(CPUARMState, btype));
132 }
133 
134 static void set_btype(DisasContext *s, int val)
135 {
136     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
137     tcg_debug_assert(val >= 1 && val <= 3);
138     set_btype_raw(val);
139     s->btype = -1;
140 }
141 
142 static void reset_btype(DisasContext *s)
143 {
144     if (s->btype != 0) {
145         set_btype_raw(0);
146         s->btype = 0;
147     }
148 }
149 
150 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
151 {
152     assert(s->pc_save != -1);
153     if (tb_cflags(s->base.tb) & CF_PCREL) {
154         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
155     } else {
156         tcg_gen_movi_i64(dest, s->pc_curr + diff);
157     }
158 }
159 
160 void gen_a64_update_pc(DisasContext *s, target_long diff)
161 {
162     gen_pc_plus_diff(s, cpu_pc, diff);
163     s->pc_save = s->pc_curr + diff;
164 }
165 
166 /*
167  * Handle Top Byte Ignore (TBI) bits.
168  *
169  * If address tagging is enabled via the TCR TBI bits:
170  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
171  *    then the address is zero-extended, clearing bits [63:56]
172  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
173  *    and TBI1 controls addressses with bit 55 == 1.
174  *    If the appropriate TBI bit is set for the address then
175  *    the address is sign-extended from bit 55 into bits [63:56]
176  *
177  * Here We have concatenated TBI{1,0} into tbi.
178  */
179 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
180                                 TCGv_i64 src, int tbi)
181 {
182     if (tbi == 0) {
183         /* Load unmodified address */
184         tcg_gen_mov_i64(dst, src);
185     } else if (!regime_has_2_ranges(s->mmu_idx)) {
186         /* Force tag byte to all zero */
187         tcg_gen_extract_i64(dst, src, 0, 56);
188     } else {
189         /* Sign-extend from bit 55.  */
190         tcg_gen_sextract_i64(dst, src, 0, 56);
191 
192         switch (tbi) {
193         case 1:
194             /* tbi0 but !tbi1: only use the extension if positive */
195             tcg_gen_and_i64(dst, dst, src);
196             break;
197         case 2:
198             /* !tbi0 but tbi1: only use the extension if negative */
199             tcg_gen_or_i64(dst, dst, src);
200             break;
201         case 3:
202             /* tbi0 and tbi1: always use the extension */
203             break;
204         default:
205             g_assert_not_reached();
206         }
207     }
208 }
209 
210 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
211 {
212     /*
213      * If address tagging is enabled for instructions via the TCR TBI bits,
214      * then loading an address into the PC will clear out any tag.
215      */
216     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
217     s->pc_save = -1;
218 }
219 
220 /*
221  * Handle MTE and/or TBI.
222  *
223  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
224  * for the tag to be present in the FAR_ELx register.  But for user-only
225  * mode we do not have a TLB with which to implement this, so we must
226  * remove the top byte now.
227  *
228  * Always return a fresh temporary that we can increment independently
229  * of the write-back address.
230  */
231 
232 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
233 {
234     TCGv_i64 clean = tcg_temp_new_i64();
235 #ifdef CONFIG_USER_ONLY
236     gen_top_byte_ignore(s, clean, addr, s->tbid);
237 #else
238     tcg_gen_mov_i64(clean, addr);
239 #endif
240     return clean;
241 }
242 
243 /* Insert a zero tag into src, with the result at dst. */
244 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
245 {
246     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
247 }
248 
249 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
250                              MMUAccessType acc, int log2_size)
251 {
252     gen_helper_probe_access(cpu_env, ptr,
253                             tcg_constant_i32(acc),
254                             tcg_constant_i32(get_mem_index(s)),
255                             tcg_constant_i32(1 << log2_size));
256 }
257 
258 /*
259  * For MTE, check a single logical or atomic access.  This probes a single
260  * address, the exact one specified.  The size and alignment of the access
261  * is not relevant to MTE, per se, but watchpoints do require the size,
262  * and we want to recognize those before making any other changes to state.
263  */
264 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
265                                       bool is_write, bool tag_checked,
266                                       int log2_size, bool is_unpriv,
267                                       int core_idx)
268 {
269     if (tag_checked && s->mte_active[is_unpriv]) {
270         TCGv_i64 ret;
271         int desc = 0;
272 
273         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
274         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
275         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
276         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
277         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
278 
279         ret = tcg_temp_new_i64();
280         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
281 
282         return ret;
283     }
284     return clean_data_tbi(s, addr);
285 }
286 
287 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int log2_size)
289 {
290     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
291                                  false, get_mem_index(s));
292 }
293 
294 /*
295  * For MTE, check multiple logical sequential accesses.
296  */
297 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
298                         bool tag_checked, int size)
299 {
300     if (tag_checked && s->mte_active[0]) {
301         TCGv_i64 ret;
302         int desc = 0;
303 
304         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
305         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
306         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
307         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
308         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
309 
310         ret = tcg_temp_new_i64();
311         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
312 
313         return ret;
314     }
315     return clean_data_tbi(s, addr);
316 }
317 
318 typedef struct DisasCompare64 {
319     TCGCond cond;
320     TCGv_i64 value;
321 } DisasCompare64;
322 
323 static void a64_test_cc(DisasCompare64 *c64, int cc)
324 {
325     DisasCompare c32;
326 
327     arm_test_cc(&c32, cc);
328 
329     /*
330      * Sign-extend the 32-bit value so that the GE/LT comparisons work
331      * properly.  The NE/EQ comparisons are also fine with this choice.
332       */
333     c64->cond = c32.cond;
334     c64->value = tcg_temp_new_i64();
335     tcg_gen_ext_i32_i64(c64->value, c32.value);
336 }
337 
338 static void gen_rebuild_hflags(DisasContext *s)
339 {
340     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
341 }
342 
343 static void gen_exception_internal(int excp)
344 {
345     assert(excp_is_internal(excp));
346     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
347 }
348 
349 static void gen_exception_internal_insn(DisasContext *s, int excp)
350 {
351     gen_a64_update_pc(s, 0);
352     gen_exception_internal(excp);
353     s->base.is_jmp = DISAS_NORETURN;
354 }
355 
356 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
357 {
358     gen_a64_update_pc(s, 0);
359     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
360     s->base.is_jmp = DISAS_NORETURN;
361 }
362 
363 static void gen_step_complete_exception(DisasContext *s)
364 {
365     /* We just completed step of an insn. Move from Active-not-pending
366      * to Active-pending, and then also take the swstep exception.
367      * This corresponds to making the (IMPDEF) choice to prioritize
368      * swstep exceptions over asynchronous exceptions taken to an exception
369      * level where debug is disabled. This choice has the advantage that
370      * we do not need to maintain internal state corresponding to the
371      * ISV/EX syndrome bits between completion of the step and generation
372      * of the exception, and our syndrome information is always correct.
373      */
374     gen_ss_advance(s);
375     gen_swstep_exception(s, 1, s->is_ldex);
376     s->base.is_jmp = DISAS_NORETURN;
377 }
378 
379 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
380 {
381     if (s->ss_active) {
382         return false;
383     }
384     return translator_use_goto_tb(&s->base, dest);
385 }
386 
387 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
388 {
389     if (use_goto_tb(s, s->pc_curr + diff)) {
390         /*
391          * For pcrel, the pc must always be up-to-date on entry to
392          * the linked TB, so that it can use simple additions for all
393          * further adjustments.  For !pcrel, the linked TB is compiled
394          * to know its full virtual address, so we can delay the
395          * update to pc to the unlinked path.  A long chain of links
396          * can thus avoid many updates to the PC.
397          */
398         if (tb_cflags(s->base.tb) & CF_PCREL) {
399             gen_a64_update_pc(s, diff);
400             tcg_gen_goto_tb(n);
401         } else {
402             tcg_gen_goto_tb(n);
403             gen_a64_update_pc(s, diff);
404         }
405         tcg_gen_exit_tb(s->base.tb, n);
406         s->base.is_jmp = DISAS_NORETURN;
407     } else {
408         gen_a64_update_pc(s, diff);
409         if (s->ss_active) {
410             gen_step_complete_exception(s);
411         } else {
412             tcg_gen_lookup_and_goto_ptr();
413             s->base.is_jmp = DISAS_NORETURN;
414         }
415     }
416 }
417 
418 /*
419  * Register access functions
420  *
421  * These functions are used for directly accessing a register in where
422  * changes to the final register value are likely to be made. If you
423  * need to use a register for temporary calculation (e.g. index type
424  * operations) use the read_* form.
425  *
426  * B1.2.1 Register mappings
427  *
428  * In instruction register encoding 31 can refer to ZR (zero register) or
429  * the SP (stack pointer) depending on context. In QEMU's case we map SP
430  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
431  * This is the point of the _sp forms.
432  */
433 TCGv_i64 cpu_reg(DisasContext *s, int reg)
434 {
435     if (reg == 31) {
436         TCGv_i64 t = tcg_temp_new_i64();
437         tcg_gen_movi_i64(t, 0);
438         return t;
439     } else {
440         return cpu_X[reg];
441     }
442 }
443 
444 /* register access for when 31 == SP */
445 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
446 {
447     return cpu_X[reg];
448 }
449 
450 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
451  * representing the register contents. This TCGv is an auto-freed
452  * temporary so it need not be explicitly freed, and may be modified.
453  */
454 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
455 {
456     TCGv_i64 v = tcg_temp_new_i64();
457     if (reg != 31) {
458         if (sf) {
459             tcg_gen_mov_i64(v, cpu_X[reg]);
460         } else {
461             tcg_gen_ext32u_i64(v, cpu_X[reg]);
462         }
463     } else {
464         tcg_gen_movi_i64(v, 0);
465     }
466     return v;
467 }
468 
469 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
470 {
471     TCGv_i64 v = tcg_temp_new_i64();
472     if (sf) {
473         tcg_gen_mov_i64(v, cpu_X[reg]);
474     } else {
475         tcg_gen_ext32u_i64(v, cpu_X[reg]);
476     }
477     return v;
478 }
479 
480 /* Return the offset into CPUARMState of a slice (from
481  * the least significant end) of FP register Qn (ie
482  * Dn, Sn, Hn or Bn).
483  * (Note that this is not the same mapping as for A32; see cpu.h)
484  */
485 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
486 {
487     return vec_reg_offset(s, regno, 0, size);
488 }
489 
490 /* Offset of the high half of the 128 bit vector Qn */
491 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
492 {
493     return vec_reg_offset(s, regno, 1, MO_64);
494 }
495 
496 /* Convenience accessors for reading and writing single and double
497  * FP registers. Writing clears the upper parts of the associated
498  * 128 bit vector register, as required by the architecture.
499  * Note that unlike the GP register accessors, the values returned
500  * by the read functions must be manually freed.
501  */
502 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
503 {
504     TCGv_i64 v = tcg_temp_new_i64();
505 
506     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
507     return v;
508 }
509 
510 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
511 {
512     TCGv_i32 v = tcg_temp_new_i32();
513 
514     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
515     return v;
516 }
517 
518 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
519 {
520     TCGv_i32 v = tcg_temp_new_i32();
521 
522     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
523     return v;
524 }
525 
526 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
527  * If SVE is not enabled, then there are only 128 bits in the vector.
528  */
529 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
530 {
531     unsigned ofs = fp_reg_offset(s, rd, MO_64);
532     unsigned vsz = vec_full_reg_size(s);
533 
534     /* Nop move, with side effect of clearing the tail. */
535     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
536 }
537 
538 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
539 {
540     unsigned ofs = fp_reg_offset(s, reg, MO_64);
541 
542     tcg_gen_st_i64(v, cpu_env, ofs);
543     clear_vec_high(s, false, reg);
544 }
545 
546 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
547 {
548     TCGv_i64 tmp = tcg_temp_new_i64();
549 
550     tcg_gen_extu_i32_i64(tmp, v);
551     write_fp_dreg(s, reg, tmp);
552 }
553 
554 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
555 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
556                          GVecGen2Fn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 2-operand + immediate AdvSIMD vector operation using
563  * an expander function.
564  */
565 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
566                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
567 {
568     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
569             imm, is_q ? 16 : 8, vec_full_reg_size(s));
570 }
571 
572 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
573 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
574                          GVecGen3Fn *gvec_fn, int vece)
575 {
576     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
577             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
578 }
579 
580 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
581 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
582                          int rx, GVecGen4Fn *gvec_fn, int vece)
583 {
584     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
585             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
586             is_q ? 16 : 8, vec_full_reg_size(s));
587 }
588 
589 /* Expand a 2-operand operation using an out-of-line helper.  */
590 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
591                              int rn, int data, gen_helper_gvec_2 *fn)
592 {
593     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
594                        vec_full_reg_offset(s, rn),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand operation using an out-of-line helper.  */
599 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
600                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
601 {
602     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
603                        vec_full_reg_offset(s, rn),
604                        vec_full_reg_offset(s, rm),
605                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
606 }
607 
608 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
609  * an out-of-line helper.
610  */
611 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
612                               int rm, bool is_fp16, int data,
613                               gen_helper_gvec_3_ptr *fn)
614 {
615     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
616     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
617                        vec_full_reg_offset(s, rn),
618                        vec_full_reg_offset(s, rm), fpst,
619                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
620 }
621 
622 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
623 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
624                             int rm, gen_helper_gvec_3_ptr *fn)
625 {
626     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
627 
628     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
629     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm), qc_ptr,
632                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
633 }
634 
635 /* Expand a 4-operand operation using an out-of-line helper.  */
636 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
637                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
638 {
639     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
640                        vec_full_reg_offset(s, rn),
641                        vec_full_reg_offset(s, rm),
642                        vec_full_reg_offset(s, ra),
643                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
644 }
645 
646 /*
647  * Expand a 4-operand + fpstatus pointer + simd data value operation using
648  * an out-of-line helper.
649  */
650 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
651                               int rm, int ra, bool is_fp16, int data,
652                               gen_helper_gvec_4_ptr *fn)
653 {
654     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
655     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
656                        vec_full_reg_offset(s, rn),
657                        vec_full_reg_offset(s, rm),
658                        vec_full_reg_offset(s, ra), fpst,
659                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
660 }
661 
662 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
663  * than the 32 bit equivalent.
664  */
665 static inline void gen_set_NZ64(TCGv_i64 result)
666 {
667     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
668     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
669 }
670 
671 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
672 static inline void gen_logic_CC(int sf, TCGv_i64 result)
673 {
674     if (sf) {
675         gen_set_NZ64(result);
676     } else {
677         tcg_gen_extrl_i64_i32(cpu_ZF, result);
678         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
679     }
680     tcg_gen_movi_i32(cpu_CF, 0);
681     tcg_gen_movi_i32(cpu_VF, 0);
682 }
683 
684 /* dest = T0 + T1; compute C, N, V and Z flags */
685 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
686 {
687     if (sf) {
688         TCGv_i64 result, flag, tmp;
689         result = tcg_temp_new_i64();
690         flag = tcg_temp_new_i64();
691         tmp = tcg_temp_new_i64();
692 
693         tcg_gen_movi_i64(tmp, 0);
694         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
695 
696         tcg_gen_extrl_i64_i32(cpu_CF, flag);
697 
698         gen_set_NZ64(result);
699 
700         tcg_gen_xor_i64(flag, result, t0);
701         tcg_gen_xor_i64(tmp, t0, t1);
702         tcg_gen_andc_i64(flag, flag, tmp);
703         tcg_gen_extrh_i64_i32(cpu_VF, flag);
704 
705         tcg_gen_mov_i64(dest, result);
706     } else {
707         /* 32 bit arithmetic */
708         TCGv_i32 t0_32 = tcg_temp_new_i32();
709         TCGv_i32 t1_32 = tcg_temp_new_i32();
710         TCGv_i32 tmp = tcg_temp_new_i32();
711 
712         tcg_gen_movi_i32(tmp, 0);
713         tcg_gen_extrl_i64_i32(t0_32, t0);
714         tcg_gen_extrl_i64_i32(t1_32, t1);
715         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
716         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
717         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
718         tcg_gen_xor_i32(tmp, t0_32, t1_32);
719         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
720         tcg_gen_extu_i32_i64(dest, cpu_NF);
721     }
722 }
723 
724 /* dest = T0 - T1; compute C, N, V and Z flags */
725 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
726 {
727     if (sf) {
728         /* 64 bit arithmetic */
729         TCGv_i64 result, flag, tmp;
730 
731         result = tcg_temp_new_i64();
732         flag = tcg_temp_new_i64();
733         tcg_gen_sub_i64(result, t0, t1);
734 
735         gen_set_NZ64(result);
736 
737         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
738         tcg_gen_extrl_i64_i32(cpu_CF, flag);
739 
740         tcg_gen_xor_i64(flag, result, t0);
741         tmp = tcg_temp_new_i64();
742         tcg_gen_xor_i64(tmp, t0, t1);
743         tcg_gen_and_i64(flag, flag, tmp);
744         tcg_gen_extrh_i64_i32(cpu_VF, flag);
745         tcg_gen_mov_i64(dest, result);
746     } else {
747         /* 32 bit arithmetic */
748         TCGv_i32 t0_32 = tcg_temp_new_i32();
749         TCGv_i32 t1_32 = tcg_temp_new_i32();
750         TCGv_i32 tmp;
751 
752         tcg_gen_extrl_i64_i32(t0_32, t0);
753         tcg_gen_extrl_i64_i32(t1_32, t1);
754         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
755         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
756         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
757         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
758         tmp = tcg_temp_new_i32();
759         tcg_gen_xor_i32(tmp, t0_32, t1_32);
760         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
761         tcg_gen_extu_i32_i64(dest, cpu_NF);
762     }
763 }
764 
765 /* dest = T0 + T1 + CF; do not compute flags. */
766 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
767 {
768     TCGv_i64 flag = tcg_temp_new_i64();
769     tcg_gen_extu_i32_i64(flag, cpu_CF);
770     tcg_gen_add_i64(dest, t0, t1);
771     tcg_gen_add_i64(dest, dest, flag);
772 
773     if (!sf) {
774         tcg_gen_ext32u_i64(dest, dest);
775     }
776 }
777 
778 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
779 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
780 {
781     if (sf) {
782         TCGv_i64 result = tcg_temp_new_i64();
783         TCGv_i64 cf_64 = tcg_temp_new_i64();
784         TCGv_i64 vf_64 = tcg_temp_new_i64();
785         TCGv_i64 tmp = tcg_temp_new_i64();
786         TCGv_i64 zero = tcg_constant_i64(0);
787 
788         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
789         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
790         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
791         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
792         gen_set_NZ64(result);
793 
794         tcg_gen_xor_i64(vf_64, result, t0);
795         tcg_gen_xor_i64(tmp, t0, t1);
796         tcg_gen_andc_i64(vf_64, vf_64, tmp);
797         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
798 
799         tcg_gen_mov_i64(dest, result);
800     } else {
801         TCGv_i32 t0_32 = tcg_temp_new_i32();
802         TCGv_i32 t1_32 = tcg_temp_new_i32();
803         TCGv_i32 tmp = tcg_temp_new_i32();
804         TCGv_i32 zero = tcg_constant_i32(0);
805 
806         tcg_gen_extrl_i64_i32(t0_32, t0);
807         tcg_gen_extrl_i64_i32(t1_32, t1);
808         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
809         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
810 
811         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
812         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
813         tcg_gen_xor_i32(tmp, t0_32, t1_32);
814         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
815         tcg_gen_extu_i32_i64(dest, cpu_NF);
816     }
817 }
818 
819 /*
820  * Load/Store generators
821  */
822 
823 /*
824  * Store from GPR register to memory.
825  */
826 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
827                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
828                              bool iss_valid,
829                              unsigned int iss_srt,
830                              bool iss_sf, bool iss_ar)
831 {
832     memop = finalize_memop(s, memop);
833     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
834 
835     if (iss_valid) {
836         uint32_t syn;
837 
838         syn = syn_data_abort_with_iss(0,
839                                       (memop & MO_SIZE),
840                                       false,
841                                       iss_srt,
842                                       iss_sf,
843                                       iss_ar,
844                                       0, 0, 0, 0, 0, false);
845         disas_set_insn_syndrome(s, syn);
846     }
847 }
848 
849 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
850                       TCGv_i64 tcg_addr, MemOp memop,
851                       bool iss_valid,
852                       unsigned int iss_srt,
853                       bool iss_sf, bool iss_ar)
854 {
855     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
856                      iss_valid, iss_srt, iss_sf, iss_ar);
857 }
858 
859 /*
860  * Load from memory to GPR register
861  */
862 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
863                              MemOp memop, bool extend, int memidx,
864                              bool iss_valid, unsigned int iss_srt,
865                              bool iss_sf, bool iss_ar)
866 {
867     memop = finalize_memop(s, memop);
868     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
869 
870     if (extend && (memop & MO_SIGN)) {
871         g_assert((memop & MO_SIZE) <= MO_32);
872         tcg_gen_ext32u_i64(dest, dest);
873     }
874 
875     if (iss_valid) {
876         uint32_t syn;
877 
878         syn = syn_data_abort_with_iss(0,
879                                       (memop & MO_SIZE),
880                                       (memop & MO_SIGN) != 0,
881                                       iss_srt,
882                                       iss_sf,
883                                       iss_ar,
884                                       0, 0, 0, 0, 0, false);
885         disas_set_insn_syndrome(s, syn);
886     }
887 }
888 
889 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
890                       MemOp memop, bool extend,
891                       bool iss_valid, unsigned int iss_srt,
892                       bool iss_sf, bool iss_ar)
893 {
894     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
895                      iss_valid, iss_srt, iss_sf, iss_ar);
896 }
897 
898 /*
899  * Store from FP register to memory
900  */
901 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
902 {
903     /* This writes the bottom N bits of a 128 bit wide vector to memory */
904     TCGv_i64 tmplo = tcg_temp_new_i64();
905     MemOp mop;
906 
907     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
908 
909     if (size < 4) {
910         mop = finalize_memop(s, size);
911         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
912     } else {
913         bool be = s->be_data == MO_BE;
914         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
915         TCGv_i64 tmphi = tcg_temp_new_i64();
916 
917         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
918 
919         mop = s->be_data | MO_UQ;
920         tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
921                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
922         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
923         tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
924                             get_mem_index(s), mop);
925     }
926 }
927 
928 /*
929  * Load from memory to FP register
930  */
931 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
932 {
933     /* This always zero-extends and writes to a full 128 bit wide vector */
934     TCGv_i64 tmplo = tcg_temp_new_i64();
935     TCGv_i64 tmphi = NULL;
936     MemOp mop;
937 
938     if (size < 4) {
939         mop = finalize_memop(s, size);
940         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
941     } else {
942         bool be = s->be_data == MO_BE;
943         TCGv_i64 tcg_hiaddr;
944 
945         tmphi = tcg_temp_new_i64();
946         tcg_hiaddr = tcg_temp_new_i64();
947 
948         mop = s->be_data | MO_UQ;
949         tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
950                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
951         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
952         tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
953                             get_mem_index(s), mop);
954     }
955 
956     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
957 
958     if (tmphi) {
959         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
960     }
961     clear_vec_high(s, tmphi != NULL, destidx);
962 }
963 
964 /*
965  * Vector load/store helpers.
966  *
967  * The principal difference between this and a FP load is that we don't
968  * zero extend as we are filling a partial chunk of the vector register.
969  * These functions don't support 128 bit loads/stores, which would be
970  * normal load/store operations.
971  *
972  * The _i32 versions are useful when operating on 32 bit quantities
973  * (eg for floating point single or using Neon helper functions).
974  */
975 
976 /* Get value of an element within a vector register */
977 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
978                              int element, MemOp memop)
979 {
980     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
981     switch ((unsigned)memop) {
982     case MO_8:
983         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
984         break;
985     case MO_16:
986         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
987         break;
988     case MO_32:
989         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
990         break;
991     case MO_8|MO_SIGN:
992         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
993         break;
994     case MO_16|MO_SIGN:
995         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
996         break;
997     case MO_32|MO_SIGN:
998         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
999         break;
1000     case MO_64:
1001     case MO_64|MO_SIGN:
1002         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1003         break;
1004     default:
1005         g_assert_not_reached();
1006     }
1007 }
1008 
1009 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1010                                  int element, MemOp memop)
1011 {
1012     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1013     switch (memop) {
1014     case MO_8:
1015         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1016         break;
1017     case MO_16:
1018         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1019         break;
1020     case MO_8|MO_SIGN:
1021         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1022         break;
1023     case MO_16|MO_SIGN:
1024         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1025         break;
1026     case MO_32:
1027     case MO_32|MO_SIGN:
1028         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1029         break;
1030     default:
1031         g_assert_not_reached();
1032     }
1033 }
1034 
1035 /* Set value of an element within a vector register */
1036 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1037                               int element, MemOp memop)
1038 {
1039     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1040     switch (memop) {
1041     case MO_8:
1042         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1043         break;
1044     case MO_16:
1045         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1046         break;
1047     case MO_32:
1048         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1049         break;
1050     case MO_64:
1051         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1052         break;
1053     default:
1054         g_assert_not_reached();
1055     }
1056 }
1057 
1058 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1059                                   int destidx, int element, MemOp memop)
1060 {
1061     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1062     switch (memop) {
1063     case MO_8:
1064         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1065         break;
1066     case MO_16:
1067         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1068         break;
1069     case MO_32:
1070         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1071         break;
1072     default:
1073         g_assert_not_reached();
1074     }
1075 }
1076 
1077 /* Store from vector register to memory */
1078 static void do_vec_st(DisasContext *s, int srcidx, int element,
1079                       TCGv_i64 tcg_addr, MemOp mop)
1080 {
1081     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1082 
1083     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1084     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1085 }
1086 
1087 /* Load from memory to vector register */
1088 static void do_vec_ld(DisasContext *s, int destidx, int element,
1089                       TCGv_i64 tcg_addr, MemOp mop)
1090 {
1091     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1092 
1093     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1094     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1095 }
1096 
1097 /* Check that FP/Neon access is enabled. If it is, return
1098  * true. If not, emit code to generate an appropriate exception,
1099  * and return false; the caller should not emit any code for
1100  * the instruction. Note that this check must happen after all
1101  * unallocated-encoding checks (otherwise the syndrome information
1102  * for the resulting exception will be incorrect).
1103  */
1104 static bool fp_access_check_only(DisasContext *s)
1105 {
1106     if (s->fp_excp_el) {
1107         assert(!s->fp_access_checked);
1108         s->fp_access_checked = true;
1109 
1110         gen_exception_insn_el(s, 0, EXCP_UDEF,
1111                               syn_fp_access_trap(1, 0xe, false, 0),
1112                               s->fp_excp_el);
1113         return false;
1114     }
1115     s->fp_access_checked = true;
1116     return true;
1117 }
1118 
1119 static bool fp_access_check(DisasContext *s)
1120 {
1121     if (!fp_access_check_only(s)) {
1122         return false;
1123     }
1124     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1125         gen_exception_insn(s, 0, EXCP_UDEF,
1126                            syn_smetrap(SME_ET_Streaming, false));
1127         return false;
1128     }
1129     return true;
1130 }
1131 
1132 /*
1133  * Check that SVE access is enabled.  If it is, return true.
1134  * If not, emit code to generate an appropriate exception and return false.
1135  * This function corresponds to CheckSVEEnabled().
1136  */
1137 bool sve_access_check(DisasContext *s)
1138 {
1139     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1140         assert(dc_isar_feature(aa64_sme, s));
1141         if (!sme_sm_enabled_check(s)) {
1142             goto fail_exit;
1143         }
1144     } else if (s->sve_excp_el) {
1145         gen_exception_insn_el(s, 0, EXCP_UDEF,
1146                               syn_sve_access_trap(), s->sve_excp_el);
1147         goto fail_exit;
1148     }
1149     s->sve_access_checked = true;
1150     return fp_access_check(s);
1151 
1152  fail_exit:
1153     /* Assert that we only raise one exception per instruction. */
1154     assert(!s->sve_access_checked);
1155     s->sve_access_checked = true;
1156     return false;
1157 }
1158 
1159 /*
1160  * Check that SME access is enabled, raise an exception if not.
1161  * Note that this function corresponds to CheckSMEAccess and is
1162  * only used directly for cpregs.
1163  */
1164 static bool sme_access_check(DisasContext *s)
1165 {
1166     if (s->sme_excp_el) {
1167         gen_exception_insn_el(s, 0, EXCP_UDEF,
1168                               syn_smetrap(SME_ET_AccessTrap, false),
1169                               s->sme_excp_el);
1170         return false;
1171     }
1172     return true;
1173 }
1174 
1175 /* This function corresponds to CheckSMEEnabled. */
1176 bool sme_enabled_check(DisasContext *s)
1177 {
1178     /*
1179      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1180      * to be zero when fp_excp_el has priority.  This is because we need
1181      * sme_excp_el by itself for cpregs access checks.
1182      */
1183     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1184         s->fp_access_checked = true;
1185         return sme_access_check(s);
1186     }
1187     return fp_access_check_only(s);
1188 }
1189 
1190 /* Common subroutine for CheckSMEAnd*Enabled. */
1191 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1192 {
1193     if (!sme_enabled_check(s)) {
1194         return false;
1195     }
1196     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1197         gen_exception_insn(s, 0, EXCP_UDEF,
1198                            syn_smetrap(SME_ET_NotStreaming, false));
1199         return false;
1200     }
1201     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1202         gen_exception_insn(s, 0, EXCP_UDEF,
1203                            syn_smetrap(SME_ET_InactiveZA, false));
1204         return false;
1205     }
1206     return true;
1207 }
1208 
1209 /*
1210  * This utility function is for doing register extension with an
1211  * optional shift. You will likely want to pass a temporary for the
1212  * destination register. See DecodeRegExtend() in the ARM ARM.
1213  */
1214 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1215                               int option, unsigned int shift)
1216 {
1217     int extsize = extract32(option, 0, 2);
1218     bool is_signed = extract32(option, 2, 1);
1219 
1220     if (is_signed) {
1221         switch (extsize) {
1222         case 0:
1223             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1224             break;
1225         case 1:
1226             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1227             break;
1228         case 2:
1229             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1230             break;
1231         case 3:
1232             tcg_gen_mov_i64(tcg_out, tcg_in);
1233             break;
1234         }
1235     } else {
1236         switch (extsize) {
1237         case 0:
1238             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1239             break;
1240         case 1:
1241             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1242             break;
1243         case 2:
1244             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1245             break;
1246         case 3:
1247             tcg_gen_mov_i64(tcg_out, tcg_in);
1248             break;
1249         }
1250     }
1251 
1252     if (shift) {
1253         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1254     }
1255 }
1256 
1257 static inline void gen_check_sp_alignment(DisasContext *s)
1258 {
1259     /* The AArch64 architecture mandates that (if enabled via PSTATE
1260      * or SCTLR bits) there is a check that SP is 16-aligned on every
1261      * SP-relative load or store (with an exception generated if it is not).
1262      * In line with general QEMU practice regarding misaligned accesses,
1263      * we omit these checks for the sake of guest program performance.
1264      * This function is provided as a hook so we can more easily add these
1265      * checks in future (possibly as a "favour catching guest program bugs
1266      * over speed" user selectable option).
1267      */
1268 }
1269 
1270 /*
1271  * This provides a simple table based table lookup decoder. It is
1272  * intended to be used when the relevant bits for decode are too
1273  * awkwardly placed and switch/if based logic would be confusing and
1274  * deeply nested. Since it's a linear search through the table, tables
1275  * should be kept small.
1276  *
1277  * It returns the first handler where insn & mask == pattern, or
1278  * NULL if there is no match.
1279  * The table is terminated by an empty mask (i.e. 0)
1280  */
1281 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1282                                                uint32_t insn)
1283 {
1284     const AArch64DecodeTable *tptr = table;
1285 
1286     while (tptr->mask) {
1287         if ((insn & tptr->mask) == tptr->pattern) {
1288             return tptr->disas_fn;
1289         }
1290         tptr++;
1291     }
1292     return NULL;
1293 }
1294 
1295 /*
1296  * The instruction disassembly implemented here matches
1297  * the instruction encoding classifications in chapter C4
1298  * of the ARM Architecture Reference Manual (DDI0487B_a);
1299  * classification names and decode diagrams here should generally
1300  * match up with those in the manual.
1301  */
1302 
1303 /* Unconditional branch (immediate)
1304  *   31  30       26 25                                  0
1305  * +----+-----------+-------------------------------------+
1306  * | op | 0 0 1 0 1 |                 imm26               |
1307  * +----+-----------+-------------------------------------+
1308  */
1309 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1310 {
1311     int64_t diff = sextract32(insn, 0, 26) * 4;
1312 
1313     if (insn & (1U << 31)) {
1314         /* BL Branch with link */
1315         gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1316     }
1317 
1318     /* B Branch / BL Branch with link */
1319     reset_btype(s);
1320     gen_goto_tb(s, 0, diff);
1321 }
1322 
1323 /* Compare and branch (immediate)
1324  *   31  30         25  24  23                  5 4      0
1325  * +----+-------------+----+---------------------+--------+
1326  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1327  * +----+-------------+----+---------------------+--------+
1328  */
1329 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1330 {
1331     unsigned int sf, op, rt;
1332     int64_t diff;
1333     DisasLabel match;
1334     TCGv_i64 tcg_cmp;
1335 
1336     sf = extract32(insn, 31, 1);
1337     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1338     rt = extract32(insn, 0, 5);
1339     diff = sextract32(insn, 5, 19) * 4;
1340 
1341     tcg_cmp = read_cpu_reg(s, rt, sf);
1342     reset_btype(s);
1343 
1344     match = gen_disas_label(s);
1345     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1346                         tcg_cmp, 0, match.label);
1347     gen_goto_tb(s, 0, 4);
1348     set_disas_label(s, match);
1349     gen_goto_tb(s, 1, diff);
1350 }
1351 
1352 /* Test and branch (immediate)
1353  *   31  30         25  24  23   19 18          5 4    0
1354  * +----+-------------+----+-------+-------------+------+
1355  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1356  * +----+-------------+----+-------+-------------+------+
1357  */
1358 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1359 {
1360     unsigned int bit_pos, op, rt;
1361     int64_t diff;
1362     DisasLabel match;
1363     TCGv_i64 tcg_cmp;
1364 
1365     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1366     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1367     diff = sextract32(insn, 5, 14) * 4;
1368     rt = extract32(insn, 0, 5);
1369 
1370     tcg_cmp = tcg_temp_new_i64();
1371     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1372 
1373     reset_btype(s);
1374 
1375     match = gen_disas_label(s);
1376     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1377                         tcg_cmp, 0, match.label);
1378     gen_goto_tb(s, 0, 4);
1379     set_disas_label(s, match);
1380     gen_goto_tb(s, 1, diff);
1381 }
1382 
1383 /* Conditional branch (immediate)
1384  *  31           25  24  23                  5   4  3    0
1385  * +---------------+----+---------------------+----+------+
1386  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1387  * +---------------+----+---------------------+----+------+
1388  */
1389 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1390 {
1391     unsigned int cond;
1392     int64_t diff;
1393 
1394     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1395         unallocated_encoding(s);
1396         return;
1397     }
1398     diff = sextract32(insn, 5, 19) * 4;
1399     cond = extract32(insn, 0, 4);
1400 
1401     reset_btype(s);
1402     if (cond < 0x0e) {
1403         /* genuinely conditional branches */
1404         DisasLabel match = gen_disas_label(s);
1405         arm_gen_test_cc(cond, match.label);
1406         gen_goto_tb(s, 0, 4);
1407         set_disas_label(s, match);
1408         gen_goto_tb(s, 1, diff);
1409     } else {
1410         /* 0xe and 0xf are both "always" conditions */
1411         gen_goto_tb(s, 0, diff);
1412     }
1413 }
1414 
1415 /* HINT instruction group, including various allocated HINTs */
1416 static void handle_hint(DisasContext *s, uint32_t insn,
1417                         unsigned int op1, unsigned int op2, unsigned int crm)
1418 {
1419     unsigned int selector = crm << 3 | op2;
1420 
1421     if (op1 != 3) {
1422         unallocated_encoding(s);
1423         return;
1424     }
1425 
1426     switch (selector) {
1427     case 0b00000: /* NOP */
1428         break;
1429     case 0b00011: /* WFI */
1430         s->base.is_jmp = DISAS_WFI;
1431         break;
1432     case 0b00001: /* YIELD */
1433         /* When running in MTTCG we don't generate jumps to the yield and
1434          * WFE helpers as it won't affect the scheduling of other vCPUs.
1435          * If we wanted to more completely model WFE/SEV so we don't busy
1436          * spin unnecessarily we would need to do something more involved.
1437          */
1438         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1439             s->base.is_jmp = DISAS_YIELD;
1440         }
1441         break;
1442     case 0b00010: /* WFE */
1443         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1444             s->base.is_jmp = DISAS_WFE;
1445         }
1446         break;
1447     case 0b00100: /* SEV */
1448     case 0b00101: /* SEVL */
1449     case 0b00110: /* DGH */
1450         /* we treat all as NOP at least for now */
1451         break;
1452     case 0b00111: /* XPACLRI */
1453         if (s->pauth_active) {
1454             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1455         }
1456         break;
1457     case 0b01000: /* PACIA1716 */
1458         if (s->pauth_active) {
1459             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1460         }
1461         break;
1462     case 0b01010: /* PACIB1716 */
1463         if (s->pauth_active) {
1464             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1465         }
1466         break;
1467     case 0b01100: /* AUTIA1716 */
1468         if (s->pauth_active) {
1469             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1470         }
1471         break;
1472     case 0b01110: /* AUTIB1716 */
1473         if (s->pauth_active) {
1474             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1475         }
1476         break;
1477     case 0b10000: /* ESB */
1478         /* Without RAS, we must implement this as NOP. */
1479         if (dc_isar_feature(aa64_ras, s)) {
1480             /*
1481              * QEMU does not have a source of physical SErrors,
1482              * so we are only concerned with virtual SErrors.
1483              * The pseudocode in the ARM for this case is
1484              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1485              *      AArch64.vESBOperation();
1486              * Most of the condition can be evaluated at translation time.
1487              * Test for EL2 present, and defer test for SEL2 to runtime.
1488              */
1489             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1490                 gen_helper_vesb(cpu_env);
1491             }
1492         }
1493         break;
1494     case 0b11000: /* PACIAZ */
1495         if (s->pauth_active) {
1496             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1497                              tcg_constant_i64(0));
1498         }
1499         break;
1500     case 0b11001: /* PACIASP */
1501         if (s->pauth_active) {
1502             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1503         }
1504         break;
1505     case 0b11010: /* PACIBZ */
1506         if (s->pauth_active) {
1507             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1508                              tcg_constant_i64(0));
1509         }
1510         break;
1511     case 0b11011: /* PACIBSP */
1512         if (s->pauth_active) {
1513             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1514         }
1515         break;
1516     case 0b11100: /* AUTIAZ */
1517         if (s->pauth_active) {
1518             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1519                              tcg_constant_i64(0));
1520         }
1521         break;
1522     case 0b11101: /* AUTIASP */
1523         if (s->pauth_active) {
1524             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1525         }
1526         break;
1527     case 0b11110: /* AUTIBZ */
1528         if (s->pauth_active) {
1529             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1530                              tcg_constant_i64(0));
1531         }
1532         break;
1533     case 0b11111: /* AUTIBSP */
1534         if (s->pauth_active) {
1535             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1536         }
1537         break;
1538     default:
1539         /* default specified as NOP equivalent */
1540         break;
1541     }
1542 }
1543 
1544 static void gen_clrex(DisasContext *s, uint32_t insn)
1545 {
1546     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1547 }
1548 
1549 /* CLREX, DSB, DMB, ISB */
1550 static void handle_sync(DisasContext *s, uint32_t insn,
1551                         unsigned int op1, unsigned int op2, unsigned int crm)
1552 {
1553     TCGBar bar;
1554 
1555     if (op1 != 3) {
1556         unallocated_encoding(s);
1557         return;
1558     }
1559 
1560     switch (op2) {
1561     case 2: /* CLREX */
1562         gen_clrex(s, insn);
1563         return;
1564     case 4: /* DSB */
1565     case 5: /* DMB */
1566         switch (crm & 3) {
1567         case 1: /* MBReqTypes_Reads */
1568             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1569             break;
1570         case 2: /* MBReqTypes_Writes */
1571             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1572             break;
1573         default: /* MBReqTypes_All */
1574             bar = TCG_BAR_SC | TCG_MO_ALL;
1575             break;
1576         }
1577         tcg_gen_mb(bar);
1578         return;
1579     case 6: /* ISB */
1580         /* We need to break the TB after this insn to execute
1581          * a self-modified code correctly and also to take
1582          * any pending interrupts immediately.
1583          */
1584         reset_btype(s);
1585         gen_goto_tb(s, 0, 4);
1586         return;
1587 
1588     case 7: /* SB */
1589         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1590             goto do_unallocated;
1591         }
1592         /*
1593          * TODO: There is no speculation barrier opcode for TCG;
1594          * MB and end the TB instead.
1595          */
1596         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1597         gen_goto_tb(s, 0, 4);
1598         return;
1599 
1600     default:
1601     do_unallocated:
1602         unallocated_encoding(s);
1603         return;
1604     }
1605 }
1606 
1607 static void gen_xaflag(void)
1608 {
1609     TCGv_i32 z = tcg_temp_new_i32();
1610 
1611     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1612 
1613     /*
1614      * (!C & !Z) << 31
1615      * (!(C | Z)) << 31
1616      * ~((C | Z) << 31)
1617      * ~-(C | Z)
1618      * (C | Z) - 1
1619      */
1620     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1621     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1622 
1623     /* !(Z & C) */
1624     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1625     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1626 
1627     /* (!C & Z) << 31 -> -(Z & ~C) */
1628     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1629     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1630 
1631     /* C | Z */
1632     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1633 }
1634 
1635 static void gen_axflag(void)
1636 {
1637     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1638     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1639 
1640     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1641     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1642 
1643     tcg_gen_movi_i32(cpu_NF, 0);
1644     tcg_gen_movi_i32(cpu_VF, 0);
1645 }
1646 
1647 /* MSR (immediate) - move immediate to processor state field */
1648 static void handle_msr_i(DisasContext *s, uint32_t insn,
1649                          unsigned int op1, unsigned int op2, unsigned int crm)
1650 {
1651     int op = op1 << 3 | op2;
1652 
1653     /* End the TB by default, chaining is ok.  */
1654     s->base.is_jmp = DISAS_TOO_MANY;
1655 
1656     switch (op) {
1657     case 0x00: /* CFINV */
1658         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1659             goto do_unallocated;
1660         }
1661         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1662         s->base.is_jmp = DISAS_NEXT;
1663         break;
1664 
1665     case 0x01: /* XAFlag */
1666         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1667             goto do_unallocated;
1668         }
1669         gen_xaflag();
1670         s->base.is_jmp = DISAS_NEXT;
1671         break;
1672 
1673     case 0x02: /* AXFlag */
1674         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1675             goto do_unallocated;
1676         }
1677         gen_axflag();
1678         s->base.is_jmp = DISAS_NEXT;
1679         break;
1680 
1681     case 0x03: /* UAO */
1682         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1683             goto do_unallocated;
1684         }
1685         if (crm & 1) {
1686             set_pstate_bits(PSTATE_UAO);
1687         } else {
1688             clear_pstate_bits(PSTATE_UAO);
1689         }
1690         gen_rebuild_hflags(s);
1691         break;
1692 
1693     case 0x04: /* PAN */
1694         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1695             goto do_unallocated;
1696         }
1697         if (crm & 1) {
1698             set_pstate_bits(PSTATE_PAN);
1699         } else {
1700             clear_pstate_bits(PSTATE_PAN);
1701         }
1702         gen_rebuild_hflags(s);
1703         break;
1704 
1705     case 0x05: /* SPSel */
1706         if (s->current_el == 0) {
1707             goto do_unallocated;
1708         }
1709         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1710         break;
1711 
1712     case 0x19: /* SSBS */
1713         if (!dc_isar_feature(aa64_ssbs, s)) {
1714             goto do_unallocated;
1715         }
1716         if (crm & 1) {
1717             set_pstate_bits(PSTATE_SSBS);
1718         } else {
1719             clear_pstate_bits(PSTATE_SSBS);
1720         }
1721         /* Don't need to rebuild hflags since SSBS is a nop */
1722         break;
1723 
1724     case 0x1a: /* DIT */
1725         if (!dc_isar_feature(aa64_dit, s)) {
1726             goto do_unallocated;
1727         }
1728         if (crm & 1) {
1729             set_pstate_bits(PSTATE_DIT);
1730         } else {
1731             clear_pstate_bits(PSTATE_DIT);
1732         }
1733         /* There's no need to rebuild hflags because DIT is a nop */
1734         break;
1735 
1736     case 0x1e: /* DAIFSet */
1737         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1738         break;
1739 
1740     case 0x1f: /* DAIFClear */
1741         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1742         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1743         s->base.is_jmp = DISAS_UPDATE_EXIT;
1744         break;
1745 
1746     case 0x1c: /* TCO */
1747         if (dc_isar_feature(aa64_mte, s)) {
1748             /* Full MTE is enabled -- set the TCO bit as directed. */
1749             if (crm & 1) {
1750                 set_pstate_bits(PSTATE_TCO);
1751             } else {
1752                 clear_pstate_bits(PSTATE_TCO);
1753             }
1754             gen_rebuild_hflags(s);
1755             /* Many factors, including TCO, go into MTE_ACTIVE. */
1756             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1757         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1758             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1759             s->base.is_jmp = DISAS_NEXT;
1760         } else {
1761             goto do_unallocated;
1762         }
1763         break;
1764 
1765     case 0x1b: /* SVCR* */
1766         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1767             goto do_unallocated;
1768         }
1769         if (sme_access_check(s)) {
1770             int old = s->pstate_sm | (s->pstate_za << 1);
1771             int new = (crm & 1) * 3;
1772             int msk = (crm >> 1) & 3;
1773 
1774             if ((old ^ new) & msk) {
1775                 /* At least one bit changes. */
1776                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1777                                     tcg_constant_i32(msk));
1778             } else {
1779                 s->base.is_jmp = DISAS_NEXT;
1780             }
1781         }
1782         break;
1783 
1784     default:
1785     do_unallocated:
1786         unallocated_encoding(s);
1787         return;
1788     }
1789 }
1790 
1791 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1792 {
1793     TCGv_i32 tmp = tcg_temp_new_i32();
1794     TCGv_i32 nzcv = tcg_temp_new_i32();
1795 
1796     /* build bit 31, N */
1797     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1798     /* build bit 30, Z */
1799     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1800     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1801     /* build bit 29, C */
1802     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1803     /* build bit 28, V */
1804     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1805     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1806     /* generate result */
1807     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1808 }
1809 
1810 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1811 {
1812     TCGv_i32 nzcv = tcg_temp_new_i32();
1813 
1814     /* take NZCV from R[t] */
1815     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1816 
1817     /* bit 31, N */
1818     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1819     /* bit 30, Z */
1820     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1821     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1822     /* bit 29, C */
1823     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1824     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1825     /* bit 28, V */
1826     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1827     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1828 }
1829 
1830 static void gen_sysreg_undef(DisasContext *s, bool isread,
1831                              uint8_t op0, uint8_t op1, uint8_t op2,
1832                              uint8_t crn, uint8_t crm, uint8_t rt)
1833 {
1834     /*
1835      * Generate code to emit an UNDEF with correct syndrome
1836      * information for a failed system register access.
1837      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
1838      * but if FEAT_IDST is implemented then read accesses to registers
1839      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
1840      * syndrome.
1841      */
1842     uint32_t syndrome;
1843 
1844     if (isread && dc_isar_feature(aa64_ids, s) &&
1845         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
1846         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1847     } else {
1848         syndrome = syn_uncategorized();
1849     }
1850     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
1851 }
1852 
1853 /* MRS - move from system register
1854  * MSR (register) - move to system register
1855  * SYS
1856  * SYSL
1857  * These are all essentially the same insn in 'read' and 'write'
1858  * versions, with varying op0 fields.
1859  */
1860 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1861                        unsigned int op0, unsigned int op1, unsigned int op2,
1862                        unsigned int crn, unsigned int crm, unsigned int rt)
1863 {
1864     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1865                                       crn, crm, op0, op1, op2);
1866     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
1867     TCGv_ptr tcg_ri = NULL;
1868     TCGv_i64 tcg_rt;
1869 
1870     if (!ri) {
1871         /* Unknown register; this might be a guest error or a QEMU
1872          * unimplemented feature.
1873          */
1874         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1875                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1876                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1877         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
1878         return;
1879     }
1880 
1881     /* Check access permissions */
1882     if (!cp_access_ok(s->current_el, ri, isread)) {
1883         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
1884         return;
1885     }
1886 
1887     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
1888         /* Emit code to perform further access permissions checks at
1889          * runtime; this may result in an exception.
1890          */
1891         uint32_t syndrome;
1892 
1893         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1894         gen_a64_update_pc(s, 0);
1895         tcg_ri = tcg_temp_new_ptr();
1896         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
1897                                        tcg_constant_i32(key),
1898                                        tcg_constant_i32(syndrome),
1899                                        tcg_constant_i32(isread));
1900     } else if (ri->type & ARM_CP_RAISES_EXC) {
1901         /*
1902          * The readfn or writefn might raise an exception;
1903          * synchronize the CPU state in case it does.
1904          */
1905         gen_a64_update_pc(s, 0);
1906     }
1907 
1908     /* Handle special cases first */
1909     switch (ri->type & ARM_CP_SPECIAL_MASK) {
1910     case 0:
1911         break;
1912     case ARM_CP_NOP:
1913         return;
1914     case ARM_CP_NZCV:
1915         tcg_rt = cpu_reg(s, rt);
1916         if (isread) {
1917             gen_get_nzcv(tcg_rt);
1918         } else {
1919             gen_set_nzcv(tcg_rt);
1920         }
1921         return;
1922     case ARM_CP_CURRENTEL:
1923         /* Reads as current EL value from pstate, which is
1924          * guaranteed to be constant by the tb flags.
1925          */
1926         tcg_rt = cpu_reg(s, rt);
1927         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1928         return;
1929     case ARM_CP_DC_ZVA:
1930         /* Writes clear the aligned block of memory which rt points into. */
1931         if (s->mte_active[0]) {
1932             int desc = 0;
1933 
1934             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1935             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1936             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1937 
1938             tcg_rt = tcg_temp_new_i64();
1939             gen_helper_mte_check_zva(tcg_rt, cpu_env,
1940                                      tcg_constant_i32(desc), cpu_reg(s, rt));
1941         } else {
1942             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1943         }
1944         gen_helper_dc_zva(cpu_env, tcg_rt);
1945         return;
1946     case ARM_CP_DC_GVA:
1947         {
1948             TCGv_i64 clean_addr, tag;
1949 
1950             /*
1951              * DC_GVA, like DC_ZVA, requires that we supply the original
1952              * pointer for an invalid page.  Probe that address first.
1953              */
1954             tcg_rt = cpu_reg(s, rt);
1955             clean_addr = clean_data_tbi(s, tcg_rt);
1956             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
1957 
1958             if (s->ata) {
1959                 /* Extract the tag from the register to match STZGM.  */
1960                 tag = tcg_temp_new_i64();
1961                 tcg_gen_shri_i64(tag, tcg_rt, 56);
1962                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1963             }
1964         }
1965         return;
1966     case ARM_CP_DC_GZVA:
1967         {
1968             TCGv_i64 clean_addr, tag;
1969 
1970             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
1971             tcg_rt = cpu_reg(s, rt);
1972             clean_addr = clean_data_tbi(s, tcg_rt);
1973             gen_helper_dc_zva(cpu_env, clean_addr);
1974 
1975             if (s->ata) {
1976                 /* Extract the tag from the register to match STZGM.  */
1977                 tag = tcg_temp_new_i64();
1978                 tcg_gen_shri_i64(tag, tcg_rt, 56);
1979                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1980             }
1981         }
1982         return;
1983     default:
1984         g_assert_not_reached();
1985     }
1986     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
1987         return;
1988     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1989         return;
1990     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
1991         return;
1992     }
1993 
1994     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1995         gen_io_start();
1996     }
1997 
1998     tcg_rt = cpu_reg(s, rt);
1999 
2000     if (isread) {
2001         if (ri->type & ARM_CP_CONST) {
2002             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2003         } else if (ri->readfn) {
2004             if (!tcg_ri) {
2005                 tcg_ri = gen_lookup_cp_reg(key);
2006             }
2007             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2008         } else {
2009             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2010         }
2011     } else {
2012         if (ri->type & ARM_CP_CONST) {
2013             /* If not forbidden by access permissions, treat as WI */
2014             return;
2015         } else if (ri->writefn) {
2016             if (!tcg_ri) {
2017                 tcg_ri = gen_lookup_cp_reg(key);
2018             }
2019             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2020         } else {
2021             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2022         }
2023     }
2024 
2025     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
2026         /* I/O operations must end the TB here (whether read or write) */
2027         s->base.is_jmp = DISAS_UPDATE_EXIT;
2028     }
2029     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2030         /*
2031          * A write to any coprocessor regiser that ends a TB
2032          * must rebuild the hflags for the next TB.
2033          */
2034         gen_rebuild_hflags(s);
2035         /*
2036          * We default to ending the TB on a coprocessor register write,
2037          * but allow this to be suppressed by the register definition
2038          * (usually only necessary to work around guest bugs).
2039          */
2040         s->base.is_jmp = DISAS_UPDATE_EXIT;
2041     }
2042 }
2043 
2044 /* System
2045  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2046  * +---------------------+---+-----+-----+-------+-------+-----+------+
2047  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2048  * +---------------------+---+-----+-----+-------+-------+-----+------+
2049  */
2050 static void disas_system(DisasContext *s, uint32_t insn)
2051 {
2052     unsigned int l, op0, op1, crn, crm, op2, rt;
2053     l = extract32(insn, 21, 1);
2054     op0 = extract32(insn, 19, 2);
2055     op1 = extract32(insn, 16, 3);
2056     crn = extract32(insn, 12, 4);
2057     crm = extract32(insn, 8, 4);
2058     op2 = extract32(insn, 5, 3);
2059     rt = extract32(insn, 0, 5);
2060 
2061     if (op0 == 0) {
2062         if (l || rt != 31) {
2063             unallocated_encoding(s);
2064             return;
2065         }
2066         switch (crn) {
2067         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2068             handle_hint(s, insn, op1, op2, crm);
2069             break;
2070         case 3: /* CLREX, DSB, DMB, ISB */
2071             handle_sync(s, insn, op1, op2, crm);
2072             break;
2073         case 4: /* MSR (immediate) */
2074             handle_msr_i(s, insn, op1, op2, crm);
2075             break;
2076         default:
2077             unallocated_encoding(s);
2078             break;
2079         }
2080         return;
2081     }
2082     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2083 }
2084 
2085 /* Exception generation
2086  *
2087  *  31             24 23 21 20                     5 4   2 1  0
2088  * +-----------------+-----+------------------------+-----+----+
2089  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2090  * +-----------------------+------------------------+----------+
2091  */
2092 static void disas_exc(DisasContext *s, uint32_t insn)
2093 {
2094     int opc = extract32(insn, 21, 3);
2095     int op2_ll = extract32(insn, 0, 5);
2096     int imm16 = extract32(insn, 5, 16);
2097     uint32_t syndrome;
2098 
2099     switch (opc) {
2100     case 0:
2101         /* For SVC, HVC and SMC we advance the single-step state
2102          * machine before taking the exception. This is architecturally
2103          * mandated, to ensure that single-stepping a system call
2104          * instruction works properly.
2105          */
2106         switch (op2_ll) {
2107         case 1:                                                     /* SVC */
2108             syndrome = syn_aa64_svc(imm16);
2109             if (s->fgt_svc) {
2110                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2111                 break;
2112             }
2113             gen_ss_advance(s);
2114             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2115             break;
2116         case 2:                                                     /* HVC */
2117             if (s->current_el == 0) {
2118                 unallocated_encoding(s);
2119                 break;
2120             }
2121             /* The pre HVC helper handles cases when HVC gets trapped
2122              * as an undefined insn by runtime configuration.
2123              */
2124             gen_a64_update_pc(s, 0);
2125             gen_helper_pre_hvc(cpu_env);
2126             gen_ss_advance(s);
2127             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2128             break;
2129         case 3:                                                     /* SMC */
2130             if (s->current_el == 0) {
2131                 unallocated_encoding(s);
2132                 break;
2133             }
2134             gen_a64_update_pc(s, 0);
2135             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2136             gen_ss_advance(s);
2137             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2138             break;
2139         default:
2140             unallocated_encoding(s);
2141             break;
2142         }
2143         break;
2144     case 1:
2145         if (op2_ll != 0) {
2146             unallocated_encoding(s);
2147             break;
2148         }
2149         /* BRK */
2150         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2151         break;
2152     case 2:
2153         if (op2_ll != 0) {
2154             unallocated_encoding(s);
2155             break;
2156         }
2157         /* HLT. This has two purposes.
2158          * Architecturally, it is an external halting debug instruction.
2159          * Since QEMU doesn't implement external debug, we treat this as
2160          * it is required for halting debug disabled: it will UNDEF.
2161          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2162          */
2163         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2164             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2165         } else {
2166             unallocated_encoding(s);
2167         }
2168         break;
2169     case 5:
2170         if (op2_ll < 1 || op2_ll > 3) {
2171             unallocated_encoding(s);
2172             break;
2173         }
2174         /* DCPS1, DCPS2, DCPS3 */
2175         unallocated_encoding(s);
2176         break;
2177     default:
2178         unallocated_encoding(s);
2179         break;
2180     }
2181 }
2182 
2183 /* Unconditional branch (register)
2184  *  31           25 24   21 20   16 15   10 9    5 4     0
2185  * +---------------+-------+-------+-------+------+-------+
2186  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
2187  * +---------------+-------+-------+-------+------+-------+
2188  */
2189 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2190 {
2191     unsigned int opc, op2, op3, rn, op4;
2192     unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
2193     TCGv_i64 dst;
2194     TCGv_i64 modifier;
2195 
2196     opc = extract32(insn, 21, 4);
2197     op2 = extract32(insn, 16, 5);
2198     op3 = extract32(insn, 10, 6);
2199     rn = extract32(insn, 5, 5);
2200     op4 = extract32(insn, 0, 5);
2201 
2202     if (op2 != 0x1f) {
2203         goto do_unallocated;
2204     }
2205 
2206     switch (opc) {
2207     case 0: /* BR */
2208     case 1: /* BLR */
2209     case 2: /* RET */
2210         btype_mod = opc;
2211         switch (op3) {
2212         case 0:
2213             /* BR, BLR, RET */
2214             if (op4 != 0) {
2215                 goto do_unallocated;
2216             }
2217             dst = cpu_reg(s, rn);
2218             break;
2219 
2220         case 2:
2221         case 3:
2222             if (!dc_isar_feature(aa64_pauth, s)) {
2223                 goto do_unallocated;
2224             }
2225             if (opc == 2) {
2226                 /* RETAA, RETAB */
2227                 if (rn != 0x1f || op4 != 0x1f) {
2228                     goto do_unallocated;
2229                 }
2230                 rn = 30;
2231                 modifier = cpu_X[31];
2232             } else {
2233                 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2234                 if (op4 != 0x1f) {
2235                     goto do_unallocated;
2236                 }
2237                 modifier = tcg_constant_i64(0);
2238             }
2239             if (s->pauth_active) {
2240                 dst = tcg_temp_new_i64();
2241                 if (op3 == 2) {
2242                     gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2243                 } else {
2244                     gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2245                 }
2246             } else {
2247                 dst = cpu_reg(s, rn);
2248             }
2249             break;
2250 
2251         default:
2252             goto do_unallocated;
2253         }
2254         /* BLR also needs to load return address */
2255         if (opc == 1) {
2256             TCGv_i64 lr = cpu_reg(s, 30);
2257             if (dst == lr) {
2258                 TCGv_i64 tmp = tcg_temp_new_i64();
2259                 tcg_gen_mov_i64(tmp, dst);
2260                 dst = tmp;
2261             }
2262             gen_pc_plus_diff(s, lr, curr_insn_len(s));
2263         }
2264         gen_a64_set_pc(s, dst);
2265         break;
2266 
2267     case 8: /* BRAA */
2268     case 9: /* BLRAA */
2269         if (!dc_isar_feature(aa64_pauth, s)) {
2270             goto do_unallocated;
2271         }
2272         if ((op3 & ~1) != 2) {
2273             goto do_unallocated;
2274         }
2275         btype_mod = opc & 1;
2276         if (s->pauth_active) {
2277             dst = tcg_temp_new_i64();
2278             modifier = cpu_reg_sp(s, op4);
2279             if (op3 == 2) {
2280                 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2281             } else {
2282                 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2283             }
2284         } else {
2285             dst = cpu_reg(s, rn);
2286         }
2287         /* BLRAA also needs to load return address */
2288         if (opc == 9) {
2289             TCGv_i64 lr = cpu_reg(s, 30);
2290             if (dst == lr) {
2291                 TCGv_i64 tmp = tcg_temp_new_i64();
2292                 tcg_gen_mov_i64(tmp, dst);
2293                 dst = tmp;
2294             }
2295             gen_pc_plus_diff(s, lr, curr_insn_len(s));
2296         }
2297         gen_a64_set_pc(s, dst);
2298         break;
2299 
2300     case 4: /* ERET */
2301         if (s->current_el == 0) {
2302             goto do_unallocated;
2303         }
2304         switch (op3) {
2305         case 0: /* ERET */
2306             if (op4 != 0) {
2307                 goto do_unallocated;
2308             }
2309             if (s->fgt_eret) {
2310                 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2);
2311                 return;
2312             }
2313             dst = tcg_temp_new_i64();
2314             tcg_gen_ld_i64(dst, cpu_env,
2315                            offsetof(CPUARMState, elr_el[s->current_el]));
2316             break;
2317 
2318         case 2: /* ERETAA */
2319         case 3: /* ERETAB */
2320             if (!dc_isar_feature(aa64_pauth, s)) {
2321                 goto do_unallocated;
2322             }
2323             if (rn != 0x1f || op4 != 0x1f) {
2324                 goto do_unallocated;
2325             }
2326             /* The FGT trap takes precedence over an auth trap. */
2327             if (s->fgt_eret) {
2328                 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2);
2329                 return;
2330             }
2331             dst = tcg_temp_new_i64();
2332             tcg_gen_ld_i64(dst, cpu_env,
2333                            offsetof(CPUARMState, elr_el[s->current_el]));
2334             if (s->pauth_active) {
2335                 modifier = cpu_X[31];
2336                 if (op3 == 2) {
2337                     gen_helper_autia(dst, cpu_env, dst, modifier);
2338                 } else {
2339                     gen_helper_autib(dst, cpu_env, dst, modifier);
2340                 }
2341             }
2342             break;
2343 
2344         default:
2345             goto do_unallocated;
2346         }
2347         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2348             gen_io_start();
2349         }
2350 
2351         gen_helper_exception_return(cpu_env, dst);
2352         /* Must exit loop to check un-masked IRQs */
2353         s->base.is_jmp = DISAS_EXIT;
2354         return;
2355 
2356     case 5: /* DRPS */
2357         if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2358             goto do_unallocated;
2359         } else {
2360             unallocated_encoding(s);
2361         }
2362         return;
2363 
2364     default:
2365     do_unallocated:
2366         unallocated_encoding(s);
2367         return;
2368     }
2369 
2370     switch (btype_mod) {
2371     case 0: /* BR */
2372         if (dc_isar_feature(aa64_bti, s)) {
2373             /* BR to {x16,x17} or !guard -> 1, else 3.  */
2374             set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2375         }
2376         break;
2377 
2378     case 1: /* BLR */
2379         if (dc_isar_feature(aa64_bti, s)) {
2380             /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2381             set_btype(s, 2);
2382         }
2383         break;
2384 
2385     default: /* RET or none of the above.  */
2386         /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2387         break;
2388     }
2389 
2390     s->base.is_jmp = DISAS_JUMP;
2391 }
2392 
2393 /* Branches, exception generating and system instructions */
2394 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2395 {
2396     switch (extract32(insn, 25, 7)) {
2397     case 0x0a: case 0x0b:
2398     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2399         disas_uncond_b_imm(s, insn);
2400         break;
2401     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2402         disas_comp_b_imm(s, insn);
2403         break;
2404     case 0x1b: case 0x5b: /* Test & branch (immediate) */
2405         disas_test_b_imm(s, insn);
2406         break;
2407     case 0x2a: /* Conditional branch (immediate) */
2408         disas_cond_b_imm(s, insn);
2409         break;
2410     case 0x6a: /* Exception generation / System */
2411         if (insn & (1 << 24)) {
2412             if (extract32(insn, 22, 2) == 0) {
2413                 disas_system(s, insn);
2414             } else {
2415                 unallocated_encoding(s);
2416             }
2417         } else {
2418             disas_exc(s, insn);
2419         }
2420         break;
2421     case 0x6b: /* Unconditional branch (register) */
2422         disas_uncond_b_reg(s, insn);
2423         break;
2424     default:
2425         unallocated_encoding(s);
2426         break;
2427     }
2428 }
2429 
2430 /*
2431  * Load/Store exclusive instructions are implemented by remembering
2432  * the value/address loaded, and seeing if these are the same
2433  * when the store is performed. This is not actually the architecturally
2434  * mandated semantics, but it works for typical guest code sequences
2435  * and avoids having to monitor regular stores.
2436  *
2437  * The store exclusive uses the atomic cmpxchg primitives to avoid
2438  * races in multi-threaded linux-user and when MTTCG softmmu is
2439  * enabled.
2440  */
2441 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2442                                TCGv_i64 addr, int size, bool is_pair)
2443 {
2444     int idx = get_mem_index(s);
2445     MemOp memop = s->be_data;
2446 
2447     g_assert(size <= 3);
2448     if (is_pair) {
2449         g_assert(size >= 2);
2450         if (size == 2) {
2451             /* The pair must be single-copy atomic for the doubleword.  */
2452             memop |= MO_64 | MO_ALIGN;
2453             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2454             if (s->be_data == MO_LE) {
2455                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2456                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2457             } else {
2458                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2459                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2460             }
2461         } else {
2462             /* The pair must be single-copy atomic for *each* doubleword, not
2463                the entire quadword, however it must be quadword aligned.  */
2464             memop |= MO_64;
2465             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2466                                 memop | MO_ALIGN_16);
2467 
2468             TCGv_i64 addr2 = tcg_temp_new_i64();
2469             tcg_gen_addi_i64(addr2, addr, 8);
2470             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2471 
2472             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2473             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2474         }
2475     } else {
2476         memop |= size | MO_ALIGN;
2477         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2478         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2479     }
2480     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2481 }
2482 
2483 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2484                                 TCGv_i64 addr, int size, int is_pair)
2485 {
2486     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2487      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2488      *     [addr] = {Rt};
2489      *     if (is_pair) {
2490      *         [addr + datasize] = {Rt2};
2491      *     }
2492      *     {Rd} = 0;
2493      * } else {
2494      *     {Rd} = 1;
2495      * }
2496      * env->exclusive_addr = -1;
2497      */
2498     TCGLabel *fail_label = gen_new_label();
2499     TCGLabel *done_label = gen_new_label();
2500     TCGv_i64 tmp;
2501 
2502     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2503 
2504     tmp = tcg_temp_new_i64();
2505     if (is_pair) {
2506         if (size == 2) {
2507             if (s->be_data == MO_LE) {
2508                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2509             } else {
2510                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2511             }
2512             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2513                                        cpu_exclusive_val, tmp,
2514                                        get_mem_index(s),
2515                                        MO_64 | MO_ALIGN | s->be_data);
2516             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2517         } else {
2518             TCGv_i128 t16 = tcg_temp_new_i128();
2519             TCGv_i128 c16 = tcg_temp_new_i128();
2520             TCGv_i64 a, b;
2521 
2522             if (s->be_data == MO_LE) {
2523                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2524                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2525                                         cpu_exclusive_high);
2526             } else {
2527                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2528                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2529                                         cpu_exclusive_val);
2530             }
2531 
2532             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2533                                         get_mem_index(s),
2534                                         MO_128 | MO_ALIGN | s->be_data);
2535 
2536             a = tcg_temp_new_i64();
2537             b = tcg_temp_new_i64();
2538             if (s->be_data == MO_LE) {
2539                 tcg_gen_extr_i128_i64(a, b, t16);
2540             } else {
2541                 tcg_gen_extr_i128_i64(b, a, t16);
2542             }
2543 
2544             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2545             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2546             tcg_gen_or_i64(tmp, a, b);
2547 
2548             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2549         }
2550     } else {
2551         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2552                                    cpu_reg(s, rt), get_mem_index(s),
2553                                    size | MO_ALIGN | s->be_data);
2554         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2555     }
2556     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2557     tcg_gen_br(done_label);
2558 
2559     gen_set_label(fail_label);
2560     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2561     gen_set_label(done_label);
2562     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2563 }
2564 
2565 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2566                                  int rn, int size)
2567 {
2568     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2569     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2570     int memidx = get_mem_index(s);
2571     TCGv_i64 clean_addr;
2572 
2573     if (rn == 31) {
2574         gen_check_sp_alignment(s);
2575     }
2576     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2577     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2578                                size | MO_ALIGN | s->be_data);
2579 }
2580 
2581 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2582                                       int rn, int size)
2583 {
2584     TCGv_i64 s1 = cpu_reg(s, rs);
2585     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2586     TCGv_i64 t1 = cpu_reg(s, rt);
2587     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2588     TCGv_i64 clean_addr;
2589     int memidx = get_mem_index(s);
2590 
2591     if (rn == 31) {
2592         gen_check_sp_alignment(s);
2593     }
2594 
2595     /* This is a single atomic access, despite the "pair". */
2596     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2597 
2598     if (size == 2) {
2599         TCGv_i64 cmp = tcg_temp_new_i64();
2600         TCGv_i64 val = tcg_temp_new_i64();
2601 
2602         if (s->be_data == MO_LE) {
2603             tcg_gen_concat32_i64(val, t1, t2);
2604             tcg_gen_concat32_i64(cmp, s1, s2);
2605         } else {
2606             tcg_gen_concat32_i64(val, t2, t1);
2607             tcg_gen_concat32_i64(cmp, s2, s1);
2608         }
2609 
2610         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2611                                    MO_64 | MO_ALIGN | s->be_data);
2612 
2613         if (s->be_data == MO_LE) {
2614             tcg_gen_extr32_i64(s1, s2, cmp);
2615         } else {
2616             tcg_gen_extr32_i64(s2, s1, cmp);
2617         }
2618     } else {
2619         TCGv_i128 cmp = tcg_temp_new_i128();
2620         TCGv_i128 val = tcg_temp_new_i128();
2621 
2622         if (s->be_data == MO_LE) {
2623             tcg_gen_concat_i64_i128(val, t1, t2);
2624             tcg_gen_concat_i64_i128(cmp, s1, s2);
2625         } else {
2626             tcg_gen_concat_i64_i128(val, t2, t1);
2627             tcg_gen_concat_i64_i128(cmp, s2, s1);
2628         }
2629 
2630         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2631                                     MO_128 | MO_ALIGN | s->be_data);
2632 
2633         if (s->be_data == MO_LE) {
2634             tcg_gen_extr_i128_i64(s1, s2, cmp);
2635         } else {
2636             tcg_gen_extr_i128_i64(s2, s1, cmp);
2637         }
2638     }
2639 }
2640 
2641 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2642  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2643  */
2644 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2645 {
2646     int opc0 = extract32(opc, 0, 1);
2647     int regsize;
2648 
2649     if (is_signed) {
2650         regsize = opc0 ? 32 : 64;
2651     } else {
2652         regsize = size == 3 ? 64 : 32;
2653     }
2654     return regsize == 64;
2655 }
2656 
2657 /* Load/store exclusive
2658  *
2659  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2660  * +-----+-------------+----+---+----+------+----+-------+------+------+
2661  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2662  * +-----+-------------+----+---+----+------+----+-------+------+------+
2663  *
2664  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2665  *   L: 0 -> store, 1 -> load
2666  *  o2: 0 -> exclusive, 1 -> not
2667  *  o1: 0 -> single register, 1 -> register pair
2668  *  o0: 1 -> load-acquire/store-release, 0 -> not
2669  */
2670 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2671 {
2672     int rt = extract32(insn, 0, 5);
2673     int rn = extract32(insn, 5, 5);
2674     int rt2 = extract32(insn, 10, 5);
2675     int rs = extract32(insn, 16, 5);
2676     int is_lasr = extract32(insn, 15, 1);
2677     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2678     int size = extract32(insn, 30, 2);
2679     TCGv_i64 clean_addr;
2680 
2681     switch (o2_L_o1_o0) {
2682     case 0x0: /* STXR */
2683     case 0x1: /* STLXR */
2684         if (rn == 31) {
2685             gen_check_sp_alignment(s);
2686         }
2687         if (is_lasr) {
2688             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2689         }
2690         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2691                                     true, rn != 31, size);
2692         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2693         return;
2694 
2695     case 0x4: /* LDXR */
2696     case 0x5: /* LDAXR */
2697         if (rn == 31) {
2698             gen_check_sp_alignment(s);
2699         }
2700         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2701                                     false, rn != 31, size);
2702         s->is_ldex = true;
2703         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2704         if (is_lasr) {
2705             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2706         }
2707         return;
2708 
2709     case 0x8: /* STLLR */
2710         if (!dc_isar_feature(aa64_lor, s)) {
2711             break;
2712         }
2713         /* StoreLORelease is the same as Store-Release for QEMU.  */
2714         /* fall through */
2715     case 0x9: /* STLR */
2716         /* Generate ISS for non-exclusive accesses including LASR.  */
2717         if (rn == 31) {
2718             gen_check_sp_alignment(s);
2719         }
2720         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2721         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2722                                     true, rn != 31, size);
2723         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2724         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2725                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2726         return;
2727 
2728     case 0xc: /* LDLAR */
2729         if (!dc_isar_feature(aa64_lor, s)) {
2730             break;
2731         }
2732         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2733         /* fall through */
2734     case 0xd: /* LDAR */
2735         /* Generate ISS for non-exclusive accesses including LASR.  */
2736         if (rn == 31) {
2737             gen_check_sp_alignment(s);
2738         }
2739         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2740                                     false, rn != 31, size);
2741         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2742         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2743                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2744         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2745         return;
2746 
2747     case 0x2: case 0x3: /* CASP / STXP */
2748         if (size & 2) { /* STXP / STLXP */
2749             if (rn == 31) {
2750                 gen_check_sp_alignment(s);
2751             }
2752             if (is_lasr) {
2753                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2754             }
2755             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2756                                         true, rn != 31, size);
2757             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2758             return;
2759         }
2760         if (rt2 == 31
2761             && ((rt | rs) & 1) == 0
2762             && dc_isar_feature(aa64_atomics, s)) {
2763             /* CASP / CASPL */
2764             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2765             return;
2766         }
2767         break;
2768 
2769     case 0x6: case 0x7: /* CASPA / LDXP */
2770         if (size & 2) { /* LDXP / LDAXP */
2771             if (rn == 31) {
2772                 gen_check_sp_alignment(s);
2773             }
2774             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2775                                         false, rn != 31, size);
2776             s->is_ldex = true;
2777             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2778             if (is_lasr) {
2779                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2780             }
2781             return;
2782         }
2783         if (rt2 == 31
2784             && ((rt | rs) & 1) == 0
2785             && dc_isar_feature(aa64_atomics, s)) {
2786             /* CASPA / CASPAL */
2787             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2788             return;
2789         }
2790         break;
2791 
2792     case 0xa: /* CAS */
2793     case 0xb: /* CASL */
2794     case 0xe: /* CASA */
2795     case 0xf: /* CASAL */
2796         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2797             gen_compare_and_swap(s, rs, rt, rn, size);
2798             return;
2799         }
2800         break;
2801     }
2802     unallocated_encoding(s);
2803 }
2804 
2805 /*
2806  * Load register (literal)
2807  *
2808  *  31 30 29   27  26 25 24 23                5 4     0
2809  * +-----+-------+---+-----+-------------------+-------+
2810  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2811  * +-----+-------+---+-----+-------------------+-------+
2812  *
2813  * V: 1 -> vector (simd/fp)
2814  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2815  *                   10-> 32 bit signed, 11 -> prefetch
2816  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2817  */
2818 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2819 {
2820     int rt = extract32(insn, 0, 5);
2821     int64_t imm = sextract32(insn, 5, 19) << 2;
2822     bool is_vector = extract32(insn, 26, 1);
2823     int opc = extract32(insn, 30, 2);
2824     bool is_signed = false;
2825     int size = 2;
2826     TCGv_i64 tcg_rt, clean_addr;
2827 
2828     if (is_vector) {
2829         if (opc == 3) {
2830             unallocated_encoding(s);
2831             return;
2832         }
2833         size = 2 + opc;
2834         if (!fp_access_check(s)) {
2835             return;
2836         }
2837     } else {
2838         if (opc == 3) {
2839             /* PRFM (literal) : prefetch */
2840             return;
2841         }
2842         size = 2 + extract32(opc, 0, 1);
2843         is_signed = extract32(opc, 1, 1);
2844     }
2845 
2846     tcg_rt = cpu_reg(s, rt);
2847 
2848     clean_addr = tcg_temp_new_i64();
2849     gen_pc_plus_diff(s, clean_addr, imm);
2850     if (is_vector) {
2851         do_fp_ld(s, rt, clean_addr, size);
2852     } else {
2853         /* Only unsigned 32bit loads target 32bit registers.  */
2854         bool iss_sf = opc != 0;
2855 
2856         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2857                   false, true, rt, iss_sf, false);
2858     }
2859 }
2860 
2861 /*
2862  * LDNP (Load Pair - non-temporal hint)
2863  * LDP (Load Pair - non vector)
2864  * LDPSW (Load Pair Signed Word - non vector)
2865  * STNP (Store Pair - non-temporal hint)
2866  * STP (Store Pair - non vector)
2867  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2868  * LDP (Load Pair of SIMD&FP)
2869  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2870  * STP (Store Pair of SIMD&FP)
2871  *
2872  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2873  * +-----+-------+---+---+-------+---+-----------------------------+
2874  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2875  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2876  *
2877  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2878  *      LDPSW/STGP               01
2879  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2880  *   V: 0 -> GPR, 1 -> Vector
2881  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2882  *      10 -> signed offset, 11 -> pre-index
2883  *   L: 0 -> Store 1 -> Load
2884  *
2885  * Rt, Rt2 = GPR or SIMD registers to be stored
2886  * Rn = general purpose register containing address
2887  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2888  */
2889 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2890 {
2891     int rt = extract32(insn, 0, 5);
2892     int rn = extract32(insn, 5, 5);
2893     int rt2 = extract32(insn, 10, 5);
2894     uint64_t offset = sextract64(insn, 15, 7);
2895     int index = extract32(insn, 23, 2);
2896     bool is_vector = extract32(insn, 26, 1);
2897     bool is_load = extract32(insn, 22, 1);
2898     int opc = extract32(insn, 30, 2);
2899 
2900     bool is_signed = false;
2901     bool postindex = false;
2902     bool wback = false;
2903     bool set_tag = false;
2904 
2905     TCGv_i64 clean_addr, dirty_addr;
2906 
2907     int size;
2908 
2909     if (opc == 3) {
2910         unallocated_encoding(s);
2911         return;
2912     }
2913 
2914     if (is_vector) {
2915         size = 2 + opc;
2916     } else if (opc == 1 && !is_load) {
2917         /* STGP */
2918         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2919             unallocated_encoding(s);
2920             return;
2921         }
2922         size = 3;
2923         set_tag = true;
2924     } else {
2925         size = 2 + extract32(opc, 1, 1);
2926         is_signed = extract32(opc, 0, 1);
2927         if (!is_load && is_signed) {
2928             unallocated_encoding(s);
2929             return;
2930         }
2931     }
2932 
2933     switch (index) {
2934     case 1: /* post-index */
2935         postindex = true;
2936         wback = true;
2937         break;
2938     case 0:
2939         /* signed offset with "non-temporal" hint. Since we don't emulate
2940          * caches we don't care about hints to the cache system about
2941          * data access patterns, and handle this identically to plain
2942          * signed offset.
2943          */
2944         if (is_signed) {
2945             /* There is no non-temporal-hint version of LDPSW */
2946             unallocated_encoding(s);
2947             return;
2948         }
2949         postindex = false;
2950         break;
2951     case 2: /* signed offset, rn not updated */
2952         postindex = false;
2953         break;
2954     case 3: /* pre-index */
2955         postindex = false;
2956         wback = true;
2957         break;
2958     }
2959 
2960     if (is_vector && !fp_access_check(s)) {
2961         return;
2962     }
2963 
2964     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2965 
2966     if (rn == 31) {
2967         gen_check_sp_alignment(s);
2968     }
2969 
2970     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2971     if (!postindex) {
2972         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2973     }
2974 
2975     if (set_tag) {
2976         if (!s->ata) {
2977             /*
2978              * TODO: We could rely on the stores below, at least for
2979              * system mode, if we arrange to add MO_ALIGN_16.
2980              */
2981             gen_helper_stg_stub(cpu_env, dirty_addr);
2982         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2983             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2984         } else {
2985             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2986         }
2987     }
2988 
2989     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2990                                 (wback || rn != 31) && !set_tag, 2 << size);
2991 
2992     if (is_vector) {
2993         if (is_load) {
2994             do_fp_ld(s, rt, clean_addr, size);
2995         } else {
2996             do_fp_st(s, rt, clean_addr, size);
2997         }
2998         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2999         if (is_load) {
3000             do_fp_ld(s, rt2, clean_addr, size);
3001         } else {
3002             do_fp_st(s, rt2, clean_addr, size);
3003         }
3004     } else {
3005         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3006         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
3007 
3008         if (is_load) {
3009             TCGv_i64 tmp = tcg_temp_new_i64();
3010 
3011             /* Do not modify tcg_rt before recognizing any exception
3012              * from the second load.
3013              */
3014             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
3015                       false, false, 0, false, false);
3016             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
3017             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
3018                       false, false, 0, false, false);
3019 
3020             tcg_gen_mov_i64(tcg_rt, tmp);
3021         } else {
3022             do_gpr_st(s, tcg_rt, clean_addr, size,
3023                       false, 0, false, false);
3024             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
3025             do_gpr_st(s, tcg_rt2, clean_addr, size,
3026                       false, 0, false, false);
3027         }
3028     }
3029 
3030     if (wback) {
3031         if (postindex) {
3032             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3033         }
3034         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3035     }
3036 }
3037 
3038 /*
3039  * Load/store (immediate post-indexed)
3040  * Load/store (immediate pre-indexed)
3041  * Load/store (unscaled immediate)
3042  *
3043  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3044  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3045  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3046  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3047  *
3048  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3049          10 -> unprivileged
3050  * V = 0 -> non-vector
3051  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3052  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3053  */
3054 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3055                                 int opc,
3056                                 int size,
3057                                 int rt,
3058                                 bool is_vector)
3059 {
3060     int rn = extract32(insn, 5, 5);
3061     int imm9 = sextract32(insn, 12, 9);
3062     int idx = extract32(insn, 10, 2);
3063     bool is_signed = false;
3064     bool is_store = false;
3065     bool is_extended = false;
3066     bool is_unpriv = (idx == 2);
3067     bool iss_valid;
3068     bool post_index;
3069     bool writeback;
3070     int memidx;
3071 
3072     TCGv_i64 clean_addr, dirty_addr;
3073 
3074     if (is_vector) {
3075         size |= (opc & 2) << 1;
3076         if (size > 4 || is_unpriv) {
3077             unallocated_encoding(s);
3078             return;
3079         }
3080         is_store = ((opc & 1) == 0);
3081         if (!fp_access_check(s)) {
3082             return;
3083         }
3084     } else {
3085         if (size == 3 && opc == 2) {
3086             /* PRFM - prefetch */
3087             if (idx != 0) {
3088                 unallocated_encoding(s);
3089                 return;
3090             }
3091             return;
3092         }
3093         if (opc == 3 && size > 1) {
3094             unallocated_encoding(s);
3095             return;
3096         }
3097         is_store = (opc == 0);
3098         is_signed = extract32(opc, 1, 1);
3099         is_extended = (size < 3) && extract32(opc, 0, 1);
3100     }
3101 
3102     switch (idx) {
3103     case 0:
3104     case 2:
3105         post_index = false;
3106         writeback = false;
3107         break;
3108     case 1:
3109         post_index = true;
3110         writeback = true;
3111         break;
3112     case 3:
3113         post_index = false;
3114         writeback = true;
3115         break;
3116     default:
3117         g_assert_not_reached();
3118     }
3119 
3120     iss_valid = !is_vector && !writeback;
3121 
3122     if (rn == 31) {
3123         gen_check_sp_alignment(s);
3124     }
3125 
3126     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3127     if (!post_index) {
3128         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3129     }
3130 
3131     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3132     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3133                                        writeback || rn != 31,
3134                                        size, is_unpriv, memidx);
3135 
3136     if (is_vector) {
3137         if (is_store) {
3138             do_fp_st(s, rt, clean_addr, size);
3139         } else {
3140             do_fp_ld(s, rt, clean_addr, size);
3141         }
3142     } else {
3143         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3144         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3145 
3146         if (is_store) {
3147             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3148                              iss_valid, rt, iss_sf, false);
3149         } else {
3150             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3151                              is_extended, memidx,
3152                              iss_valid, rt, iss_sf, false);
3153         }
3154     }
3155 
3156     if (writeback) {
3157         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3158         if (post_index) {
3159             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3160         }
3161         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3162     }
3163 }
3164 
3165 /*
3166  * Load/store (register offset)
3167  *
3168  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3169  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3170  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3171  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3172  *
3173  * For non-vector:
3174  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3175  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3176  * For vector:
3177  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3178  *   opc<0>: 0 -> store, 1 -> load
3179  * V: 1 -> vector/simd
3180  * opt: extend encoding (see DecodeRegExtend)
3181  * S: if S=1 then scale (essentially index by sizeof(size))
3182  * Rt: register to transfer into/out of
3183  * Rn: address register or SP for base
3184  * Rm: offset register or ZR for offset
3185  */
3186 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3187                                    int opc,
3188                                    int size,
3189                                    int rt,
3190                                    bool is_vector)
3191 {
3192     int rn = extract32(insn, 5, 5);
3193     int shift = extract32(insn, 12, 1);
3194     int rm = extract32(insn, 16, 5);
3195     int opt = extract32(insn, 13, 3);
3196     bool is_signed = false;
3197     bool is_store = false;
3198     bool is_extended = false;
3199 
3200     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3201 
3202     if (extract32(opt, 1, 1) == 0) {
3203         unallocated_encoding(s);
3204         return;
3205     }
3206 
3207     if (is_vector) {
3208         size |= (opc & 2) << 1;
3209         if (size > 4) {
3210             unallocated_encoding(s);
3211             return;
3212         }
3213         is_store = !extract32(opc, 0, 1);
3214         if (!fp_access_check(s)) {
3215             return;
3216         }
3217     } else {
3218         if (size == 3 && opc == 2) {
3219             /* PRFM - prefetch */
3220             return;
3221         }
3222         if (opc == 3 && size > 1) {
3223             unallocated_encoding(s);
3224             return;
3225         }
3226         is_store = (opc == 0);
3227         is_signed = extract32(opc, 1, 1);
3228         is_extended = (size < 3) && extract32(opc, 0, 1);
3229     }
3230 
3231     if (rn == 31) {
3232         gen_check_sp_alignment(s);
3233     }
3234     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3235 
3236     tcg_rm = read_cpu_reg(s, rm, 1);
3237     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3238 
3239     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3240     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3241 
3242     if (is_vector) {
3243         if (is_store) {
3244             do_fp_st(s, rt, clean_addr, size);
3245         } else {
3246             do_fp_ld(s, rt, clean_addr, size);
3247         }
3248     } else {
3249         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3250         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3251         if (is_store) {
3252             do_gpr_st(s, tcg_rt, clean_addr, size,
3253                       true, rt, iss_sf, false);
3254         } else {
3255             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3256                       is_extended, true, rt, iss_sf, false);
3257         }
3258     }
3259 }
3260 
3261 /*
3262  * Load/store (unsigned immediate)
3263  *
3264  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3265  * +----+-------+---+-----+-----+------------+-------+------+
3266  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3267  * +----+-------+---+-----+-----+------------+-------+------+
3268  *
3269  * For non-vector:
3270  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3271  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3272  * For vector:
3273  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3274  *   opc<0>: 0 -> store, 1 -> load
3275  * Rn: base address register (inc SP)
3276  * Rt: target register
3277  */
3278 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3279                                         int opc,
3280                                         int size,
3281                                         int rt,
3282                                         bool is_vector)
3283 {
3284     int rn = extract32(insn, 5, 5);
3285     unsigned int imm12 = extract32(insn, 10, 12);
3286     unsigned int offset;
3287 
3288     TCGv_i64 clean_addr, dirty_addr;
3289 
3290     bool is_store;
3291     bool is_signed = false;
3292     bool is_extended = false;
3293 
3294     if (is_vector) {
3295         size |= (opc & 2) << 1;
3296         if (size > 4) {
3297             unallocated_encoding(s);
3298             return;
3299         }
3300         is_store = !extract32(opc, 0, 1);
3301         if (!fp_access_check(s)) {
3302             return;
3303         }
3304     } else {
3305         if (size == 3 && opc == 2) {
3306             /* PRFM - prefetch */
3307             return;
3308         }
3309         if (opc == 3 && size > 1) {
3310             unallocated_encoding(s);
3311             return;
3312         }
3313         is_store = (opc == 0);
3314         is_signed = extract32(opc, 1, 1);
3315         is_extended = (size < 3) && extract32(opc, 0, 1);
3316     }
3317 
3318     if (rn == 31) {
3319         gen_check_sp_alignment(s);
3320     }
3321     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3322     offset = imm12 << size;
3323     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3324     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3325 
3326     if (is_vector) {
3327         if (is_store) {
3328             do_fp_st(s, rt, clean_addr, size);
3329         } else {
3330             do_fp_ld(s, rt, clean_addr, size);
3331         }
3332     } else {
3333         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3334         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3335         if (is_store) {
3336             do_gpr_st(s, tcg_rt, clean_addr, size,
3337                       true, rt, iss_sf, false);
3338         } else {
3339             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3340                       is_extended, true, rt, iss_sf, false);
3341         }
3342     }
3343 }
3344 
3345 /* Atomic memory operations
3346  *
3347  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3348  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3349  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3350  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3351  *
3352  * Rt: the result register
3353  * Rn: base address or SP
3354  * Rs: the source register for the operation
3355  * V: vector flag (always 0 as of v8.3)
3356  * A: acquire flag
3357  * R: release flag
3358  */
3359 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3360                               int size, int rt, bool is_vector)
3361 {
3362     int rs = extract32(insn, 16, 5);
3363     int rn = extract32(insn, 5, 5);
3364     int o3_opc = extract32(insn, 12, 4);
3365     bool r = extract32(insn, 22, 1);
3366     bool a = extract32(insn, 23, 1);
3367     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3368     AtomicThreeOpFn *fn = NULL;
3369     MemOp mop = s->be_data | size | MO_ALIGN;
3370 
3371     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3372         unallocated_encoding(s);
3373         return;
3374     }
3375     switch (o3_opc) {
3376     case 000: /* LDADD */
3377         fn = tcg_gen_atomic_fetch_add_i64;
3378         break;
3379     case 001: /* LDCLR */
3380         fn = tcg_gen_atomic_fetch_and_i64;
3381         break;
3382     case 002: /* LDEOR */
3383         fn = tcg_gen_atomic_fetch_xor_i64;
3384         break;
3385     case 003: /* LDSET */
3386         fn = tcg_gen_atomic_fetch_or_i64;
3387         break;
3388     case 004: /* LDSMAX */
3389         fn = tcg_gen_atomic_fetch_smax_i64;
3390         mop |= MO_SIGN;
3391         break;
3392     case 005: /* LDSMIN */
3393         fn = tcg_gen_atomic_fetch_smin_i64;
3394         mop |= MO_SIGN;
3395         break;
3396     case 006: /* LDUMAX */
3397         fn = tcg_gen_atomic_fetch_umax_i64;
3398         break;
3399     case 007: /* LDUMIN */
3400         fn = tcg_gen_atomic_fetch_umin_i64;
3401         break;
3402     case 010: /* SWP */
3403         fn = tcg_gen_atomic_xchg_i64;
3404         break;
3405     case 014: /* LDAPR, LDAPRH, LDAPRB */
3406         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3407             rs != 31 || a != 1 || r != 0) {
3408             unallocated_encoding(s);
3409             return;
3410         }
3411         break;
3412     default:
3413         unallocated_encoding(s);
3414         return;
3415     }
3416 
3417     if (rn == 31) {
3418         gen_check_sp_alignment(s);
3419     }
3420     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3421 
3422     if (o3_opc == 014) {
3423         /*
3424          * LDAPR* are a special case because they are a simple load, not a
3425          * fetch-and-do-something op.
3426          * The architectural consistency requirements here are weaker than
3427          * full load-acquire (we only need "load-acquire processor consistent"),
3428          * but we choose to implement them as full LDAQ.
3429          */
3430         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3431                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3432         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3433         return;
3434     }
3435 
3436     tcg_rs = read_cpu_reg(s, rs, true);
3437     tcg_rt = cpu_reg(s, rt);
3438 
3439     if (o3_opc == 1) { /* LDCLR */
3440         tcg_gen_not_i64(tcg_rs, tcg_rs);
3441     }
3442 
3443     /* The tcg atomic primitives are all full barriers.  Therefore we
3444      * can ignore the Acquire and Release bits of this instruction.
3445      */
3446     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3447 
3448     if ((mop & MO_SIGN) && size != MO_64) {
3449         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3450     }
3451 }
3452 
3453 /*
3454  * PAC memory operations
3455  *
3456  *  31  30      27  26    24    22  21       12  11  10    5     0
3457  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3458  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3459  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3460  *
3461  * Rt: the result register
3462  * Rn: base address or SP
3463  * V: vector flag (always 0 as of v8.3)
3464  * M: clear for key DA, set for key DB
3465  * W: pre-indexing flag
3466  * S: sign for imm9.
3467  */
3468 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3469                            int size, int rt, bool is_vector)
3470 {
3471     int rn = extract32(insn, 5, 5);
3472     bool is_wback = extract32(insn, 11, 1);
3473     bool use_key_a = !extract32(insn, 23, 1);
3474     int offset;
3475     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3476 
3477     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3478         unallocated_encoding(s);
3479         return;
3480     }
3481 
3482     if (rn == 31) {
3483         gen_check_sp_alignment(s);
3484     }
3485     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3486 
3487     if (s->pauth_active) {
3488         if (use_key_a) {
3489             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3490                              tcg_constant_i64(0));
3491         } else {
3492             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3493                              tcg_constant_i64(0));
3494         }
3495     }
3496 
3497     /* Form the 10-bit signed, scaled offset.  */
3498     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3499     offset = sextract32(offset << size, 0, 10 + size);
3500     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3501 
3502     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3503     clean_addr = gen_mte_check1(s, dirty_addr, false,
3504                                 is_wback || rn != 31, size);
3505 
3506     tcg_rt = cpu_reg(s, rt);
3507     do_gpr_ld(s, tcg_rt, clean_addr, size,
3508               /* extend */ false, /* iss_valid */ !is_wback,
3509               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3510 
3511     if (is_wback) {
3512         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3513     }
3514 }
3515 
3516 /*
3517  * LDAPR/STLR (unscaled immediate)
3518  *
3519  *  31  30            24    22  21       12    10    5     0
3520  * +------+-------------+-----+---+--------+-----+----+-----+
3521  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3522  * +------+-------------+-----+---+--------+-----+----+-----+
3523  *
3524  * Rt: source or destination register
3525  * Rn: base register
3526  * imm9: unscaled immediate offset
3527  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3528  * size: size of load/store
3529  */
3530 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3531 {
3532     int rt = extract32(insn, 0, 5);
3533     int rn = extract32(insn, 5, 5);
3534     int offset = sextract32(insn, 12, 9);
3535     int opc = extract32(insn, 22, 2);
3536     int size = extract32(insn, 30, 2);
3537     TCGv_i64 clean_addr, dirty_addr;
3538     bool is_store = false;
3539     bool extend = false;
3540     bool iss_sf;
3541     MemOp mop;
3542 
3543     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3544         unallocated_encoding(s);
3545         return;
3546     }
3547 
3548     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3549     mop = size | MO_ALIGN;
3550 
3551     switch (opc) {
3552     case 0: /* STLURB */
3553         is_store = true;
3554         break;
3555     case 1: /* LDAPUR* */
3556         break;
3557     case 2: /* LDAPURS* 64-bit variant */
3558         if (size == 3) {
3559             unallocated_encoding(s);
3560             return;
3561         }
3562         mop |= MO_SIGN;
3563         break;
3564     case 3: /* LDAPURS* 32-bit variant */
3565         if (size > 1) {
3566             unallocated_encoding(s);
3567             return;
3568         }
3569         mop |= MO_SIGN;
3570         extend = true; /* zero-extend 32->64 after signed load */
3571         break;
3572     default:
3573         g_assert_not_reached();
3574     }
3575 
3576     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3577 
3578     if (rn == 31) {
3579         gen_check_sp_alignment(s);
3580     }
3581 
3582     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3583     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3584     clean_addr = clean_data_tbi(s, dirty_addr);
3585 
3586     if (is_store) {
3587         /* Store-Release semantics */
3588         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3589         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3590     } else {
3591         /*
3592          * Load-AcquirePC semantics; we implement as the slightly more
3593          * restrictive Load-Acquire.
3594          */
3595         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3596                   extend, true, rt, iss_sf, true);
3597         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3598     }
3599 }
3600 
3601 /* Load/store register (all forms) */
3602 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3603 {
3604     int rt = extract32(insn, 0, 5);
3605     int opc = extract32(insn, 22, 2);
3606     bool is_vector = extract32(insn, 26, 1);
3607     int size = extract32(insn, 30, 2);
3608 
3609     switch (extract32(insn, 24, 2)) {
3610     case 0:
3611         if (extract32(insn, 21, 1) == 0) {
3612             /* Load/store register (unscaled immediate)
3613              * Load/store immediate pre/post-indexed
3614              * Load/store register unprivileged
3615              */
3616             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3617             return;
3618         }
3619         switch (extract32(insn, 10, 2)) {
3620         case 0:
3621             disas_ldst_atomic(s, insn, size, rt, is_vector);
3622             return;
3623         case 2:
3624             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3625             return;
3626         default:
3627             disas_ldst_pac(s, insn, size, rt, is_vector);
3628             return;
3629         }
3630         break;
3631     case 1:
3632         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3633         return;
3634     }
3635     unallocated_encoding(s);
3636 }
3637 
3638 /* AdvSIMD load/store multiple structures
3639  *
3640  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3641  * +---+---+---------------+---+-------------+--------+------+------+------+
3642  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3643  * +---+---+---------------+---+-------------+--------+------+------+------+
3644  *
3645  * AdvSIMD load/store multiple structures (post-indexed)
3646  *
3647  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3648  * +---+---+---------------+---+---+---------+--------+------+------+------+
3649  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3650  * +---+---+---------------+---+---+---------+--------+------+------+------+
3651  *
3652  * Rt: first (or only) SIMD&FP register to be transferred
3653  * Rn: base address or SP
3654  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3655  */
3656 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3657 {
3658     int rt = extract32(insn, 0, 5);
3659     int rn = extract32(insn, 5, 5);
3660     int rm = extract32(insn, 16, 5);
3661     int size = extract32(insn, 10, 2);
3662     int opcode = extract32(insn, 12, 4);
3663     bool is_store = !extract32(insn, 22, 1);
3664     bool is_postidx = extract32(insn, 23, 1);
3665     bool is_q = extract32(insn, 30, 1);
3666     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3667     MemOp endian, align, mop;
3668 
3669     int total;    /* total bytes */
3670     int elements; /* elements per vector */
3671     int rpt;    /* num iterations */
3672     int selem;  /* structure elements */
3673     int r;
3674 
3675     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3676         unallocated_encoding(s);
3677         return;
3678     }
3679 
3680     if (!is_postidx && rm != 0) {
3681         unallocated_encoding(s);
3682         return;
3683     }
3684 
3685     /* From the shared decode logic */
3686     switch (opcode) {
3687     case 0x0:
3688         rpt = 1;
3689         selem = 4;
3690         break;
3691     case 0x2:
3692         rpt = 4;
3693         selem = 1;
3694         break;
3695     case 0x4:
3696         rpt = 1;
3697         selem = 3;
3698         break;
3699     case 0x6:
3700         rpt = 3;
3701         selem = 1;
3702         break;
3703     case 0x7:
3704         rpt = 1;
3705         selem = 1;
3706         break;
3707     case 0x8:
3708         rpt = 1;
3709         selem = 2;
3710         break;
3711     case 0xa:
3712         rpt = 2;
3713         selem = 1;
3714         break;
3715     default:
3716         unallocated_encoding(s);
3717         return;
3718     }
3719 
3720     if (size == 3 && !is_q && selem != 1) {
3721         /* reserved */
3722         unallocated_encoding(s);
3723         return;
3724     }
3725 
3726     if (!fp_access_check(s)) {
3727         return;
3728     }
3729 
3730     if (rn == 31) {
3731         gen_check_sp_alignment(s);
3732     }
3733 
3734     /* For our purposes, bytes are always little-endian.  */
3735     endian = s->be_data;
3736     if (size == 0) {
3737         endian = MO_LE;
3738     }
3739 
3740     total = rpt * selem * (is_q ? 16 : 8);
3741     tcg_rn = cpu_reg_sp(s, rn);
3742 
3743     /*
3744      * Issue the MTE check vs the logical repeat count, before we
3745      * promote consecutive little-endian elements below.
3746      */
3747     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3748                                 total);
3749 
3750     /*
3751      * Consecutive little-endian elements from a single register
3752      * can be promoted to a larger little-endian operation.
3753      */
3754     align = MO_ALIGN;
3755     if (selem == 1 && endian == MO_LE) {
3756         align = pow2_align(size);
3757         size = 3;
3758     }
3759     if (!s->align_mem) {
3760         align = 0;
3761     }
3762     mop = endian | size | align;
3763 
3764     elements = (is_q ? 16 : 8) >> size;
3765     tcg_ebytes = tcg_constant_i64(1 << size);
3766     for (r = 0; r < rpt; r++) {
3767         int e;
3768         for (e = 0; e < elements; e++) {
3769             int xs;
3770             for (xs = 0; xs < selem; xs++) {
3771                 int tt = (rt + r + xs) % 32;
3772                 if (is_store) {
3773                     do_vec_st(s, tt, e, clean_addr, mop);
3774                 } else {
3775                     do_vec_ld(s, tt, e, clean_addr, mop);
3776                 }
3777                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3778             }
3779         }
3780     }
3781 
3782     if (!is_store) {
3783         /* For non-quad operations, setting a slice of the low
3784          * 64 bits of the register clears the high 64 bits (in
3785          * the ARM ARM pseudocode this is implicit in the fact
3786          * that 'rval' is a 64 bit wide variable).
3787          * For quad operations, we might still need to zero the
3788          * high bits of SVE.
3789          */
3790         for (r = 0; r < rpt * selem; r++) {
3791             int tt = (rt + r) % 32;
3792             clear_vec_high(s, is_q, tt);
3793         }
3794     }
3795 
3796     if (is_postidx) {
3797         if (rm == 31) {
3798             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3799         } else {
3800             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3801         }
3802     }
3803 }
3804 
3805 /* AdvSIMD load/store single structure
3806  *
3807  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3808  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3809  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3810  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3811  *
3812  * AdvSIMD load/store single structure (post-indexed)
3813  *
3814  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3815  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3816  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3817  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3818  *
3819  * Rt: first (or only) SIMD&FP register to be transferred
3820  * Rn: base address or SP
3821  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3822  * index = encoded in Q:S:size dependent on size
3823  *
3824  * lane_size = encoded in R, opc
3825  * transfer width = encoded in opc, S, size
3826  */
3827 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3828 {
3829     int rt = extract32(insn, 0, 5);
3830     int rn = extract32(insn, 5, 5);
3831     int rm = extract32(insn, 16, 5);
3832     int size = extract32(insn, 10, 2);
3833     int S = extract32(insn, 12, 1);
3834     int opc = extract32(insn, 13, 3);
3835     int R = extract32(insn, 21, 1);
3836     int is_load = extract32(insn, 22, 1);
3837     int is_postidx = extract32(insn, 23, 1);
3838     int is_q = extract32(insn, 30, 1);
3839 
3840     int scale = extract32(opc, 1, 2);
3841     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3842     bool replicate = false;
3843     int index = is_q << 3 | S << 2 | size;
3844     int xs, total;
3845     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3846     MemOp mop;
3847 
3848     if (extract32(insn, 31, 1)) {
3849         unallocated_encoding(s);
3850         return;
3851     }
3852     if (!is_postidx && rm != 0) {
3853         unallocated_encoding(s);
3854         return;
3855     }
3856 
3857     switch (scale) {
3858     case 3:
3859         if (!is_load || S) {
3860             unallocated_encoding(s);
3861             return;
3862         }
3863         scale = size;
3864         replicate = true;
3865         break;
3866     case 0:
3867         break;
3868     case 1:
3869         if (extract32(size, 0, 1)) {
3870             unallocated_encoding(s);
3871             return;
3872         }
3873         index >>= 1;
3874         break;
3875     case 2:
3876         if (extract32(size, 1, 1)) {
3877             unallocated_encoding(s);
3878             return;
3879         }
3880         if (!extract32(size, 0, 1)) {
3881             index >>= 2;
3882         } else {
3883             if (S) {
3884                 unallocated_encoding(s);
3885                 return;
3886             }
3887             index >>= 3;
3888             scale = 3;
3889         }
3890         break;
3891     default:
3892         g_assert_not_reached();
3893     }
3894 
3895     if (!fp_access_check(s)) {
3896         return;
3897     }
3898 
3899     if (rn == 31) {
3900         gen_check_sp_alignment(s);
3901     }
3902 
3903     total = selem << scale;
3904     tcg_rn = cpu_reg_sp(s, rn);
3905 
3906     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3907                                 total);
3908     mop = finalize_memop(s, scale);
3909 
3910     tcg_ebytes = tcg_constant_i64(1 << scale);
3911     for (xs = 0; xs < selem; xs++) {
3912         if (replicate) {
3913             /* Load and replicate to all elements */
3914             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3915 
3916             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3917             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3918                                  (is_q + 1) * 8, vec_full_reg_size(s),
3919                                  tcg_tmp);
3920         } else {
3921             /* Load/store one element per register */
3922             if (is_load) {
3923                 do_vec_ld(s, rt, index, clean_addr, mop);
3924             } else {
3925                 do_vec_st(s, rt, index, clean_addr, mop);
3926             }
3927         }
3928         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3929         rt = (rt + 1) % 32;
3930     }
3931 
3932     if (is_postidx) {
3933         if (rm == 31) {
3934             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3935         } else {
3936             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3937         }
3938     }
3939 }
3940 
3941 /*
3942  * Load/Store memory tags
3943  *
3944  *  31 30 29         24     22  21     12    10      5      0
3945  * +-----+-------------+-----+---+------+-----+------+------+
3946  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3947  * +-----+-------------+-----+---+------+-----+------+------+
3948  */
3949 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3950 {
3951     int rt = extract32(insn, 0, 5);
3952     int rn = extract32(insn, 5, 5);
3953     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3954     int op2 = extract32(insn, 10, 2);
3955     int op1 = extract32(insn, 22, 2);
3956     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3957     int index = 0;
3958     TCGv_i64 addr, clean_addr, tcg_rt;
3959 
3960     /* We checked insn bits [29:24,21] in the caller.  */
3961     if (extract32(insn, 30, 2) != 3) {
3962         goto do_unallocated;
3963     }
3964 
3965     /*
3966      * @index is a tri-state variable which has 3 states:
3967      * < 0 : post-index, writeback
3968      * = 0 : signed offset
3969      * > 0 : pre-index, writeback
3970      */
3971     switch (op1) {
3972     case 0:
3973         if (op2 != 0) {
3974             /* STG */
3975             index = op2 - 2;
3976         } else {
3977             /* STZGM */
3978             if (s->current_el == 0 || offset != 0) {
3979                 goto do_unallocated;
3980             }
3981             is_mult = is_zero = true;
3982         }
3983         break;
3984     case 1:
3985         if (op2 != 0) {
3986             /* STZG */
3987             is_zero = true;
3988             index = op2 - 2;
3989         } else {
3990             /* LDG */
3991             is_load = true;
3992         }
3993         break;
3994     case 2:
3995         if (op2 != 0) {
3996             /* ST2G */
3997             is_pair = true;
3998             index = op2 - 2;
3999         } else {
4000             /* STGM */
4001             if (s->current_el == 0 || offset != 0) {
4002                 goto do_unallocated;
4003             }
4004             is_mult = true;
4005         }
4006         break;
4007     case 3:
4008         if (op2 != 0) {
4009             /* STZ2G */
4010             is_pair = is_zero = true;
4011             index = op2 - 2;
4012         } else {
4013             /* LDGM */
4014             if (s->current_el == 0 || offset != 0) {
4015                 goto do_unallocated;
4016             }
4017             is_mult = is_load = true;
4018         }
4019         break;
4020 
4021     default:
4022     do_unallocated:
4023         unallocated_encoding(s);
4024         return;
4025     }
4026 
4027     if (is_mult
4028         ? !dc_isar_feature(aa64_mte, s)
4029         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
4030         goto do_unallocated;
4031     }
4032 
4033     if (rn == 31) {
4034         gen_check_sp_alignment(s);
4035     }
4036 
4037     addr = read_cpu_reg_sp(s, rn, true);
4038     if (index >= 0) {
4039         /* pre-index or signed offset */
4040         tcg_gen_addi_i64(addr, addr, offset);
4041     }
4042 
4043     if (is_mult) {
4044         tcg_rt = cpu_reg(s, rt);
4045 
4046         if (is_zero) {
4047             int size = 4 << s->dcz_blocksize;
4048 
4049             if (s->ata) {
4050                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4051             }
4052             /*
4053              * The non-tags portion of STZGM is mostly like DC_ZVA,
4054              * except the alignment happens before the access.
4055              */
4056             clean_addr = clean_data_tbi(s, addr);
4057             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4058             gen_helper_dc_zva(cpu_env, clean_addr);
4059         } else if (s->ata) {
4060             if (is_load) {
4061                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4062             } else {
4063                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4064             }
4065         } else {
4066             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4067             int size = 4 << GMID_EL1_BS;
4068 
4069             clean_addr = clean_data_tbi(s, addr);
4070             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4071             gen_probe_access(s, clean_addr, acc, size);
4072 
4073             if (is_load) {
4074                 /* The result tags are zeros.  */
4075                 tcg_gen_movi_i64(tcg_rt, 0);
4076             }
4077         }
4078         return;
4079     }
4080 
4081     if (is_load) {
4082         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4083         tcg_rt = cpu_reg(s, rt);
4084         if (s->ata) {
4085             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4086         } else {
4087             clean_addr = clean_data_tbi(s, addr);
4088             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4089             gen_address_with_allocation_tag0(tcg_rt, addr);
4090         }
4091     } else {
4092         tcg_rt = cpu_reg_sp(s, rt);
4093         if (!s->ata) {
4094             /*
4095              * For STG and ST2G, we need to check alignment and probe memory.
4096              * TODO: For STZG and STZ2G, we could rely on the stores below,
4097              * at least for system mode; user-only won't enforce alignment.
4098              */
4099             if (is_pair) {
4100                 gen_helper_st2g_stub(cpu_env, addr);
4101             } else {
4102                 gen_helper_stg_stub(cpu_env, addr);
4103             }
4104         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4105             if (is_pair) {
4106                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4107             } else {
4108                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4109             }
4110         } else {
4111             if (is_pair) {
4112                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4113             } else {
4114                 gen_helper_stg(cpu_env, addr, tcg_rt);
4115             }
4116         }
4117     }
4118 
4119     if (is_zero) {
4120         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4121         TCGv_i64 tcg_zero = tcg_constant_i64(0);
4122         int mem_index = get_mem_index(s);
4123         int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4124 
4125         tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4126                             MO_UQ | MO_ALIGN_16);
4127         for (i = 8; i < n; i += 8) {
4128             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4129             tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
4130         }
4131     }
4132 
4133     if (index != 0) {
4134         /* pre-index or post-index */
4135         if (index < 0) {
4136             /* post-index */
4137             tcg_gen_addi_i64(addr, addr, offset);
4138         }
4139         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4140     }
4141 }
4142 
4143 /* Loads and stores */
4144 static void disas_ldst(DisasContext *s, uint32_t insn)
4145 {
4146     switch (extract32(insn, 24, 6)) {
4147     case 0x08: /* Load/store exclusive */
4148         disas_ldst_excl(s, insn);
4149         break;
4150     case 0x18: case 0x1c: /* Load register (literal) */
4151         disas_ld_lit(s, insn);
4152         break;
4153     case 0x28: case 0x29:
4154     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4155         disas_ldst_pair(s, insn);
4156         break;
4157     case 0x38: case 0x39:
4158     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4159         disas_ldst_reg(s, insn);
4160         break;
4161     case 0x0c: /* AdvSIMD load/store multiple structures */
4162         disas_ldst_multiple_struct(s, insn);
4163         break;
4164     case 0x0d: /* AdvSIMD load/store single structure */
4165         disas_ldst_single_struct(s, insn);
4166         break;
4167     case 0x19:
4168         if (extract32(insn, 21, 1) != 0) {
4169             disas_ldst_tag(s, insn);
4170         } else if (extract32(insn, 10, 2) == 0) {
4171             disas_ldst_ldapr_stlr(s, insn);
4172         } else {
4173             unallocated_encoding(s);
4174         }
4175         break;
4176     default:
4177         unallocated_encoding(s);
4178         break;
4179     }
4180 }
4181 
4182 /* PC-rel. addressing
4183  *   31  30   29 28       24 23                5 4    0
4184  * +----+-------+-----------+-------------------+------+
4185  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
4186  * +----+-------+-----------+-------------------+------+
4187  */
4188 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
4189 {
4190     unsigned int page, rd;
4191     int64_t offset;
4192 
4193     page = extract32(insn, 31, 1);
4194     /* SignExtend(immhi:immlo) -> offset */
4195     offset = sextract64(insn, 5, 19);
4196     offset = offset << 2 | extract32(insn, 29, 2);
4197     rd = extract32(insn, 0, 5);
4198 
4199     if (page) {
4200         /* ADRP (page based) */
4201         offset <<= 12;
4202         /* The page offset is ok for CF_PCREL. */
4203         offset -= s->pc_curr & 0xfff;
4204     }
4205 
4206     gen_pc_plus_diff(s, cpu_reg(s, rd), offset);
4207 }
4208 
4209 /*
4210  * Add/subtract (immediate)
4211  *
4212  *  31 30 29 28         23 22 21         10 9   5 4   0
4213  * +--+--+--+-------------+--+-------------+-----+-----+
4214  * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
4215  * +--+--+--+-------------+--+-------------+-----+-----+
4216  *
4217  *    sf: 0 -> 32bit, 1 -> 64bit
4218  *    op: 0 -> add  , 1 -> sub
4219  *     S: 1 -> set flags
4220  *    sh: 1 -> LSL imm by 12
4221  */
4222 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
4223 {
4224     int rd = extract32(insn, 0, 5);
4225     int rn = extract32(insn, 5, 5);
4226     uint64_t imm = extract32(insn, 10, 12);
4227     bool shift = extract32(insn, 22, 1);
4228     bool setflags = extract32(insn, 29, 1);
4229     bool sub_op = extract32(insn, 30, 1);
4230     bool is_64bit = extract32(insn, 31, 1);
4231 
4232     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
4233     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
4234     TCGv_i64 tcg_result;
4235 
4236     if (shift) {
4237         imm <<= 12;
4238     }
4239 
4240     tcg_result = tcg_temp_new_i64();
4241     if (!setflags) {
4242         if (sub_op) {
4243             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
4244         } else {
4245             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
4246         }
4247     } else {
4248         TCGv_i64 tcg_imm = tcg_constant_i64(imm);
4249         if (sub_op) {
4250             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4251         } else {
4252             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4253         }
4254     }
4255 
4256     if (is_64bit) {
4257         tcg_gen_mov_i64(tcg_rd, tcg_result);
4258     } else {
4259         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4260     }
4261 }
4262 
4263 /*
4264  * Add/subtract (immediate, with tags)
4265  *
4266  *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
4267  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4268  * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
4269  * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4270  *
4271  *    op: 0 -> add, 1 -> sub
4272  */
4273 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
4274 {
4275     int rd = extract32(insn, 0, 5);
4276     int rn = extract32(insn, 5, 5);
4277     int uimm4 = extract32(insn, 10, 4);
4278     int uimm6 = extract32(insn, 16, 6);
4279     bool sub_op = extract32(insn, 30, 1);
4280     TCGv_i64 tcg_rn, tcg_rd;
4281     int imm;
4282 
4283     /* Test all of sf=1, S=0, o2=0, o3=0.  */
4284     if ((insn & 0xa040c000u) != 0x80000000u ||
4285         !dc_isar_feature(aa64_mte_insn_reg, s)) {
4286         unallocated_encoding(s);
4287         return;
4288     }
4289 
4290     imm = uimm6 << LOG2_TAG_GRANULE;
4291     if (sub_op) {
4292         imm = -imm;
4293     }
4294 
4295     tcg_rn = cpu_reg_sp(s, rn);
4296     tcg_rd = cpu_reg_sp(s, rd);
4297 
4298     if (s->ata) {
4299         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4300                            tcg_constant_i32(imm),
4301                            tcg_constant_i32(uimm4));
4302     } else {
4303         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4304         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4305     }
4306 }
4307 
4308 /* The input should be a value in the bottom e bits (with higher
4309  * bits zero); returns that value replicated into every element
4310  * of size e in a 64 bit integer.
4311  */
4312 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4313 {
4314     assert(e != 0);
4315     while (e < 64) {
4316         mask |= mask << e;
4317         e *= 2;
4318     }
4319     return mask;
4320 }
4321 
4322 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
4323 static inline uint64_t bitmask64(unsigned int length)
4324 {
4325     assert(length > 0 && length <= 64);
4326     return ~0ULL >> (64 - length);
4327 }
4328 
4329 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
4330  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4331  * value (ie should cause a guest UNDEF exception), and true if they are
4332  * valid, in which case the decoded bit pattern is written to result.
4333  */
4334 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4335                             unsigned int imms, unsigned int immr)
4336 {
4337     uint64_t mask;
4338     unsigned e, levels, s, r;
4339     int len;
4340 
4341     assert(immn < 2 && imms < 64 && immr < 64);
4342 
4343     /* The bit patterns we create here are 64 bit patterns which
4344      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4345      * 64 bits each. Each element contains the same value: a run
4346      * of between 1 and e-1 non-zero bits, rotated within the
4347      * element by between 0 and e-1 bits.
4348      *
4349      * The element size and run length are encoded into immn (1 bit)
4350      * and imms (6 bits) as follows:
4351      * 64 bit elements: immn = 1, imms = <length of run - 1>
4352      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4353      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4354      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4355      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4356      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4357      * Notice that immn = 0, imms = 11111x is the only combination
4358      * not covered by one of the above options; this is reserved.
4359      * Further, <length of run - 1> all-ones is a reserved pattern.
4360      *
4361      * In all cases the rotation is by immr % e (and immr is 6 bits).
4362      */
4363 
4364     /* First determine the element size */
4365     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4366     if (len < 1) {
4367         /* This is the immn == 0, imms == 0x11111x case */
4368         return false;
4369     }
4370     e = 1 << len;
4371 
4372     levels = e - 1;
4373     s = imms & levels;
4374     r = immr & levels;
4375 
4376     if (s == levels) {
4377         /* <length of run - 1> mustn't be all-ones. */
4378         return false;
4379     }
4380 
4381     /* Create the value of one element: s+1 set bits rotated
4382      * by r within the element (which is e bits wide)...
4383      */
4384     mask = bitmask64(s + 1);
4385     if (r) {
4386         mask = (mask >> r) | (mask << (e - r));
4387         mask &= bitmask64(e);
4388     }
4389     /* ...then replicate the element over the whole 64 bit value */
4390     mask = bitfield_replicate(mask, e);
4391     *result = mask;
4392     return true;
4393 }
4394 
4395 /* Logical (immediate)
4396  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4397  * +----+-----+-------------+---+------+------+------+------+
4398  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
4399  * +----+-----+-------------+---+------+------+------+------+
4400  */
4401 static void disas_logic_imm(DisasContext *s, uint32_t insn)
4402 {
4403     unsigned int sf, opc, is_n, immr, imms, rn, rd;
4404     TCGv_i64 tcg_rd, tcg_rn;
4405     uint64_t wmask;
4406     bool is_and = false;
4407 
4408     sf = extract32(insn, 31, 1);
4409     opc = extract32(insn, 29, 2);
4410     is_n = extract32(insn, 22, 1);
4411     immr = extract32(insn, 16, 6);
4412     imms = extract32(insn, 10, 6);
4413     rn = extract32(insn, 5, 5);
4414     rd = extract32(insn, 0, 5);
4415 
4416     if (!sf && is_n) {
4417         unallocated_encoding(s);
4418         return;
4419     }
4420 
4421     if (opc == 0x3) { /* ANDS */
4422         tcg_rd = cpu_reg(s, rd);
4423     } else {
4424         tcg_rd = cpu_reg_sp(s, rd);
4425     }
4426     tcg_rn = cpu_reg(s, rn);
4427 
4428     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
4429         /* some immediate field values are reserved */
4430         unallocated_encoding(s);
4431         return;
4432     }
4433 
4434     if (!sf) {
4435         wmask &= 0xffffffff;
4436     }
4437 
4438     switch (opc) {
4439     case 0x3: /* ANDS */
4440     case 0x0: /* AND */
4441         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
4442         is_and = true;
4443         break;
4444     case 0x1: /* ORR */
4445         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
4446         break;
4447     case 0x2: /* EOR */
4448         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
4449         break;
4450     default:
4451         assert(FALSE); /* must handle all above */
4452         break;
4453     }
4454 
4455     if (!sf && !is_and) {
4456         /* zero extend final result; we know we can skip this for AND
4457          * since the immediate had the high 32 bits clear.
4458          */
4459         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4460     }
4461 
4462     if (opc == 3) { /* ANDS */
4463         gen_logic_CC(sf, tcg_rd);
4464     }
4465 }
4466 
4467 /*
4468  * Move wide (immediate)
4469  *
4470  *  31 30 29 28         23 22 21 20             5 4    0
4471  * +--+-----+-------------+-----+----------------+------+
4472  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
4473  * +--+-----+-------------+-----+----------------+------+
4474  *
4475  * sf: 0 -> 32 bit, 1 -> 64 bit
4476  * opc: 00 -> N, 10 -> Z, 11 -> K
4477  * hw: shift/16 (0,16, and sf only 32, 48)
4478  */
4479 static void disas_movw_imm(DisasContext *s, uint32_t insn)
4480 {
4481     int rd = extract32(insn, 0, 5);
4482     uint64_t imm = extract32(insn, 5, 16);
4483     int sf = extract32(insn, 31, 1);
4484     int opc = extract32(insn, 29, 2);
4485     int pos = extract32(insn, 21, 2) << 4;
4486     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4487 
4488     if (!sf && (pos >= 32)) {
4489         unallocated_encoding(s);
4490         return;
4491     }
4492 
4493     switch (opc) {
4494     case 0: /* MOVN */
4495     case 2: /* MOVZ */
4496         imm <<= pos;
4497         if (opc == 0) {
4498             imm = ~imm;
4499         }
4500         if (!sf) {
4501             imm &= 0xffffffffu;
4502         }
4503         tcg_gen_movi_i64(tcg_rd, imm);
4504         break;
4505     case 3: /* MOVK */
4506         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16);
4507         if (!sf) {
4508             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4509         }
4510         break;
4511     default:
4512         unallocated_encoding(s);
4513         break;
4514     }
4515 }
4516 
4517 /* Bitfield
4518  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4519  * +----+-----+-------------+---+------+------+------+------+
4520  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
4521  * +----+-----+-------------+---+------+------+------+------+
4522  */
4523 static void disas_bitfield(DisasContext *s, uint32_t insn)
4524 {
4525     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
4526     TCGv_i64 tcg_rd, tcg_tmp;
4527 
4528     sf = extract32(insn, 31, 1);
4529     opc = extract32(insn, 29, 2);
4530     n = extract32(insn, 22, 1);
4531     ri = extract32(insn, 16, 6);
4532     si = extract32(insn, 10, 6);
4533     rn = extract32(insn, 5, 5);
4534     rd = extract32(insn, 0, 5);
4535     bitsize = sf ? 64 : 32;
4536 
4537     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
4538         unallocated_encoding(s);
4539         return;
4540     }
4541 
4542     tcg_rd = cpu_reg(s, rd);
4543 
4544     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
4545        to be smaller than bitsize, we'll never reference data outside the
4546        low 32-bits anyway.  */
4547     tcg_tmp = read_cpu_reg(s, rn, 1);
4548 
4549     /* Recognize simple(r) extractions.  */
4550     if (si >= ri) {
4551         /* Wd<s-r:0> = Wn<s:r> */
4552         len = (si - ri) + 1;
4553         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
4554             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4555             goto done;
4556         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
4557             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4558             return;
4559         }
4560         /* opc == 1, BFXIL fall through to deposit */
4561         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4562         pos = 0;
4563     } else {
4564         /* Handle the ri > si case with a deposit
4565          * Wd<32+s-r,32-r> = Wn<s:0>
4566          */
4567         len = si + 1;
4568         pos = (bitsize - ri) & (bitsize - 1);
4569     }
4570 
4571     if (opc == 0 && len < ri) {
4572         /* SBFM: sign extend the destination field from len to fill
4573            the balance of the word.  Let the deposit below insert all
4574            of those sign bits.  */
4575         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4576         len = ri;
4577     }
4578 
4579     if (opc == 1) { /* BFM, BFXIL */
4580         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4581     } else {
4582         /* SBFM or UBFM: We start with zero, and we haven't modified
4583            any bits outside bitsize, therefore the zero-extension
4584            below is unneeded.  */
4585         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4586         return;
4587     }
4588 
4589  done:
4590     if (!sf) { /* zero extend final result */
4591         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4592     }
4593 }
4594 
4595 /* Extract
4596  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
4597  * +----+------+-------------+---+----+------+--------+------+------+
4598  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
4599  * +----+------+-------------+---+----+------+--------+------+------+
4600  */
4601 static void disas_extract(DisasContext *s, uint32_t insn)
4602 {
4603     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
4604 
4605     sf = extract32(insn, 31, 1);
4606     n = extract32(insn, 22, 1);
4607     rm = extract32(insn, 16, 5);
4608     imm = extract32(insn, 10, 6);
4609     rn = extract32(insn, 5, 5);
4610     rd = extract32(insn, 0, 5);
4611     op21 = extract32(insn, 29, 2);
4612     op0 = extract32(insn, 21, 1);
4613     bitsize = sf ? 64 : 32;
4614 
4615     if (sf != n || op21 || op0 || imm >= bitsize) {
4616         unallocated_encoding(s);
4617     } else {
4618         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4619 
4620         tcg_rd = cpu_reg(s, rd);
4621 
4622         if (unlikely(imm == 0)) {
4623             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4624              * so an extract from bit 0 is a special case.
4625              */
4626             if (sf) {
4627                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
4628             } else {
4629                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
4630             }
4631         } else {
4632             tcg_rm = cpu_reg(s, rm);
4633             tcg_rn = cpu_reg(s, rn);
4634 
4635             if (sf) {
4636                 /* Specialization to ROR happens in EXTRACT2.  */
4637                 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4638             } else {
4639                 TCGv_i32 t0 = tcg_temp_new_i32();
4640 
4641                 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4642                 if (rm == rn) {
4643                     tcg_gen_rotri_i32(t0, t0, imm);
4644                 } else {
4645                     TCGv_i32 t1 = tcg_temp_new_i32();
4646                     tcg_gen_extrl_i64_i32(t1, tcg_rn);
4647                     tcg_gen_extract2_i32(t0, t0, t1, imm);
4648                 }
4649                 tcg_gen_extu_i32_i64(tcg_rd, t0);
4650             }
4651         }
4652     }
4653 }
4654 
4655 /* Data processing - immediate */
4656 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4657 {
4658     switch (extract32(insn, 23, 6)) {
4659     case 0x20: case 0x21: /* PC-rel. addressing */
4660         disas_pc_rel_adr(s, insn);
4661         break;
4662     case 0x22: /* Add/subtract (immediate) */
4663         disas_add_sub_imm(s, insn);
4664         break;
4665     case 0x23: /* Add/subtract (immediate, with tags) */
4666         disas_add_sub_imm_with_tags(s, insn);
4667         break;
4668     case 0x24: /* Logical (immediate) */
4669         disas_logic_imm(s, insn);
4670         break;
4671     case 0x25: /* Move wide (immediate) */
4672         disas_movw_imm(s, insn);
4673         break;
4674     case 0x26: /* Bitfield */
4675         disas_bitfield(s, insn);
4676         break;
4677     case 0x27: /* Extract */
4678         disas_extract(s, insn);
4679         break;
4680     default:
4681         unallocated_encoding(s);
4682         break;
4683     }
4684 }
4685 
4686 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4687  * Note that it is the caller's responsibility to ensure that the
4688  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4689  * mandated semantics for out of range shifts.
4690  */
4691 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4692                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4693 {
4694     switch (shift_type) {
4695     case A64_SHIFT_TYPE_LSL:
4696         tcg_gen_shl_i64(dst, src, shift_amount);
4697         break;
4698     case A64_SHIFT_TYPE_LSR:
4699         tcg_gen_shr_i64(dst, src, shift_amount);
4700         break;
4701     case A64_SHIFT_TYPE_ASR:
4702         if (!sf) {
4703             tcg_gen_ext32s_i64(dst, src);
4704         }
4705         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4706         break;
4707     case A64_SHIFT_TYPE_ROR:
4708         if (sf) {
4709             tcg_gen_rotr_i64(dst, src, shift_amount);
4710         } else {
4711             TCGv_i32 t0, t1;
4712             t0 = tcg_temp_new_i32();
4713             t1 = tcg_temp_new_i32();
4714             tcg_gen_extrl_i64_i32(t0, src);
4715             tcg_gen_extrl_i64_i32(t1, shift_amount);
4716             tcg_gen_rotr_i32(t0, t0, t1);
4717             tcg_gen_extu_i32_i64(dst, t0);
4718         }
4719         break;
4720     default:
4721         assert(FALSE); /* all shift types should be handled */
4722         break;
4723     }
4724 
4725     if (!sf) { /* zero extend final result */
4726         tcg_gen_ext32u_i64(dst, dst);
4727     }
4728 }
4729 
4730 /* Shift a TCGv src by immediate, put result in dst.
4731  * The shift amount must be in range (this should always be true as the
4732  * relevant instructions will UNDEF on bad shift immediates).
4733  */
4734 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4735                           enum a64_shift_type shift_type, unsigned int shift_i)
4736 {
4737     assert(shift_i < (sf ? 64 : 32));
4738 
4739     if (shift_i == 0) {
4740         tcg_gen_mov_i64(dst, src);
4741     } else {
4742         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4743     }
4744 }
4745 
4746 /* Logical (shifted register)
4747  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4748  * +----+-----+-----------+-------+---+------+--------+------+------+
4749  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4750  * +----+-----+-----------+-------+---+------+--------+------+------+
4751  */
4752 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4753 {
4754     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4755     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4756 
4757     sf = extract32(insn, 31, 1);
4758     opc = extract32(insn, 29, 2);
4759     shift_type = extract32(insn, 22, 2);
4760     invert = extract32(insn, 21, 1);
4761     rm = extract32(insn, 16, 5);
4762     shift_amount = extract32(insn, 10, 6);
4763     rn = extract32(insn, 5, 5);
4764     rd = extract32(insn, 0, 5);
4765 
4766     if (!sf && (shift_amount & (1 << 5))) {
4767         unallocated_encoding(s);
4768         return;
4769     }
4770 
4771     tcg_rd = cpu_reg(s, rd);
4772 
4773     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4774         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4775          * register-register MOV and MVN, so it is worth special casing.
4776          */
4777         tcg_rm = cpu_reg(s, rm);
4778         if (invert) {
4779             tcg_gen_not_i64(tcg_rd, tcg_rm);
4780             if (!sf) {
4781                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4782             }
4783         } else {
4784             if (sf) {
4785                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4786             } else {
4787                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4788             }
4789         }
4790         return;
4791     }
4792 
4793     tcg_rm = read_cpu_reg(s, rm, sf);
4794 
4795     if (shift_amount) {
4796         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4797     }
4798 
4799     tcg_rn = cpu_reg(s, rn);
4800 
4801     switch (opc | (invert << 2)) {
4802     case 0: /* AND */
4803     case 3: /* ANDS */
4804         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4805         break;
4806     case 1: /* ORR */
4807         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4808         break;
4809     case 2: /* EOR */
4810         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4811         break;
4812     case 4: /* BIC */
4813     case 7: /* BICS */
4814         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4815         break;
4816     case 5: /* ORN */
4817         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4818         break;
4819     case 6: /* EON */
4820         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4821         break;
4822     default:
4823         assert(FALSE);
4824         break;
4825     }
4826 
4827     if (!sf) {
4828         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4829     }
4830 
4831     if (opc == 3) {
4832         gen_logic_CC(sf, tcg_rd);
4833     }
4834 }
4835 
4836 /*
4837  * Add/subtract (extended register)
4838  *
4839  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4840  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4841  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4842  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4843  *
4844  *  sf: 0 -> 32bit, 1 -> 64bit
4845  *  op: 0 -> add  , 1 -> sub
4846  *   S: 1 -> set flags
4847  * opt: 00
4848  * option: extension type (see DecodeRegExtend)
4849  * imm3: optional shift to Rm
4850  *
4851  * Rd = Rn + LSL(extend(Rm), amount)
4852  */
4853 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4854 {
4855     int rd = extract32(insn, 0, 5);
4856     int rn = extract32(insn, 5, 5);
4857     int imm3 = extract32(insn, 10, 3);
4858     int option = extract32(insn, 13, 3);
4859     int rm = extract32(insn, 16, 5);
4860     int opt = extract32(insn, 22, 2);
4861     bool setflags = extract32(insn, 29, 1);
4862     bool sub_op = extract32(insn, 30, 1);
4863     bool sf = extract32(insn, 31, 1);
4864 
4865     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4866     TCGv_i64 tcg_rd;
4867     TCGv_i64 tcg_result;
4868 
4869     if (imm3 > 4 || opt != 0) {
4870         unallocated_encoding(s);
4871         return;
4872     }
4873 
4874     /* non-flag setting ops may use SP */
4875     if (!setflags) {
4876         tcg_rd = cpu_reg_sp(s, rd);
4877     } else {
4878         tcg_rd = cpu_reg(s, rd);
4879     }
4880     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4881 
4882     tcg_rm = read_cpu_reg(s, rm, sf);
4883     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4884 
4885     tcg_result = tcg_temp_new_i64();
4886 
4887     if (!setflags) {
4888         if (sub_op) {
4889             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4890         } else {
4891             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4892         }
4893     } else {
4894         if (sub_op) {
4895             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4896         } else {
4897             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4898         }
4899     }
4900 
4901     if (sf) {
4902         tcg_gen_mov_i64(tcg_rd, tcg_result);
4903     } else {
4904         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4905     }
4906 }
4907 
4908 /*
4909  * Add/subtract (shifted register)
4910  *
4911  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4912  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4913  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4914  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4915  *
4916  *    sf: 0 -> 32bit, 1 -> 64bit
4917  *    op: 0 -> add  , 1 -> sub
4918  *     S: 1 -> set flags
4919  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4920  *  imm6: Shift amount to apply to Rm before the add/sub
4921  */
4922 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4923 {
4924     int rd = extract32(insn, 0, 5);
4925     int rn = extract32(insn, 5, 5);
4926     int imm6 = extract32(insn, 10, 6);
4927     int rm = extract32(insn, 16, 5);
4928     int shift_type = extract32(insn, 22, 2);
4929     bool setflags = extract32(insn, 29, 1);
4930     bool sub_op = extract32(insn, 30, 1);
4931     bool sf = extract32(insn, 31, 1);
4932 
4933     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4934     TCGv_i64 tcg_rn, tcg_rm;
4935     TCGv_i64 tcg_result;
4936 
4937     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4938         unallocated_encoding(s);
4939         return;
4940     }
4941 
4942     tcg_rn = read_cpu_reg(s, rn, sf);
4943     tcg_rm = read_cpu_reg(s, rm, sf);
4944 
4945     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4946 
4947     tcg_result = tcg_temp_new_i64();
4948 
4949     if (!setflags) {
4950         if (sub_op) {
4951             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4952         } else {
4953             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4954         }
4955     } else {
4956         if (sub_op) {
4957             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4958         } else {
4959             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4960         }
4961     }
4962 
4963     if (sf) {
4964         tcg_gen_mov_i64(tcg_rd, tcg_result);
4965     } else {
4966         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4967     }
4968 }
4969 
4970 /* Data-processing (3 source)
4971  *
4972  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4973  *  +--+------+-----------+------+------+----+------+------+------+
4974  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4975  *  +--+------+-----------+------+------+----+------+------+------+
4976  */
4977 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4978 {
4979     int rd = extract32(insn, 0, 5);
4980     int rn = extract32(insn, 5, 5);
4981     int ra = extract32(insn, 10, 5);
4982     int rm = extract32(insn, 16, 5);
4983     int op_id = (extract32(insn, 29, 3) << 4) |
4984         (extract32(insn, 21, 3) << 1) |
4985         extract32(insn, 15, 1);
4986     bool sf = extract32(insn, 31, 1);
4987     bool is_sub = extract32(op_id, 0, 1);
4988     bool is_high = extract32(op_id, 2, 1);
4989     bool is_signed = false;
4990     TCGv_i64 tcg_op1;
4991     TCGv_i64 tcg_op2;
4992     TCGv_i64 tcg_tmp;
4993 
4994     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4995     switch (op_id) {
4996     case 0x42: /* SMADDL */
4997     case 0x43: /* SMSUBL */
4998     case 0x44: /* SMULH */
4999         is_signed = true;
5000         break;
5001     case 0x0: /* MADD (32bit) */
5002     case 0x1: /* MSUB (32bit) */
5003     case 0x40: /* MADD (64bit) */
5004     case 0x41: /* MSUB (64bit) */
5005     case 0x4a: /* UMADDL */
5006     case 0x4b: /* UMSUBL */
5007     case 0x4c: /* UMULH */
5008         break;
5009     default:
5010         unallocated_encoding(s);
5011         return;
5012     }
5013 
5014     if (is_high) {
5015         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
5016         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5017         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5018         TCGv_i64 tcg_rm = cpu_reg(s, rm);
5019 
5020         if (is_signed) {
5021             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5022         } else {
5023             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5024         }
5025         return;
5026     }
5027 
5028     tcg_op1 = tcg_temp_new_i64();
5029     tcg_op2 = tcg_temp_new_i64();
5030     tcg_tmp = tcg_temp_new_i64();
5031 
5032     if (op_id < 0x42) {
5033         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
5034         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
5035     } else {
5036         if (is_signed) {
5037             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
5038             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
5039         } else {
5040             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
5041             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
5042         }
5043     }
5044 
5045     if (ra == 31 && !is_sub) {
5046         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
5047         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
5048     } else {
5049         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
5050         if (is_sub) {
5051             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5052         } else {
5053             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5054         }
5055     }
5056 
5057     if (!sf) {
5058         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
5059     }
5060 }
5061 
5062 /* Add/subtract (with carry)
5063  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
5064  * +--+--+--+------------------------+------+-------------+------+-----+
5065  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
5066  * +--+--+--+------------------------+------+-------------+------+-----+
5067  */
5068 
5069 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
5070 {
5071     unsigned int sf, op, setflags, rm, rn, rd;
5072     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
5073 
5074     sf = extract32(insn, 31, 1);
5075     op = extract32(insn, 30, 1);
5076     setflags = extract32(insn, 29, 1);
5077     rm = extract32(insn, 16, 5);
5078     rn = extract32(insn, 5, 5);
5079     rd = extract32(insn, 0, 5);
5080 
5081     tcg_rd = cpu_reg(s, rd);
5082     tcg_rn = cpu_reg(s, rn);
5083 
5084     if (op) {
5085         tcg_y = tcg_temp_new_i64();
5086         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
5087     } else {
5088         tcg_y = cpu_reg(s, rm);
5089     }
5090 
5091     if (setflags) {
5092         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
5093     } else {
5094         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
5095     }
5096 }
5097 
5098 /*
5099  * Rotate right into flags
5100  *  31 30 29                21       15          10      5  4      0
5101  * +--+--+--+-----------------+--------+-----------+------+--+------+
5102  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
5103  * +--+--+--+-----------------+--------+-----------+------+--+------+
5104  */
5105 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
5106 {
5107     int mask = extract32(insn, 0, 4);
5108     int o2 = extract32(insn, 4, 1);
5109     int rn = extract32(insn, 5, 5);
5110     int imm6 = extract32(insn, 15, 6);
5111     int sf_op_s = extract32(insn, 29, 3);
5112     TCGv_i64 tcg_rn;
5113     TCGv_i32 nzcv;
5114 
5115     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
5116         unallocated_encoding(s);
5117         return;
5118     }
5119 
5120     tcg_rn = read_cpu_reg(s, rn, 1);
5121     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
5122 
5123     nzcv = tcg_temp_new_i32();
5124     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5125 
5126     if (mask & 8) { /* N */
5127         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5128     }
5129     if (mask & 4) { /* Z */
5130         tcg_gen_not_i32(cpu_ZF, nzcv);
5131         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5132     }
5133     if (mask & 2) { /* C */
5134         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5135     }
5136     if (mask & 1) { /* V */
5137         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5138     }
5139 }
5140 
5141 /*
5142  * Evaluate into flags
5143  *  31 30 29                21        15   14        10      5  4      0
5144  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5145  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5146  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5147  */
5148 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5149 {
5150     int o3_mask = extract32(insn, 0, 5);
5151     int rn = extract32(insn, 5, 5);
5152     int o2 = extract32(insn, 15, 6);
5153     int sz = extract32(insn, 14, 1);
5154     int sf_op_s = extract32(insn, 29, 3);
5155     TCGv_i32 tmp;
5156     int shift;
5157 
5158     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5159         !dc_isar_feature(aa64_condm_4, s)) {
5160         unallocated_encoding(s);
5161         return;
5162     }
5163     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5164 
5165     tmp = tcg_temp_new_i32();
5166     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5167     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5168     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5169     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5170     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5171 }
5172 
5173 /* Conditional compare (immediate / register)
5174  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5175  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5176  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5177  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5178  *        [1]                             y                [0]       [0]
5179  */
5180 static void disas_cc(DisasContext *s, uint32_t insn)
5181 {
5182     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5183     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5184     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5185     DisasCompare c;
5186 
5187     if (!extract32(insn, 29, 1)) {
5188         unallocated_encoding(s);
5189         return;
5190     }
5191     if (insn & (1 << 10 | 1 << 4)) {
5192         unallocated_encoding(s);
5193         return;
5194     }
5195     sf = extract32(insn, 31, 1);
5196     op = extract32(insn, 30, 1);
5197     is_imm = extract32(insn, 11, 1);
5198     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5199     cond = extract32(insn, 12, 4);
5200     rn = extract32(insn, 5, 5);
5201     nzcv = extract32(insn, 0, 4);
5202 
5203     /* Set T0 = !COND.  */
5204     tcg_t0 = tcg_temp_new_i32();
5205     arm_test_cc(&c, cond);
5206     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5207 
5208     /* Load the arguments for the new comparison.  */
5209     if (is_imm) {
5210         tcg_y = tcg_temp_new_i64();
5211         tcg_gen_movi_i64(tcg_y, y);
5212     } else {
5213         tcg_y = cpu_reg(s, y);
5214     }
5215     tcg_rn = cpu_reg(s, rn);
5216 
5217     /* Set the flags for the new comparison.  */
5218     tcg_tmp = tcg_temp_new_i64();
5219     if (op) {
5220         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5221     } else {
5222         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5223     }
5224 
5225     /* If COND was false, force the flags to #nzcv.  Compute two masks
5226      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5227      * For tcg hosts that support ANDC, we can make do with just T1.
5228      * In either case, allow the tcg optimizer to delete any unused mask.
5229      */
5230     tcg_t1 = tcg_temp_new_i32();
5231     tcg_t2 = tcg_temp_new_i32();
5232     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5233     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5234 
5235     if (nzcv & 8) { /* N */
5236         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5237     } else {
5238         if (TCG_TARGET_HAS_andc_i32) {
5239             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5240         } else {
5241             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5242         }
5243     }
5244     if (nzcv & 4) { /* Z */
5245         if (TCG_TARGET_HAS_andc_i32) {
5246             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5247         } else {
5248             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5249         }
5250     } else {
5251         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5252     }
5253     if (nzcv & 2) { /* C */
5254         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5255     } else {
5256         if (TCG_TARGET_HAS_andc_i32) {
5257             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5258         } else {
5259             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5260         }
5261     }
5262     if (nzcv & 1) { /* V */
5263         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5264     } else {
5265         if (TCG_TARGET_HAS_andc_i32) {
5266             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5267         } else {
5268             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5269         }
5270     }
5271 }
5272 
5273 /* Conditional select
5274  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5275  * +----+----+---+-----------------+------+------+-----+------+------+
5276  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5277  * +----+----+---+-----------------+------+------+-----+------+------+
5278  */
5279 static void disas_cond_select(DisasContext *s, uint32_t insn)
5280 {
5281     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5282     TCGv_i64 tcg_rd, zero;
5283     DisasCompare64 c;
5284 
5285     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5286         /* S == 1 or op2<1> == 1 */
5287         unallocated_encoding(s);
5288         return;
5289     }
5290     sf = extract32(insn, 31, 1);
5291     else_inv = extract32(insn, 30, 1);
5292     rm = extract32(insn, 16, 5);
5293     cond = extract32(insn, 12, 4);
5294     else_inc = extract32(insn, 10, 1);
5295     rn = extract32(insn, 5, 5);
5296     rd = extract32(insn, 0, 5);
5297 
5298     tcg_rd = cpu_reg(s, rd);
5299 
5300     a64_test_cc(&c, cond);
5301     zero = tcg_constant_i64(0);
5302 
5303     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5304         /* CSET & CSETM.  */
5305         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5306         if (else_inv) {
5307             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5308         }
5309     } else {
5310         TCGv_i64 t_true = cpu_reg(s, rn);
5311         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5312         if (else_inv && else_inc) {
5313             tcg_gen_neg_i64(t_false, t_false);
5314         } else if (else_inv) {
5315             tcg_gen_not_i64(t_false, t_false);
5316         } else if (else_inc) {
5317             tcg_gen_addi_i64(t_false, t_false, 1);
5318         }
5319         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5320     }
5321 
5322     if (!sf) {
5323         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5324     }
5325 }
5326 
5327 static void handle_clz(DisasContext *s, unsigned int sf,
5328                        unsigned int rn, unsigned int rd)
5329 {
5330     TCGv_i64 tcg_rd, tcg_rn;
5331     tcg_rd = cpu_reg(s, rd);
5332     tcg_rn = cpu_reg(s, rn);
5333 
5334     if (sf) {
5335         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5336     } else {
5337         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5338         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5339         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5340         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5341     }
5342 }
5343 
5344 static void handle_cls(DisasContext *s, unsigned int sf,
5345                        unsigned int rn, unsigned int rd)
5346 {
5347     TCGv_i64 tcg_rd, tcg_rn;
5348     tcg_rd = cpu_reg(s, rd);
5349     tcg_rn = cpu_reg(s, rn);
5350 
5351     if (sf) {
5352         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5353     } else {
5354         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5355         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5356         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5357         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5358     }
5359 }
5360 
5361 static void handle_rbit(DisasContext *s, unsigned int sf,
5362                         unsigned int rn, unsigned int rd)
5363 {
5364     TCGv_i64 tcg_rd, tcg_rn;
5365     tcg_rd = cpu_reg(s, rd);
5366     tcg_rn = cpu_reg(s, rn);
5367 
5368     if (sf) {
5369         gen_helper_rbit64(tcg_rd, tcg_rn);
5370     } else {
5371         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5372         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5373         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5374         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5375     }
5376 }
5377 
5378 /* REV with sf==1, opcode==3 ("REV64") */
5379 static void handle_rev64(DisasContext *s, unsigned int sf,
5380                          unsigned int rn, unsigned int rd)
5381 {
5382     if (!sf) {
5383         unallocated_encoding(s);
5384         return;
5385     }
5386     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5387 }
5388 
5389 /* REV with sf==0, opcode==2
5390  * REV32 (sf==1, opcode==2)
5391  */
5392 static void handle_rev32(DisasContext *s, unsigned int sf,
5393                          unsigned int rn, unsigned int rd)
5394 {
5395     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5396     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5397 
5398     if (sf) {
5399         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5400         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5401     } else {
5402         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5403     }
5404 }
5405 
5406 /* REV16 (opcode==1) */
5407 static void handle_rev16(DisasContext *s, unsigned int sf,
5408                          unsigned int rn, unsigned int rd)
5409 {
5410     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5411     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5412     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5413     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5414 
5415     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5416     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5417     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5418     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5419     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5420 }
5421 
5422 /* Data-processing (1 source)
5423  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5424  * +----+---+---+-----------------+---------+--------+------+------+
5425  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5426  * +----+---+---+-----------------+---------+--------+------+------+
5427  */
5428 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5429 {
5430     unsigned int sf, opcode, opcode2, rn, rd;
5431     TCGv_i64 tcg_rd;
5432 
5433     if (extract32(insn, 29, 1)) {
5434         unallocated_encoding(s);
5435         return;
5436     }
5437 
5438     sf = extract32(insn, 31, 1);
5439     opcode = extract32(insn, 10, 6);
5440     opcode2 = extract32(insn, 16, 5);
5441     rn = extract32(insn, 5, 5);
5442     rd = extract32(insn, 0, 5);
5443 
5444 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5445 
5446     switch (MAP(sf, opcode2, opcode)) {
5447     case MAP(0, 0x00, 0x00): /* RBIT */
5448     case MAP(1, 0x00, 0x00):
5449         handle_rbit(s, sf, rn, rd);
5450         break;
5451     case MAP(0, 0x00, 0x01): /* REV16 */
5452     case MAP(1, 0x00, 0x01):
5453         handle_rev16(s, sf, rn, rd);
5454         break;
5455     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5456     case MAP(1, 0x00, 0x02):
5457         handle_rev32(s, sf, rn, rd);
5458         break;
5459     case MAP(1, 0x00, 0x03): /* REV64 */
5460         handle_rev64(s, sf, rn, rd);
5461         break;
5462     case MAP(0, 0x00, 0x04): /* CLZ */
5463     case MAP(1, 0x00, 0x04):
5464         handle_clz(s, sf, rn, rd);
5465         break;
5466     case MAP(0, 0x00, 0x05): /* CLS */
5467     case MAP(1, 0x00, 0x05):
5468         handle_cls(s, sf, rn, rd);
5469         break;
5470     case MAP(1, 0x01, 0x00): /* PACIA */
5471         if (s->pauth_active) {
5472             tcg_rd = cpu_reg(s, rd);
5473             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5474         } else if (!dc_isar_feature(aa64_pauth, s)) {
5475             goto do_unallocated;
5476         }
5477         break;
5478     case MAP(1, 0x01, 0x01): /* PACIB */
5479         if (s->pauth_active) {
5480             tcg_rd = cpu_reg(s, rd);
5481             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5482         } else if (!dc_isar_feature(aa64_pauth, s)) {
5483             goto do_unallocated;
5484         }
5485         break;
5486     case MAP(1, 0x01, 0x02): /* PACDA */
5487         if (s->pauth_active) {
5488             tcg_rd = cpu_reg(s, rd);
5489             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5490         } else if (!dc_isar_feature(aa64_pauth, s)) {
5491             goto do_unallocated;
5492         }
5493         break;
5494     case MAP(1, 0x01, 0x03): /* PACDB */
5495         if (s->pauth_active) {
5496             tcg_rd = cpu_reg(s, rd);
5497             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5498         } else if (!dc_isar_feature(aa64_pauth, s)) {
5499             goto do_unallocated;
5500         }
5501         break;
5502     case MAP(1, 0x01, 0x04): /* AUTIA */
5503         if (s->pauth_active) {
5504             tcg_rd = cpu_reg(s, rd);
5505             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5506         } else if (!dc_isar_feature(aa64_pauth, s)) {
5507             goto do_unallocated;
5508         }
5509         break;
5510     case MAP(1, 0x01, 0x05): /* AUTIB */
5511         if (s->pauth_active) {
5512             tcg_rd = cpu_reg(s, rd);
5513             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5514         } else if (!dc_isar_feature(aa64_pauth, s)) {
5515             goto do_unallocated;
5516         }
5517         break;
5518     case MAP(1, 0x01, 0x06): /* AUTDA */
5519         if (s->pauth_active) {
5520             tcg_rd = cpu_reg(s, rd);
5521             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5522         } else if (!dc_isar_feature(aa64_pauth, s)) {
5523             goto do_unallocated;
5524         }
5525         break;
5526     case MAP(1, 0x01, 0x07): /* AUTDB */
5527         if (s->pauth_active) {
5528             tcg_rd = cpu_reg(s, rd);
5529             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5530         } else if (!dc_isar_feature(aa64_pauth, s)) {
5531             goto do_unallocated;
5532         }
5533         break;
5534     case MAP(1, 0x01, 0x08): /* PACIZA */
5535         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5536             goto do_unallocated;
5537         } else if (s->pauth_active) {
5538             tcg_rd = cpu_reg(s, rd);
5539             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5540         }
5541         break;
5542     case MAP(1, 0x01, 0x09): /* PACIZB */
5543         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5544             goto do_unallocated;
5545         } else if (s->pauth_active) {
5546             tcg_rd = cpu_reg(s, rd);
5547             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5548         }
5549         break;
5550     case MAP(1, 0x01, 0x0a): /* PACDZA */
5551         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5552             goto do_unallocated;
5553         } else if (s->pauth_active) {
5554             tcg_rd = cpu_reg(s, rd);
5555             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5556         }
5557         break;
5558     case MAP(1, 0x01, 0x0b): /* PACDZB */
5559         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5560             goto do_unallocated;
5561         } else if (s->pauth_active) {
5562             tcg_rd = cpu_reg(s, rd);
5563             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5564         }
5565         break;
5566     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5567         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5568             goto do_unallocated;
5569         } else if (s->pauth_active) {
5570             tcg_rd = cpu_reg(s, rd);
5571             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5572         }
5573         break;
5574     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5575         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5576             goto do_unallocated;
5577         } else if (s->pauth_active) {
5578             tcg_rd = cpu_reg(s, rd);
5579             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5580         }
5581         break;
5582     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5583         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5584             goto do_unallocated;
5585         } else if (s->pauth_active) {
5586             tcg_rd = cpu_reg(s, rd);
5587             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5588         }
5589         break;
5590     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5591         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5592             goto do_unallocated;
5593         } else if (s->pauth_active) {
5594             tcg_rd = cpu_reg(s, rd);
5595             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5596         }
5597         break;
5598     case MAP(1, 0x01, 0x10): /* XPACI */
5599         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5600             goto do_unallocated;
5601         } else if (s->pauth_active) {
5602             tcg_rd = cpu_reg(s, rd);
5603             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5604         }
5605         break;
5606     case MAP(1, 0x01, 0x11): /* XPACD */
5607         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5608             goto do_unallocated;
5609         } else if (s->pauth_active) {
5610             tcg_rd = cpu_reg(s, rd);
5611             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5612         }
5613         break;
5614     default:
5615     do_unallocated:
5616         unallocated_encoding(s);
5617         break;
5618     }
5619 
5620 #undef MAP
5621 }
5622 
5623 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5624                        unsigned int rm, unsigned int rn, unsigned int rd)
5625 {
5626     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5627     tcg_rd = cpu_reg(s, rd);
5628 
5629     if (!sf && is_signed) {
5630         tcg_n = tcg_temp_new_i64();
5631         tcg_m = tcg_temp_new_i64();
5632         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5633         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5634     } else {
5635         tcg_n = read_cpu_reg(s, rn, sf);
5636         tcg_m = read_cpu_reg(s, rm, sf);
5637     }
5638 
5639     if (is_signed) {
5640         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5641     } else {
5642         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5643     }
5644 
5645     if (!sf) { /* zero extend final result */
5646         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5647     }
5648 }
5649 
5650 /* LSLV, LSRV, ASRV, RORV */
5651 static void handle_shift_reg(DisasContext *s,
5652                              enum a64_shift_type shift_type, unsigned int sf,
5653                              unsigned int rm, unsigned int rn, unsigned int rd)
5654 {
5655     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5656     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5657     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5658 
5659     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5660     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5661 }
5662 
5663 /* CRC32[BHWX], CRC32C[BHWX] */
5664 static void handle_crc32(DisasContext *s,
5665                          unsigned int sf, unsigned int sz, bool crc32c,
5666                          unsigned int rm, unsigned int rn, unsigned int rd)
5667 {
5668     TCGv_i64 tcg_acc, tcg_val;
5669     TCGv_i32 tcg_bytes;
5670 
5671     if (!dc_isar_feature(aa64_crc32, s)
5672         || (sf == 1 && sz != 3)
5673         || (sf == 0 && sz == 3)) {
5674         unallocated_encoding(s);
5675         return;
5676     }
5677 
5678     if (sz == 3) {
5679         tcg_val = cpu_reg(s, rm);
5680     } else {
5681         uint64_t mask;
5682         switch (sz) {
5683         case 0:
5684             mask = 0xFF;
5685             break;
5686         case 1:
5687             mask = 0xFFFF;
5688             break;
5689         case 2:
5690             mask = 0xFFFFFFFF;
5691             break;
5692         default:
5693             g_assert_not_reached();
5694         }
5695         tcg_val = tcg_temp_new_i64();
5696         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5697     }
5698 
5699     tcg_acc = cpu_reg(s, rn);
5700     tcg_bytes = tcg_constant_i32(1 << sz);
5701 
5702     if (crc32c) {
5703         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5704     } else {
5705         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5706     }
5707 }
5708 
5709 /* Data-processing (2 source)
5710  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5711  * +----+---+---+-----------------+------+--------+------+------+
5712  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5713  * +----+---+---+-----------------+------+--------+------+------+
5714  */
5715 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5716 {
5717     unsigned int sf, rm, opcode, rn, rd, setflag;
5718     sf = extract32(insn, 31, 1);
5719     setflag = extract32(insn, 29, 1);
5720     rm = extract32(insn, 16, 5);
5721     opcode = extract32(insn, 10, 6);
5722     rn = extract32(insn, 5, 5);
5723     rd = extract32(insn, 0, 5);
5724 
5725     if (setflag && opcode != 0) {
5726         unallocated_encoding(s);
5727         return;
5728     }
5729 
5730     switch (opcode) {
5731     case 0: /* SUBP(S) */
5732         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5733             goto do_unallocated;
5734         } else {
5735             TCGv_i64 tcg_n, tcg_m, tcg_d;
5736 
5737             tcg_n = read_cpu_reg_sp(s, rn, true);
5738             tcg_m = read_cpu_reg_sp(s, rm, true);
5739             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5740             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5741             tcg_d = cpu_reg(s, rd);
5742 
5743             if (setflag) {
5744                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5745             } else {
5746                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5747             }
5748         }
5749         break;
5750     case 2: /* UDIV */
5751         handle_div(s, false, sf, rm, rn, rd);
5752         break;
5753     case 3: /* SDIV */
5754         handle_div(s, true, sf, rm, rn, rd);
5755         break;
5756     case 4: /* IRG */
5757         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5758             goto do_unallocated;
5759         }
5760         if (s->ata) {
5761             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5762                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5763         } else {
5764             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5765                                              cpu_reg_sp(s, rn));
5766         }
5767         break;
5768     case 5: /* GMI */
5769         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5770             goto do_unallocated;
5771         } else {
5772             TCGv_i64 t = tcg_temp_new_i64();
5773 
5774             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5775             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5776             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5777         }
5778         break;
5779     case 8: /* LSLV */
5780         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5781         break;
5782     case 9: /* LSRV */
5783         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5784         break;
5785     case 10: /* ASRV */
5786         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5787         break;
5788     case 11: /* RORV */
5789         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5790         break;
5791     case 12: /* PACGA */
5792         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5793             goto do_unallocated;
5794         }
5795         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5796                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5797         break;
5798     case 16:
5799     case 17:
5800     case 18:
5801     case 19:
5802     case 20:
5803     case 21:
5804     case 22:
5805     case 23: /* CRC32 */
5806     {
5807         int sz = extract32(opcode, 0, 2);
5808         bool crc32c = extract32(opcode, 2, 1);
5809         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5810         break;
5811     }
5812     default:
5813     do_unallocated:
5814         unallocated_encoding(s);
5815         break;
5816     }
5817 }
5818 
5819 /*
5820  * Data processing - register
5821  *  31  30 29  28      25    21  20  16      10         0
5822  * +--+---+--+---+-------+-----+-------+-------+---------+
5823  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5824  * +--+---+--+---+-------+-----+-------+-------+---------+
5825  */
5826 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5827 {
5828     int op0 = extract32(insn, 30, 1);
5829     int op1 = extract32(insn, 28, 1);
5830     int op2 = extract32(insn, 21, 4);
5831     int op3 = extract32(insn, 10, 6);
5832 
5833     if (!op1) {
5834         if (op2 & 8) {
5835             if (op2 & 1) {
5836                 /* Add/sub (extended register) */
5837                 disas_add_sub_ext_reg(s, insn);
5838             } else {
5839                 /* Add/sub (shifted register) */
5840                 disas_add_sub_reg(s, insn);
5841             }
5842         } else {
5843             /* Logical (shifted register) */
5844             disas_logic_reg(s, insn);
5845         }
5846         return;
5847     }
5848 
5849     switch (op2) {
5850     case 0x0:
5851         switch (op3) {
5852         case 0x00: /* Add/subtract (with carry) */
5853             disas_adc_sbc(s, insn);
5854             break;
5855 
5856         case 0x01: /* Rotate right into flags */
5857         case 0x21:
5858             disas_rotate_right_into_flags(s, insn);
5859             break;
5860 
5861         case 0x02: /* Evaluate into flags */
5862         case 0x12:
5863         case 0x22:
5864         case 0x32:
5865             disas_evaluate_into_flags(s, insn);
5866             break;
5867 
5868         default:
5869             goto do_unallocated;
5870         }
5871         break;
5872 
5873     case 0x2: /* Conditional compare */
5874         disas_cc(s, insn); /* both imm and reg forms */
5875         break;
5876 
5877     case 0x4: /* Conditional select */
5878         disas_cond_select(s, insn);
5879         break;
5880 
5881     case 0x6: /* Data-processing */
5882         if (op0) {    /* (1 source) */
5883             disas_data_proc_1src(s, insn);
5884         } else {      /* (2 source) */
5885             disas_data_proc_2src(s, insn);
5886         }
5887         break;
5888     case 0x8 ... 0xf: /* (3 source) */
5889         disas_data_proc_3src(s, insn);
5890         break;
5891 
5892     default:
5893     do_unallocated:
5894         unallocated_encoding(s);
5895         break;
5896     }
5897 }
5898 
5899 static void handle_fp_compare(DisasContext *s, int size,
5900                               unsigned int rn, unsigned int rm,
5901                               bool cmp_with_zero, bool signal_all_nans)
5902 {
5903     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5904     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5905 
5906     if (size == MO_64) {
5907         TCGv_i64 tcg_vn, tcg_vm;
5908 
5909         tcg_vn = read_fp_dreg(s, rn);
5910         if (cmp_with_zero) {
5911             tcg_vm = tcg_constant_i64(0);
5912         } else {
5913             tcg_vm = read_fp_dreg(s, rm);
5914         }
5915         if (signal_all_nans) {
5916             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5917         } else {
5918             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5919         }
5920     } else {
5921         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5922         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5923 
5924         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5925         if (cmp_with_zero) {
5926             tcg_gen_movi_i32(tcg_vm, 0);
5927         } else {
5928             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5929         }
5930 
5931         switch (size) {
5932         case MO_32:
5933             if (signal_all_nans) {
5934                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5935             } else {
5936                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5937             }
5938             break;
5939         case MO_16:
5940             if (signal_all_nans) {
5941                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5942             } else {
5943                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5944             }
5945             break;
5946         default:
5947             g_assert_not_reached();
5948         }
5949     }
5950 
5951     gen_set_nzcv(tcg_flags);
5952 }
5953 
5954 /* Floating point compare
5955  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5956  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5957  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5958  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5959  */
5960 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5961 {
5962     unsigned int mos, type, rm, op, rn, opc, op2r;
5963     int size;
5964 
5965     mos = extract32(insn, 29, 3);
5966     type = extract32(insn, 22, 2);
5967     rm = extract32(insn, 16, 5);
5968     op = extract32(insn, 14, 2);
5969     rn = extract32(insn, 5, 5);
5970     opc = extract32(insn, 3, 2);
5971     op2r = extract32(insn, 0, 3);
5972 
5973     if (mos || op || op2r) {
5974         unallocated_encoding(s);
5975         return;
5976     }
5977 
5978     switch (type) {
5979     case 0:
5980         size = MO_32;
5981         break;
5982     case 1:
5983         size = MO_64;
5984         break;
5985     case 3:
5986         size = MO_16;
5987         if (dc_isar_feature(aa64_fp16, s)) {
5988             break;
5989         }
5990         /* fallthru */
5991     default:
5992         unallocated_encoding(s);
5993         return;
5994     }
5995 
5996     if (!fp_access_check(s)) {
5997         return;
5998     }
5999 
6000     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
6001 }
6002 
6003 /* Floating point conditional compare
6004  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
6005  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6006  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
6007  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6008  */
6009 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
6010 {
6011     unsigned int mos, type, rm, cond, rn, op, nzcv;
6012     TCGLabel *label_continue = NULL;
6013     int size;
6014 
6015     mos = extract32(insn, 29, 3);
6016     type = extract32(insn, 22, 2);
6017     rm = extract32(insn, 16, 5);
6018     cond = extract32(insn, 12, 4);
6019     rn = extract32(insn, 5, 5);
6020     op = extract32(insn, 4, 1);
6021     nzcv = extract32(insn, 0, 4);
6022 
6023     if (mos) {
6024         unallocated_encoding(s);
6025         return;
6026     }
6027 
6028     switch (type) {
6029     case 0:
6030         size = MO_32;
6031         break;
6032     case 1:
6033         size = MO_64;
6034         break;
6035     case 3:
6036         size = MO_16;
6037         if (dc_isar_feature(aa64_fp16, s)) {
6038             break;
6039         }
6040         /* fallthru */
6041     default:
6042         unallocated_encoding(s);
6043         return;
6044     }
6045 
6046     if (!fp_access_check(s)) {
6047         return;
6048     }
6049 
6050     if (cond < 0x0e) { /* not always */
6051         TCGLabel *label_match = gen_new_label();
6052         label_continue = gen_new_label();
6053         arm_gen_test_cc(cond, label_match);
6054         /* nomatch: */
6055         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
6056         tcg_gen_br(label_continue);
6057         gen_set_label(label_match);
6058     }
6059 
6060     handle_fp_compare(s, size, rn, rm, false, op);
6061 
6062     if (cond < 0x0e) {
6063         gen_set_label(label_continue);
6064     }
6065 }
6066 
6067 /* Floating point conditional select
6068  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
6069  * +---+---+---+-----------+------+---+------+------+-----+------+------+
6070  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
6071  * +---+---+---+-----------+------+---+------+------+-----+------+------+
6072  */
6073 static void disas_fp_csel(DisasContext *s, uint32_t insn)
6074 {
6075     unsigned int mos, type, rm, cond, rn, rd;
6076     TCGv_i64 t_true, t_false;
6077     DisasCompare64 c;
6078     MemOp sz;
6079 
6080     mos = extract32(insn, 29, 3);
6081     type = extract32(insn, 22, 2);
6082     rm = extract32(insn, 16, 5);
6083     cond = extract32(insn, 12, 4);
6084     rn = extract32(insn, 5, 5);
6085     rd = extract32(insn, 0, 5);
6086 
6087     if (mos) {
6088         unallocated_encoding(s);
6089         return;
6090     }
6091 
6092     switch (type) {
6093     case 0:
6094         sz = MO_32;
6095         break;
6096     case 1:
6097         sz = MO_64;
6098         break;
6099     case 3:
6100         sz = MO_16;
6101         if (dc_isar_feature(aa64_fp16, s)) {
6102             break;
6103         }
6104         /* fallthru */
6105     default:
6106         unallocated_encoding(s);
6107         return;
6108     }
6109 
6110     if (!fp_access_check(s)) {
6111         return;
6112     }
6113 
6114     /* Zero extend sreg & hreg inputs to 64 bits now.  */
6115     t_true = tcg_temp_new_i64();
6116     t_false = tcg_temp_new_i64();
6117     read_vec_element(s, t_true, rn, 0, sz);
6118     read_vec_element(s, t_false, rm, 0, sz);
6119 
6120     a64_test_cc(&c, cond);
6121     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6122                         t_true, t_false);
6123 
6124     /* Note that sregs & hregs write back zeros to the high bits,
6125        and we've already done the zero-extension.  */
6126     write_fp_dreg(s, rd, t_true);
6127 }
6128 
6129 /* Floating-point data-processing (1 source) - half precision */
6130 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6131 {
6132     TCGv_ptr fpst = NULL;
6133     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6134     TCGv_i32 tcg_res = tcg_temp_new_i32();
6135 
6136     switch (opcode) {
6137     case 0x0: /* FMOV */
6138         tcg_gen_mov_i32(tcg_res, tcg_op);
6139         break;
6140     case 0x1: /* FABS */
6141         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6142         break;
6143     case 0x2: /* FNEG */
6144         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6145         break;
6146     case 0x3: /* FSQRT */
6147         fpst = fpstatus_ptr(FPST_FPCR_F16);
6148         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6149         break;
6150     case 0x8: /* FRINTN */
6151     case 0x9: /* FRINTP */
6152     case 0xa: /* FRINTM */
6153     case 0xb: /* FRINTZ */
6154     case 0xc: /* FRINTA */
6155     {
6156         TCGv_i32 tcg_rmode;
6157 
6158         fpst = fpstatus_ptr(FPST_FPCR_F16);
6159         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6160         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6161         gen_restore_rmode(tcg_rmode, fpst);
6162         break;
6163     }
6164     case 0xe: /* FRINTX */
6165         fpst = fpstatus_ptr(FPST_FPCR_F16);
6166         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6167         break;
6168     case 0xf: /* FRINTI */
6169         fpst = fpstatus_ptr(FPST_FPCR_F16);
6170         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6171         break;
6172     default:
6173         g_assert_not_reached();
6174     }
6175 
6176     write_fp_sreg(s, rd, tcg_res);
6177 }
6178 
6179 /* Floating-point data-processing (1 source) - single precision */
6180 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6181 {
6182     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6183     TCGv_i32 tcg_op, tcg_res;
6184     TCGv_ptr fpst;
6185     int rmode = -1;
6186 
6187     tcg_op = read_fp_sreg(s, rn);
6188     tcg_res = tcg_temp_new_i32();
6189 
6190     switch (opcode) {
6191     case 0x0: /* FMOV */
6192         tcg_gen_mov_i32(tcg_res, tcg_op);
6193         goto done;
6194     case 0x1: /* FABS */
6195         gen_helper_vfp_abss(tcg_res, tcg_op);
6196         goto done;
6197     case 0x2: /* FNEG */
6198         gen_helper_vfp_negs(tcg_res, tcg_op);
6199         goto done;
6200     case 0x3: /* FSQRT */
6201         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6202         goto done;
6203     case 0x6: /* BFCVT */
6204         gen_fpst = gen_helper_bfcvt;
6205         break;
6206     case 0x8: /* FRINTN */
6207     case 0x9: /* FRINTP */
6208     case 0xa: /* FRINTM */
6209     case 0xb: /* FRINTZ */
6210     case 0xc: /* FRINTA */
6211         rmode = opcode & 7;
6212         gen_fpst = gen_helper_rints;
6213         break;
6214     case 0xe: /* FRINTX */
6215         gen_fpst = gen_helper_rints_exact;
6216         break;
6217     case 0xf: /* FRINTI */
6218         gen_fpst = gen_helper_rints;
6219         break;
6220     case 0x10: /* FRINT32Z */
6221         rmode = FPROUNDING_ZERO;
6222         gen_fpst = gen_helper_frint32_s;
6223         break;
6224     case 0x11: /* FRINT32X */
6225         gen_fpst = gen_helper_frint32_s;
6226         break;
6227     case 0x12: /* FRINT64Z */
6228         rmode = FPROUNDING_ZERO;
6229         gen_fpst = gen_helper_frint64_s;
6230         break;
6231     case 0x13: /* FRINT64X */
6232         gen_fpst = gen_helper_frint64_s;
6233         break;
6234     default:
6235         g_assert_not_reached();
6236     }
6237 
6238     fpst = fpstatus_ptr(FPST_FPCR);
6239     if (rmode >= 0) {
6240         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6241         gen_fpst(tcg_res, tcg_op, fpst);
6242         gen_restore_rmode(tcg_rmode, fpst);
6243     } else {
6244         gen_fpst(tcg_res, tcg_op, fpst);
6245     }
6246 
6247  done:
6248     write_fp_sreg(s, rd, tcg_res);
6249 }
6250 
6251 /* Floating-point data-processing (1 source) - double precision */
6252 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6253 {
6254     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6255     TCGv_i64 tcg_op, tcg_res;
6256     TCGv_ptr fpst;
6257     int rmode = -1;
6258 
6259     switch (opcode) {
6260     case 0x0: /* FMOV */
6261         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6262         return;
6263     }
6264 
6265     tcg_op = read_fp_dreg(s, rn);
6266     tcg_res = tcg_temp_new_i64();
6267 
6268     switch (opcode) {
6269     case 0x1: /* FABS */
6270         gen_helper_vfp_absd(tcg_res, tcg_op);
6271         goto done;
6272     case 0x2: /* FNEG */
6273         gen_helper_vfp_negd(tcg_res, tcg_op);
6274         goto done;
6275     case 0x3: /* FSQRT */
6276         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6277         goto done;
6278     case 0x8: /* FRINTN */
6279     case 0x9: /* FRINTP */
6280     case 0xa: /* FRINTM */
6281     case 0xb: /* FRINTZ */
6282     case 0xc: /* FRINTA */
6283         rmode = opcode & 7;
6284         gen_fpst = gen_helper_rintd;
6285         break;
6286     case 0xe: /* FRINTX */
6287         gen_fpst = gen_helper_rintd_exact;
6288         break;
6289     case 0xf: /* FRINTI */
6290         gen_fpst = gen_helper_rintd;
6291         break;
6292     case 0x10: /* FRINT32Z */
6293         rmode = FPROUNDING_ZERO;
6294         gen_fpst = gen_helper_frint32_d;
6295         break;
6296     case 0x11: /* FRINT32X */
6297         gen_fpst = gen_helper_frint32_d;
6298         break;
6299     case 0x12: /* FRINT64Z */
6300         rmode = FPROUNDING_ZERO;
6301         gen_fpst = gen_helper_frint64_d;
6302         break;
6303     case 0x13: /* FRINT64X */
6304         gen_fpst = gen_helper_frint64_d;
6305         break;
6306     default:
6307         g_assert_not_reached();
6308     }
6309 
6310     fpst = fpstatus_ptr(FPST_FPCR);
6311     if (rmode >= 0) {
6312         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6313         gen_fpst(tcg_res, tcg_op, fpst);
6314         gen_restore_rmode(tcg_rmode, fpst);
6315     } else {
6316         gen_fpst(tcg_res, tcg_op, fpst);
6317     }
6318 
6319  done:
6320     write_fp_dreg(s, rd, tcg_res);
6321 }
6322 
6323 static void handle_fp_fcvt(DisasContext *s, int opcode,
6324                            int rd, int rn, int dtype, int ntype)
6325 {
6326     switch (ntype) {
6327     case 0x0:
6328     {
6329         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6330         if (dtype == 1) {
6331             /* Single to double */
6332             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6333             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6334             write_fp_dreg(s, rd, tcg_rd);
6335         } else {
6336             /* Single to half */
6337             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6338             TCGv_i32 ahp = get_ahp_flag();
6339             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6340 
6341             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6342             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6343             write_fp_sreg(s, rd, tcg_rd);
6344         }
6345         break;
6346     }
6347     case 0x1:
6348     {
6349         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6350         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6351         if (dtype == 0) {
6352             /* Double to single */
6353             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6354         } else {
6355             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6356             TCGv_i32 ahp = get_ahp_flag();
6357             /* Double to half */
6358             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6359             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6360         }
6361         write_fp_sreg(s, rd, tcg_rd);
6362         break;
6363     }
6364     case 0x3:
6365     {
6366         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6367         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6368         TCGv_i32 tcg_ahp = get_ahp_flag();
6369         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6370         if (dtype == 0) {
6371             /* Half to single */
6372             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6373             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6374             write_fp_sreg(s, rd, tcg_rd);
6375         } else {
6376             /* Half to double */
6377             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6378             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6379             write_fp_dreg(s, rd, tcg_rd);
6380         }
6381         break;
6382     }
6383     default:
6384         g_assert_not_reached();
6385     }
6386 }
6387 
6388 /* Floating point data-processing (1 source)
6389  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6390  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6391  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6392  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6393  */
6394 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6395 {
6396     int mos = extract32(insn, 29, 3);
6397     int type = extract32(insn, 22, 2);
6398     int opcode = extract32(insn, 15, 6);
6399     int rn = extract32(insn, 5, 5);
6400     int rd = extract32(insn, 0, 5);
6401 
6402     if (mos) {
6403         goto do_unallocated;
6404     }
6405 
6406     switch (opcode) {
6407     case 0x4: case 0x5: case 0x7:
6408     {
6409         /* FCVT between half, single and double precision */
6410         int dtype = extract32(opcode, 0, 2);
6411         if (type == 2 || dtype == type) {
6412             goto do_unallocated;
6413         }
6414         if (!fp_access_check(s)) {
6415             return;
6416         }
6417 
6418         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6419         break;
6420     }
6421 
6422     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6423         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6424             goto do_unallocated;
6425         }
6426         /* fall through */
6427     case 0x0 ... 0x3:
6428     case 0x8 ... 0xc:
6429     case 0xe ... 0xf:
6430         /* 32-to-32 and 64-to-64 ops */
6431         switch (type) {
6432         case 0:
6433             if (!fp_access_check(s)) {
6434                 return;
6435             }
6436             handle_fp_1src_single(s, opcode, rd, rn);
6437             break;
6438         case 1:
6439             if (!fp_access_check(s)) {
6440                 return;
6441             }
6442             handle_fp_1src_double(s, opcode, rd, rn);
6443             break;
6444         case 3:
6445             if (!dc_isar_feature(aa64_fp16, s)) {
6446                 goto do_unallocated;
6447             }
6448 
6449             if (!fp_access_check(s)) {
6450                 return;
6451             }
6452             handle_fp_1src_half(s, opcode, rd, rn);
6453             break;
6454         default:
6455             goto do_unallocated;
6456         }
6457         break;
6458 
6459     case 0x6:
6460         switch (type) {
6461         case 1: /* BFCVT */
6462             if (!dc_isar_feature(aa64_bf16, s)) {
6463                 goto do_unallocated;
6464             }
6465             if (!fp_access_check(s)) {
6466                 return;
6467             }
6468             handle_fp_1src_single(s, opcode, rd, rn);
6469             break;
6470         default:
6471             goto do_unallocated;
6472         }
6473         break;
6474 
6475     default:
6476     do_unallocated:
6477         unallocated_encoding(s);
6478         break;
6479     }
6480 }
6481 
6482 /* Floating-point data-processing (2 source) - single precision */
6483 static void handle_fp_2src_single(DisasContext *s, int opcode,
6484                                   int rd, int rn, int rm)
6485 {
6486     TCGv_i32 tcg_op1;
6487     TCGv_i32 tcg_op2;
6488     TCGv_i32 tcg_res;
6489     TCGv_ptr fpst;
6490 
6491     tcg_res = tcg_temp_new_i32();
6492     fpst = fpstatus_ptr(FPST_FPCR);
6493     tcg_op1 = read_fp_sreg(s, rn);
6494     tcg_op2 = read_fp_sreg(s, rm);
6495 
6496     switch (opcode) {
6497     case 0x0: /* FMUL */
6498         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6499         break;
6500     case 0x1: /* FDIV */
6501         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6502         break;
6503     case 0x2: /* FADD */
6504         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6505         break;
6506     case 0x3: /* FSUB */
6507         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6508         break;
6509     case 0x4: /* FMAX */
6510         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6511         break;
6512     case 0x5: /* FMIN */
6513         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6514         break;
6515     case 0x6: /* FMAXNM */
6516         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6517         break;
6518     case 0x7: /* FMINNM */
6519         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6520         break;
6521     case 0x8: /* FNMUL */
6522         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6523         gen_helper_vfp_negs(tcg_res, tcg_res);
6524         break;
6525     }
6526 
6527     write_fp_sreg(s, rd, tcg_res);
6528 }
6529 
6530 /* Floating-point data-processing (2 source) - double precision */
6531 static void handle_fp_2src_double(DisasContext *s, int opcode,
6532                                   int rd, int rn, int rm)
6533 {
6534     TCGv_i64 tcg_op1;
6535     TCGv_i64 tcg_op2;
6536     TCGv_i64 tcg_res;
6537     TCGv_ptr fpst;
6538 
6539     tcg_res = tcg_temp_new_i64();
6540     fpst = fpstatus_ptr(FPST_FPCR);
6541     tcg_op1 = read_fp_dreg(s, rn);
6542     tcg_op2 = read_fp_dreg(s, rm);
6543 
6544     switch (opcode) {
6545     case 0x0: /* FMUL */
6546         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6547         break;
6548     case 0x1: /* FDIV */
6549         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6550         break;
6551     case 0x2: /* FADD */
6552         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6553         break;
6554     case 0x3: /* FSUB */
6555         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6556         break;
6557     case 0x4: /* FMAX */
6558         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6559         break;
6560     case 0x5: /* FMIN */
6561         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6562         break;
6563     case 0x6: /* FMAXNM */
6564         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6565         break;
6566     case 0x7: /* FMINNM */
6567         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6568         break;
6569     case 0x8: /* FNMUL */
6570         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6571         gen_helper_vfp_negd(tcg_res, tcg_res);
6572         break;
6573     }
6574 
6575     write_fp_dreg(s, rd, tcg_res);
6576 }
6577 
6578 /* Floating-point data-processing (2 source) - half precision */
6579 static void handle_fp_2src_half(DisasContext *s, int opcode,
6580                                 int rd, int rn, int rm)
6581 {
6582     TCGv_i32 tcg_op1;
6583     TCGv_i32 tcg_op2;
6584     TCGv_i32 tcg_res;
6585     TCGv_ptr fpst;
6586 
6587     tcg_res = tcg_temp_new_i32();
6588     fpst = fpstatus_ptr(FPST_FPCR_F16);
6589     tcg_op1 = read_fp_hreg(s, rn);
6590     tcg_op2 = read_fp_hreg(s, rm);
6591 
6592     switch (opcode) {
6593     case 0x0: /* FMUL */
6594         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6595         break;
6596     case 0x1: /* FDIV */
6597         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6598         break;
6599     case 0x2: /* FADD */
6600         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6601         break;
6602     case 0x3: /* FSUB */
6603         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6604         break;
6605     case 0x4: /* FMAX */
6606         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6607         break;
6608     case 0x5: /* FMIN */
6609         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6610         break;
6611     case 0x6: /* FMAXNM */
6612         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6613         break;
6614     case 0x7: /* FMINNM */
6615         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6616         break;
6617     case 0x8: /* FNMUL */
6618         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6619         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6620         break;
6621     default:
6622         g_assert_not_reached();
6623     }
6624 
6625     write_fp_sreg(s, rd, tcg_res);
6626 }
6627 
6628 /* Floating point data-processing (2 source)
6629  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6630  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6631  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6632  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6633  */
6634 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6635 {
6636     int mos = extract32(insn, 29, 3);
6637     int type = extract32(insn, 22, 2);
6638     int rd = extract32(insn, 0, 5);
6639     int rn = extract32(insn, 5, 5);
6640     int rm = extract32(insn, 16, 5);
6641     int opcode = extract32(insn, 12, 4);
6642 
6643     if (opcode > 8 || mos) {
6644         unallocated_encoding(s);
6645         return;
6646     }
6647 
6648     switch (type) {
6649     case 0:
6650         if (!fp_access_check(s)) {
6651             return;
6652         }
6653         handle_fp_2src_single(s, opcode, rd, rn, rm);
6654         break;
6655     case 1:
6656         if (!fp_access_check(s)) {
6657             return;
6658         }
6659         handle_fp_2src_double(s, opcode, rd, rn, rm);
6660         break;
6661     case 3:
6662         if (!dc_isar_feature(aa64_fp16, s)) {
6663             unallocated_encoding(s);
6664             return;
6665         }
6666         if (!fp_access_check(s)) {
6667             return;
6668         }
6669         handle_fp_2src_half(s, opcode, rd, rn, rm);
6670         break;
6671     default:
6672         unallocated_encoding(s);
6673     }
6674 }
6675 
6676 /* Floating-point data-processing (3 source) - single precision */
6677 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6678                                   int rd, int rn, int rm, int ra)
6679 {
6680     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6681     TCGv_i32 tcg_res = tcg_temp_new_i32();
6682     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6683 
6684     tcg_op1 = read_fp_sreg(s, rn);
6685     tcg_op2 = read_fp_sreg(s, rm);
6686     tcg_op3 = read_fp_sreg(s, ra);
6687 
6688     /* These are fused multiply-add, and must be done as one
6689      * floating point operation with no rounding between the
6690      * multiplication and addition steps.
6691      * NB that doing the negations here as separate steps is
6692      * correct : an input NaN should come out with its sign bit
6693      * flipped if it is a negated-input.
6694      */
6695     if (o1 == true) {
6696         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6697     }
6698 
6699     if (o0 != o1) {
6700         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6701     }
6702 
6703     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6704 
6705     write_fp_sreg(s, rd, tcg_res);
6706 }
6707 
6708 /* Floating-point data-processing (3 source) - double precision */
6709 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6710                                   int rd, int rn, int rm, int ra)
6711 {
6712     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6713     TCGv_i64 tcg_res = tcg_temp_new_i64();
6714     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6715 
6716     tcg_op1 = read_fp_dreg(s, rn);
6717     tcg_op2 = read_fp_dreg(s, rm);
6718     tcg_op3 = read_fp_dreg(s, ra);
6719 
6720     /* These are fused multiply-add, and must be done as one
6721      * floating point operation with no rounding between the
6722      * multiplication and addition steps.
6723      * NB that doing the negations here as separate steps is
6724      * correct : an input NaN should come out with its sign bit
6725      * flipped if it is a negated-input.
6726      */
6727     if (o1 == true) {
6728         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6729     }
6730 
6731     if (o0 != o1) {
6732         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6733     }
6734 
6735     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6736 
6737     write_fp_dreg(s, rd, tcg_res);
6738 }
6739 
6740 /* Floating-point data-processing (3 source) - half precision */
6741 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6742                                 int rd, int rn, int rm, int ra)
6743 {
6744     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6745     TCGv_i32 tcg_res = tcg_temp_new_i32();
6746     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6747 
6748     tcg_op1 = read_fp_hreg(s, rn);
6749     tcg_op2 = read_fp_hreg(s, rm);
6750     tcg_op3 = read_fp_hreg(s, ra);
6751 
6752     /* These are fused multiply-add, and must be done as one
6753      * floating point operation with no rounding between the
6754      * multiplication and addition steps.
6755      * NB that doing the negations here as separate steps is
6756      * correct : an input NaN should come out with its sign bit
6757      * flipped if it is a negated-input.
6758      */
6759     if (o1 == true) {
6760         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6761     }
6762 
6763     if (o0 != o1) {
6764         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6765     }
6766 
6767     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6768 
6769     write_fp_sreg(s, rd, tcg_res);
6770 }
6771 
6772 /* Floating point data-processing (3 source)
6773  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6774  * +---+---+---+-----------+------+----+------+----+------+------+------+
6775  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6776  * +---+---+---+-----------+------+----+------+----+------+------+------+
6777  */
6778 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6779 {
6780     int mos = extract32(insn, 29, 3);
6781     int type = extract32(insn, 22, 2);
6782     int rd = extract32(insn, 0, 5);
6783     int rn = extract32(insn, 5, 5);
6784     int ra = extract32(insn, 10, 5);
6785     int rm = extract32(insn, 16, 5);
6786     bool o0 = extract32(insn, 15, 1);
6787     bool o1 = extract32(insn, 21, 1);
6788 
6789     if (mos) {
6790         unallocated_encoding(s);
6791         return;
6792     }
6793 
6794     switch (type) {
6795     case 0:
6796         if (!fp_access_check(s)) {
6797             return;
6798         }
6799         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6800         break;
6801     case 1:
6802         if (!fp_access_check(s)) {
6803             return;
6804         }
6805         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6806         break;
6807     case 3:
6808         if (!dc_isar_feature(aa64_fp16, s)) {
6809             unallocated_encoding(s);
6810             return;
6811         }
6812         if (!fp_access_check(s)) {
6813             return;
6814         }
6815         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6816         break;
6817     default:
6818         unallocated_encoding(s);
6819     }
6820 }
6821 
6822 /* Floating point immediate
6823  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6824  * +---+---+---+-----------+------+---+------------+-------+------+------+
6825  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6826  * +---+---+---+-----------+------+---+------------+-------+------+------+
6827  */
6828 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6829 {
6830     int rd = extract32(insn, 0, 5);
6831     int imm5 = extract32(insn, 5, 5);
6832     int imm8 = extract32(insn, 13, 8);
6833     int type = extract32(insn, 22, 2);
6834     int mos = extract32(insn, 29, 3);
6835     uint64_t imm;
6836     MemOp sz;
6837 
6838     if (mos || imm5) {
6839         unallocated_encoding(s);
6840         return;
6841     }
6842 
6843     switch (type) {
6844     case 0:
6845         sz = MO_32;
6846         break;
6847     case 1:
6848         sz = MO_64;
6849         break;
6850     case 3:
6851         sz = MO_16;
6852         if (dc_isar_feature(aa64_fp16, s)) {
6853             break;
6854         }
6855         /* fallthru */
6856     default:
6857         unallocated_encoding(s);
6858         return;
6859     }
6860 
6861     if (!fp_access_check(s)) {
6862         return;
6863     }
6864 
6865     imm = vfp_expand_imm(sz, imm8);
6866     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6867 }
6868 
6869 /* Handle floating point <=> fixed point conversions. Note that we can
6870  * also deal with fp <=> integer conversions as a special case (scale == 64)
6871  * OPTME: consider handling that special case specially or at least skipping
6872  * the call to scalbn in the helpers for zero shifts.
6873  */
6874 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6875                            bool itof, int rmode, int scale, int sf, int type)
6876 {
6877     bool is_signed = !(opcode & 1);
6878     TCGv_ptr tcg_fpstatus;
6879     TCGv_i32 tcg_shift, tcg_single;
6880     TCGv_i64 tcg_double;
6881 
6882     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6883 
6884     tcg_shift = tcg_constant_i32(64 - scale);
6885 
6886     if (itof) {
6887         TCGv_i64 tcg_int = cpu_reg(s, rn);
6888         if (!sf) {
6889             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6890 
6891             if (is_signed) {
6892                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6893             } else {
6894                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6895             }
6896 
6897             tcg_int = tcg_extend;
6898         }
6899 
6900         switch (type) {
6901         case 1: /* float64 */
6902             tcg_double = tcg_temp_new_i64();
6903             if (is_signed) {
6904                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6905                                      tcg_shift, tcg_fpstatus);
6906             } else {
6907                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6908                                      tcg_shift, tcg_fpstatus);
6909             }
6910             write_fp_dreg(s, rd, tcg_double);
6911             break;
6912 
6913         case 0: /* float32 */
6914             tcg_single = tcg_temp_new_i32();
6915             if (is_signed) {
6916                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6917                                      tcg_shift, tcg_fpstatus);
6918             } else {
6919                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6920                                      tcg_shift, tcg_fpstatus);
6921             }
6922             write_fp_sreg(s, rd, tcg_single);
6923             break;
6924 
6925         case 3: /* float16 */
6926             tcg_single = tcg_temp_new_i32();
6927             if (is_signed) {
6928                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6929                                      tcg_shift, tcg_fpstatus);
6930             } else {
6931                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6932                                      tcg_shift, tcg_fpstatus);
6933             }
6934             write_fp_sreg(s, rd, tcg_single);
6935             break;
6936 
6937         default:
6938             g_assert_not_reached();
6939         }
6940     } else {
6941         TCGv_i64 tcg_int = cpu_reg(s, rd);
6942         TCGv_i32 tcg_rmode;
6943 
6944         if (extract32(opcode, 2, 1)) {
6945             /* There are too many rounding modes to all fit into rmode,
6946              * so FCVTA[US] is a special case.
6947              */
6948             rmode = FPROUNDING_TIEAWAY;
6949         }
6950 
6951         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6952 
6953         switch (type) {
6954         case 1: /* float64 */
6955             tcg_double = read_fp_dreg(s, rn);
6956             if (is_signed) {
6957                 if (!sf) {
6958                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6959                                          tcg_shift, tcg_fpstatus);
6960                 } else {
6961                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6962                                          tcg_shift, tcg_fpstatus);
6963                 }
6964             } else {
6965                 if (!sf) {
6966                     gen_helper_vfp_tould(tcg_int, tcg_double,
6967                                          tcg_shift, tcg_fpstatus);
6968                 } else {
6969                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6970                                          tcg_shift, tcg_fpstatus);
6971                 }
6972             }
6973             if (!sf) {
6974                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6975             }
6976             break;
6977 
6978         case 0: /* float32 */
6979             tcg_single = read_fp_sreg(s, rn);
6980             if (sf) {
6981                 if (is_signed) {
6982                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6983                                          tcg_shift, tcg_fpstatus);
6984                 } else {
6985                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6986                                          tcg_shift, tcg_fpstatus);
6987                 }
6988             } else {
6989                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6990                 if (is_signed) {
6991                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6992                                          tcg_shift, tcg_fpstatus);
6993                 } else {
6994                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6995                                          tcg_shift, tcg_fpstatus);
6996                 }
6997                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6998             }
6999             break;
7000 
7001         case 3: /* float16 */
7002             tcg_single = read_fp_sreg(s, rn);
7003             if (sf) {
7004                 if (is_signed) {
7005                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
7006                                          tcg_shift, tcg_fpstatus);
7007                 } else {
7008                     gen_helper_vfp_touqh(tcg_int, tcg_single,
7009                                          tcg_shift, tcg_fpstatus);
7010                 }
7011             } else {
7012                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
7013                 if (is_signed) {
7014                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
7015                                          tcg_shift, tcg_fpstatus);
7016                 } else {
7017                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
7018                                          tcg_shift, tcg_fpstatus);
7019                 }
7020                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7021             }
7022             break;
7023 
7024         default:
7025             g_assert_not_reached();
7026         }
7027 
7028         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
7029     }
7030 }
7031 
7032 /* Floating point <-> fixed point conversions
7033  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
7034  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7035  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
7036  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7037  */
7038 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
7039 {
7040     int rd = extract32(insn, 0, 5);
7041     int rn = extract32(insn, 5, 5);
7042     int scale = extract32(insn, 10, 6);
7043     int opcode = extract32(insn, 16, 3);
7044     int rmode = extract32(insn, 19, 2);
7045     int type = extract32(insn, 22, 2);
7046     bool sbit = extract32(insn, 29, 1);
7047     bool sf = extract32(insn, 31, 1);
7048     bool itof;
7049 
7050     if (sbit || (!sf && scale < 32)) {
7051         unallocated_encoding(s);
7052         return;
7053     }
7054 
7055     switch (type) {
7056     case 0: /* float32 */
7057     case 1: /* float64 */
7058         break;
7059     case 3: /* float16 */
7060         if (dc_isar_feature(aa64_fp16, s)) {
7061             break;
7062         }
7063         /* fallthru */
7064     default:
7065         unallocated_encoding(s);
7066         return;
7067     }
7068 
7069     switch ((rmode << 3) | opcode) {
7070     case 0x2: /* SCVTF */
7071     case 0x3: /* UCVTF */
7072         itof = true;
7073         break;
7074     case 0x18: /* FCVTZS */
7075     case 0x19: /* FCVTZU */
7076         itof = false;
7077         break;
7078     default:
7079         unallocated_encoding(s);
7080         return;
7081     }
7082 
7083     if (!fp_access_check(s)) {
7084         return;
7085     }
7086 
7087     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
7088 }
7089 
7090 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
7091 {
7092     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
7093      * without conversion.
7094      */
7095 
7096     if (itof) {
7097         TCGv_i64 tcg_rn = cpu_reg(s, rn);
7098         TCGv_i64 tmp;
7099 
7100         switch (type) {
7101         case 0:
7102             /* 32 bit */
7103             tmp = tcg_temp_new_i64();
7104             tcg_gen_ext32u_i64(tmp, tcg_rn);
7105             write_fp_dreg(s, rd, tmp);
7106             break;
7107         case 1:
7108             /* 64 bit */
7109             write_fp_dreg(s, rd, tcg_rn);
7110             break;
7111         case 2:
7112             /* 64 bit to top half. */
7113             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
7114             clear_vec_high(s, true, rd);
7115             break;
7116         case 3:
7117             /* 16 bit */
7118             tmp = tcg_temp_new_i64();
7119             tcg_gen_ext16u_i64(tmp, tcg_rn);
7120             write_fp_dreg(s, rd, tmp);
7121             break;
7122         default:
7123             g_assert_not_reached();
7124         }
7125     } else {
7126         TCGv_i64 tcg_rd = cpu_reg(s, rd);
7127 
7128         switch (type) {
7129         case 0:
7130             /* 32 bit */
7131             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
7132             break;
7133         case 1:
7134             /* 64 bit */
7135             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
7136             break;
7137         case 2:
7138             /* 64 bits from top half */
7139             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7140             break;
7141         case 3:
7142             /* 16 bit */
7143             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7144             break;
7145         default:
7146             g_assert_not_reached();
7147         }
7148     }
7149 }
7150 
7151 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7152 {
7153     TCGv_i64 t = read_fp_dreg(s, rn);
7154     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7155 
7156     gen_helper_fjcvtzs(t, t, fpstatus);
7157 
7158     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7159     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7160     tcg_gen_movi_i32(cpu_CF, 0);
7161     tcg_gen_movi_i32(cpu_NF, 0);
7162     tcg_gen_movi_i32(cpu_VF, 0);
7163 }
7164 
7165 /* Floating point <-> integer conversions
7166  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7167  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7168  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7169  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7170  */
7171 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7172 {
7173     int rd = extract32(insn, 0, 5);
7174     int rn = extract32(insn, 5, 5);
7175     int opcode = extract32(insn, 16, 3);
7176     int rmode = extract32(insn, 19, 2);
7177     int type = extract32(insn, 22, 2);
7178     bool sbit = extract32(insn, 29, 1);
7179     bool sf = extract32(insn, 31, 1);
7180     bool itof = false;
7181 
7182     if (sbit) {
7183         goto do_unallocated;
7184     }
7185 
7186     switch (opcode) {
7187     case 2: /* SCVTF */
7188     case 3: /* UCVTF */
7189         itof = true;
7190         /* fallthru */
7191     case 4: /* FCVTAS */
7192     case 5: /* FCVTAU */
7193         if (rmode != 0) {
7194             goto do_unallocated;
7195         }
7196         /* fallthru */
7197     case 0: /* FCVT[NPMZ]S */
7198     case 1: /* FCVT[NPMZ]U */
7199         switch (type) {
7200         case 0: /* float32 */
7201         case 1: /* float64 */
7202             break;
7203         case 3: /* float16 */
7204             if (!dc_isar_feature(aa64_fp16, s)) {
7205                 goto do_unallocated;
7206             }
7207             break;
7208         default:
7209             goto do_unallocated;
7210         }
7211         if (!fp_access_check(s)) {
7212             return;
7213         }
7214         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7215         break;
7216 
7217     default:
7218         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7219         case 0b01100110: /* FMOV half <-> 32-bit int */
7220         case 0b01100111:
7221         case 0b11100110: /* FMOV half <-> 64-bit int */
7222         case 0b11100111:
7223             if (!dc_isar_feature(aa64_fp16, s)) {
7224                 goto do_unallocated;
7225             }
7226             /* fallthru */
7227         case 0b00000110: /* FMOV 32-bit */
7228         case 0b00000111:
7229         case 0b10100110: /* FMOV 64-bit */
7230         case 0b10100111:
7231         case 0b11001110: /* FMOV top half of 128-bit */
7232         case 0b11001111:
7233             if (!fp_access_check(s)) {
7234                 return;
7235             }
7236             itof = opcode & 1;
7237             handle_fmov(s, rd, rn, type, itof);
7238             break;
7239 
7240         case 0b00111110: /* FJCVTZS */
7241             if (!dc_isar_feature(aa64_jscvt, s)) {
7242                 goto do_unallocated;
7243             } else if (fp_access_check(s)) {
7244                 handle_fjcvtzs(s, rd, rn);
7245             }
7246             break;
7247 
7248         default:
7249         do_unallocated:
7250             unallocated_encoding(s);
7251             return;
7252         }
7253         break;
7254     }
7255 }
7256 
7257 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7258  *   31  30  29 28     25 24                          0
7259  * +---+---+---+---------+-----------------------------+
7260  * |   | 0 |   | 1 1 1 1 |                             |
7261  * +---+---+---+---------+-----------------------------+
7262  */
7263 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7264 {
7265     if (extract32(insn, 24, 1)) {
7266         /* Floating point data-processing (3 source) */
7267         disas_fp_3src(s, insn);
7268     } else if (extract32(insn, 21, 1) == 0) {
7269         /* Floating point to fixed point conversions */
7270         disas_fp_fixed_conv(s, insn);
7271     } else {
7272         switch (extract32(insn, 10, 2)) {
7273         case 1:
7274             /* Floating point conditional compare */
7275             disas_fp_ccomp(s, insn);
7276             break;
7277         case 2:
7278             /* Floating point data-processing (2 source) */
7279             disas_fp_2src(s, insn);
7280             break;
7281         case 3:
7282             /* Floating point conditional select */
7283             disas_fp_csel(s, insn);
7284             break;
7285         case 0:
7286             switch (ctz32(extract32(insn, 12, 4))) {
7287             case 0: /* [15:12] == xxx1 */
7288                 /* Floating point immediate */
7289                 disas_fp_imm(s, insn);
7290                 break;
7291             case 1: /* [15:12] == xx10 */
7292                 /* Floating point compare */
7293                 disas_fp_compare(s, insn);
7294                 break;
7295             case 2: /* [15:12] == x100 */
7296                 /* Floating point data-processing (1 source) */
7297                 disas_fp_1src(s, insn);
7298                 break;
7299             case 3: /* [15:12] == 1000 */
7300                 unallocated_encoding(s);
7301                 break;
7302             default: /* [15:12] == 0000 */
7303                 /* Floating point <-> integer conversions */
7304                 disas_fp_int_conv(s, insn);
7305                 break;
7306             }
7307             break;
7308         }
7309     }
7310 }
7311 
7312 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7313                      int pos)
7314 {
7315     /* Extract 64 bits from the middle of two concatenated 64 bit
7316      * vector register slices left:right. The extracted bits start
7317      * at 'pos' bits into the right (least significant) side.
7318      * We return the result in tcg_right, and guarantee not to
7319      * trash tcg_left.
7320      */
7321     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7322     assert(pos > 0 && pos < 64);
7323 
7324     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7325     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7326     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7327 }
7328 
7329 /* EXT
7330  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7331  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7332  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7333  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7334  */
7335 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7336 {
7337     int is_q = extract32(insn, 30, 1);
7338     int op2 = extract32(insn, 22, 2);
7339     int imm4 = extract32(insn, 11, 4);
7340     int rm = extract32(insn, 16, 5);
7341     int rn = extract32(insn, 5, 5);
7342     int rd = extract32(insn, 0, 5);
7343     int pos = imm4 << 3;
7344     TCGv_i64 tcg_resl, tcg_resh;
7345 
7346     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7347         unallocated_encoding(s);
7348         return;
7349     }
7350 
7351     if (!fp_access_check(s)) {
7352         return;
7353     }
7354 
7355     tcg_resh = tcg_temp_new_i64();
7356     tcg_resl = tcg_temp_new_i64();
7357 
7358     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7359      * either extracting 128 bits from a 128:128 concatenation, or
7360      * extracting 64 bits from a 64:64 concatenation.
7361      */
7362     if (!is_q) {
7363         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7364         if (pos != 0) {
7365             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7366             do_ext64(s, tcg_resh, tcg_resl, pos);
7367         }
7368     } else {
7369         TCGv_i64 tcg_hh;
7370         typedef struct {
7371             int reg;
7372             int elt;
7373         } EltPosns;
7374         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7375         EltPosns *elt = eltposns;
7376 
7377         if (pos >= 64) {
7378             elt++;
7379             pos -= 64;
7380         }
7381 
7382         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7383         elt++;
7384         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7385         elt++;
7386         if (pos != 0) {
7387             do_ext64(s, tcg_resh, tcg_resl, pos);
7388             tcg_hh = tcg_temp_new_i64();
7389             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7390             do_ext64(s, tcg_hh, tcg_resh, pos);
7391         }
7392     }
7393 
7394     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7395     if (is_q) {
7396         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7397     }
7398     clear_vec_high(s, is_q, rd);
7399 }
7400 
7401 /* TBL/TBX
7402  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7403  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7404  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7405  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7406  */
7407 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7408 {
7409     int op2 = extract32(insn, 22, 2);
7410     int is_q = extract32(insn, 30, 1);
7411     int rm = extract32(insn, 16, 5);
7412     int rn = extract32(insn, 5, 5);
7413     int rd = extract32(insn, 0, 5);
7414     int is_tbx = extract32(insn, 12, 1);
7415     int len = (extract32(insn, 13, 2) + 1) * 16;
7416 
7417     if (op2 != 0) {
7418         unallocated_encoding(s);
7419         return;
7420     }
7421 
7422     if (!fp_access_check(s)) {
7423         return;
7424     }
7425 
7426     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7427                        vec_full_reg_offset(s, rm), cpu_env,
7428                        is_q ? 16 : 8, vec_full_reg_size(s),
7429                        (len << 6) | (is_tbx << 5) | rn,
7430                        gen_helper_simd_tblx);
7431 }
7432 
7433 /* ZIP/UZP/TRN
7434  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7435  * +---+---+-------------+------+---+------+---+------------------+------+
7436  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7437  * +---+---+-------------+------+---+------+---+------------------+------+
7438  */
7439 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7440 {
7441     int rd = extract32(insn, 0, 5);
7442     int rn = extract32(insn, 5, 5);
7443     int rm = extract32(insn, 16, 5);
7444     int size = extract32(insn, 22, 2);
7445     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7446      * bit 2 indicates 1 vs 2 variant of the insn.
7447      */
7448     int opcode = extract32(insn, 12, 2);
7449     bool part = extract32(insn, 14, 1);
7450     bool is_q = extract32(insn, 30, 1);
7451     int esize = 8 << size;
7452     int i;
7453     int datasize = is_q ? 128 : 64;
7454     int elements = datasize / esize;
7455     TCGv_i64 tcg_res[2], tcg_ele;
7456 
7457     if (opcode == 0 || (size == 3 && !is_q)) {
7458         unallocated_encoding(s);
7459         return;
7460     }
7461 
7462     if (!fp_access_check(s)) {
7463         return;
7464     }
7465 
7466     tcg_res[0] = tcg_temp_new_i64();
7467     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7468     tcg_ele = tcg_temp_new_i64();
7469 
7470     for (i = 0; i < elements; i++) {
7471         int o, w;
7472 
7473         switch (opcode) {
7474         case 1: /* UZP1/2 */
7475         {
7476             int midpoint = elements / 2;
7477             if (i < midpoint) {
7478                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7479             } else {
7480                 read_vec_element(s, tcg_ele, rm,
7481                                  2 * (i - midpoint) + part, size);
7482             }
7483             break;
7484         }
7485         case 2: /* TRN1/2 */
7486             if (i & 1) {
7487                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7488             } else {
7489                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7490             }
7491             break;
7492         case 3: /* ZIP1/2 */
7493         {
7494             int base = part * elements / 2;
7495             if (i & 1) {
7496                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7497             } else {
7498                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7499             }
7500             break;
7501         }
7502         default:
7503             g_assert_not_reached();
7504         }
7505 
7506         w = (i * esize) / 64;
7507         o = (i * esize) % 64;
7508         if (o == 0) {
7509             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7510         } else {
7511             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7512             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7513         }
7514     }
7515 
7516     for (i = 0; i <= is_q; ++i) {
7517         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7518     }
7519     clear_vec_high(s, is_q, rd);
7520 }
7521 
7522 /*
7523  * do_reduction_op helper
7524  *
7525  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7526  * important for correct NaN propagation that we do these
7527  * operations in exactly the order specified by the pseudocode.
7528  *
7529  * This is a recursive function, TCG temps should be freed by the
7530  * calling function once it is done with the values.
7531  */
7532 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7533                                 int esize, int size, int vmap, TCGv_ptr fpst)
7534 {
7535     if (esize == size) {
7536         int element;
7537         MemOp msize = esize == 16 ? MO_16 : MO_32;
7538         TCGv_i32 tcg_elem;
7539 
7540         /* We should have one register left here */
7541         assert(ctpop8(vmap) == 1);
7542         element = ctz32(vmap);
7543         assert(element < 8);
7544 
7545         tcg_elem = tcg_temp_new_i32();
7546         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7547         return tcg_elem;
7548     } else {
7549         int bits = size / 2;
7550         int shift = ctpop8(vmap) / 2;
7551         int vmap_lo = (vmap >> shift) & vmap;
7552         int vmap_hi = (vmap & ~vmap_lo);
7553         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7554 
7555         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7556         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7557         tcg_res = tcg_temp_new_i32();
7558 
7559         switch (fpopcode) {
7560         case 0x0c: /* fmaxnmv half-precision */
7561             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7562             break;
7563         case 0x0f: /* fmaxv half-precision */
7564             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7565             break;
7566         case 0x1c: /* fminnmv half-precision */
7567             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7568             break;
7569         case 0x1f: /* fminv half-precision */
7570             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7571             break;
7572         case 0x2c: /* fmaxnmv */
7573             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7574             break;
7575         case 0x2f: /* fmaxv */
7576             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7577             break;
7578         case 0x3c: /* fminnmv */
7579             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7580             break;
7581         case 0x3f: /* fminv */
7582             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7583             break;
7584         default:
7585             g_assert_not_reached();
7586         }
7587         return tcg_res;
7588     }
7589 }
7590 
7591 /* AdvSIMD across lanes
7592  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7593  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7594  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7595  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7596  */
7597 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7598 {
7599     int rd = extract32(insn, 0, 5);
7600     int rn = extract32(insn, 5, 5);
7601     int size = extract32(insn, 22, 2);
7602     int opcode = extract32(insn, 12, 5);
7603     bool is_q = extract32(insn, 30, 1);
7604     bool is_u = extract32(insn, 29, 1);
7605     bool is_fp = false;
7606     bool is_min = false;
7607     int esize;
7608     int elements;
7609     int i;
7610     TCGv_i64 tcg_res, tcg_elt;
7611 
7612     switch (opcode) {
7613     case 0x1b: /* ADDV */
7614         if (is_u) {
7615             unallocated_encoding(s);
7616             return;
7617         }
7618         /* fall through */
7619     case 0x3: /* SADDLV, UADDLV */
7620     case 0xa: /* SMAXV, UMAXV */
7621     case 0x1a: /* SMINV, UMINV */
7622         if (size == 3 || (size == 2 && !is_q)) {
7623             unallocated_encoding(s);
7624             return;
7625         }
7626         break;
7627     case 0xc: /* FMAXNMV, FMINNMV */
7628     case 0xf: /* FMAXV, FMINV */
7629         /* Bit 1 of size field encodes min vs max and the actual size
7630          * depends on the encoding of the U bit. If not set (and FP16
7631          * enabled) then we do half-precision float instead of single
7632          * precision.
7633          */
7634         is_min = extract32(size, 1, 1);
7635         is_fp = true;
7636         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7637             size = 1;
7638         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7639             unallocated_encoding(s);
7640             return;
7641         } else {
7642             size = 2;
7643         }
7644         break;
7645     default:
7646         unallocated_encoding(s);
7647         return;
7648     }
7649 
7650     if (!fp_access_check(s)) {
7651         return;
7652     }
7653 
7654     esize = 8 << size;
7655     elements = (is_q ? 128 : 64) / esize;
7656 
7657     tcg_res = tcg_temp_new_i64();
7658     tcg_elt = tcg_temp_new_i64();
7659 
7660     /* These instructions operate across all lanes of a vector
7661      * to produce a single result. We can guarantee that a 64
7662      * bit intermediate is sufficient:
7663      *  + for [US]ADDLV the maximum element size is 32 bits, and
7664      *    the result type is 64 bits
7665      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7666      *    same as the element size, which is 32 bits at most
7667      * For the integer operations we can choose to work at 64
7668      * or 32 bits and truncate at the end; for simplicity
7669      * we use 64 bits always. The floating point
7670      * ops do require 32 bit intermediates, though.
7671      */
7672     if (!is_fp) {
7673         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7674 
7675         for (i = 1; i < elements; i++) {
7676             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7677 
7678             switch (opcode) {
7679             case 0x03: /* SADDLV / UADDLV */
7680             case 0x1b: /* ADDV */
7681                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7682                 break;
7683             case 0x0a: /* SMAXV / UMAXV */
7684                 if (is_u) {
7685                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7686                 } else {
7687                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7688                 }
7689                 break;
7690             case 0x1a: /* SMINV / UMINV */
7691                 if (is_u) {
7692                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7693                 } else {
7694                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7695                 }
7696                 break;
7697             default:
7698                 g_assert_not_reached();
7699             }
7700 
7701         }
7702     } else {
7703         /* Floating point vector reduction ops which work across 32
7704          * bit (single) or 16 bit (half-precision) intermediates.
7705          * Note that correct NaN propagation requires that we do these
7706          * operations in exactly the order specified by the pseudocode.
7707          */
7708         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7709         int fpopcode = opcode | is_min << 4 | is_u << 5;
7710         int vmap = (1 << elements) - 1;
7711         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7712                                              (is_q ? 128 : 64), vmap, fpst);
7713         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7714     }
7715 
7716     /* Now truncate the result to the width required for the final output */
7717     if (opcode == 0x03) {
7718         /* SADDLV, UADDLV: result is 2*esize */
7719         size++;
7720     }
7721 
7722     switch (size) {
7723     case 0:
7724         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7725         break;
7726     case 1:
7727         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7728         break;
7729     case 2:
7730         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7731         break;
7732     case 3:
7733         break;
7734     default:
7735         g_assert_not_reached();
7736     }
7737 
7738     write_fp_dreg(s, rd, tcg_res);
7739 }
7740 
7741 /* DUP (Element, Vector)
7742  *
7743  *  31  30   29              21 20    16 15        10  9    5 4    0
7744  * +---+---+-------------------+--------+-------------+------+------+
7745  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7746  * +---+---+-------------------+--------+-------------+------+------+
7747  *
7748  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7749  */
7750 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7751                              int imm5)
7752 {
7753     int size = ctz32(imm5);
7754     int index;
7755 
7756     if (size > 3 || (size == 3 && !is_q)) {
7757         unallocated_encoding(s);
7758         return;
7759     }
7760 
7761     if (!fp_access_check(s)) {
7762         return;
7763     }
7764 
7765     index = imm5 >> (size + 1);
7766     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7767                          vec_reg_offset(s, rn, index, size),
7768                          is_q ? 16 : 8, vec_full_reg_size(s));
7769 }
7770 
7771 /* DUP (element, scalar)
7772  *  31                   21 20    16 15        10  9    5 4    0
7773  * +-----------------------+--------+-------------+------+------+
7774  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7775  * +-----------------------+--------+-------------+------+------+
7776  */
7777 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7778                               int imm5)
7779 {
7780     int size = ctz32(imm5);
7781     int index;
7782     TCGv_i64 tmp;
7783 
7784     if (size > 3) {
7785         unallocated_encoding(s);
7786         return;
7787     }
7788 
7789     if (!fp_access_check(s)) {
7790         return;
7791     }
7792 
7793     index = imm5 >> (size + 1);
7794 
7795     /* This instruction just extracts the specified element and
7796      * zero-extends it into the bottom of the destination register.
7797      */
7798     tmp = tcg_temp_new_i64();
7799     read_vec_element(s, tmp, rn, index, size);
7800     write_fp_dreg(s, rd, tmp);
7801 }
7802 
7803 /* DUP (General)
7804  *
7805  *  31  30   29              21 20    16 15        10  9    5 4    0
7806  * +---+---+-------------------+--------+-------------+------+------+
7807  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7808  * +---+---+-------------------+--------+-------------+------+------+
7809  *
7810  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7811  */
7812 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7813                              int imm5)
7814 {
7815     int size = ctz32(imm5);
7816     uint32_t dofs, oprsz, maxsz;
7817 
7818     if (size > 3 || ((size == 3) && !is_q)) {
7819         unallocated_encoding(s);
7820         return;
7821     }
7822 
7823     if (!fp_access_check(s)) {
7824         return;
7825     }
7826 
7827     dofs = vec_full_reg_offset(s, rd);
7828     oprsz = is_q ? 16 : 8;
7829     maxsz = vec_full_reg_size(s);
7830 
7831     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7832 }
7833 
7834 /* INS (Element)
7835  *
7836  *  31                   21 20    16 15  14    11  10 9    5 4    0
7837  * +-----------------------+--------+------------+---+------+------+
7838  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7839  * +-----------------------+--------+------------+---+------+------+
7840  *
7841  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7842  * index: encoded in imm5<4:size+1>
7843  */
7844 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7845                              int imm4, int imm5)
7846 {
7847     int size = ctz32(imm5);
7848     int src_index, dst_index;
7849     TCGv_i64 tmp;
7850 
7851     if (size > 3) {
7852         unallocated_encoding(s);
7853         return;
7854     }
7855 
7856     if (!fp_access_check(s)) {
7857         return;
7858     }
7859 
7860     dst_index = extract32(imm5, 1+size, 5);
7861     src_index = extract32(imm4, size, 4);
7862 
7863     tmp = tcg_temp_new_i64();
7864 
7865     read_vec_element(s, tmp, rn, src_index, size);
7866     write_vec_element(s, tmp, rd, dst_index, size);
7867 
7868     /* INS is considered a 128-bit write for SVE. */
7869     clear_vec_high(s, true, rd);
7870 }
7871 
7872 
7873 /* INS (General)
7874  *
7875  *  31                   21 20    16 15        10  9    5 4    0
7876  * +-----------------------+--------+-------------+------+------+
7877  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7878  * +-----------------------+--------+-------------+------+------+
7879  *
7880  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7881  * index: encoded in imm5<4:size+1>
7882  */
7883 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7884 {
7885     int size = ctz32(imm5);
7886     int idx;
7887 
7888     if (size > 3) {
7889         unallocated_encoding(s);
7890         return;
7891     }
7892 
7893     if (!fp_access_check(s)) {
7894         return;
7895     }
7896 
7897     idx = extract32(imm5, 1 + size, 4 - size);
7898     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7899 
7900     /* INS is considered a 128-bit write for SVE. */
7901     clear_vec_high(s, true, rd);
7902 }
7903 
7904 /*
7905  * UMOV (General)
7906  * SMOV (General)
7907  *
7908  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7909  * +---+---+-------------------+--------+-------------+------+------+
7910  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7911  * +---+---+-------------------+--------+-------------+------+------+
7912  *
7913  * U: unsigned when set
7914  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7915  */
7916 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7917                                   int rn, int rd, int imm5)
7918 {
7919     int size = ctz32(imm5);
7920     int element;
7921     TCGv_i64 tcg_rd;
7922 
7923     /* Check for UnallocatedEncodings */
7924     if (is_signed) {
7925         if (size > 2 || (size == 2 && !is_q)) {
7926             unallocated_encoding(s);
7927             return;
7928         }
7929     } else {
7930         if (size > 3
7931             || (size < 3 && is_q)
7932             || (size == 3 && !is_q)) {
7933             unallocated_encoding(s);
7934             return;
7935         }
7936     }
7937 
7938     if (!fp_access_check(s)) {
7939         return;
7940     }
7941 
7942     element = extract32(imm5, 1+size, 4);
7943 
7944     tcg_rd = cpu_reg(s, rd);
7945     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7946     if (is_signed && !is_q) {
7947         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7948     }
7949 }
7950 
7951 /* AdvSIMD copy
7952  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7953  * +---+---+----+-----------------+------+---+------+---+------+------+
7954  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7955  * +---+---+----+-----------------+------+---+------+---+------+------+
7956  */
7957 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7958 {
7959     int rd = extract32(insn, 0, 5);
7960     int rn = extract32(insn, 5, 5);
7961     int imm4 = extract32(insn, 11, 4);
7962     int op = extract32(insn, 29, 1);
7963     int is_q = extract32(insn, 30, 1);
7964     int imm5 = extract32(insn, 16, 5);
7965 
7966     if (op) {
7967         if (is_q) {
7968             /* INS (element) */
7969             handle_simd_inse(s, rd, rn, imm4, imm5);
7970         } else {
7971             unallocated_encoding(s);
7972         }
7973     } else {
7974         switch (imm4) {
7975         case 0:
7976             /* DUP (element - vector) */
7977             handle_simd_dupe(s, is_q, rd, rn, imm5);
7978             break;
7979         case 1:
7980             /* DUP (general) */
7981             handle_simd_dupg(s, is_q, rd, rn, imm5);
7982             break;
7983         case 3:
7984             if (is_q) {
7985                 /* INS (general) */
7986                 handle_simd_insg(s, rd, rn, imm5);
7987             } else {
7988                 unallocated_encoding(s);
7989             }
7990             break;
7991         case 5:
7992         case 7:
7993             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7994             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7995             break;
7996         default:
7997             unallocated_encoding(s);
7998             break;
7999         }
8000     }
8001 }
8002 
8003 /* AdvSIMD modified immediate
8004  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
8005  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8006  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
8007  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8008  *
8009  * There are a number of operations that can be carried out here:
8010  *   MOVI - move (shifted) imm into register
8011  *   MVNI - move inverted (shifted) imm into register
8012  *   ORR  - bitwise OR of (shifted) imm with register
8013  *   BIC  - bitwise clear of (shifted) imm with register
8014  * With ARMv8.2 we also have:
8015  *   FMOV half-precision
8016  */
8017 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
8018 {
8019     int rd = extract32(insn, 0, 5);
8020     int cmode = extract32(insn, 12, 4);
8021     int o2 = extract32(insn, 11, 1);
8022     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
8023     bool is_neg = extract32(insn, 29, 1);
8024     bool is_q = extract32(insn, 30, 1);
8025     uint64_t imm = 0;
8026 
8027     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
8028         /* Check for FMOV (vector, immediate) - half-precision */
8029         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
8030             unallocated_encoding(s);
8031             return;
8032         }
8033     }
8034 
8035     if (!fp_access_check(s)) {
8036         return;
8037     }
8038 
8039     if (cmode == 15 && o2 && !is_neg) {
8040         /* FMOV (vector, immediate) - half-precision */
8041         imm = vfp_expand_imm(MO_16, abcdefgh);
8042         /* now duplicate across the lanes */
8043         imm = dup_const(MO_16, imm);
8044     } else {
8045         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
8046     }
8047 
8048     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
8049         /* MOVI or MVNI, with MVNI negation handled above.  */
8050         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
8051                              vec_full_reg_size(s), imm);
8052     } else {
8053         /* ORR or BIC, with BIC negation to AND handled above.  */
8054         if (is_neg) {
8055             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
8056         } else {
8057             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
8058         }
8059     }
8060 }
8061 
8062 /* AdvSIMD scalar copy
8063  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
8064  * +-----+----+-----------------+------+---+------+---+------+------+
8065  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8066  * +-----+----+-----------------+------+---+------+---+------+------+
8067  */
8068 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
8069 {
8070     int rd = extract32(insn, 0, 5);
8071     int rn = extract32(insn, 5, 5);
8072     int imm4 = extract32(insn, 11, 4);
8073     int imm5 = extract32(insn, 16, 5);
8074     int op = extract32(insn, 29, 1);
8075 
8076     if (op != 0 || imm4 != 0) {
8077         unallocated_encoding(s);
8078         return;
8079     }
8080 
8081     /* DUP (element, scalar) */
8082     handle_simd_dupes(s, rd, rn, imm5);
8083 }
8084 
8085 /* AdvSIMD scalar pairwise
8086  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8087  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8088  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8089  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8090  */
8091 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
8092 {
8093     int u = extract32(insn, 29, 1);
8094     int size = extract32(insn, 22, 2);
8095     int opcode = extract32(insn, 12, 5);
8096     int rn = extract32(insn, 5, 5);
8097     int rd = extract32(insn, 0, 5);
8098     TCGv_ptr fpst;
8099 
8100     /* For some ops (the FP ones), size[1] is part of the encoding.
8101      * For ADDP strictly it is not but size[1] is always 1 for valid
8102      * encodings.
8103      */
8104     opcode |= (extract32(size, 1, 1) << 5);
8105 
8106     switch (opcode) {
8107     case 0x3b: /* ADDP */
8108         if (u || size != 3) {
8109             unallocated_encoding(s);
8110             return;
8111         }
8112         if (!fp_access_check(s)) {
8113             return;
8114         }
8115 
8116         fpst = NULL;
8117         break;
8118     case 0xc: /* FMAXNMP */
8119     case 0xd: /* FADDP */
8120     case 0xf: /* FMAXP */
8121     case 0x2c: /* FMINNMP */
8122     case 0x2f: /* FMINP */
8123         /* FP op, size[0] is 32 or 64 bit*/
8124         if (!u) {
8125             if (!dc_isar_feature(aa64_fp16, s)) {
8126                 unallocated_encoding(s);
8127                 return;
8128             } else {
8129                 size = MO_16;
8130             }
8131         } else {
8132             size = extract32(size, 0, 1) ? MO_64 : MO_32;
8133         }
8134 
8135         if (!fp_access_check(s)) {
8136             return;
8137         }
8138 
8139         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8140         break;
8141     default:
8142         unallocated_encoding(s);
8143         return;
8144     }
8145 
8146     if (size == MO_64) {
8147         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8148         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8149         TCGv_i64 tcg_res = tcg_temp_new_i64();
8150 
8151         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8152         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8153 
8154         switch (opcode) {
8155         case 0x3b: /* ADDP */
8156             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8157             break;
8158         case 0xc: /* FMAXNMP */
8159             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8160             break;
8161         case 0xd: /* FADDP */
8162             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8163             break;
8164         case 0xf: /* FMAXP */
8165             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8166             break;
8167         case 0x2c: /* FMINNMP */
8168             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8169             break;
8170         case 0x2f: /* FMINP */
8171             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8172             break;
8173         default:
8174             g_assert_not_reached();
8175         }
8176 
8177         write_fp_dreg(s, rd, tcg_res);
8178     } else {
8179         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8180         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8181         TCGv_i32 tcg_res = tcg_temp_new_i32();
8182 
8183         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8184         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8185 
8186         if (size == MO_16) {
8187             switch (opcode) {
8188             case 0xc: /* FMAXNMP */
8189                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8190                 break;
8191             case 0xd: /* FADDP */
8192                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8193                 break;
8194             case 0xf: /* FMAXP */
8195                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8196                 break;
8197             case 0x2c: /* FMINNMP */
8198                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8199                 break;
8200             case 0x2f: /* FMINP */
8201                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8202                 break;
8203             default:
8204                 g_assert_not_reached();
8205             }
8206         } else {
8207             switch (opcode) {
8208             case 0xc: /* FMAXNMP */
8209                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8210                 break;
8211             case 0xd: /* FADDP */
8212                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8213                 break;
8214             case 0xf: /* FMAXP */
8215                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8216                 break;
8217             case 0x2c: /* FMINNMP */
8218                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8219                 break;
8220             case 0x2f: /* FMINP */
8221                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8222                 break;
8223             default:
8224                 g_assert_not_reached();
8225             }
8226         }
8227 
8228         write_fp_sreg(s, rd, tcg_res);
8229     }
8230 }
8231 
8232 /*
8233  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8234  *
8235  * This code is handles the common shifting code and is used by both
8236  * the vector and scalar code.
8237  */
8238 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8239                                     TCGv_i64 tcg_rnd, bool accumulate,
8240                                     bool is_u, int size, int shift)
8241 {
8242     bool extended_result = false;
8243     bool round = tcg_rnd != NULL;
8244     int ext_lshift = 0;
8245     TCGv_i64 tcg_src_hi;
8246 
8247     if (round && size == 3) {
8248         extended_result = true;
8249         ext_lshift = 64 - shift;
8250         tcg_src_hi = tcg_temp_new_i64();
8251     } else if (shift == 64) {
8252         if (!accumulate && is_u) {
8253             /* result is zero */
8254             tcg_gen_movi_i64(tcg_res, 0);
8255             return;
8256         }
8257     }
8258 
8259     /* Deal with the rounding step */
8260     if (round) {
8261         if (extended_result) {
8262             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8263             if (!is_u) {
8264                 /* take care of sign extending tcg_res */
8265                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8266                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8267                                  tcg_src, tcg_src_hi,
8268                                  tcg_rnd, tcg_zero);
8269             } else {
8270                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8271                                  tcg_src, tcg_zero,
8272                                  tcg_rnd, tcg_zero);
8273             }
8274         } else {
8275             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8276         }
8277     }
8278 
8279     /* Now do the shift right */
8280     if (round && extended_result) {
8281         /* extended case, >64 bit precision required */
8282         if (ext_lshift == 0) {
8283             /* special case, only high bits matter */
8284             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8285         } else {
8286             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8287             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8288             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8289         }
8290     } else {
8291         if (is_u) {
8292             if (shift == 64) {
8293                 /* essentially shifting in 64 zeros */
8294                 tcg_gen_movi_i64(tcg_src, 0);
8295             } else {
8296                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8297             }
8298         } else {
8299             if (shift == 64) {
8300                 /* effectively extending the sign-bit */
8301                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8302             } else {
8303                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8304             }
8305         }
8306     }
8307 
8308     if (accumulate) {
8309         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8310     } else {
8311         tcg_gen_mov_i64(tcg_res, tcg_src);
8312     }
8313 }
8314 
8315 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8316 static void handle_scalar_simd_shri(DisasContext *s,
8317                                     bool is_u, int immh, int immb,
8318                                     int opcode, int rn, int rd)
8319 {
8320     const int size = 3;
8321     int immhb = immh << 3 | immb;
8322     int shift = 2 * (8 << size) - immhb;
8323     bool accumulate = false;
8324     bool round = false;
8325     bool insert = false;
8326     TCGv_i64 tcg_rn;
8327     TCGv_i64 tcg_rd;
8328     TCGv_i64 tcg_round;
8329 
8330     if (!extract32(immh, 3, 1)) {
8331         unallocated_encoding(s);
8332         return;
8333     }
8334 
8335     if (!fp_access_check(s)) {
8336         return;
8337     }
8338 
8339     switch (opcode) {
8340     case 0x02: /* SSRA / USRA (accumulate) */
8341         accumulate = true;
8342         break;
8343     case 0x04: /* SRSHR / URSHR (rounding) */
8344         round = true;
8345         break;
8346     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8347         accumulate = round = true;
8348         break;
8349     case 0x08: /* SRI */
8350         insert = true;
8351         break;
8352     }
8353 
8354     if (round) {
8355         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8356     } else {
8357         tcg_round = NULL;
8358     }
8359 
8360     tcg_rn = read_fp_dreg(s, rn);
8361     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8362 
8363     if (insert) {
8364         /* shift count same as element size is valid but does nothing;
8365          * special case to avoid potential shift by 64.
8366          */
8367         int esize = 8 << size;
8368         if (shift != esize) {
8369             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8370             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8371         }
8372     } else {
8373         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8374                                 accumulate, is_u, size, shift);
8375     }
8376 
8377     write_fp_dreg(s, rd, tcg_rd);
8378 }
8379 
8380 /* SHL/SLI - Scalar shift left */
8381 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8382                                     int immh, int immb, int opcode,
8383                                     int rn, int rd)
8384 {
8385     int size = 32 - clz32(immh) - 1;
8386     int immhb = immh << 3 | immb;
8387     int shift = immhb - (8 << size);
8388     TCGv_i64 tcg_rn;
8389     TCGv_i64 tcg_rd;
8390 
8391     if (!extract32(immh, 3, 1)) {
8392         unallocated_encoding(s);
8393         return;
8394     }
8395 
8396     if (!fp_access_check(s)) {
8397         return;
8398     }
8399 
8400     tcg_rn = read_fp_dreg(s, rn);
8401     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8402 
8403     if (insert) {
8404         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8405     } else {
8406         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8407     }
8408 
8409     write_fp_dreg(s, rd, tcg_rd);
8410 }
8411 
8412 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8413  * (signed/unsigned) narrowing */
8414 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8415                                    bool is_u_shift, bool is_u_narrow,
8416                                    int immh, int immb, int opcode,
8417                                    int rn, int rd)
8418 {
8419     int immhb = immh << 3 | immb;
8420     int size = 32 - clz32(immh) - 1;
8421     int esize = 8 << size;
8422     int shift = (2 * esize) - immhb;
8423     int elements = is_scalar ? 1 : (64 / esize);
8424     bool round = extract32(opcode, 0, 1);
8425     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8426     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8427     TCGv_i32 tcg_rd_narrowed;
8428     TCGv_i64 tcg_final;
8429 
8430     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8431         { gen_helper_neon_narrow_sat_s8,
8432           gen_helper_neon_unarrow_sat8 },
8433         { gen_helper_neon_narrow_sat_s16,
8434           gen_helper_neon_unarrow_sat16 },
8435         { gen_helper_neon_narrow_sat_s32,
8436           gen_helper_neon_unarrow_sat32 },
8437         { NULL, NULL },
8438     };
8439     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8440         gen_helper_neon_narrow_sat_u8,
8441         gen_helper_neon_narrow_sat_u16,
8442         gen_helper_neon_narrow_sat_u32,
8443         NULL
8444     };
8445     NeonGenNarrowEnvFn *narrowfn;
8446 
8447     int i;
8448 
8449     assert(size < 4);
8450 
8451     if (extract32(immh, 3, 1)) {
8452         unallocated_encoding(s);
8453         return;
8454     }
8455 
8456     if (!fp_access_check(s)) {
8457         return;
8458     }
8459 
8460     if (is_u_shift) {
8461         narrowfn = unsigned_narrow_fns[size];
8462     } else {
8463         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8464     }
8465 
8466     tcg_rn = tcg_temp_new_i64();
8467     tcg_rd = tcg_temp_new_i64();
8468     tcg_rd_narrowed = tcg_temp_new_i32();
8469     tcg_final = tcg_temp_new_i64();
8470 
8471     if (round) {
8472         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8473     } else {
8474         tcg_round = NULL;
8475     }
8476 
8477     for (i = 0; i < elements; i++) {
8478         read_vec_element(s, tcg_rn, rn, i, ldop);
8479         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8480                                 false, is_u_shift, size+1, shift);
8481         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8482         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8483         if (i == 0) {
8484             tcg_gen_mov_i64(tcg_final, tcg_rd);
8485         } else {
8486             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8487         }
8488     }
8489 
8490     if (!is_q) {
8491         write_vec_element(s, tcg_final, rd, 0, MO_64);
8492     } else {
8493         write_vec_element(s, tcg_final, rd, 1, MO_64);
8494     }
8495     clear_vec_high(s, is_q, rd);
8496 }
8497 
8498 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8499 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8500                              bool src_unsigned, bool dst_unsigned,
8501                              int immh, int immb, int rn, int rd)
8502 {
8503     int immhb = immh << 3 | immb;
8504     int size = 32 - clz32(immh) - 1;
8505     int shift = immhb - (8 << size);
8506     int pass;
8507 
8508     assert(immh != 0);
8509     assert(!(scalar && is_q));
8510 
8511     if (!scalar) {
8512         if (!is_q && extract32(immh, 3, 1)) {
8513             unallocated_encoding(s);
8514             return;
8515         }
8516 
8517         /* Since we use the variable-shift helpers we must
8518          * replicate the shift count into each element of
8519          * the tcg_shift value.
8520          */
8521         switch (size) {
8522         case 0:
8523             shift |= shift << 8;
8524             /* fall through */
8525         case 1:
8526             shift |= shift << 16;
8527             break;
8528         case 2:
8529         case 3:
8530             break;
8531         default:
8532             g_assert_not_reached();
8533         }
8534     }
8535 
8536     if (!fp_access_check(s)) {
8537         return;
8538     }
8539 
8540     if (size == 3) {
8541         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8542         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8543             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8544             { NULL, gen_helper_neon_qshl_u64 },
8545         };
8546         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8547         int maxpass = is_q ? 2 : 1;
8548 
8549         for (pass = 0; pass < maxpass; pass++) {
8550             TCGv_i64 tcg_op = tcg_temp_new_i64();
8551 
8552             read_vec_element(s, tcg_op, rn, pass, MO_64);
8553             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8554             write_vec_element(s, tcg_op, rd, pass, MO_64);
8555         }
8556         clear_vec_high(s, is_q, rd);
8557     } else {
8558         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8559         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8560             {
8561                 { gen_helper_neon_qshl_s8,
8562                   gen_helper_neon_qshl_s16,
8563                   gen_helper_neon_qshl_s32 },
8564                 { gen_helper_neon_qshlu_s8,
8565                   gen_helper_neon_qshlu_s16,
8566                   gen_helper_neon_qshlu_s32 }
8567             }, {
8568                 { NULL, NULL, NULL },
8569                 { gen_helper_neon_qshl_u8,
8570                   gen_helper_neon_qshl_u16,
8571                   gen_helper_neon_qshl_u32 }
8572             }
8573         };
8574         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8575         MemOp memop = scalar ? size : MO_32;
8576         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8577 
8578         for (pass = 0; pass < maxpass; pass++) {
8579             TCGv_i32 tcg_op = tcg_temp_new_i32();
8580 
8581             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8582             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8583             if (scalar) {
8584                 switch (size) {
8585                 case 0:
8586                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8587                     break;
8588                 case 1:
8589                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8590                     break;
8591                 case 2:
8592                     break;
8593                 default:
8594                     g_assert_not_reached();
8595                 }
8596                 write_fp_sreg(s, rd, tcg_op);
8597             } else {
8598                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8599             }
8600         }
8601 
8602         if (!scalar) {
8603             clear_vec_high(s, is_q, rd);
8604         }
8605     }
8606 }
8607 
8608 /* Common vector code for handling integer to FP conversion */
8609 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8610                                    int elements, int is_signed,
8611                                    int fracbits, int size)
8612 {
8613     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8614     TCGv_i32 tcg_shift = NULL;
8615 
8616     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8617     int pass;
8618 
8619     if (fracbits || size == MO_64) {
8620         tcg_shift = tcg_constant_i32(fracbits);
8621     }
8622 
8623     if (size == MO_64) {
8624         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8625         TCGv_i64 tcg_double = tcg_temp_new_i64();
8626 
8627         for (pass = 0; pass < elements; pass++) {
8628             read_vec_element(s, tcg_int64, rn, pass, mop);
8629 
8630             if (is_signed) {
8631                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8632                                      tcg_shift, tcg_fpst);
8633             } else {
8634                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8635                                      tcg_shift, tcg_fpst);
8636             }
8637             if (elements == 1) {
8638                 write_fp_dreg(s, rd, tcg_double);
8639             } else {
8640                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8641             }
8642         }
8643     } else {
8644         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8645         TCGv_i32 tcg_float = tcg_temp_new_i32();
8646 
8647         for (pass = 0; pass < elements; pass++) {
8648             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8649 
8650             switch (size) {
8651             case MO_32:
8652                 if (fracbits) {
8653                     if (is_signed) {
8654                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8655                                              tcg_shift, tcg_fpst);
8656                     } else {
8657                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8658                                              tcg_shift, tcg_fpst);
8659                     }
8660                 } else {
8661                     if (is_signed) {
8662                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8663                     } else {
8664                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8665                     }
8666                 }
8667                 break;
8668             case MO_16:
8669                 if (fracbits) {
8670                     if (is_signed) {
8671                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8672                                              tcg_shift, tcg_fpst);
8673                     } else {
8674                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8675                                              tcg_shift, tcg_fpst);
8676                     }
8677                 } else {
8678                     if (is_signed) {
8679                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8680                     } else {
8681                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8682                     }
8683                 }
8684                 break;
8685             default:
8686                 g_assert_not_reached();
8687             }
8688 
8689             if (elements == 1) {
8690                 write_fp_sreg(s, rd, tcg_float);
8691             } else {
8692                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8693             }
8694         }
8695     }
8696 
8697     clear_vec_high(s, elements << size == 16, rd);
8698 }
8699 
8700 /* UCVTF/SCVTF - Integer to FP conversion */
8701 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8702                                          bool is_q, bool is_u,
8703                                          int immh, int immb, int opcode,
8704                                          int rn, int rd)
8705 {
8706     int size, elements, fracbits;
8707     int immhb = immh << 3 | immb;
8708 
8709     if (immh & 8) {
8710         size = MO_64;
8711         if (!is_scalar && !is_q) {
8712             unallocated_encoding(s);
8713             return;
8714         }
8715     } else if (immh & 4) {
8716         size = MO_32;
8717     } else if (immh & 2) {
8718         size = MO_16;
8719         if (!dc_isar_feature(aa64_fp16, s)) {
8720             unallocated_encoding(s);
8721             return;
8722         }
8723     } else {
8724         /* immh == 0 would be a failure of the decode logic */
8725         g_assert(immh == 1);
8726         unallocated_encoding(s);
8727         return;
8728     }
8729 
8730     if (is_scalar) {
8731         elements = 1;
8732     } else {
8733         elements = (8 << is_q) >> size;
8734     }
8735     fracbits = (16 << size) - immhb;
8736 
8737     if (!fp_access_check(s)) {
8738         return;
8739     }
8740 
8741     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8742 }
8743 
8744 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8745 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8746                                          bool is_q, bool is_u,
8747                                          int immh, int immb, int rn, int rd)
8748 {
8749     int immhb = immh << 3 | immb;
8750     int pass, size, fracbits;
8751     TCGv_ptr tcg_fpstatus;
8752     TCGv_i32 tcg_rmode, tcg_shift;
8753 
8754     if (immh & 0x8) {
8755         size = MO_64;
8756         if (!is_scalar && !is_q) {
8757             unallocated_encoding(s);
8758             return;
8759         }
8760     } else if (immh & 0x4) {
8761         size = MO_32;
8762     } else if (immh & 0x2) {
8763         size = MO_16;
8764         if (!dc_isar_feature(aa64_fp16, s)) {
8765             unallocated_encoding(s);
8766             return;
8767         }
8768     } else {
8769         /* Should have split out AdvSIMD modified immediate earlier.  */
8770         assert(immh == 1);
8771         unallocated_encoding(s);
8772         return;
8773     }
8774 
8775     if (!fp_access_check(s)) {
8776         return;
8777     }
8778 
8779     assert(!(is_scalar && is_q));
8780 
8781     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8782     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8783     fracbits = (16 << size) - immhb;
8784     tcg_shift = tcg_constant_i32(fracbits);
8785 
8786     if (size == MO_64) {
8787         int maxpass = is_scalar ? 1 : 2;
8788 
8789         for (pass = 0; pass < maxpass; pass++) {
8790             TCGv_i64 tcg_op = tcg_temp_new_i64();
8791 
8792             read_vec_element(s, tcg_op, rn, pass, MO_64);
8793             if (is_u) {
8794                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8795             } else {
8796                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8797             }
8798             write_vec_element(s, tcg_op, rd, pass, MO_64);
8799         }
8800         clear_vec_high(s, is_q, rd);
8801     } else {
8802         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8803         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8804 
8805         switch (size) {
8806         case MO_16:
8807             if (is_u) {
8808                 fn = gen_helper_vfp_touhh;
8809             } else {
8810                 fn = gen_helper_vfp_toshh;
8811             }
8812             break;
8813         case MO_32:
8814             if (is_u) {
8815                 fn = gen_helper_vfp_touls;
8816             } else {
8817                 fn = gen_helper_vfp_tosls;
8818             }
8819             break;
8820         default:
8821             g_assert_not_reached();
8822         }
8823 
8824         for (pass = 0; pass < maxpass; pass++) {
8825             TCGv_i32 tcg_op = tcg_temp_new_i32();
8826 
8827             read_vec_element_i32(s, tcg_op, rn, pass, size);
8828             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8829             if (is_scalar) {
8830                 write_fp_sreg(s, rd, tcg_op);
8831             } else {
8832                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8833             }
8834         }
8835         if (!is_scalar) {
8836             clear_vec_high(s, is_q, rd);
8837         }
8838     }
8839 
8840     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8841 }
8842 
8843 /* AdvSIMD scalar shift by immediate
8844  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8845  * +-----+---+-------------+------+------+--------+---+------+------+
8846  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8847  * +-----+---+-------------+------+------+--------+---+------+------+
8848  *
8849  * This is the scalar version so it works on a fixed sized registers
8850  */
8851 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8852 {
8853     int rd = extract32(insn, 0, 5);
8854     int rn = extract32(insn, 5, 5);
8855     int opcode = extract32(insn, 11, 5);
8856     int immb = extract32(insn, 16, 3);
8857     int immh = extract32(insn, 19, 4);
8858     bool is_u = extract32(insn, 29, 1);
8859 
8860     if (immh == 0) {
8861         unallocated_encoding(s);
8862         return;
8863     }
8864 
8865     switch (opcode) {
8866     case 0x08: /* SRI */
8867         if (!is_u) {
8868             unallocated_encoding(s);
8869             return;
8870         }
8871         /* fall through */
8872     case 0x00: /* SSHR / USHR */
8873     case 0x02: /* SSRA / USRA */
8874     case 0x04: /* SRSHR / URSHR */
8875     case 0x06: /* SRSRA / URSRA */
8876         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8877         break;
8878     case 0x0a: /* SHL / SLI */
8879         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8880         break;
8881     case 0x1c: /* SCVTF, UCVTF */
8882         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8883                                      opcode, rn, rd);
8884         break;
8885     case 0x10: /* SQSHRUN, SQSHRUN2 */
8886     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8887         if (!is_u) {
8888             unallocated_encoding(s);
8889             return;
8890         }
8891         handle_vec_simd_sqshrn(s, true, false, false, true,
8892                                immh, immb, opcode, rn, rd);
8893         break;
8894     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8895     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8896         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8897                                immh, immb, opcode, rn, rd);
8898         break;
8899     case 0xc: /* SQSHLU */
8900         if (!is_u) {
8901             unallocated_encoding(s);
8902             return;
8903         }
8904         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8905         break;
8906     case 0xe: /* SQSHL, UQSHL */
8907         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8908         break;
8909     case 0x1f: /* FCVTZS, FCVTZU */
8910         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8911         break;
8912     default:
8913         unallocated_encoding(s);
8914         break;
8915     }
8916 }
8917 
8918 /* AdvSIMD scalar three different
8919  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8920  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8921  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8922  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8923  */
8924 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8925 {
8926     bool is_u = extract32(insn, 29, 1);
8927     int size = extract32(insn, 22, 2);
8928     int opcode = extract32(insn, 12, 4);
8929     int rm = extract32(insn, 16, 5);
8930     int rn = extract32(insn, 5, 5);
8931     int rd = extract32(insn, 0, 5);
8932 
8933     if (is_u) {
8934         unallocated_encoding(s);
8935         return;
8936     }
8937 
8938     switch (opcode) {
8939     case 0x9: /* SQDMLAL, SQDMLAL2 */
8940     case 0xb: /* SQDMLSL, SQDMLSL2 */
8941     case 0xd: /* SQDMULL, SQDMULL2 */
8942         if (size == 0 || size == 3) {
8943             unallocated_encoding(s);
8944             return;
8945         }
8946         break;
8947     default:
8948         unallocated_encoding(s);
8949         return;
8950     }
8951 
8952     if (!fp_access_check(s)) {
8953         return;
8954     }
8955 
8956     if (size == 2) {
8957         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8958         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8959         TCGv_i64 tcg_res = tcg_temp_new_i64();
8960 
8961         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8962         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8963 
8964         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8965         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8966 
8967         switch (opcode) {
8968         case 0xd: /* SQDMULL, SQDMULL2 */
8969             break;
8970         case 0xb: /* SQDMLSL, SQDMLSL2 */
8971             tcg_gen_neg_i64(tcg_res, tcg_res);
8972             /* fall through */
8973         case 0x9: /* SQDMLAL, SQDMLAL2 */
8974             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8975             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8976                                               tcg_res, tcg_op1);
8977             break;
8978         default:
8979             g_assert_not_reached();
8980         }
8981 
8982         write_fp_dreg(s, rd, tcg_res);
8983     } else {
8984         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8985         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8986         TCGv_i64 tcg_res = tcg_temp_new_i64();
8987 
8988         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8989         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8990 
8991         switch (opcode) {
8992         case 0xd: /* SQDMULL, SQDMULL2 */
8993             break;
8994         case 0xb: /* SQDMLSL, SQDMLSL2 */
8995             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8996             /* fall through */
8997         case 0x9: /* SQDMLAL, SQDMLAL2 */
8998         {
8999             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
9000             read_vec_element(s, tcg_op3, rd, 0, MO_32);
9001             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
9002                                               tcg_res, tcg_op3);
9003             break;
9004         }
9005         default:
9006             g_assert_not_reached();
9007         }
9008 
9009         tcg_gen_ext32u_i64(tcg_res, tcg_res);
9010         write_fp_dreg(s, rd, tcg_res);
9011     }
9012 }
9013 
9014 static void handle_3same_64(DisasContext *s, int opcode, bool u,
9015                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
9016 {
9017     /* Handle 64x64->64 opcodes which are shared between the scalar
9018      * and vector 3-same groups. We cover every opcode where size == 3
9019      * is valid in either the three-reg-same (integer, not pairwise)
9020      * or scalar-three-reg-same groups.
9021      */
9022     TCGCond cond;
9023 
9024     switch (opcode) {
9025     case 0x1: /* SQADD */
9026         if (u) {
9027             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9028         } else {
9029             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9030         }
9031         break;
9032     case 0x5: /* SQSUB */
9033         if (u) {
9034             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9035         } else {
9036             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9037         }
9038         break;
9039     case 0x6: /* CMGT, CMHI */
9040         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
9041          * We implement this using setcond (test) and then negating.
9042          */
9043         cond = u ? TCG_COND_GTU : TCG_COND_GT;
9044     do_cmop:
9045         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
9046         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9047         break;
9048     case 0x7: /* CMGE, CMHS */
9049         cond = u ? TCG_COND_GEU : TCG_COND_GE;
9050         goto do_cmop;
9051     case 0x11: /* CMTST, CMEQ */
9052         if (u) {
9053             cond = TCG_COND_EQ;
9054             goto do_cmop;
9055         }
9056         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
9057         break;
9058     case 0x8: /* SSHL, USHL */
9059         if (u) {
9060             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
9061         } else {
9062             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
9063         }
9064         break;
9065     case 0x9: /* SQSHL, UQSHL */
9066         if (u) {
9067             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9068         } else {
9069             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9070         }
9071         break;
9072     case 0xa: /* SRSHL, URSHL */
9073         if (u) {
9074             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
9075         } else {
9076             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
9077         }
9078         break;
9079     case 0xb: /* SQRSHL, UQRSHL */
9080         if (u) {
9081             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9082         } else {
9083             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9084         }
9085         break;
9086     case 0x10: /* ADD, SUB */
9087         if (u) {
9088             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
9089         } else {
9090             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
9091         }
9092         break;
9093     default:
9094         g_assert_not_reached();
9095     }
9096 }
9097 
9098 /* Handle the 3-same-operands float operations; shared by the scalar
9099  * and vector encodings. The caller must filter out any encodings
9100  * not allocated for the encoding it is dealing with.
9101  */
9102 static void handle_3same_float(DisasContext *s, int size, int elements,
9103                                int fpopcode, int rd, int rn, int rm)
9104 {
9105     int pass;
9106     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9107 
9108     for (pass = 0; pass < elements; pass++) {
9109         if (size) {
9110             /* Double */
9111             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9112             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9113             TCGv_i64 tcg_res = tcg_temp_new_i64();
9114 
9115             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9116             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9117 
9118             switch (fpopcode) {
9119             case 0x39: /* FMLS */
9120                 /* As usual for ARM, separate negation for fused multiply-add */
9121                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
9122                 /* fall through */
9123             case 0x19: /* FMLA */
9124                 read_vec_element(s, tcg_res, rd, pass, MO_64);
9125                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9126                                        tcg_res, fpst);
9127                 break;
9128             case 0x18: /* FMAXNM */
9129                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9130                 break;
9131             case 0x1a: /* FADD */
9132                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9133                 break;
9134             case 0x1b: /* FMULX */
9135                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9136                 break;
9137             case 0x1c: /* FCMEQ */
9138                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9139                 break;
9140             case 0x1e: /* FMAX */
9141                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9142                 break;
9143             case 0x1f: /* FRECPS */
9144                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9145                 break;
9146             case 0x38: /* FMINNM */
9147                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9148                 break;
9149             case 0x3a: /* FSUB */
9150                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9151                 break;
9152             case 0x3e: /* FMIN */
9153                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9154                 break;
9155             case 0x3f: /* FRSQRTS */
9156                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9157                 break;
9158             case 0x5b: /* FMUL */
9159                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9160                 break;
9161             case 0x5c: /* FCMGE */
9162                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9163                 break;
9164             case 0x5d: /* FACGE */
9165                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9166                 break;
9167             case 0x5f: /* FDIV */
9168                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9169                 break;
9170             case 0x7a: /* FABD */
9171                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9172                 gen_helper_vfp_absd(tcg_res, tcg_res);
9173                 break;
9174             case 0x7c: /* FCMGT */
9175                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9176                 break;
9177             case 0x7d: /* FACGT */
9178                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9179                 break;
9180             default:
9181                 g_assert_not_reached();
9182             }
9183 
9184             write_vec_element(s, tcg_res, rd, pass, MO_64);
9185         } else {
9186             /* Single */
9187             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9188             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9189             TCGv_i32 tcg_res = tcg_temp_new_i32();
9190 
9191             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9192             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9193 
9194             switch (fpopcode) {
9195             case 0x39: /* FMLS */
9196                 /* As usual for ARM, separate negation for fused multiply-add */
9197                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9198                 /* fall through */
9199             case 0x19: /* FMLA */
9200                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9201                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9202                                        tcg_res, fpst);
9203                 break;
9204             case 0x1a: /* FADD */
9205                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9206                 break;
9207             case 0x1b: /* FMULX */
9208                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9209                 break;
9210             case 0x1c: /* FCMEQ */
9211                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9212                 break;
9213             case 0x1e: /* FMAX */
9214                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9215                 break;
9216             case 0x1f: /* FRECPS */
9217                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9218                 break;
9219             case 0x18: /* FMAXNM */
9220                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9221                 break;
9222             case 0x38: /* FMINNM */
9223                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9224                 break;
9225             case 0x3a: /* FSUB */
9226                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9227                 break;
9228             case 0x3e: /* FMIN */
9229                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9230                 break;
9231             case 0x3f: /* FRSQRTS */
9232                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9233                 break;
9234             case 0x5b: /* FMUL */
9235                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9236                 break;
9237             case 0x5c: /* FCMGE */
9238                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9239                 break;
9240             case 0x5d: /* FACGE */
9241                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9242                 break;
9243             case 0x5f: /* FDIV */
9244                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9245                 break;
9246             case 0x7a: /* FABD */
9247                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9248                 gen_helper_vfp_abss(tcg_res, tcg_res);
9249                 break;
9250             case 0x7c: /* FCMGT */
9251                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9252                 break;
9253             case 0x7d: /* FACGT */
9254                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9255                 break;
9256             default:
9257                 g_assert_not_reached();
9258             }
9259 
9260             if (elements == 1) {
9261                 /* scalar single so clear high part */
9262                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9263 
9264                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9265                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9266             } else {
9267                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9268             }
9269         }
9270     }
9271 
9272     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9273 }
9274 
9275 /* AdvSIMD scalar three same
9276  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9277  * +-----+---+-----------+------+---+------+--------+---+------+------+
9278  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9279  * +-----+---+-----------+------+---+------+--------+---+------+------+
9280  */
9281 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9282 {
9283     int rd = extract32(insn, 0, 5);
9284     int rn = extract32(insn, 5, 5);
9285     int opcode = extract32(insn, 11, 5);
9286     int rm = extract32(insn, 16, 5);
9287     int size = extract32(insn, 22, 2);
9288     bool u = extract32(insn, 29, 1);
9289     TCGv_i64 tcg_rd;
9290 
9291     if (opcode >= 0x18) {
9292         /* Floating point: U, size[1] and opcode indicate operation */
9293         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9294         switch (fpopcode) {
9295         case 0x1b: /* FMULX */
9296         case 0x1f: /* FRECPS */
9297         case 0x3f: /* FRSQRTS */
9298         case 0x5d: /* FACGE */
9299         case 0x7d: /* FACGT */
9300         case 0x1c: /* FCMEQ */
9301         case 0x5c: /* FCMGE */
9302         case 0x7c: /* FCMGT */
9303         case 0x7a: /* FABD */
9304             break;
9305         default:
9306             unallocated_encoding(s);
9307             return;
9308         }
9309 
9310         if (!fp_access_check(s)) {
9311             return;
9312         }
9313 
9314         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9315         return;
9316     }
9317 
9318     switch (opcode) {
9319     case 0x1: /* SQADD, UQADD */
9320     case 0x5: /* SQSUB, UQSUB */
9321     case 0x9: /* SQSHL, UQSHL */
9322     case 0xb: /* SQRSHL, UQRSHL */
9323         break;
9324     case 0x8: /* SSHL, USHL */
9325     case 0xa: /* SRSHL, URSHL */
9326     case 0x6: /* CMGT, CMHI */
9327     case 0x7: /* CMGE, CMHS */
9328     case 0x11: /* CMTST, CMEQ */
9329     case 0x10: /* ADD, SUB (vector) */
9330         if (size != 3) {
9331             unallocated_encoding(s);
9332             return;
9333         }
9334         break;
9335     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9336         if (size != 1 && size != 2) {
9337             unallocated_encoding(s);
9338             return;
9339         }
9340         break;
9341     default:
9342         unallocated_encoding(s);
9343         return;
9344     }
9345 
9346     if (!fp_access_check(s)) {
9347         return;
9348     }
9349 
9350     tcg_rd = tcg_temp_new_i64();
9351 
9352     if (size == 3) {
9353         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9354         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9355 
9356         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9357     } else {
9358         /* Do a single operation on the lowest element in the vector.
9359          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9360          * no side effects for all these operations.
9361          * OPTME: special-purpose helpers would avoid doing some
9362          * unnecessary work in the helper for the 8 and 16 bit cases.
9363          */
9364         NeonGenTwoOpEnvFn *genenvfn;
9365         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9366         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9367         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9368 
9369         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9370         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9371 
9372         switch (opcode) {
9373         case 0x1: /* SQADD, UQADD */
9374         {
9375             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9376                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9377                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9378                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9379             };
9380             genenvfn = fns[size][u];
9381             break;
9382         }
9383         case 0x5: /* SQSUB, UQSUB */
9384         {
9385             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9386                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9387                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9388                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9389             };
9390             genenvfn = fns[size][u];
9391             break;
9392         }
9393         case 0x9: /* SQSHL, UQSHL */
9394         {
9395             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9396                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9397                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9398                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9399             };
9400             genenvfn = fns[size][u];
9401             break;
9402         }
9403         case 0xb: /* SQRSHL, UQRSHL */
9404         {
9405             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9406                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9407                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9408                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9409             };
9410             genenvfn = fns[size][u];
9411             break;
9412         }
9413         case 0x16: /* SQDMULH, SQRDMULH */
9414         {
9415             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9416                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9417                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9418             };
9419             assert(size == 1 || size == 2);
9420             genenvfn = fns[size - 1][u];
9421             break;
9422         }
9423         default:
9424             g_assert_not_reached();
9425         }
9426 
9427         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9428         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9429     }
9430 
9431     write_fp_dreg(s, rd, tcg_rd);
9432 }
9433 
9434 /* AdvSIMD scalar three same FP16
9435  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9436  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9437  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9438  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9439  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9440  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9441  */
9442 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9443                                                   uint32_t insn)
9444 {
9445     int rd = extract32(insn, 0, 5);
9446     int rn = extract32(insn, 5, 5);
9447     int opcode = extract32(insn, 11, 3);
9448     int rm = extract32(insn, 16, 5);
9449     bool u = extract32(insn, 29, 1);
9450     bool a = extract32(insn, 23, 1);
9451     int fpopcode = opcode | (a << 3) |  (u << 4);
9452     TCGv_ptr fpst;
9453     TCGv_i32 tcg_op1;
9454     TCGv_i32 tcg_op2;
9455     TCGv_i32 tcg_res;
9456 
9457     switch (fpopcode) {
9458     case 0x03: /* FMULX */
9459     case 0x04: /* FCMEQ (reg) */
9460     case 0x07: /* FRECPS */
9461     case 0x0f: /* FRSQRTS */
9462     case 0x14: /* FCMGE (reg) */
9463     case 0x15: /* FACGE */
9464     case 0x1a: /* FABD */
9465     case 0x1c: /* FCMGT (reg) */
9466     case 0x1d: /* FACGT */
9467         break;
9468     default:
9469         unallocated_encoding(s);
9470         return;
9471     }
9472 
9473     if (!dc_isar_feature(aa64_fp16, s)) {
9474         unallocated_encoding(s);
9475     }
9476 
9477     if (!fp_access_check(s)) {
9478         return;
9479     }
9480 
9481     fpst = fpstatus_ptr(FPST_FPCR_F16);
9482 
9483     tcg_op1 = read_fp_hreg(s, rn);
9484     tcg_op2 = read_fp_hreg(s, rm);
9485     tcg_res = tcg_temp_new_i32();
9486 
9487     switch (fpopcode) {
9488     case 0x03: /* FMULX */
9489         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9490         break;
9491     case 0x04: /* FCMEQ (reg) */
9492         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9493         break;
9494     case 0x07: /* FRECPS */
9495         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9496         break;
9497     case 0x0f: /* FRSQRTS */
9498         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9499         break;
9500     case 0x14: /* FCMGE (reg) */
9501         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9502         break;
9503     case 0x15: /* FACGE */
9504         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9505         break;
9506     case 0x1a: /* FABD */
9507         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9508         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9509         break;
9510     case 0x1c: /* FCMGT (reg) */
9511         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9512         break;
9513     case 0x1d: /* FACGT */
9514         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9515         break;
9516     default:
9517         g_assert_not_reached();
9518     }
9519 
9520     write_fp_sreg(s, rd, tcg_res);
9521 }
9522 
9523 /* AdvSIMD scalar three same extra
9524  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9525  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9526  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9527  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9528  */
9529 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9530                                                    uint32_t insn)
9531 {
9532     int rd = extract32(insn, 0, 5);
9533     int rn = extract32(insn, 5, 5);
9534     int opcode = extract32(insn, 11, 4);
9535     int rm = extract32(insn, 16, 5);
9536     int size = extract32(insn, 22, 2);
9537     bool u = extract32(insn, 29, 1);
9538     TCGv_i32 ele1, ele2, ele3;
9539     TCGv_i64 res;
9540     bool feature;
9541 
9542     switch (u * 16 + opcode) {
9543     case 0x10: /* SQRDMLAH (vector) */
9544     case 0x11: /* SQRDMLSH (vector) */
9545         if (size != 1 && size != 2) {
9546             unallocated_encoding(s);
9547             return;
9548         }
9549         feature = dc_isar_feature(aa64_rdm, s);
9550         break;
9551     default:
9552         unallocated_encoding(s);
9553         return;
9554     }
9555     if (!feature) {
9556         unallocated_encoding(s);
9557         return;
9558     }
9559     if (!fp_access_check(s)) {
9560         return;
9561     }
9562 
9563     /* Do a single operation on the lowest element in the vector.
9564      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9565      * with no side effects for all these operations.
9566      * OPTME: special-purpose helpers would avoid doing some
9567      * unnecessary work in the helper for the 16 bit cases.
9568      */
9569     ele1 = tcg_temp_new_i32();
9570     ele2 = tcg_temp_new_i32();
9571     ele3 = tcg_temp_new_i32();
9572 
9573     read_vec_element_i32(s, ele1, rn, 0, size);
9574     read_vec_element_i32(s, ele2, rm, 0, size);
9575     read_vec_element_i32(s, ele3, rd, 0, size);
9576 
9577     switch (opcode) {
9578     case 0x0: /* SQRDMLAH */
9579         if (size == 1) {
9580             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9581         } else {
9582             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9583         }
9584         break;
9585     case 0x1: /* SQRDMLSH */
9586         if (size == 1) {
9587             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9588         } else {
9589             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9590         }
9591         break;
9592     default:
9593         g_assert_not_reached();
9594     }
9595 
9596     res = tcg_temp_new_i64();
9597     tcg_gen_extu_i32_i64(res, ele3);
9598     write_fp_dreg(s, rd, res);
9599 }
9600 
9601 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9602                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9603                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9604 {
9605     /* Handle 64->64 opcodes which are shared between the scalar and
9606      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9607      * is valid in either group and also the double-precision fp ops.
9608      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9609      * requires them.
9610      */
9611     TCGCond cond;
9612 
9613     switch (opcode) {
9614     case 0x4: /* CLS, CLZ */
9615         if (u) {
9616             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9617         } else {
9618             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9619         }
9620         break;
9621     case 0x5: /* NOT */
9622         /* This opcode is shared with CNT and RBIT but we have earlier
9623          * enforced that size == 3 if and only if this is the NOT insn.
9624          */
9625         tcg_gen_not_i64(tcg_rd, tcg_rn);
9626         break;
9627     case 0x7: /* SQABS, SQNEG */
9628         if (u) {
9629             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9630         } else {
9631             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9632         }
9633         break;
9634     case 0xa: /* CMLT */
9635         /* 64 bit integer comparison against zero, result is
9636          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9637          * subtracting 1.
9638          */
9639         cond = TCG_COND_LT;
9640     do_cmop:
9641         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9642         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9643         break;
9644     case 0x8: /* CMGT, CMGE */
9645         cond = u ? TCG_COND_GE : TCG_COND_GT;
9646         goto do_cmop;
9647     case 0x9: /* CMEQ, CMLE */
9648         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9649         goto do_cmop;
9650     case 0xb: /* ABS, NEG */
9651         if (u) {
9652             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9653         } else {
9654             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9655         }
9656         break;
9657     case 0x2f: /* FABS */
9658         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9659         break;
9660     case 0x6f: /* FNEG */
9661         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9662         break;
9663     case 0x7f: /* FSQRT */
9664         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9665         break;
9666     case 0x1a: /* FCVTNS */
9667     case 0x1b: /* FCVTMS */
9668     case 0x1c: /* FCVTAS */
9669     case 0x3a: /* FCVTPS */
9670     case 0x3b: /* FCVTZS */
9671         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9672         break;
9673     case 0x5a: /* FCVTNU */
9674     case 0x5b: /* FCVTMU */
9675     case 0x5c: /* FCVTAU */
9676     case 0x7a: /* FCVTPU */
9677     case 0x7b: /* FCVTZU */
9678         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9679         break;
9680     case 0x18: /* FRINTN */
9681     case 0x19: /* FRINTM */
9682     case 0x38: /* FRINTP */
9683     case 0x39: /* FRINTZ */
9684     case 0x58: /* FRINTA */
9685     case 0x79: /* FRINTI */
9686         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9687         break;
9688     case 0x59: /* FRINTX */
9689         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9690         break;
9691     case 0x1e: /* FRINT32Z */
9692     case 0x5e: /* FRINT32X */
9693         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9694         break;
9695     case 0x1f: /* FRINT64Z */
9696     case 0x5f: /* FRINT64X */
9697         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9698         break;
9699     default:
9700         g_assert_not_reached();
9701     }
9702 }
9703 
9704 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9705                                    bool is_scalar, bool is_u, bool is_q,
9706                                    int size, int rn, int rd)
9707 {
9708     bool is_double = (size == MO_64);
9709     TCGv_ptr fpst;
9710 
9711     if (!fp_access_check(s)) {
9712         return;
9713     }
9714 
9715     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9716 
9717     if (is_double) {
9718         TCGv_i64 tcg_op = tcg_temp_new_i64();
9719         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9720         TCGv_i64 tcg_res = tcg_temp_new_i64();
9721         NeonGenTwoDoubleOpFn *genfn;
9722         bool swap = false;
9723         int pass;
9724 
9725         switch (opcode) {
9726         case 0x2e: /* FCMLT (zero) */
9727             swap = true;
9728             /* fallthrough */
9729         case 0x2c: /* FCMGT (zero) */
9730             genfn = gen_helper_neon_cgt_f64;
9731             break;
9732         case 0x2d: /* FCMEQ (zero) */
9733             genfn = gen_helper_neon_ceq_f64;
9734             break;
9735         case 0x6d: /* FCMLE (zero) */
9736             swap = true;
9737             /* fall through */
9738         case 0x6c: /* FCMGE (zero) */
9739             genfn = gen_helper_neon_cge_f64;
9740             break;
9741         default:
9742             g_assert_not_reached();
9743         }
9744 
9745         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9746             read_vec_element(s, tcg_op, rn, pass, MO_64);
9747             if (swap) {
9748                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9749             } else {
9750                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9751             }
9752             write_vec_element(s, tcg_res, rd, pass, MO_64);
9753         }
9754 
9755         clear_vec_high(s, !is_scalar, rd);
9756     } else {
9757         TCGv_i32 tcg_op = tcg_temp_new_i32();
9758         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9759         TCGv_i32 tcg_res = tcg_temp_new_i32();
9760         NeonGenTwoSingleOpFn *genfn;
9761         bool swap = false;
9762         int pass, maxpasses;
9763 
9764         if (size == MO_16) {
9765             switch (opcode) {
9766             case 0x2e: /* FCMLT (zero) */
9767                 swap = true;
9768                 /* fall through */
9769             case 0x2c: /* FCMGT (zero) */
9770                 genfn = gen_helper_advsimd_cgt_f16;
9771                 break;
9772             case 0x2d: /* FCMEQ (zero) */
9773                 genfn = gen_helper_advsimd_ceq_f16;
9774                 break;
9775             case 0x6d: /* FCMLE (zero) */
9776                 swap = true;
9777                 /* fall through */
9778             case 0x6c: /* FCMGE (zero) */
9779                 genfn = gen_helper_advsimd_cge_f16;
9780                 break;
9781             default:
9782                 g_assert_not_reached();
9783             }
9784         } else {
9785             switch (opcode) {
9786             case 0x2e: /* FCMLT (zero) */
9787                 swap = true;
9788                 /* fall through */
9789             case 0x2c: /* FCMGT (zero) */
9790                 genfn = gen_helper_neon_cgt_f32;
9791                 break;
9792             case 0x2d: /* FCMEQ (zero) */
9793                 genfn = gen_helper_neon_ceq_f32;
9794                 break;
9795             case 0x6d: /* FCMLE (zero) */
9796                 swap = true;
9797                 /* fall through */
9798             case 0x6c: /* FCMGE (zero) */
9799                 genfn = gen_helper_neon_cge_f32;
9800                 break;
9801             default:
9802                 g_assert_not_reached();
9803             }
9804         }
9805 
9806         if (is_scalar) {
9807             maxpasses = 1;
9808         } else {
9809             int vector_size = 8 << is_q;
9810             maxpasses = vector_size >> size;
9811         }
9812 
9813         for (pass = 0; pass < maxpasses; pass++) {
9814             read_vec_element_i32(s, tcg_op, rn, pass, size);
9815             if (swap) {
9816                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9817             } else {
9818                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9819             }
9820             if (is_scalar) {
9821                 write_fp_sreg(s, rd, tcg_res);
9822             } else {
9823                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9824             }
9825         }
9826 
9827         if (!is_scalar) {
9828             clear_vec_high(s, is_q, rd);
9829         }
9830     }
9831 }
9832 
9833 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9834                                     bool is_scalar, bool is_u, bool is_q,
9835                                     int size, int rn, int rd)
9836 {
9837     bool is_double = (size == 3);
9838     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9839 
9840     if (is_double) {
9841         TCGv_i64 tcg_op = tcg_temp_new_i64();
9842         TCGv_i64 tcg_res = tcg_temp_new_i64();
9843         int pass;
9844 
9845         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9846             read_vec_element(s, tcg_op, rn, pass, MO_64);
9847             switch (opcode) {
9848             case 0x3d: /* FRECPE */
9849                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9850                 break;
9851             case 0x3f: /* FRECPX */
9852                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9853                 break;
9854             case 0x7d: /* FRSQRTE */
9855                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9856                 break;
9857             default:
9858                 g_assert_not_reached();
9859             }
9860             write_vec_element(s, tcg_res, rd, pass, MO_64);
9861         }
9862         clear_vec_high(s, !is_scalar, rd);
9863     } else {
9864         TCGv_i32 tcg_op = tcg_temp_new_i32();
9865         TCGv_i32 tcg_res = tcg_temp_new_i32();
9866         int pass, maxpasses;
9867 
9868         if (is_scalar) {
9869             maxpasses = 1;
9870         } else {
9871             maxpasses = is_q ? 4 : 2;
9872         }
9873 
9874         for (pass = 0; pass < maxpasses; pass++) {
9875             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9876 
9877             switch (opcode) {
9878             case 0x3c: /* URECPE */
9879                 gen_helper_recpe_u32(tcg_res, tcg_op);
9880                 break;
9881             case 0x3d: /* FRECPE */
9882                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9883                 break;
9884             case 0x3f: /* FRECPX */
9885                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9886                 break;
9887             case 0x7d: /* FRSQRTE */
9888                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9889                 break;
9890             default:
9891                 g_assert_not_reached();
9892             }
9893 
9894             if (is_scalar) {
9895                 write_fp_sreg(s, rd, tcg_res);
9896             } else {
9897                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9898             }
9899         }
9900         if (!is_scalar) {
9901             clear_vec_high(s, is_q, rd);
9902         }
9903     }
9904 }
9905 
9906 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9907                                 int opcode, bool u, bool is_q,
9908                                 int size, int rn, int rd)
9909 {
9910     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9911      * in the source becomes a size element in the destination).
9912      */
9913     int pass;
9914     TCGv_i32 tcg_res[2];
9915     int destelt = is_q ? 2 : 0;
9916     int passes = scalar ? 1 : 2;
9917 
9918     if (scalar) {
9919         tcg_res[1] = tcg_constant_i32(0);
9920     }
9921 
9922     for (pass = 0; pass < passes; pass++) {
9923         TCGv_i64 tcg_op = tcg_temp_new_i64();
9924         NeonGenNarrowFn *genfn = NULL;
9925         NeonGenNarrowEnvFn *genenvfn = NULL;
9926 
9927         if (scalar) {
9928             read_vec_element(s, tcg_op, rn, pass, size + 1);
9929         } else {
9930             read_vec_element(s, tcg_op, rn, pass, MO_64);
9931         }
9932         tcg_res[pass] = tcg_temp_new_i32();
9933 
9934         switch (opcode) {
9935         case 0x12: /* XTN, SQXTUN */
9936         {
9937             static NeonGenNarrowFn * const xtnfns[3] = {
9938                 gen_helper_neon_narrow_u8,
9939                 gen_helper_neon_narrow_u16,
9940                 tcg_gen_extrl_i64_i32,
9941             };
9942             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9943                 gen_helper_neon_unarrow_sat8,
9944                 gen_helper_neon_unarrow_sat16,
9945                 gen_helper_neon_unarrow_sat32,
9946             };
9947             if (u) {
9948                 genenvfn = sqxtunfns[size];
9949             } else {
9950                 genfn = xtnfns[size];
9951             }
9952             break;
9953         }
9954         case 0x14: /* SQXTN, UQXTN */
9955         {
9956             static NeonGenNarrowEnvFn * const fns[3][2] = {
9957                 { gen_helper_neon_narrow_sat_s8,
9958                   gen_helper_neon_narrow_sat_u8 },
9959                 { gen_helper_neon_narrow_sat_s16,
9960                   gen_helper_neon_narrow_sat_u16 },
9961                 { gen_helper_neon_narrow_sat_s32,
9962                   gen_helper_neon_narrow_sat_u32 },
9963             };
9964             genenvfn = fns[size][u];
9965             break;
9966         }
9967         case 0x16: /* FCVTN, FCVTN2 */
9968             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9969             if (size == 2) {
9970                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9971             } else {
9972                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9973                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9974                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9975                 TCGv_i32 ahp = get_ahp_flag();
9976 
9977                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9978                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9979                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9980                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9981             }
9982             break;
9983         case 0x36: /* BFCVTN, BFCVTN2 */
9984             {
9985                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9986                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9987             }
9988             break;
9989         case 0x56:  /* FCVTXN, FCVTXN2 */
9990             /* 64 bit to 32 bit float conversion
9991              * with von Neumann rounding (round to odd)
9992              */
9993             assert(size == 2);
9994             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9995             break;
9996         default:
9997             g_assert_not_reached();
9998         }
9999 
10000         if (genfn) {
10001             genfn(tcg_res[pass], tcg_op);
10002         } else if (genenvfn) {
10003             genenvfn(tcg_res[pass], cpu_env, tcg_op);
10004         }
10005     }
10006 
10007     for (pass = 0; pass < 2; pass++) {
10008         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
10009     }
10010     clear_vec_high(s, is_q, rd);
10011 }
10012 
10013 /* Remaining saturating accumulating ops */
10014 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
10015                                 bool is_q, int size, int rn, int rd)
10016 {
10017     bool is_double = (size == 3);
10018 
10019     if (is_double) {
10020         TCGv_i64 tcg_rn = tcg_temp_new_i64();
10021         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10022         int pass;
10023 
10024         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10025             read_vec_element(s, tcg_rn, rn, pass, MO_64);
10026             read_vec_element(s, tcg_rd, rd, pass, MO_64);
10027 
10028             if (is_u) { /* USQADD */
10029                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10030             } else { /* SUQADD */
10031                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10032             }
10033             write_vec_element(s, tcg_rd, rd, pass, MO_64);
10034         }
10035         clear_vec_high(s, !is_scalar, rd);
10036     } else {
10037         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10038         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10039         int pass, maxpasses;
10040 
10041         if (is_scalar) {
10042             maxpasses = 1;
10043         } else {
10044             maxpasses = is_q ? 4 : 2;
10045         }
10046 
10047         for (pass = 0; pass < maxpasses; pass++) {
10048             if (is_scalar) {
10049                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
10050                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
10051             } else {
10052                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
10053                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10054             }
10055 
10056             if (is_u) { /* USQADD */
10057                 switch (size) {
10058                 case 0:
10059                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10060                     break;
10061                 case 1:
10062                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10063                     break;
10064                 case 2:
10065                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10066                     break;
10067                 default:
10068                     g_assert_not_reached();
10069                 }
10070             } else { /* SUQADD */
10071                 switch (size) {
10072                 case 0:
10073                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10074                     break;
10075                 case 1:
10076                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10077                     break;
10078                 case 2:
10079                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10080                     break;
10081                 default:
10082                     g_assert_not_reached();
10083                 }
10084             }
10085 
10086             if (is_scalar) {
10087                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
10088             }
10089             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10090         }
10091         clear_vec_high(s, is_q, rd);
10092     }
10093 }
10094 
10095 /* AdvSIMD scalar two reg misc
10096  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10097  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10098  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10099  * +-----+---+-----------+------+-----------+--------+-----+------+------+
10100  */
10101 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10102 {
10103     int rd = extract32(insn, 0, 5);
10104     int rn = extract32(insn, 5, 5);
10105     int opcode = extract32(insn, 12, 5);
10106     int size = extract32(insn, 22, 2);
10107     bool u = extract32(insn, 29, 1);
10108     bool is_fcvt = false;
10109     int rmode;
10110     TCGv_i32 tcg_rmode;
10111     TCGv_ptr tcg_fpstatus;
10112 
10113     switch (opcode) {
10114     case 0x3: /* USQADD / SUQADD*/
10115         if (!fp_access_check(s)) {
10116             return;
10117         }
10118         handle_2misc_satacc(s, true, u, false, size, rn, rd);
10119         return;
10120     case 0x7: /* SQABS / SQNEG */
10121         break;
10122     case 0xa: /* CMLT */
10123         if (u) {
10124             unallocated_encoding(s);
10125             return;
10126         }
10127         /* fall through */
10128     case 0x8: /* CMGT, CMGE */
10129     case 0x9: /* CMEQ, CMLE */
10130     case 0xb: /* ABS, NEG */
10131         if (size != 3) {
10132             unallocated_encoding(s);
10133             return;
10134         }
10135         break;
10136     case 0x12: /* SQXTUN */
10137         if (!u) {
10138             unallocated_encoding(s);
10139             return;
10140         }
10141         /* fall through */
10142     case 0x14: /* SQXTN, UQXTN */
10143         if (size == 3) {
10144             unallocated_encoding(s);
10145             return;
10146         }
10147         if (!fp_access_check(s)) {
10148             return;
10149         }
10150         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10151         return;
10152     case 0xc ... 0xf:
10153     case 0x16 ... 0x1d:
10154     case 0x1f:
10155         /* Floating point: U, size[1] and opcode indicate operation;
10156          * size[0] indicates single or double precision.
10157          */
10158         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10159         size = extract32(size, 0, 1) ? 3 : 2;
10160         switch (opcode) {
10161         case 0x2c: /* FCMGT (zero) */
10162         case 0x2d: /* FCMEQ (zero) */
10163         case 0x2e: /* FCMLT (zero) */
10164         case 0x6c: /* FCMGE (zero) */
10165         case 0x6d: /* FCMLE (zero) */
10166             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10167             return;
10168         case 0x1d: /* SCVTF */
10169         case 0x5d: /* UCVTF */
10170         {
10171             bool is_signed = (opcode == 0x1d);
10172             if (!fp_access_check(s)) {
10173                 return;
10174             }
10175             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10176             return;
10177         }
10178         case 0x3d: /* FRECPE */
10179         case 0x3f: /* FRECPX */
10180         case 0x7d: /* FRSQRTE */
10181             if (!fp_access_check(s)) {
10182                 return;
10183             }
10184             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10185             return;
10186         case 0x1a: /* FCVTNS */
10187         case 0x1b: /* FCVTMS */
10188         case 0x3a: /* FCVTPS */
10189         case 0x3b: /* FCVTZS */
10190         case 0x5a: /* FCVTNU */
10191         case 0x5b: /* FCVTMU */
10192         case 0x7a: /* FCVTPU */
10193         case 0x7b: /* FCVTZU */
10194             is_fcvt = true;
10195             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10196             break;
10197         case 0x1c: /* FCVTAS */
10198         case 0x5c: /* FCVTAU */
10199             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10200             is_fcvt = true;
10201             rmode = FPROUNDING_TIEAWAY;
10202             break;
10203         case 0x56: /* FCVTXN, FCVTXN2 */
10204             if (size == 2) {
10205                 unallocated_encoding(s);
10206                 return;
10207             }
10208             if (!fp_access_check(s)) {
10209                 return;
10210             }
10211             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10212             return;
10213         default:
10214             unallocated_encoding(s);
10215             return;
10216         }
10217         break;
10218     default:
10219         unallocated_encoding(s);
10220         return;
10221     }
10222 
10223     if (!fp_access_check(s)) {
10224         return;
10225     }
10226 
10227     if (is_fcvt) {
10228         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10229         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10230     } else {
10231         tcg_fpstatus = NULL;
10232         tcg_rmode = NULL;
10233     }
10234 
10235     if (size == 3) {
10236         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10237         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10238 
10239         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10240         write_fp_dreg(s, rd, tcg_rd);
10241     } else {
10242         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10243         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10244 
10245         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10246 
10247         switch (opcode) {
10248         case 0x7: /* SQABS, SQNEG */
10249         {
10250             NeonGenOneOpEnvFn *genfn;
10251             static NeonGenOneOpEnvFn * const fns[3][2] = {
10252                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10253                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10254                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10255             };
10256             genfn = fns[size][u];
10257             genfn(tcg_rd, cpu_env, tcg_rn);
10258             break;
10259         }
10260         case 0x1a: /* FCVTNS */
10261         case 0x1b: /* FCVTMS */
10262         case 0x1c: /* FCVTAS */
10263         case 0x3a: /* FCVTPS */
10264         case 0x3b: /* FCVTZS */
10265             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10266                                  tcg_fpstatus);
10267             break;
10268         case 0x5a: /* FCVTNU */
10269         case 0x5b: /* FCVTMU */
10270         case 0x5c: /* FCVTAU */
10271         case 0x7a: /* FCVTPU */
10272         case 0x7b: /* FCVTZU */
10273             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10274                                  tcg_fpstatus);
10275             break;
10276         default:
10277             g_assert_not_reached();
10278         }
10279 
10280         write_fp_sreg(s, rd, tcg_rd);
10281     }
10282 
10283     if (is_fcvt) {
10284         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10285     }
10286 }
10287 
10288 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10289 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10290                                  int immh, int immb, int opcode, int rn, int rd)
10291 {
10292     int size = 32 - clz32(immh) - 1;
10293     int immhb = immh << 3 | immb;
10294     int shift = 2 * (8 << size) - immhb;
10295     GVecGen2iFn *gvec_fn;
10296 
10297     if (extract32(immh, 3, 1) && !is_q) {
10298         unallocated_encoding(s);
10299         return;
10300     }
10301     tcg_debug_assert(size <= 3);
10302 
10303     if (!fp_access_check(s)) {
10304         return;
10305     }
10306 
10307     switch (opcode) {
10308     case 0x02: /* SSRA / USRA (accumulate) */
10309         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10310         break;
10311 
10312     case 0x08: /* SRI */
10313         gvec_fn = gen_gvec_sri;
10314         break;
10315 
10316     case 0x00: /* SSHR / USHR */
10317         if (is_u) {
10318             if (shift == 8 << size) {
10319                 /* Shift count the same size as element size produces zero.  */
10320                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10321                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10322                 return;
10323             }
10324             gvec_fn = tcg_gen_gvec_shri;
10325         } else {
10326             /* Shift count the same size as element size produces all sign.  */
10327             if (shift == 8 << size) {
10328                 shift -= 1;
10329             }
10330             gvec_fn = tcg_gen_gvec_sari;
10331         }
10332         break;
10333 
10334     case 0x04: /* SRSHR / URSHR (rounding) */
10335         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10336         break;
10337 
10338     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10339         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10340         break;
10341 
10342     default:
10343         g_assert_not_reached();
10344     }
10345 
10346     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10347 }
10348 
10349 /* SHL/SLI - Vector shift left */
10350 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10351                                  int immh, int immb, int opcode, int rn, int rd)
10352 {
10353     int size = 32 - clz32(immh) - 1;
10354     int immhb = immh << 3 | immb;
10355     int shift = immhb - (8 << size);
10356 
10357     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10358     assert(size >= 0 && size <= 3);
10359 
10360     if (extract32(immh, 3, 1) && !is_q) {
10361         unallocated_encoding(s);
10362         return;
10363     }
10364 
10365     if (!fp_access_check(s)) {
10366         return;
10367     }
10368 
10369     if (insert) {
10370         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10371     } else {
10372         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10373     }
10374 }
10375 
10376 /* USHLL/SHLL - Vector shift left with widening */
10377 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10378                                  int immh, int immb, int opcode, int rn, int rd)
10379 {
10380     int size = 32 - clz32(immh) - 1;
10381     int immhb = immh << 3 | immb;
10382     int shift = immhb - (8 << size);
10383     int dsize = 64;
10384     int esize = 8 << size;
10385     int elements = dsize/esize;
10386     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10387     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10388     int i;
10389 
10390     if (size >= 3) {
10391         unallocated_encoding(s);
10392         return;
10393     }
10394 
10395     if (!fp_access_check(s)) {
10396         return;
10397     }
10398 
10399     /* For the LL variants the store is larger than the load,
10400      * so if rd == rn we would overwrite parts of our input.
10401      * So load everything right now and use shifts in the main loop.
10402      */
10403     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10404 
10405     for (i = 0; i < elements; i++) {
10406         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10407         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10408         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10409         write_vec_element(s, tcg_rd, rd, i, size + 1);
10410     }
10411 }
10412 
10413 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10414 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10415                                  int immh, int immb, int opcode, int rn, int rd)
10416 {
10417     int immhb = immh << 3 | immb;
10418     int size = 32 - clz32(immh) - 1;
10419     int dsize = 64;
10420     int esize = 8 << size;
10421     int elements = dsize/esize;
10422     int shift = (2 * esize) - immhb;
10423     bool round = extract32(opcode, 0, 1);
10424     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10425     TCGv_i64 tcg_round;
10426     int i;
10427 
10428     if (extract32(immh, 3, 1)) {
10429         unallocated_encoding(s);
10430         return;
10431     }
10432 
10433     if (!fp_access_check(s)) {
10434         return;
10435     }
10436 
10437     tcg_rn = tcg_temp_new_i64();
10438     tcg_rd = tcg_temp_new_i64();
10439     tcg_final = tcg_temp_new_i64();
10440     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10441 
10442     if (round) {
10443         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10444     } else {
10445         tcg_round = NULL;
10446     }
10447 
10448     for (i = 0; i < elements; i++) {
10449         read_vec_element(s, tcg_rn, rn, i, size+1);
10450         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10451                                 false, true, size+1, shift);
10452 
10453         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10454     }
10455 
10456     if (!is_q) {
10457         write_vec_element(s, tcg_final, rd, 0, MO_64);
10458     } else {
10459         write_vec_element(s, tcg_final, rd, 1, MO_64);
10460     }
10461 
10462     clear_vec_high(s, is_q, rd);
10463 }
10464 
10465 
10466 /* AdvSIMD shift by immediate
10467  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10468  * +---+---+---+-------------+------+------+--------+---+------+------+
10469  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10470  * +---+---+---+-------------+------+------+--------+---+------+------+
10471  */
10472 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10473 {
10474     int rd = extract32(insn, 0, 5);
10475     int rn = extract32(insn, 5, 5);
10476     int opcode = extract32(insn, 11, 5);
10477     int immb = extract32(insn, 16, 3);
10478     int immh = extract32(insn, 19, 4);
10479     bool is_u = extract32(insn, 29, 1);
10480     bool is_q = extract32(insn, 30, 1);
10481 
10482     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10483     assert(immh != 0);
10484 
10485     switch (opcode) {
10486     case 0x08: /* SRI */
10487         if (!is_u) {
10488             unallocated_encoding(s);
10489             return;
10490         }
10491         /* fall through */
10492     case 0x00: /* SSHR / USHR */
10493     case 0x02: /* SSRA / USRA (accumulate) */
10494     case 0x04: /* SRSHR / URSHR (rounding) */
10495     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10496         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10497         break;
10498     case 0x0a: /* SHL / SLI */
10499         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10500         break;
10501     case 0x10: /* SHRN */
10502     case 0x11: /* RSHRN / SQRSHRUN */
10503         if (is_u) {
10504             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10505                                    opcode, rn, rd);
10506         } else {
10507             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10508         }
10509         break;
10510     case 0x12: /* SQSHRN / UQSHRN */
10511     case 0x13: /* SQRSHRN / UQRSHRN */
10512         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10513                                opcode, rn, rd);
10514         break;
10515     case 0x14: /* SSHLL / USHLL */
10516         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10517         break;
10518     case 0x1c: /* SCVTF / UCVTF */
10519         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10520                                      opcode, rn, rd);
10521         break;
10522     case 0xc: /* SQSHLU */
10523         if (!is_u) {
10524             unallocated_encoding(s);
10525             return;
10526         }
10527         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10528         break;
10529     case 0xe: /* SQSHL, UQSHL */
10530         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10531         break;
10532     case 0x1f: /* FCVTZS/ FCVTZU */
10533         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10534         return;
10535     default:
10536         unallocated_encoding(s);
10537         return;
10538     }
10539 }
10540 
10541 /* Generate code to do a "long" addition or subtraction, ie one done in
10542  * TCGv_i64 on vector lanes twice the width specified by size.
10543  */
10544 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10545                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10546 {
10547     static NeonGenTwo64OpFn * const fns[3][2] = {
10548         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10549         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10550         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10551     };
10552     NeonGenTwo64OpFn *genfn;
10553     assert(size < 3);
10554 
10555     genfn = fns[size][is_sub];
10556     genfn(tcg_res, tcg_op1, tcg_op2);
10557 }
10558 
10559 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10560                                 int opcode, int rd, int rn, int rm)
10561 {
10562     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10563     TCGv_i64 tcg_res[2];
10564     int pass, accop;
10565 
10566     tcg_res[0] = tcg_temp_new_i64();
10567     tcg_res[1] = tcg_temp_new_i64();
10568 
10569     /* Does this op do an adding accumulate, a subtracting accumulate,
10570      * or no accumulate at all?
10571      */
10572     switch (opcode) {
10573     case 5:
10574     case 8:
10575     case 9:
10576         accop = 1;
10577         break;
10578     case 10:
10579     case 11:
10580         accop = -1;
10581         break;
10582     default:
10583         accop = 0;
10584         break;
10585     }
10586 
10587     if (accop != 0) {
10588         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10589         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10590     }
10591 
10592     /* size == 2 means two 32x32->64 operations; this is worth special
10593      * casing because we can generally handle it inline.
10594      */
10595     if (size == 2) {
10596         for (pass = 0; pass < 2; pass++) {
10597             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10598             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10599             TCGv_i64 tcg_passres;
10600             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10601 
10602             int elt = pass + is_q * 2;
10603 
10604             read_vec_element(s, tcg_op1, rn, elt, memop);
10605             read_vec_element(s, tcg_op2, rm, elt, memop);
10606 
10607             if (accop == 0) {
10608                 tcg_passres = tcg_res[pass];
10609             } else {
10610                 tcg_passres = tcg_temp_new_i64();
10611             }
10612 
10613             switch (opcode) {
10614             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10615                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10616                 break;
10617             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10618                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10619                 break;
10620             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10621             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10622             {
10623                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10624                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10625 
10626                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10627                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10628                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10629                                     tcg_passres,
10630                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10631                 break;
10632             }
10633             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10634             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10635             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10636                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10637                 break;
10638             case 9: /* SQDMLAL, SQDMLAL2 */
10639             case 11: /* SQDMLSL, SQDMLSL2 */
10640             case 13: /* SQDMULL, SQDMULL2 */
10641                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10642                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10643                                                   tcg_passres, tcg_passres);
10644                 break;
10645             default:
10646                 g_assert_not_reached();
10647             }
10648 
10649             if (opcode == 9 || opcode == 11) {
10650                 /* saturating accumulate ops */
10651                 if (accop < 0) {
10652                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10653                 }
10654                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10655                                                   tcg_res[pass], tcg_passres);
10656             } else if (accop > 0) {
10657                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10658             } else if (accop < 0) {
10659                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10660             }
10661         }
10662     } else {
10663         /* size 0 or 1, generally helper functions */
10664         for (pass = 0; pass < 2; pass++) {
10665             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10666             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10667             TCGv_i64 tcg_passres;
10668             int elt = pass + is_q * 2;
10669 
10670             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10671             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10672 
10673             if (accop == 0) {
10674                 tcg_passres = tcg_res[pass];
10675             } else {
10676                 tcg_passres = tcg_temp_new_i64();
10677             }
10678 
10679             switch (opcode) {
10680             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10681             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10682             {
10683                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10684                 static NeonGenWidenFn * const widenfns[2][2] = {
10685                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10686                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10687                 };
10688                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10689 
10690                 widenfn(tcg_op2_64, tcg_op2);
10691                 widenfn(tcg_passres, tcg_op1);
10692                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10693                               tcg_passres, tcg_op2_64);
10694                 break;
10695             }
10696             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10697             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10698                 if (size == 0) {
10699                     if (is_u) {
10700                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10701                     } else {
10702                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10703                     }
10704                 } else {
10705                     if (is_u) {
10706                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10707                     } else {
10708                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10709                     }
10710                 }
10711                 break;
10712             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10713             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10714             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10715                 if (size == 0) {
10716                     if (is_u) {
10717                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10718                     } else {
10719                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10720                     }
10721                 } else {
10722                     if (is_u) {
10723                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10724                     } else {
10725                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10726                     }
10727                 }
10728                 break;
10729             case 9: /* SQDMLAL, SQDMLAL2 */
10730             case 11: /* SQDMLSL, SQDMLSL2 */
10731             case 13: /* SQDMULL, SQDMULL2 */
10732                 assert(size == 1);
10733                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10734                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10735                                                   tcg_passres, tcg_passres);
10736                 break;
10737             default:
10738                 g_assert_not_reached();
10739             }
10740 
10741             if (accop != 0) {
10742                 if (opcode == 9 || opcode == 11) {
10743                     /* saturating accumulate ops */
10744                     if (accop < 0) {
10745                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10746                     }
10747                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10748                                                       tcg_res[pass],
10749                                                       tcg_passres);
10750                 } else {
10751                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10752                                   tcg_res[pass], tcg_passres);
10753                 }
10754             }
10755         }
10756     }
10757 
10758     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10759     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10760 }
10761 
10762 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10763                             int opcode, int rd, int rn, int rm)
10764 {
10765     TCGv_i64 tcg_res[2];
10766     int part = is_q ? 2 : 0;
10767     int pass;
10768 
10769     for (pass = 0; pass < 2; pass++) {
10770         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10771         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10772         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10773         static NeonGenWidenFn * const widenfns[3][2] = {
10774             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10775             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10776             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10777         };
10778         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10779 
10780         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10781         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10782         widenfn(tcg_op2_wide, tcg_op2);
10783         tcg_res[pass] = tcg_temp_new_i64();
10784         gen_neon_addl(size, (opcode == 3),
10785                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10786     }
10787 
10788     for (pass = 0; pass < 2; pass++) {
10789         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10790     }
10791 }
10792 
10793 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10794 {
10795     tcg_gen_addi_i64(in, in, 1U << 31);
10796     tcg_gen_extrh_i64_i32(res, in);
10797 }
10798 
10799 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10800                                  int opcode, int rd, int rn, int rm)
10801 {
10802     TCGv_i32 tcg_res[2];
10803     int part = is_q ? 2 : 0;
10804     int pass;
10805 
10806     for (pass = 0; pass < 2; pass++) {
10807         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10808         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10809         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10810         static NeonGenNarrowFn * const narrowfns[3][2] = {
10811             { gen_helper_neon_narrow_high_u8,
10812               gen_helper_neon_narrow_round_high_u8 },
10813             { gen_helper_neon_narrow_high_u16,
10814               gen_helper_neon_narrow_round_high_u16 },
10815             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10816         };
10817         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10818 
10819         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10820         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10821 
10822         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10823 
10824         tcg_res[pass] = tcg_temp_new_i32();
10825         gennarrow(tcg_res[pass], tcg_wideres);
10826     }
10827 
10828     for (pass = 0; pass < 2; pass++) {
10829         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10830     }
10831     clear_vec_high(s, is_q, rd);
10832 }
10833 
10834 /* AdvSIMD three different
10835  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10836  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10837  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10838  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10839  */
10840 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10841 {
10842     /* Instructions in this group fall into three basic classes
10843      * (in each case with the operation working on each element in
10844      * the input vectors):
10845      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10846      *     128 bit input)
10847      * (2) wide 64 x 128 -> 128
10848      * (3) narrowing 128 x 128 -> 64
10849      * Here we do initial decode, catch unallocated cases and
10850      * dispatch to separate functions for each class.
10851      */
10852     int is_q = extract32(insn, 30, 1);
10853     int is_u = extract32(insn, 29, 1);
10854     int size = extract32(insn, 22, 2);
10855     int opcode = extract32(insn, 12, 4);
10856     int rm = extract32(insn, 16, 5);
10857     int rn = extract32(insn, 5, 5);
10858     int rd = extract32(insn, 0, 5);
10859 
10860     switch (opcode) {
10861     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10862     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10863         /* 64 x 128 -> 128 */
10864         if (size == 3) {
10865             unallocated_encoding(s);
10866             return;
10867         }
10868         if (!fp_access_check(s)) {
10869             return;
10870         }
10871         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10872         break;
10873     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10874     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10875         /* 128 x 128 -> 64 */
10876         if (size == 3) {
10877             unallocated_encoding(s);
10878             return;
10879         }
10880         if (!fp_access_check(s)) {
10881             return;
10882         }
10883         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10884         break;
10885     case 14: /* PMULL, PMULL2 */
10886         if (is_u) {
10887             unallocated_encoding(s);
10888             return;
10889         }
10890         switch (size) {
10891         case 0: /* PMULL.P8 */
10892             if (!fp_access_check(s)) {
10893                 return;
10894             }
10895             /* The Q field specifies lo/hi half input for this insn.  */
10896             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10897                              gen_helper_neon_pmull_h);
10898             break;
10899 
10900         case 3: /* PMULL.P64 */
10901             if (!dc_isar_feature(aa64_pmull, s)) {
10902                 unallocated_encoding(s);
10903                 return;
10904             }
10905             if (!fp_access_check(s)) {
10906                 return;
10907             }
10908             /* The Q field specifies lo/hi half input for this insn.  */
10909             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10910                              gen_helper_gvec_pmull_q);
10911             break;
10912 
10913         default:
10914             unallocated_encoding(s);
10915             break;
10916         }
10917         return;
10918     case 9: /* SQDMLAL, SQDMLAL2 */
10919     case 11: /* SQDMLSL, SQDMLSL2 */
10920     case 13: /* SQDMULL, SQDMULL2 */
10921         if (is_u || size == 0) {
10922             unallocated_encoding(s);
10923             return;
10924         }
10925         /* fall through */
10926     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10927     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10928     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10929     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10930     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10931     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10932     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10933         /* 64 x 64 -> 128 */
10934         if (size == 3) {
10935             unallocated_encoding(s);
10936             return;
10937         }
10938         if (!fp_access_check(s)) {
10939             return;
10940         }
10941 
10942         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10943         break;
10944     default:
10945         /* opcode 15 not allocated */
10946         unallocated_encoding(s);
10947         break;
10948     }
10949 }
10950 
10951 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10952 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10953 {
10954     int rd = extract32(insn, 0, 5);
10955     int rn = extract32(insn, 5, 5);
10956     int rm = extract32(insn, 16, 5);
10957     int size = extract32(insn, 22, 2);
10958     bool is_u = extract32(insn, 29, 1);
10959     bool is_q = extract32(insn, 30, 1);
10960 
10961     if (!fp_access_check(s)) {
10962         return;
10963     }
10964 
10965     switch (size + 4 * is_u) {
10966     case 0: /* AND */
10967         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10968         return;
10969     case 1: /* BIC */
10970         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10971         return;
10972     case 2: /* ORR */
10973         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10974         return;
10975     case 3: /* ORN */
10976         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10977         return;
10978     case 4: /* EOR */
10979         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10980         return;
10981 
10982     case 5: /* BSL bitwise select */
10983         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10984         return;
10985     case 6: /* BIT, bitwise insert if true */
10986         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10987         return;
10988     case 7: /* BIF, bitwise insert if false */
10989         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10990         return;
10991 
10992     default:
10993         g_assert_not_reached();
10994     }
10995 }
10996 
10997 /* Pairwise op subgroup of C3.6.16.
10998  *
10999  * This is called directly or via the handle_3same_float for float pairwise
11000  * operations where the opcode and size are calculated differently.
11001  */
11002 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
11003                                    int size, int rn, int rm, int rd)
11004 {
11005     TCGv_ptr fpst;
11006     int pass;
11007 
11008     /* Floating point operations need fpst */
11009     if (opcode >= 0x58) {
11010         fpst = fpstatus_ptr(FPST_FPCR);
11011     } else {
11012         fpst = NULL;
11013     }
11014 
11015     if (!fp_access_check(s)) {
11016         return;
11017     }
11018 
11019     /* These operations work on the concatenated rm:rn, with each pair of
11020      * adjacent elements being operated on to produce an element in the result.
11021      */
11022     if (size == 3) {
11023         TCGv_i64 tcg_res[2];
11024 
11025         for (pass = 0; pass < 2; pass++) {
11026             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11027             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11028             int passreg = (pass == 0) ? rn : rm;
11029 
11030             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
11031             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
11032             tcg_res[pass] = tcg_temp_new_i64();
11033 
11034             switch (opcode) {
11035             case 0x17: /* ADDP */
11036                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11037                 break;
11038             case 0x58: /* FMAXNMP */
11039                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11040                 break;
11041             case 0x5a: /* FADDP */
11042                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11043                 break;
11044             case 0x5e: /* FMAXP */
11045                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11046                 break;
11047             case 0x78: /* FMINNMP */
11048                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11049                 break;
11050             case 0x7e: /* FMINP */
11051                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11052                 break;
11053             default:
11054                 g_assert_not_reached();
11055             }
11056         }
11057 
11058         for (pass = 0; pass < 2; pass++) {
11059             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11060         }
11061     } else {
11062         int maxpass = is_q ? 4 : 2;
11063         TCGv_i32 tcg_res[4];
11064 
11065         for (pass = 0; pass < maxpass; pass++) {
11066             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11067             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11068             NeonGenTwoOpFn *genfn = NULL;
11069             int passreg = pass < (maxpass / 2) ? rn : rm;
11070             int passelt = (is_q && (pass & 1)) ? 2 : 0;
11071 
11072             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
11073             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
11074             tcg_res[pass] = tcg_temp_new_i32();
11075 
11076             switch (opcode) {
11077             case 0x17: /* ADDP */
11078             {
11079                 static NeonGenTwoOpFn * const fns[3] = {
11080                     gen_helper_neon_padd_u8,
11081                     gen_helper_neon_padd_u16,
11082                     tcg_gen_add_i32,
11083                 };
11084                 genfn = fns[size];
11085                 break;
11086             }
11087             case 0x14: /* SMAXP, UMAXP */
11088             {
11089                 static NeonGenTwoOpFn * const fns[3][2] = {
11090                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
11091                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
11092                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
11093                 };
11094                 genfn = fns[size][u];
11095                 break;
11096             }
11097             case 0x15: /* SMINP, UMINP */
11098             {
11099                 static NeonGenTwoOpFn * const fns[3][2] = {
11100                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
11101                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
11102                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11103                 };
11104                 genfn = fns[size][u];
11105                 break;
11106             }
11107             /* The FP operations are all on single floats (32 bit) */
11108             case 0x58: /* FMAXNMP */
11109                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11110                 break;
11111             case 0x5a: /* FADDP */
11112                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11113                 break;
11114             case 0x5e: /* FMAXP */
11115                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11116                 break;
11117             case 0x78: /* FMINNMP */
11118                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11119                 break;
11120             case 0x7e: /* FMINP */
11121                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11122                 break;
11123             default:
11124                 g_assert_not_reached();
11125             }
11126 
11127             /* FP ops called directly, otherwise call now */
11128             if (genfn) {
11129                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
11130             }
11131         }
11132 
11133         for (pass = 0; pass < maxpass; pass++) {
11134             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11135         }
11136         clear_vec_high(s, is_q, rd);
11137     }
11138 }
11139 
11140 /* Floating point op subgroup of C3.6.16. */
11141 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11142 {
11143     /* For floating point ops, the U, size[1] and opcode bits
11144      * together indicate the operation. size[0] indicates single
11145      * or double.
11146      */
11147     int fpopcode = extract32(insn, 11, 5)
11148         | (extract32(insn, 23, 1) << 5)
11149         | (extract32(insn, 29, 1) << 6);
11150     int is_q = extract32(insn, 30, 1);
11151     int size = extract32(insn, 22, 1);
11152     int rm = extract32(insn, 16, 5);
11153     int rn = extract32(insn, 5, 5);
11154     int rd = extract32(insn, 0, 5);
11155 
11156     int datasize = is_q ? 128 : 64;
11157     int esize = 32 << size;
11158     int elements = datasize / esize;
11159 
11160     if (size == 1 && !is_q) {
11161         unallocated_encoding(s);
11162         return;
11163     }
11164 
11165     switch (fpopcode) {
11166     case 0x58: /* FMAXNMP */
11167     case 0x5a: /* FADDP */
11168     case 0x5e: /* FMAXP */
11169     case 0x78: /* FMINNMP */
11170     case 0x7e: /* FMINP */
11171         if (size && !is_q) {
11172             unallocated_encoding(s);
11173             return;
11174         }
11175         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11176                                rn, rm, rd);
11177         return;
11178     case 0x1b: /* FMULX */
11179     case 0x1f: /* FRECPS */
11180     case 0x3f: /* FRSQRTS */
11181     case 0x5d: /* FACGE */
11182     case 0x7d: /* FACGT */
11183     case 0x19: /* FMLA */
11184     case 0x39: /* FMLS */
11185     case 0x18: /* FMAXNM */
11186     case 0x1a: /* FADD */
11187     case 0x1c: /* FCMEQ */
11188     case 0x1e: /* FMAX */
11189     case 0x38: /* FMINNM */
11190     case 0x3a: /* FSUB */
11191     case 0x3e: /* FMIN */
11192     case 0x5b: /* FMUL */
11193     case 0x5c: /* FCMGE */
11194     case 0x5f: /* FDIV */
11195     case 0x7a: /* FABD */
11196     case 0x7c: /* FCMGT */
11197         if (!fp_access_check(s)) {
11198             return;
11199         }
11200         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11201         return;
11202 
11203     case 0x1d: /* FMLAL  */
11204     case 0x3d: /* FMLSL  */
11205     case 0x59: /* FMLAL2 */
11206     case 0x79: /* FMLSL2 */
11207         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11208             unallocated_encoding(s);
11209             return;
11210         }
11211         if (fp_access_check(s)) {
11212             int is_s = extract32(insn, 23, 1);
11213             int is_2 = extract32(insn, 29, 1);
11214             int data = (is_2 << 1) | is_s;
11215             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11216                                vec_full_reg_offset(s, rn),
11217                                vec_full_reg_offset(s, rm), cpu_env,
11218                                is_q ? 16 : 8, vec_full_reg_size(s),
11219                                data, gen_helper_gvec_fmlal_a64);
11220         }
11221         return;
11222 
11223     default:
11224         unallocated_encoding(s);
11225         return;
11226     }
11227 }
11228 
11229 /* Integer op subgroup of C3.6.16. */
11230 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11231 {
11232     int is_q = extract32(insn, 30, 1);
11233     int u = extract32(insn, 29, 1);
11234     int size = extract32(insn, 22, 2);
11235     int opcode = extract32(insn, 11, 5);
11236     int rm = extract32(insn, 16, 5);
11237     int rn = extract32(insn, 5, 5);
11238     int rd = extract32(insn, 0, 5);
11239     int pass;
11240     TCGCond cond;
11241 
11242     switch (opcode) {
11243     case 0x13: /* MUL, PMUL */
11244         if (u && size != 0) {
11245             unallocated_encoding(s);
11246             return;
11247         }
11248         /* fall through */
11249     case 0x0: /* SHADD, UHADD */
11250     case 0x2: /* SRHADD, URHADD */
11251     case 0x4: /* SHSUB, UHSUB */
11252     case 0xc: /* SMAX, UMAX */
11253     case 0xd: /* SMIN, UMIN */
11254     case 0xe: /* SABD, UABD */
11255     case 0xf: /* SABA, UABA */
11256     case 0x12: /* MLA, MLS */
11257         if (size == 3) {
11258             unallocated_encoding(s);
11259             return;
11260         }
11261         break;
11262     case 0x16: /* SQDMULH, SQRDMULH */
11263         if (size == 0 || size == 3) {
11264             unallocated_encoding(s);
11265             return;
11266         }
11267         break;
11268     default:
11269         if (size == 3 && !is_q) {
11270             unallocated_encoding(s);
11271             return;
11272         }
11273         break;
11274     }
11275 
11276     if (!fp_access_check(s)) {
11277         return;
11278     }
11279 
11280     switch (opcode) {
11281     case 0x01: /* SQADD, UQADD */
11282         if (u) {
11283             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11284         } else {
11285             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11286         }
11287         return;
11288     case 0x05: /* SQSUB, UQSUB */
11289         if (u) {
11290             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11291         } else {
11292             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11293         }
11294         return;
11295     case 0x08: /* SSHL, USHL */
11296         if (u) {
11297             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11298         } else {
11299             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11300         }
11301         return;
11302     case 0x0c: /* SMAX, UMAX */
11303         if (u) {
11304             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11305         } else {
11306             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11307         }
11308         return;
11309     case 0x0d: /* SMIN, UMIN */
11310         if (u) {
11311             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11312         } else {
11313             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11314         }
11315         return;
11316     case 0xe: /* SABD, UABD */
11317         if (u) {
11318             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11319         } else {
11320             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11321         }
11322         return;
11323     case 0xf: /* SABA, UABA */
11324         if (u) {
11325             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11326         } else {
11327             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11328         }
11329         return;
11330     case 0x10: /* ADD, SUB */
11331         if (u) {
11332             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11333         } else {
11334             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11335         }
11336         return;
11337     case 0x13: /* MUL, PMUL */
11338         if (!u) { /* MUL */
11339             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11340         } else {  /* PMUL */
11341             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11342         }
11343         return;
11344     case 0x12: /* MLA, MLS */
11345         if (u) {
11346             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11347         } else {
11348             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11349         }
11350         return;
11351     case 0x16: /* SQDMULH, SQRDMULH */
11352         {
11353             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11354                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11355                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11356             };
11357             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11358         }
11359         return;
11360     case 0x11:
11361         if (!u) { /* CMTST */
11362             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11363             return;
11364         }
11365         /* else CMEQ */
11366         cond = TCG_COND_EQ;
11367         goto do_gvec_cmp;
11368     case 0x06: /* CMGT, CMHI */
11369         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11370         goto do_gvec_cmp;
11371     case 0x07: /* CMGE, CMHS */
11372         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11373     do_gvec_cmp:
11374         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11375                          vec_full_reg_offset(s, rn),
11376                          vec_full_reg_offset(s, rm),
11377                          is_q ? 16 : 8, vec_full_reg_size(s));
11378         return;
11379     }
11380 
11381     if (size == 3) {
11382         assert(is_q);
11383         for (pass = 0; pass < 2; pass++) {
11384             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11385             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11386             TCGv_i64 tcg_res = tcg_temp_new_i64();
11387 
11388             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11389             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11390 
11391             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11392 
11393             write_vec_element(s, tcg_res, rd, pass, MO_64);
11394         }
11395     } else {
11396         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11397             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11398             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11399             TCGv_i32 tcg_res = tcg_temp_new_i32();
11400             NeonGenTwoOpFn *genfn = NULL;
11401             NeonGenTwoOpEnvFn *genenvfn = NULL;
11402 
11403             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11404             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11405 
11406             switch (opcode) {
11407             case 0x0: /* SHADD, UHADD */
11408             {
11409                 static NeonGenTwoOpFn * const fns[3][2] = {
11410                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11411                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11412                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11413                 };
11414                 genfn = fns[size][u];
11415                 break;
11416             }
11417             case 0x2: /* SRHADD, URHADD */
11418             {
11419                 static NeonGenTwoOpFn * const fns[3][2] = {
11420                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11421                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11422                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11423                 };
11424                 genfn = fns[size][u];
11425                 break;
11426             }
11427             case 0x4: /* SHSUB, UHSUB */
11428             {
11429                 static NeonGenTwoOpFn * const fns[3][2] = {
11430                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11431                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11432                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11433                 };
11434                 genfn = fns[size][u];
11435                 break;
11436             }
11437             case 0x9: /* SQSHL, UQSHL */
11438             {
11439                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11440                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11441                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11442                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11443                 };
11444                 genenvfn = fns[size][u];
11445                 break;
11446             }
11447             case 0xa: /* SRSHL, URSHL */
11448             {
11449                 static NeonGenTwoOpFn * const fns[3][2] = {
11450                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11451                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11452                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11453                 };
11454                 genfn = fns[size][u];
11455                 break;
11456             }
11457             case 0xb: /* SQRSHL, UQRSHL */
11458             {
11459                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11460                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11461                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11462                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11463                 };
11464                 genenvfn = fns[size][u];
11465                 break;
11466             }
11467             default:
11468                 g_assert_not_reached();
11469             }
11470 
11471             if (genenvfn) {
11472                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11473             } else {
11474                 genfn(tcg_res, tcg_op1, tcg_op2);
11475             }
11476 
11477             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11478         }
11479     }
11480     clear_vec_high(s, is_q, rd);
11481 }
11482 
11483 /* AdvSIMD three same
11484  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11485  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11486  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11487  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11488  */
11489 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11490 {
11491     int opcode = extract32(insn, 11, 5);
11492 
11493     switch (opcode) {
11494     case 0x3: /* logic ops */
11495         disas_simd_3same_logic(s, insn);
11496         break;
11497     case 0x17: /* ADDP */
11498     case 0x14: /* SMAXP, UMAXP */
11499     case 0x15: /* SMINP, UMINP */
11500     {
11501         /* Pairwise operations */
11502         int is_q = extract32(insn, 30, 1);
11503         int u = extract32(insn, 29, 1);
11504         int size = extract32(insn, 22, 2);
11505         int rm = extract32(insn, 16, 5);
11506         int rn = extract32(insn, 5, 5);
11507         int rd = extract32(insn, 0, 5);
11508         if (opcode == 0x17) {
11509             if (u || (size == 3 && !is_q)) {
11510                 unallocated_encoding(s);
11511                 return;
11512             }
11513         } else {
11514             if (size == 3) {
11515                 unallocated_encoding(s);
11516                 return;
11517             }
11518         }
11519         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11520         break;
11521     }
11522     case 0x18 ... 0x31:
11523         /* floating point ops, sz[1] and U are part of opcode */
11524         disas_simd_3same_float(s, insn);
11525         break;
11526     default:
11527         disas_simd_3same_int(s, insn);
11528         break;
11529     }
11530 }
11531 
11532 /*
11533  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11534  *
11535  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11536  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11537  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11538  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11539  *
11540  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11541  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11542  *
11543  */
11544 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11545 {
11546     int opcode = extract32(insn, 11, 3);
11547     int u = extract32(insn, 29, 1);
11548     int a = extract32(insn, 23, 1);
11549     int is_q = extract32(insn, 30, 1);
11550     int rm = extract32(insn, 16, 5);
11551     int rn = extract32(insn, 5, 5);
11552     int rd = extract32(insn, 0, 5);
11553     /*
11554      * For these floating point ops, the U, a and opcode bits
11555      * together indicate the operation.
11556      */
11557     int fpopcode = opcode | (a << 3) | (u << 4);
11558     int datasize = is_q ? 128 : 64;
11559     int elements = datasize / 16;
11560     bool pairwise;
11561     TCGv_ptr fpst;
11562     int pass;
11563 
11564     switch (fpopcode) {
11565     case 0x0: /* FMAXNM */
11566     case 0x1: /* FMLA */
11567     case 0x2: /* FADD */
11568     case 0x3: /* FMULX */
11569     case 0x4: /* FCMEQ */
11570     case 0x6: /* FMAX */
11571     case 0x7: /* FRECPS */
11572     case 0x8: /* FMINNM */
11573     case 0x9: /* FMLS */
11574     case 0xa: /* FSUB */
11575     case 0xe: /* FMIN */
11576     case 0xf: /* FRSQRTS */
11577     case 0x13: /* FMUL */
11578     case 0x14: /* FCMGE */
11579     case 0x15: /* FACGE */
11580     case 0x17: /* FDIV */
11581     case 0x1a: /* FABD */
11582     case 0x1c: /* FCMGT */
11583     case 0x1d: /* FACGT */
11584         pairwise = false;
11585         break;
11586     case 0x10: /* FMAXNMP */
11587     case 0x12: /* FADDP */
11588     case 0x16: /* FMAXP */
11589     case 0x18: /* FMINNMP */
11590     case 0x1e: /* FMINP */
11591         pairwise = true;
11592         break;
11593     default:
11594         unallocated_encoding(s);
11595         return;
11596     }
11597 
11598     if (!dc_isar_feature(aa64_fp16, s)) {
11599         unallocated_encoding(s);
11600         return;
11601     }
11602 
11603     if (!fp_access_check(s)) {
11604         return;
11605     }
11606 
11607     fpst = fpstatus_ptr(FPST_FPCR_F16);
11608 
11609     if (pairwise) {
11610         int maxpass = is_q ? 8 : 4;
11611         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11612         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11613         TCGv_i32 tcg_res[8];
11614 
11615         for (pass = 0; pass < maxpass; pass++) {
11616             int passreg = pass < (maxpass / 2) ? rn : rm;
11617             int passelt = (pass << 1) & (maxpass - 1);
11618 
11619             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11620             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11621             tcg_res[pass] = tcg_temp_new_i32();
11622 
11623             switch (fpopcode) {
11624             case 0x10: /* FMAXNMP */
11625                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11626                                            fpst);
11627                 break;
11628             case 0x12: /* FADDP */
11629                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11630                 break;
11631             case 0x16: /* FMAXP */
11632                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11633                 break;
11634             case 0x18: /* FMINNMP */
11635                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11636                                            fpst);
11637                 break;
11638             case 0x1e: /* FMINP */
11639                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11640                 break;
11641             default:
11642                 g_assert_not_reached();
11643             }
11644         }
11645 
11646         for (pass = 0; pass < maxpass; pass++) {
11647             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11648         }
11649     } else {
11650         for (pass = 0; pass < elements; pass++) {
11651             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11652             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11653             TCGv_i32 tcg_res = tcg_temp_new_i32();
11654 
11655             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11656             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11657 
11658             switch (fpopcode) {
11659             case 0x0: /* FMAXNM */
11660                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11661                 break;
11662             case 0x1: /* FMLA */
11663                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11664                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11665                                            fpst);
11666                 break;
11667             case 0x2: /* FADD */
11668                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11669                 break;
11670             case 0x3: /* FMULX */
11671                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11672                 break;
11673             case 0x4: /* FCMEQ */
11674                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11675                 break;
11676             case 0x6: /* FMAX */
11677                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11678                 break;
11679             case 0x7: /* FRECPS */
11680                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11681                 break;
11682             case 0x8: /* FMINNM */
11683                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11684                 break;
11685             case 0x9: /* FMLS */
11686                 /* As usual for ARM, separate negation for fused multiply-add */
11687                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11688                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11689                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11690                                            fpst);
11691                 break;
11692             case 0xa: /* FSUB */
11693                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11694                 break;
11695             case 0xe: /* FMIN */
11696                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11697                 break;
11698             case 0xf: /* FRSQRTS */
11699                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11700                 break;
11701             case 0x13: /* FMUL */
11702                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11703                 break;
11704             case 0x14: /* FCMGE */
11705                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11706                 break;
11707             case 0x15: /* FACGE */
11708                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11709                 break;
11710             case 0x17: /* FDIV */
11711                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11712                 break;
11713             case 0x1a: /* FABD */
11714                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11715                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11716                 break;
11717             case 0x1c: /* FCMGT */
11718                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11719                 break;
11720             case 0x1d: /* FACGT */
11721                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11722                 break;
11723             default:
11724                 g_assert_not_reached();
11725             }
11726 
11727             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11728         }
11729     }
11730 
11731     clear_vec_high(s, is_q, rd);
11732 }
11733 
11734 /* AdvSIMD three same extra
11735  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11736  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11737  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11738  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11739  */
11740 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11741 {
11742     int rd = extract32(insn, 0, 5);
11743     int rn = extract32(insn, 5, 5);
11744     int opcode = extract32(insn, 11, 4);
11745     int rm = extract32(insn, 16, 5);
11746     int size = extract32(insn, 22, 2);
11747     bool u = extract32(insn, 29, 1);
11748     bool is_q = extract32(insn, 30, 1);
11749     bool feature;
11750     int rot;
11751 
11752     switch (u * 16 + opcode) {
11753     case 0x10: /* SQRDMLAH (vector) */
11754     case 0x11: /* SQRDMLSH (vector) */
11755         if (size != 1 && size != 2) {
11756             unallocated_encoding(s);
11757             return;
11758         }
11759         feature = dc_isar_feature(aa64_rdm, s);
11760         break;
11761     case 0x02: /* SDOT (vector) */
11762     case 0x12: /* UDOT (vector) */
11763         if (size != MO_32) {
11764             unallocated_encoding(s);
11765             return;
11766         }
11767         feature = dc_isar_feature(aa64_dp, s);
11768         break;
11769     case 0x03: /* USDOT */
11770         if (size != MO_32) {
11771             unallocated_encoding(s);
11772             return;
11773         }
11774         feature = dc_isar_feature(aa64_i8mm, s);
11775         break;
11776     case 0x04: /* SMMLA */
11777     case 0x14: /* UMMLA */
11778     case 0x05: /* USMMLA */
11779         if (!is_q || size != MO_32) {
11780             unallocated_encoding(s);
11781             return;
11782         }
11783         feature = dc_isar_feature(aa64_i8mm, s);
11784         break;
11785     case 0x18: /* FCMLA, #0 */
11786     case 0x19: /* FCMLA, #90 */
11787     case 0x1a: /* FCMLA, #180 */
11788     case 0x1b: /* FCMLA, #270 */
11789     case 0x1c: /* FCADD, #90 */
11790     case 0x1e: /* FCADD, #270 */
11791         if (size == 0
11792             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11793             || (size == 3 && !is_q)) {
11794             unallocated_encoding(s);
11795             return;
11796         }
11797         feature = dc_isar_feature(aa64_fcma, s);
11798         break;
11799     case 0x1d: /* BFMMLA */
11800         if (size != MO_16 || !is_q) {
11801             unallocated_encoding(s);
11802             return;
11803         }
11804         feature = dc_isar_feature(aa64_bf16, s);
11805         break;
11806     case 0x1f:
11807         switch (size) {
11808         case 1: /* BFDOT */
11809         case 3: /* BFMLAL{B,T} */
11810             feature = dc_isar_feature(aa64_bf16, s);
11811             break;
11812         default:
11813             unallocated_encoding(s);
11814             return;
11815         }
11816         break;
11817     default:
11818         unallocated_encoding(s);
11819         return;
11820     }
11821     if (!feature) {
11822         unallocated_encoding(s);
11823         return;
11824     }
11825     if (!fp_access_check(s)) {
11826         return;
11827     }
11828 
11829     switch (opcode) {
11830     case 0x0: /* SQRDMLAH (vector) */
11831         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11832         return;
11833 
11834     case 0x1: /* SQRDMLSH (vector) */
11835         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11836         return;
11837 
11838     case 0x2: /* SDOT / UDOT */
11839         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11840                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11841         return;
11842 
11843     case 0x3: /* USDOT */
11844         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11845         return;
11846 
11847     case 0x04: /* SMMLA, UMMLA */
11848         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11849                          u ? gen_helper_gvec_ummla_b
11850                          : gen_helper_gvec_smmla_b);
11851         return;
11852     case 0x05: /* USMMLA */
11853         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11854         return;
11855 
11856     case 0x8: /* FCMLA, #0 */
11857     case 0x9: /* FCMLA, #90 */
11858     case 0xa: /* FCMLA, #180 */
11859     case 0xb: /* FCMLA, #270 */
11860         rot = extract32(opcode, 0, 2);
11861         switch (size) {
11862         case 1:
11863             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11864                               gen_helper_gvec_fcmlah);
11865             break;
11866         case 2:
11867             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11868                               gen_helper_gvec_fcmlas);
11869             break;
11870         case 3:
11871             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11872                               gen_helper_gvec_fcmlad);
11873             break;
11874         default:
11875             g_assert_not_reached();
11876         }
11877         return;
11878 
11879     case 0xc: /* FCADD, #90 */
11880     case 0xe: /* FCADD, #270 */
11881         rot = extract32(opcode, 1, 1);
11882         switch (size) {
11883         case 1:
11884             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11885                               gen_helper_gvec_fcaddh);
11886             break;
11887         case 2:
11888             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11889                               gen_helper_gvec_fcadds);
11890             break;
11891         case 3:
11892             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11893                               gen_helper_gvec_fcaddd);
11894             break;
11895         default:
11896             g_assert_not_reached();
11897         }
11898         return;
11899 
11900     case 0xd: /* BFMMLA */
11901         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11902         return;
11903     case 0xf:
11904         switch (size) {
11905         case 1: /* BFDOT */
11906             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11907             break;
11908         case 3: /* BFMLAL{B,T} */
11909             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11910                               gen_helper_gvec_bfmlal);
11911             break;
11912         default:
11913             g_assert_not_reached();
11914         }
11915         return;
11916 
11917     default:
11918         g_assert_not_reached();
11919     }
11920 }
11921 
11922 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11923                                   int size, int rn, int rd)
11924 {
11925     /* Handle 2-reg-misc ops which are widening (so each size element
11926      * in the source becomes a 2*size element in the destination.
11927      * The only instruction like this is FCVTL.
11928      */
11929     int pass;
11930 
11931     if (size == 3) {
11932         /* 32 -> 64 bit fp conversion */
11933         TCGv_i64 tcg_res[2];
11934         int srcelt = is_q ? 2 : 0;
11935 
11936         for (pass = 0; pass < 2; pass++) {
11937             TCGv_i32 tcg_op = tcg_temp_new_i32();
11938             tcg_res[pass] = tcg_temp_new_i64();
11939 
11940             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11941             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11942         }
11943         for (pass = 0; pass < 2; pass++) {
11944             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11945         }
11946     } else {
11947         /* 16 -> 32 bit fp conversion */
11948         int srcelt = is_q ? 4 : 0;
11949         TCGv_i32 tcg_res[4];
11950         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11951         TCGv_i32 ahp = get_ahp_flag();
11952 
11953         for (pass = 0; pass < 4; pass++) {
11954             tcg_res[pass] = tcg_temp_new_i32();
11955 
11956             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11957             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11958                                            fpst, ahp);
11959         }
11960         for (pass = 0; pass < 4; pass++) {
11961             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11962         }
11963     }
11964 }
11965 
11966 static void handle_rev(DisasContext *s, int opcode, bool u,
11967                        bool is_q, int size, int rn, int rd)
11968 {
11969     int op = (opcode << 1) | u;
11970     int opsz = op + size;
11971     int grp_size = 3 - opsz;
11972     int dsize = is_q ? 128 : 64;
11973     int i;
11974 
11975     if (opsz >= 3) {
11976         unallocated_encoding(s);
11977         return;
11978     }
11979 
11980     if (!fp_access_check(s)) {
11981         return;
11982     }
11983 
11984     if (size == 0) {
11985         /* Special case bytes, use bswap op on each group of elements */
11986         int groups = dsize / (8 << grp_size);
11987 
11988         for (i = 0; i < groups; i++) {
11989             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11990 
11991             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11992             switch (grp_size) {
11993             case MO_16:
11994                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11995                 break;
11996             case MO_32:
11997                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11998                 break;
11999             case MO_64:
12000                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
12001                 break;
12002             default:
12003                 g_assert_not_reached();
12004             }
12005             write_vec_element(s, tcg_tmp, rd, i, grp_size);
12006         }
12007         clear_vec_high(s, is_q, rd);
12008     } else {
12009         int revmask = (1 << grp_size) - 1;
12010         int esize = 8 << size;
12011         int elements = dsize / esize;
12012         TCGv_i64 tcg_rn = tcg_temp_new_i64();
12013         TCGv_i64 tcg_rd[2];
12014 
12015         for (i = 0; i < 2; i++) {
12016             tcg_rd[i] = tcg_temp_new_i64();
12017             tcg_gen_movi_i64(tcg_rd[i], 0);
12018         }
12019 
12020         for (i = 0; i < elements; i++) {
12021             int e_rev = (i & 0xf) ^ revmask;
12022             int w = (e_rev * esize) / 64;
12023             int o = (e_rev * esize) % 64;
12024 
12025             read_vec_element(s, tcg_rn, rn, i, size);
12026             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
12027         }
12028 
12029         for (i = 0; i < 2; i++) {
12030             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
12031         }
12032         clear_vec_high(s, true, rd);
12033     }
12034 }
12035 
12036 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
12037                                   bool is_q, int size, int rn, int rd)
12038 {
12039     /* Implement the pairwise operations from 2-misc:
12040      * SADDLP, UADDLP, SADALP, UADALP.
12041      * These all add pairs of elements in the input to produce a
12042      * double-width result element in the output (possibly accumulating).
12043      */
12044     bool accum = (opcode == 0x6);
12045     int maxpass = is_q ? 2 : 1;
12046     int pass;
12047     TCGv_i64 tcg_res[2];
12048 
12049     if (size == 2) {
12050         /* 32 + 32 -> 64 op */
12051         MemOp memop = size + (u ? 0 : MO_SIGN);
12052 
12053         for (pass = 0; pass < maxpass; pass++) {
12054             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
12055             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
12056 
12057             tcg_res[pass] = tcg_temp_new_i64();
12058 
12059             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
12060             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
12061             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
12062             if (accum) {
12063                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
12064                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12065             }
12066         }
12067     } else {
12068         for (pass = 0; pass < maxpass; pass++) {
12069             TCGv_i64 tcg_op = tcg_temp_new_i64();
12070             NeonGenOne64OpFn *genfn;
12071             static NeonGenOne64OpFn * const fns[2][2] = {
12072                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
12073                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
12074             };
12075 
12076             genfn = fns[size][u];
12077 
12078             tcg_res[pass] = tcg_temp_new_i64();
12079 
12080             read_vec_element(s, tcg_op, rn, pass, MO_64);
12081             genfn(tcg_res[pass], tcg_op);
12082 
12083             if (accum) {
12084                 read_vec_element(s, tcg_op, rd, pass, MO_64);
12085                 if (size == 0) {
12086                     gen_helper_neon_addl_u16(tcg_res[pass],
12087                                              tcg_res[pass], tcg_op);
12088                 } else {
12089                     gen_helper_neon_addl_u32(tcg_res[pass],
12090                                              tcg_res[pass], tcg_op);
12091                 }
12092             }
12093         }
12094     }
12095     if (!is_q) {
12096         tcg_res[1] = tcg_constant_i64(0);
12097     }
12098     for (pass = 0; pass < 2; pass++) {
12099         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12100     }
12101 }
12102 
12103 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
12104 {
12105     /* Implement SHLL and SHLL2 */
12106     int pass;
12107     int part = is_q ? 2 : 0;
12108     TCGv_i64 tcg_res[2];
12109 
12110     for (pass = 0; pass < 2; pass++) {
12111         static NeonGenWidenFn * const widenfns[3] = {
12112             gen_helper_neon_widen_u8,
12113             gen_helper_neon_widen_u16,
12114             tcg_gen_extu_i32_i64,
12115         };
12116         NeonGenWidenFn *widenfn = widenfns[size];
12117         TCGv_i32 tcg_op = tcg_temp_new_i32();
12118 
12119         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
12120         tcg_res[pass] = tcg_temp_new_i64();
12121         widenfn(tcg_res[pass], tcg_op);
12122         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
12123     }
12124 
12125     for (pass = 0; pass < 2; pass++) {
12126         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12127     }
12128 }
12129 
12130 /* AdvSIMD two reg misc
12131  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12132  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12133  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12134  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12135  */
12136 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12137 {
12138     int size = extract32(insn, 22, 2);
12139     int opcode = extract32(insn, 12, 5);
12140     bool u = extract32(insn, 29, 1);
12141     bool is_q = extract32(insn, 30, 1);
12142     int rn = extract32(insn, 5, 5);
12143     int rd = extract32(insn, 0, 5);
12144     bool need_fpstatus = false;
12145     int rmode = -1;
12146     TCGv_i32 tcg_rmode;
12147     TCGv_ptr tcg_fpstatus;
12148 
12149     switch (opcode) {
12150     case 0x0: /* REV64, REV32 */
12151     case 0x1: /* REV16 */
12152         handle_rev(s, opcode, u, is_q, size, rn, rd);
12153         return;
12154     case 0x5: /* CNT, NOT, RBIT */
12155         if (u && size == 0) {
12156             /* NOT */
12157             break;
12158         } else if (u && size == 1) {
12159             /* RBIT */
12160             break;
12161         } else if (!u && size == 0) {
12162             /* CNT */
12163             break;
12164         }
12165         unallocated_encoding(s);
12166         return;
12167     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12168     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12169         if (size == 3) {
12170             unallocated_encoding(s);
12171             return;
12172         }
12173         if (!fp_access_check(s)) {
12174             return;
12175         }
12176 
12177         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12178         return;
12179     case 0x4: /* CLS, CLZ */
12180         if (size == 3) {
12181             unallocated_encoding(s);
12182             return;
12183         }
12184         break;
12185     case 0x2: /* SADDLP, UADDLP */
12186     case 0x6: /* SADALP, UADALP */
12187         if (size == 3) {
12188             unallocated_encoding(s);
12189             return;
12190         }
12191         if (!fp_access_check(s)) {
12192             return;
12193         }
12194         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12195         return;
12196     case 0x13: /* SHLL, SHLL2 */
12197         if (u == 0 || size == 3) {
12198             unallocated_encoding(s);
12199             return;
12200         }
12201         if (!fp_access_check(s)) {
12202             return;
12203         }
12204         handle_shll(s, is_q, size, rn, rd);
12205         return;
12206     case 0xa: /* CMLT */
12207         if (u == 1) {
12208             unallocated_encoding(s);
12209             return;
12210         }
12211         /* fall through */
12212     case 0x8: /* CMGT, CMGE */
12213     case 0x9: /* CMEQ, CMLE */
12214     case 0xb: /* ABS, NEG */
12215         if (size == 3 && !is_q) {
12216             unallocated_encoding(s);
12217             return;
12218         }
12219         break;
12220     case 0x3: /* SUQADD, USQADD */
12221         if (size == 3 && !is_q) {
12222             unallocated_encoding(s);
12223             return;
12224         }
12225         if (!fp_access_check(s)) {
12226             return;
12227         }
12228         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12229         return;
12230     case 0x7: /* SQABS, SQNEG */
12231         if (size == 3 && !is_q) {
12232             unallocated_encoding(s);
12233             return;
12234         }
12235         break;
12236     case 0xc ... 0xf:
12237     case 0x16 ... 0x1f:
12238     {
12239         /* Floating point: U, size[1] and opcode indicate operation;
12240          * size[0] indicates single or double precision.
12241          */
12242         int is_double = extract32(size, 0, 1);
12243         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12244         size = is_double ? 3 : 2;
12245         switch (opcode) {
12246         case 0x2f: /* FABS */
12247         case 0x6f: /* FNEG */
12248             if (size == 3 && !is_q) {
12249                 unallocated_encoding(s);
12250                 return;
12251             }
12252             break;
12253         case 0x1d: /* SCVTF */
12254         case 0x5d: /* UCVTF */
12255         {
12256             bool is_signed = (opcode == 0x1d) ? true : false;
12257             int elements = is_double ? 2 : is_q ? 4 : 2;
12258             if (is_double && !is_q) {
12259                 unallocated_encoding(s);
12260                 return;
12261             }
12262             if (!fp_access_check(s)) {
12263                 return;
12264             }
12265             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12266             return;
12267         }
12268         case 0x2c: /* FCMGT (zero) */
12269         case 0x2d: /* FCMEQ (zero) */
12270         case 0x2e: /* FCMLT (zero) */
12271         case 0x6c: /* FCMGE (zero) */
12272         case 0x6d: /* FCMLE (zero) */
12273             if (size == 3 && !is_q) {
12274                 unallocated_encoding(s);
12275                 return;
12276             }
12277             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12278             return;
12279         case 0x7f: /* FSQRT */
12280             if (size == 3 && !is_q) {
12281                 unallocated_encoding(s);
12282                 return;
12283             }
12284             break;
12285         case 0x1a: /* FCVTNS */
12286         case 0x1b: /* FCVTMS */
12287         case 0x3a: /* FCVTPS */
12288         case 0x3b: /* FCVTZS */
12289         case 0x5a: /* FCVTNU */
12290         case 0x5b: /* FCVTMU */
12291         case 0x7a: /* FCVTPU */
12292         case 0x7b: /* FCVTZU */
12293             need_fpstatus = true;
12294             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12295             if (size == 3 && !is_q) {
12296                 unallocated_encoding(s);
12297                 return;
12298             }
12299             break;
12300         case 0x5c: /* FCVTAU */
12301         case 0x1c: /* FCVTAS */
12302             need_fpstatus = true;
12303             rmode = FPROUNDING_TIEAWAY;
12304             if (size == 3 && !is_q) {
12305                 unallocated_encoding(s);
12306                 return;
12307             }
12308             break;
12309         case 0x3c: /* URECPE */
12310             if (size == 3) {
12311                 unallocated_encoding(s);
12312                 return;
12313             }
12314             /* fall through */
12315         case 0x3d: /* FRECPE */
12316         case 0x7d: /* FRSQRTE */
12317             if (size == 3 && !is_q) {
12318                 unallocated_encoding(s);
12319                 return;
12320             }
12321             if (!fp_access_check(s)) {
12322                 return;
12323             }
12324             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12325             return;
12326         case 0x56: /* FCVTXN, FCVTXN2 */
12327             if (size == 2) {
12328                 unallocated_encoding(s);
12329                 return;
12330             }
12331             /* fall through */
12332         case 0x16: /* FCVTN, FCVTN2 */
12333             /* handle_2misc_narrow does a 2*size -> size operation, but these
12334              * instructions encode the source size rather than dest size.
12335              */
12336             if (!fp_access_check(s)) {
12337                 return;
12338             }
12339             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12340             return;
12341         case 0x36: /* BFCVTN, BFCVTN2 */
12342             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12343                 unallocated_encoding(s);
12344                 return;
12345             }
12346             if (!fp_access_check(s)) {
12347                 return;
12348             }
12349             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12350             return;
12351         case 0x17: /* FCVTL, FCVTL2 */
12352             if (!fp_access_check(s)) {
12353                 return;
12354             }
12355             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12356             return;
12357         case 0x18: /* FRINTN */
12358         case 0x19: /* FRINTM */
12359         case 0x38: /* FRINTP */
12360         case 0x39: /* FRINTZ */
12361             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12362             /* fall through */
12363         case 0x59: /* FRINTX */
12364         case 0x79: /* FRINTI */
12365             need_fpstatus = true;
12366             if (size == 3 && !is_q) {
12367                 unallocated_encoding(s);
12368                 return;
12369             }
12370             break;
12371         case 0x58: /* FRINTA */
12372             rmode = FPROUNDING_TIEAWAY;
12373             need_fpstatus = true;
12374             if (size == 3 && !is_q) {
12375                 unallocated_encoding(s);
12376                 return;
12377             }
12378             break;
12379         case 0x7c: /* URSQRTE */
12380             if (size == 3) {
12381                 unallocated_encoding(s);
12382                 return;
12383             }
12384             break;
12385         case 0x1e: /* FRINT32Z */
12386         case 0x1f: /* FRINT64Z */
12387             rmode = FPROUNDING_ZERO;
12388             /* fall through */
12389         case 0x5e: /* FRINT32X */
12390         case 0x5f: /* FRINT64X */
12391             need_fpstatus = true;
12392             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12393                 unallocated_encoding(s);
12394                 return;
12395             }
12396             break;
12397         default:
12398             unallocated_encoding(s);
12399             return;
12400         }
12401         break;
12402     }
12403     default:
12404         unallocated_encoding(s);
12405         return;
12406     }
12407 
12408     if (!fp_access_check(s)) {
12409         return;
12410     }
12411 
12412     if (need_fpstatus || rmode >= 0) {
12413         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12414     } else {
12415         tcg_fpstatus = NULL;
12416     }
12417     if (rmode >= 0) {
12418         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12419     } else {
12420         tcg_rmode = NULL;
12421     }
12422 
12423     switch (opcode) {
12424     case 0x5:
12425         if (u && size == 0) { /* NOT */
12426             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12427             return;
12428         }
12429         break;
12430     case 0x8: /* CMGT, CMGE */
12431         if (u) {
12432             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12433         } else {
12434             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12435         }
12436         return;
12437     case 0x9: /* CMEQ, CMLE */
12438         if (u) {
12439             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12440         } else {
12441             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12442         }
12443         return;
12444     case 0xa: /* CMLT */
12445         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12446         return;
12447     case 0xb:
12448         if (u) { /* ABS, NEG */
12449             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12450         } else {
12451             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12452         }
12453         return;
12454     }
12455 
12456     if (size == 3) {
12457         /* All 64-bit element operations can be shared with scalar 2misc */
12458         int pass;
12459 
12460         /* Coverity claims (size == 3 && !is_q) has been eliminated
12461          * from all paths leading to here.
12462          */
12463         tcg_debug_assert(is_q);
12464         for (pass = 0; pass < 2; pass++) {
12465             TCGv_i64 tcg_op = tcg_temp_new_i64();
12466             TCGv_i64 tcg_res = tcg_temp_new_i64();
12467 
12468             read_vec_element(s, tcg_op, rn, pass, MO_64);
12469 
12470             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12471                             tcg_rmode, tcg_fpstatus);
12472 
12473             write_vec_element(s, tcg_res, rd, pass, MO_64);
12474         }
12475     } else {
12476         int pass;
12477 
12478         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12479             TCGv_i32 tcg_op = tcg_temp_new_i32();
12480             TCGv_i32 tcg_res = tcg_temp_new_i32();
12481 
12482             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12483 
12484             if (size == 2) {
12485                 /* Special cases for 32 bit elements */
12486                 switch (opcode) {
12487                 case 0x4: /* CLS */
12488                     if (u) {
12489                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12490                     } else {
12491                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12492                     }
12493                     break;
12494                 case 0x7: /* SQABS, SQNEG */
12495                     if (u) {
12496                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12497                     } else {
12498                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12499                     }
12500                     break;
12501                 case 0x2f: /* FABS */
12502                     gen_helper_vfp_abss(tcg_res, tcg_op);
12503                     break;
12504                 case 0x6f: /* FNEG */
12505                     gen_helper_vfp_negs(tcg_res, tcg_op);
12506                     break;
12507                 case 0x7f: /* FSQRT */
12508                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12509                     break;
12510                 case 0x1a: /* FCVTNS */
12511                 case 0x1b: /* FCVTMS */
12512                 case 0x1c: /* FCVTAS */
12513                 case 0x3a: /* FCVTPS */
12514                 case 0x3b: /* FCVTZS */
12515                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12516                                          tcg_constant_i32(0), tcg_fpstatus);
12517                     break;
12518                 case 0x5a: /* FCVTNU */
12519                 case 0x5b: /* FCVTMU */
12520                 case 0x5c: /* FCVTAU */
12521                 case 0x7a: /* FCVTPU */
12522                 case 0x7b: /* FCVTZU */
12523                     gen_helper_vfp_touls(tcg_res, tcg_op,
12524                                          tcg_constant_i32(0), tcg_fpstatus);
12525                     break;
12526                 case 0x18: /* FRINTN */
12527                 case 0x19: /* FRINTM */
12528                 case 0x38: /* FRINTP */
12529                 case 0x39: /* FRINTZ */
12530                 case 0x58: /* FRINTA */
12531                 case 0x79: /* FRINTI */
12532                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12533                     break;
12534                 case 0x59: /* FRINTX */
12535                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12536                     break;
12537                 case 0x7c: /* URSQRTE */
12538                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12539                     break;
12540                 case 0x1e: /* FRINT32Z */
12541                 case 0x5e: /* FRINT32X */
12542                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12543                     break;
12544                 case 0x1f: /* FRINT64Z */
12545                 case 0x5f: /* FRINT64X */
12546                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12547                     break;
12548                 default:
12549                     g_assert_not_reached();
12550                 }
12551             } else {
12552                 /* Use helpers for 8 and 16 bit elements */
12553                 switch (opcode) {
12554                 case 0x5: /* CNT, RBIT */
12555                     /* For these two insns size is part of the opcode specifier
12556                      * (handled earlier); they always operate on byte elements.
12557                      */
12558                     if (u) {
12559                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12560                     } else {
12561                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12562                     }
12563                     break;
12564                 case 0x7: /* SQABS, SQNEG */
12565                 {
12566                     NeonGenOneOpEnvFn *genfn;
12567                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12568                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12569                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12570                     };
12571                     genfn = fns[size][u];
12572                     genfn(tcg_res, cpu_env, tcg_op);
12573                     break;
12574                 }
12575                 case 0x4: /* CLS, CLZ */
12576                     if (u) {
12577                         if (size == 0) {
12578                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12579                         } else {
12580                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12581                         }
12582                     } else {
12583                         if (size == 0) {
12584                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12585                         } else {
12586                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12587                         }
12588                     }
12589                     break;
12590                 default:
12591                     g_assert_not_reached();
12592                 }
12593             }
12594 
12595             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12596         }
12597     }
12598     clear_vec_high(s, is_q, rd);
12599 
12600     if (tcg_rmode) {
12601         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12602     }
12603 }
12604 
12605 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12606  *
12607  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12608  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12609  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12610  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12611  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12612  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12613  *
12614  * This actually covers two groups where scalar access is governed by
12615  * bit 28. A bunch of the instructions (float to integral) only exist
12616  * in the vector form and are un-allocated for the scalar decode. Also
12617  * in the scalar decode Q is always 1.
12618  */
12619 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12620 {
12621     int fpop, opcode, a, u;
12622     int rn, rd;
12623     bool is_q;
12624     bool is_scalar;
12625     bool only_in_vector = false;
12626 
12627     int pass;
12628     TCGv_i32 tcg_rmode = NULL;
12629     TCGv_ptr tcg_fpstatus = NULL;
12630     bool need_fpst = true;
12631     int rmode = -1;
12632 
12633     if (!dc_isar_feature(aa64_fp16, s)) {
12634         unallocated_encoding(s);
12635         return;
12636     }
12637 
12638     rd = extract32(insn, 0, 5);
12639     rn = extract32(insn, 5, 5);
12640 
12641     a = extract32(insn, 23, 1);
12642     u = extract32(insn, 29, 1);
12643     is_scalar = extract32(insn, 28, 1);
12644     is_q = extract32(insn, 30, 1);
12645 
12646     opcode = extract32(insn, 12, 5);
12647     fpop = deposit32(opcode, 5, 1, a);
12648     fpop = deposit32(fpop, 6, 1, u);
12649 
12650     switch (fpop) {
12651     case 0x1d: /* SCVTF */
12652     case 0x5d: /* UCVTF */
12653     {
12654         int elements;
12655 
12656         if (is_scalar) {
12657             elements = 1;
12658         } else {
12659             elements = (is_q ? 8 : 4);
12660         }
12661 
12662         if (!fp_access_check(s)) {
12663             return;
12664         }
12665         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12666         return;
12667     }
12668     break;
12669     case 0x2c: /* FCMGT (zero) */
12670     case 0x2d: /* FCMEQ (zero) */
12671     case 0x2e: /* FCMLT (zero) */
12672     case 0x6c: /* FCMGE (zero) */
12673     case 0x6d: /* FCMLE (zero) */
12674         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12675         return;
12676     case 0x3d: /* FRECPE */
12677     case 0x3f: /* FRECPX */
12678         break;
12679     case 0x18: /* FRINTN */
12680         only_in_vector = true;
12681         rmode = FPROUNDING_TIEEVEN;
12682         break;
12683     case 0x19: /* FRINTM */
12684         only_in_vector = true;
12685         rmode = FPROUNDING_NEGINF;
12686         break;
12687     case 0x38: /* FRINTP */
12688         only_in_vector = true;
12689         rmode = FPROUNDING_POSINF;
12690         break;
12691     case 0x39: /* FRINTZ */
12692         only_in_vector = true;
12693         rmode = FPROUNDING_ZERO;
12694         break;
12695     case 0x58: /* FRINTA */
12696         only_in_vector = true;
12697         rmode = FPROUNDING_TIEAWAY;
12698         break;
12699     case 0x59: /* FRINTX */
12700     case 0x79: /* FRINTI */
12701         only_in_vector = true;
12702         /* current rounding mode */
12703         break;
12704     case 0x1a: /* FCVTNS */
12705         rmode = FPROUNDING_TIEEVEN;
12706         break;
12707     case 0x1b: /* FCVTMS */
12708         rmode = FPROUNDING_NEGINF;
12709         break;
12710     case 0x1c: /* FCVTAS */
12711         rmode = FPROUNDING_TIEAWAY;
12712         break;
12713     case 0x3a: /* FCVTPS */
12714         rmode = FPROUNDING_POSINF;
12715         break;
12716     case 0x3b: /* FCVTZS */
12717         rmode = FPROUNDING_ZERO;
12718         break;
12719     case 0x5a: /* FCVTNU */
12720         rmode = FPROUNDING_TIEEVEN;
12721         break;
12722     case 0x5b: /* FCVTMU */
12723         rmode = FPROUNDING_NEGINF;
12724         break;
12725     case 0x5c: /* FCVTAU */
12726         rmode = FPROUNDING_TIEAWAY;
12727         break;
12728     case 0x7a: /* FCVTPU */
12729         rmode = FPROUNDING_POSINF;
12730         break;
12731     case 0x7b: /* FCVTZU */
12732         rmode = FPROUNDING_ZERO;
12733         break;
12734     case 0x2f: /* FABS */
12735     case 0x6f: /* FNEG */
12736         need_fpst = false;
12737         break;
12738     case 0x7d: /* FRSQRTE */
12739     case 0x7f: /* FSQRT (vector) */
12740         break;
12741     default:
12742         unallocated_encoding(s);
12743         return;
12744     }
12745 
12746 
12747     /* Check additional constraints for the scalar encoding */
12748     if (is_scalar) {
12749         if (!is_q) {
12750             unallocated_encoding(s);
12751             return;
12752         }
12753         /* FRINTxx is only in the vector form */
12754         if (only_in_vector) {
12755             unallocated_encoding(s);
12756             return;
12757         }
12758     }
12759 
12760     if (!fp_access_check(s)) {
12761         return;
12762     }
12763 
12764     if (rmode >= 0 || need_fpst) {
12765         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12766     }
12767 
12768     if (rmode >= 0) {
12769         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12770     }
12771 
12772     if (is_scalar) {
12773         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12774         TCGv_i32 tcg_res = tcg_temp_new_i32();
12775 
12776         switch (fpop) {
12777         case 0x1a: /* FCVTNS */
12778         case 0x1b: /* FCVTMS */
12779         case 0x1c: /* FCVTAS */
12780         case 0x3a: /* FCVTPS */
12781         case 0x3b: /* FCVTZS */
12782             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12783             break;
12784         case 0x3d: /* FRECPE */
12785             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12786             break;
12787         case 0x3f: /* FRECPX */
12788             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12789             break;
12790         case 0x5a: /* FCVTNU */
12791         case 0x5b: /* FCVTMU */
12792         case 0x5c: /* FCVTAU */
12793         case 0x7a: /* FCVTPU */
12794         case 0x7b: /* FCVTZU */
12795             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12796             break;
12797         case 0x6f: /* FNEG */
12798             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12799             break;
12800         case 0x7d: /* FRSQRTE */
12801             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12802             break;
12803         default:
12804             g_assert_not_reached();
12805         }
12806 
12807         /* limit any sign extension going on */
12808         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12809         write_fp_sreg(s, rd, tcg_res);
12810     } else {
12811         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12812             TCGv_i32 tcg_op = tcg_temp_new_i32();
12813             TCGv_i32 tcg_res = tcg_temp_new_i32();
12814 
12815             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12816 
12817             switch (fpop) {
12818             case 0x1a: /* FCVTNS */
12819             case 0x1b: /* FCVTMS */
12820             case 0x1c: /* FCVTAS */
12821             case 0x3a: /* FCVTPS */
12822             case 0x3b: /* FCVTZS */
12823                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12824                 break;
12825             case 0x3d: /* FRECPE */
12826                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12827                 break;
12828             case 0x5a: /* FCVTNU */
12829             case 0x5b: /* FCVTMU */
12830             case 0x5c: /* FCVTAU */
12831             case 0x7a: /* FCVTPU */
12832             case 0x7b: /* FCVTZU */
12833                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12834                 break;
12835             case 0x18: /* FRINTN */
12836             case 0x19: /* FRINTM */
12837             case 0x38: /* FRINTP */
12838             case 0x39: /* FRINTZ */
12839             case 0x58: /* FRINTA */
12840             case 0x79: /* FRINTI */
12841                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12842                 break;
12843             case 0x59: /* FRINTX */
12844                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12845                 break;
12846             case 0x2f: /* FABS */
12847                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12848                 break;
12849             case 0x6f: /* FNEG */
12850                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12851                 break;
12852             case 0x7d: /* FRSQRTE */
12853                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12854                 break;
12855             case 0x7f: /* FSQRT */
12856                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12857                 break;
12858             default:
12859                 g_assert_not_reached();
12860             }
12861 
12862             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12863         }
12864 
12865         clear_vec_high(s, is_q, rd);
12866     }
12867 
12868     if (tcg_rmode) {
12869         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12870     }
12871 }
12872 
12873 /* AdvSIMD scalar x indexed element
12874  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12875  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12876  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12877  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12878  * AdvSIMD vector x indexed element
12879  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12880  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12881  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12882  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12883  */
12884 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12885 {
12886     /* This encoding has two kinds of instruction:
12887      *  normal, where we perform elt x idxelt => elt for each
12888      *     element in the vector
12889      *  long, where we perform elt x idxelt and generate a result of
12890      *     double the width of the input element
12891      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12892      */
12893     bool is_scalar = extract32(insn, 28, 1);
12894     bool is_q = extract32(insn, 30, 1);
12895     bool u = extract32(insn, 29, 1);
12896     int size = extract32(insn, 22, 2);
12897     int l = extract32(insn, 21, 1);
12898     int m = extract32(insn, 20, 1);
12899     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12900     int rm = extract32(insn, 16, 4);
12901     int opcode = extract32(insn, 12, 4);
12902     int h = extract32(insn, 11, 1);
12903     int rn = extract32(insn, 5, 5);
12904     int rd = extract32(insn, 0, 5);
12905     bool is_long = false;
12906     int is_fp = 0;
12907     bool is_fp16 = false;
12908     int index;
12909     TCGv_ptr fpst;
12910 
12911     switch (16 * u + opcode) {
12912     case 0x08: /* MUL */
12913     case 0x10: /* MLA */
12914     case 0x14: /* MLS */
12915         if (is_scalar) {
12916             unallocated_encoding(s);
12917             return;
12918         }
12919         break;
12920     case 0x02: /* SMLAL, SMLAL2 */
12921     case 0x12: /* UMLAL, UMLAL2 */
12922     case 0x06: /* SMLSL, SMLSL2 */
12923     case 0x16: /* UMLSL, UMLSL2 */
12924     case 0x0a: /* SMULL, SMULL2 */
12925     case 0x1a: /* UMULL, UMULL2 */
12926         if (is_scalar) {
12927             unallocated_encoding(s);
12928             return;
12929         }
12930         is_long = true;
12931         break;
12932     case 0x03: /* SQDMLAL, SQDMLAL2 */
12933     case 0x07: /* SQDMLSL, SQDMLSL2 */
12934     case 0x0b: /* SQDMULL, SQDMULL2 */
12935         is_long = true;
12936         break;
12937     case 0x0c: /* SQDMULH */
12938     case 0x0d: /* SQRDMULH */
12939         break;
12940     case 0x01: /* FMLA */
12941     case 0x05: /* FMLS */
12942     case 0x09: /* FMUL */
12943     case 0x19: /* FMULX */
12944         is_fp = 1;
12945         break;
12946     case 0x1d: /* SQRDMLAH */
12947     case 0x1f: /* SQRDMLSH */
12948         if (!dc_isar_feature(aa64_rdm, s)) {
12949             unallocated_encoding(s);
12950             return;
12951         }
12952         break;
12953     case 0x0e: /* SDOT */
12954     case 0x1e: /* UDOT */
12955         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12956             unallocated_encoding(s);
12957             return;
12958         }
12959         break;
12960     case 0x0f:
12961         switch (size) {
12962         case 0: /* SUDOT */
12963         case 2: /* USDOT */
12964             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12965                 unallocated_encoding(s);
12966                 return;
12967             }
12968             size = MO_32;
12969             break;
12970         case 1: /* BFDOT */
12971             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12972                 unallocated_encoding(s);
12973                 return;
12974             }
12975             size = MO_32;
12976             break;
12977         case 3: /* BFMLAL{B,T} */
12978             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12979                 unallocated_encoding(s);
12980                 return;
12981             }
12982             /* can't set is_fp without other incorrect size checks */
12983             size = MO_16;
12984             break;
12985         default:
12986             unallocated_encoding(s);
12987             return;
12988         }
12989         break;
12990     case 0x11: /* FCMLA #0 */
12991     case 0x13: /* FCMLA #90 */
12992     case 0x15: /* FCMLA #180 */
12993     case 0x17: /* FCMLA #270 */
12994         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12995             unallocated_encoding(s);
12996             return;
12997         }
12998         is_fp = 2;
12999         break;
13000     case 0x00: /* FMLAL */
13001     case 0x04: /* FMLSL */
13002     case 0x18: /* FMLAL2 */
13003     case 0x1c: /* FMLSL2 */
13004         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
13005             unallocated_encoding(s);
13006             return;
13007         }
13008         size = MO_16;
13009         /* is_fp, but we pass cpu_env not fp_status.  */
13010         break;
13011     default:
13012         unallocated_encoding(s);
13013         return;
13014     }
13015 
13016     switch (is_fp) {
13017     case 1: /* normal fp */
13018         /* convert insn encoded size to MemOp size */
13019         switch (size) {
13020         case 0: /* half-precision */
13021             size = MO_16;
13022             is_fp16 = true;
13023             break;
13024         case MO_32: /* single precision */
13025         case MO_64: /* double precision */
13026             break;
13027         default:
13028             unallocated_encoding(s);
13029             return;
13030         }
13031         break;
13032 
13033     case 2: /* complex fp */
13034         /* Each indexable element is a complex pair.  */
13035         size += 1;
13036         switch (size) {
13037         case MO_32:
13038             if (h && !is_q) {
13039                 unallocated_encoding(s);
13040                 return;
13041             }
13042             is_fp16 = true;
13043             break;
13044         case MO_64:
13045             break;
13046         default:
13047             unallocated_encoding(s);
13048             return;
13049         }
13050         break;
13051 
13052     default: /* integer */
13053         switch (size) {
13054         case MO_8:
13055         case MO_64:
13056             unallocated_encoding(s);
13057             return;
13058         }
13059         break;
13060     }
13061     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
13062         unallocated_encoding(s);
13063         return;
13064     }
13065 
13066     /* Given MemOp size, adjust register and indexing.  */
13067     switch (size) {
13068     case MO_16:
13069         index = h << 2 | l << 1 | m;
13070         break;
13071     case MO_32:
13072         index = h << 1 | l;
13073         rm |= m << 4;
13074         break;
13075     case MO_64:
13076         if (l || !is_q) {
13077             unallocated_encoding(s);
13078             return;
13079         }
13080         index = h;
13081         rm |= m << 4;
13082         break;
13083     default:
13084         g_assert_not_reached();
13085     }
13086 
13087     if (!fp_access_check(s)) {
13088         return;
13089     }
13090 
13091     if (is_fp) {
13092         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
13093     } else {
13094         fpst = NULL;
13095     }
13096 
13097     switch (16 * u + opcode) {
13098     case 0x0e: /* SDOT */
13099     case 0x1e: /* UDOT */
13100         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13101                          u ? gen_helper_gvec_udot_idx_b
13102                          : gen_helper_gvec_sdot_idx_b);
13103         return;
13104     case 0x0f:
13105         switch (extract32(insn, 22, 2)) {
13106         case 0: /* SUDOT */
13107             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13108                              gen_helper_gvec_sudot_idx_b);
13109             return;
13110         case 1: /* BFDOT */
13111             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13112                              gen_helper_gvec_bfdot_idx);
13113             return;
13114         case 2: /* USDOT */
13115             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13116                              gen_helper_gvec_usdot_idx_b);
13117             return;
13118         case 3: /* BFMLAL{B,T} */
13119             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
13120                               gen_helper_gvec_bfmlal_idx);
13121             return;
13122         }
13123         g_assert_not_reached();
13124     case 0x11: /* FCMLA #0 */
13125     case 0x13: /* FCMLA #90 */
13126     case 0x15: /* FCMLA #180 */
13127     case 0x17: /* FCMLA #270 */
13128         {
13129             int rot = extract32(insn, 13, 2);
13130             int data = (index << 2) | rot;
13131             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
13132                                vec_full_reg_offset(s, rn),
13133                                vec_full_reg_offset(s, rm),
13134                                vec_full_reg_offset(s, rd), fpst,
13135                                is_q ? 16 : 8, vec_full_reg_size(s), data,
13136                                size == MO_64
13137                                ? gen_helper_gvec_fcmlas_idx
13138                                : gen_helper_gvec_fcmlah_idx);
13139         }
13140         return;
13141 
13142     case 0x00: /* FMLAL */
13143     case 0x04: /* FMLSL */
13144     case 0x18: /* FMLAL2 */
13145     case 0x1c: /* FMLSL2 */
13146         {
13147             int is_s = extract32(opcode, 2, 1);
13148             int is_2 = u;
13149             int data = (index << 2) | (is_2 << 1) | is_s;
13150             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13151                                vec_full_reg_offset(s, rn),
13152                                vec_full_reg_offset(s, rm), cpu_env,
13153                                is_q ? 16 : 8, vec_full_reg_size(s),
13154                                data, gen_helper_gvec_fmlal_idx_a64);
13155         }
13156         return;
13157 
13158     case 0x08: /* MUL */
13159         if (!is_long && !is_scalar) {
13160             static gen_helper_gvec_3 * const fns[3] = {
13161                 gen_helper_gvec_mul_idx_h,
13162                 gen_helper_gvec_mul_idx_s,
13163                 gen_helper_gvec_mul_idx_d,
13164             };
13165             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13166                                vec_full_reg_offset(s, rn),
13167                                vec_full_reg_offset(s, rm),
13168                                is_q ? 16 : 8, vec_full_reg_size(s),
13169                                index, fns[size - 1]);
13170             return;
13171         }
13172         break;
13173 
13174     case 0x10: /* MLA */
13175         if (!is_long && !is_scalar) {
13176             static gen_helper_gvec_4 * const fns[3] = {
13177                 gen_helper_gvec_mla_idx_h,
13178                 gen_helper_gvec_mla_idx_s,
13179                 gen_helper_gvec_mla_idx_d,
13180             };
13181             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13182                                vec_full_reg_offset(s, rn),
13183                                vec_full_reg_offset(s, rm),
13184                                vec_full_reg_offset(s, rd),
13185                                is_q ? 16 : 8, vec_full_reg_size(s),
13186                                index, fns[size - 1]);
13187             return;
13188         }
13189         break;
13190 
13191     case 0x14: /* MLS */
13192         if (!is_long && !is_scalar) {
13193             static gen_helper_gvec_4 * const fns[3] = {
13194                 gen_helper_gvec_mls_idx_h,
13195                 gen_helper_gvec_mls_idx_s,
13196                 gen_helper_gvec_mls_idx_d,
13197             };
13198             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13199                                vec_full_reg_offset(s, rn),
13200                                vec_full_reg_offset(s, rm),
13201                                vec_full_reg_offset(s, rd),
13202                                is_q ? 16 : 8, vec_full_reg_size(s),
13203                                index, fns[size - 1]);
13204             return;
13205         }
13206         break;
13207     }
13208 
13209     if (size == 3) {
13210         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13211         int pass;
13212 
13213         assert(is_fp && is_q && !is_long);
13214 
13215         read_vec_element(s, tcg_idx, rm, index, MO_64);
13216 
13217         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13218             TCGv_i64 tcg_op = tcg_temp_new_i64();
13219             TCGv_i64 tcg_res = tcg_temp_new_i64();
13220 
13221             read_vec_element(s, tcg_op, rn, pass, MO_64);
13222 
13223             switch (16 * u + opcode) {
13224             case 0x05: /* FMLS */
13225                 /* As usual for ARM, separate negation for fused multiply-add */
13226                 gen_helper_vfp_negd(tcg_op, tcg_op);
13227                 /* fall through */
13228             case 0x01: /* FMLA */
13229                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13230                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13231                 break;
13232             case 0x09: /* FMUL */
13233                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13234                 break;
13235             case 0x19: /* FMULX */
13236                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13237                 break;
13238             default:
13239                 g_assert_not_reached();
13240             }
13241 
13242             write_vec_element(s, tcg_res, rd, pass, MO_64);
13243         }
13244 
13245         clear_vec_high(s, !is_scalar, rd);
13246     } else if (!is_long) {
13247         /* 32 bit floating point, or 16 or 32 bit integer.
13248          * For the 16 bit scalar case we use the usual Neon helpers and
13249          * rely on the fact that 0 op 0 == 0 with no side effects.
13250          */
13251         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13252         int pass, maxpasses;
13253 
13254         if (is_scalar) {
13255             maxpasses = 1;
13256         } else {
13257             maxpasses = is_q ? 4 : 2;
13258         }
13259 
13260         read_vec_element_i32(s, tcg_idx, rm, index, size);
13261 
13262         if (size == 1 && !is_scalar) {
13263             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13264              * the index into both halves of the 32 bit tcg_idx and then use
13265              * the usual Neon helpers.
13266              */
13267             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13268         }
13269 
13270         for (pass = 0; pass < maxpasses; pass++) {
13271             TCGv_i32 tcg_op = tcg_temp_new_i32();
13272             TCGv_i32 tcg_res = tcg_temp_new_i32();
13273 
13274             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13275 
13276             switch (16 * u + opcode) {
13277             case 0x08: /* MUL */
13278             case 0x10: /* MLA */
13279             case 0x14: /* MLS */
13280             {
13281                 static NeonGenTwoOpFn * const fns[2][2] = {
13282                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13283                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13284                 };
13285                 NeonGenTwoOpFn *genfn;
13286                 bool is_sub = opcode == 0x4;
13287 
13288                 if (size == 1) {
13289                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13290                 } else {
13291                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13292                 }
13293                 if (opcode == 0x8) {
13294                     break;
13295                 }
13296                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13297                 genfn = fns[size - 1][is_sub];
13298                 genfn(tcg_res, tcg_op, tcg_res);
13299                 break;
13300             }
13301             case 0x05: /* FMLS */
13302             case 0x01: /* FMLA */
13303                 read_vec_element_i32(s, tcg_res, rd, pass,
13304                                      is_scalar ? size : MO_32);
13305                 switch (size) {
13306                 case 1:
13307                     if (opcode == 0x5) {
13308                         /* As usual for ARM, separate negation for fused
13309                          * multiply-add */
13310                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13311                     }
13312                     if (is_scalar) {
13313                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13314                                                    tcg_res, fpst);
13315                     } else {
13316                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13317                                                     tcg_res, fpst);
13318                     }
13319                     break;
13320                 case 2:
13321                     if (opcode == 0x5) {
13322                         /* As usual for ARM, separate negation for
13323                          * fused multiply-add */
13324                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13325                     }
13326                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13327                                            tcg_res, fpst);
13328                     break;
13329                 default:
13330                     g_assert_not_reached();
13331                 }
13332                 break;
13333             case 0x09: /* FMUL */
13334                 switch (size) {
13335                 case 1:
13336                     if (is_scalar) {
13337                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13338                                                 tcg_idx, fpst);
13339                     } else {
13340                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13341                                                  tcg_idx, fpst);
13342                     }
13343                     break;
13344                 case 2:
13345                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13346                     break;
13347                 default:
13348                     g_assert_not_reached();
13349                 }
13350                 break;
13351             case 0x19: /* FMULX */
13352                 switch (size) {
13353                 case 1:
13354                     if (is_scalar) {
13355                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13356                                                  tcg_idx, fpst);
13357                     } else {
13358                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13359                                                   tcg_idx, fpst);
13360                     }
13361                     break;
13362                 case 2:
13363                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13364                     break;
13365                 default:
13366                     g_assert_not_reached();
13367                 }
13368                 break;
13369             case 0x0c: /* SQDMULH */
13370                 if (size == 1) {
13371                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13372                                                tcg_op, tcg_idx);
13373                 } else {
13374                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13375                                                tcg_op, tcg_idx);
13376                 }
13377                 break;
13378             case 0x0d: /* SQRDMULH */
13379                 if (size == 1) {
13380                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13381                                                 tcg_op, tcg_idx);
13382                 } else {
13383                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13384                                                 tcg_op, tcg_idx);
13385                 }
13386                 break;
13387             case 0x1d: /* SQRDMLAH */
13388                 read_vec_element_i32(s, tcg_res, rd, pass,
13389                                      is_scalar ? size : MO_32);
13390                 if (size == 1) {
13391                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13392                                                 tcg_op, tcg_idx, tcg_res);
13393                 } else {
13394                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13395                                                 tcg_op, tcg_idx, tcg_res);
13396                 }
13397                 break;
13398             case 0x1f: /* SQRDMLSH */
13399                 read_vec_element_i32(s, tcg_res, rd, pass,
13400                                      is_scalar ? size : MO_32);
13401                 if (size == 1) {
13402                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13403                                                 tcg_op, tcg_idx, tcg_res);
13404                 } else {
13405                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13406                                                 tcg_op, tcg_idx, tcg_res);
13407                 }
13408                 break;
13409             default:
13410                 g_assert_not_reached();
13411             }
13412 
13413             if (is_scalar) {
13414                 write_fp_sreg(s, rd, tcg_res);
13415             } else {
13416                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13417             }
13418         }
13419 
13420         clear_vec_high(s, is_q, rd);
13421     } else {
13422         /* long ops: 16x16->32 or 32x32->64 */
13423         TCGv_i64 tcg_res[2];
13424         int pass;
13425         bool satop = extract32(opcode, 0, 1);
13426         MemOp memop = MO_32;
13427 
13428         if (satop || !u) {
13429             memop |= MO_SIGN;
13430         }
13431 
13432         if (size == 2) {
13433             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13434 
13435             read_vec_element(s, tcg_idx, rm, index, memop);
13436 
13437             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13438                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13439                 TCGv_i64 tcg_passres;
13440                 int passelt;
13441 
13442                 if (is_scalar) {
13443                     passelt = 0;
13444                 } else {
13445                     passelt = pass + (is_q * 2);
13446                 }
13447 
13448                 read_vec_element(s, tcg_op, rn, passelt, memop);
13449 
13450                 tcg_res[pass] = tcg_temp_new_i64();
13451 
13452                 if (opcode == 0xa || opcode == 0xb) {
13453                     /* Non-accumulating ops */
13454                     tcg_passres = tcg_res[pass];
13455                 } else {
13456                     tcg_passres = tcg_temp_new_i64();
13457                 }
13458 
13459                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13460 
13461                 if (satop) {
13462                     /* saturating, doubling */
13463                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13464                                                       tcg_passres, tcg_passres);
13465                 }
13466 
13467                 if (opcode == 0xa || opcode == 0xb) {
13468                     continue;
13469                 }
13470 
13471                 /* Accumulating op: handle accumulate step */
13472                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13473 
13474                 switch (opcode) {
13475                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13476                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13477                     break;
13478                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13479                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13480                     break;
13481                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13482                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13483                     /* fall through */
13484                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13485                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13486                                                       tcg_res[pass],
13487                                                       tcg_passres);
13488                     break;
13489                 default:
13490                     g_assert_not_reached();
13491                 }
13492             }
13493 
13494             clear_vec_high(s, !is_scalar, rd);
13495         } else {
13496             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13497 
13498             assert(size == 1);
13499             read_vec_element_i32(s, tcg_idx, rm, index, size);
13500 
13501             if (!is_scalar) {
13502                 /* The simplest way to handle the 16x16 indexed ops is to
13503                  * duplicate the index into both halves of the 32 bit tcg_idx
13504                  * and then use the usual Neon helpers.
13505                  */
13506                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13507             }
13508 
13509             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13510                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13511                 TCGv_i64 tcg_passres;
13512 
13513                 if (is_scalar) {
13514                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13515                 } else {
13516                     read_vec_element_i32(s, tcg_op, rn,
13517                                          pass + (is_q * 2), MO_32);
13518                 }
13519 
13520                 tcg_res[pass] = tcg_temp_new_i64();
13521 
13522                 if (opcode == 0xa || opcode == 0xb) {
13523                     /* Non-accumulating ops */
13524                     tcg_passres = tcg_res[pass];
13525                 } else {
13526                     tcg_passres = tcg_temp_new_i64();
13527                 }
13528 
13529                 if (memop & MO_SIGN) {
13530                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13531                 } else {
13532                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13533                 }
13534                 if (satop) {
13535                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13536                                                       tcg_passres, tcg_passres);
13537                 }
13538 
13539                 if (opcode == 0xa || opcode == 0xb) {
13540                     continue;
13541                 }
13542 
13543                 /* Accumulating op: handle accumulate step */
13544                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13545 
13546                 switch (opcode) {
13547                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13548                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13549                                              tcg_passres);
13550                     break;
13551                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13552                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13553                                              tcg_passres);
13554                     break;
13555                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13556                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13557                     /* fall through */
13558                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13559                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13560                                                       tcg_res[pass],
13561                                                       tcg_passres);
13562                     break;
13563                 default:
13564                     g_assert_not_reached();
13565                 }
13566             }
13567 
13568             if (is_scalar) {
13569                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13570             }
13571         }
13572 
13573         if (is_scalar) {
13574             tcg_res[1] = tcg_constant_i64(0);
13575         }
13576 
13577         for (pass = 0; pass < 2; pass++) {
13578             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13579         }
13580     }
13581 }
13582 
13583 /* Crypto AES
13584  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13585  * +-----------------+------+-----------+--------+-----+------+------+
13586  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13587  * +-----------------+------+-----------+--------+-----+------+------+
13588  */
13589 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13590 {
13591     int size = extract32(insn, 22, 2);
13592     int opcode = extract32(insn, 12, 5);
13593     int rn = extract32(insn, 5, 5);
13594     int rd = extract32(insn, 0, 5);
13595     int decrypt;
13596     gen_helper_gvec_2 *genfn2 = NULL;
13597     gen_helper_gvec_3 *genfn3 = NULL;
13598 
13599     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13600         unallocated_encoding(s);
13601         return;
13602     }
13603 
13604     switch (opcode) {
13605     case 0x4: /* AESE */
13606         decrypt = 0;
13607         genfn3 = gen_helper_crypto_aese;
13608         break;
13609     case 0x6: /* AESMC */
13610         decrypt = 0;
13611         genfn2 = gen_helper_crypto_aesmc;
13612         break;
13613     case 0x5: /* AESD */
13614         decrypt = 1;
13615         genfn3 = gen_helper_crypto_aese;
13616         break;
13617     case 0x7: /* AESIMC */
13618         decrypt = 1;
13619         genfn2 = gen_helper_crypto_aesmc;
13620         break;
13621     default:
13622         unallocated_encoding(s);
13623         return;
13624     }
13625 
13626     if (!fp_access_check(s)) {
13627         return;
13628     }
13629     if (genfn2) {
13630         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13631     } else {
13632         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13633     }
13634 }
13635 
13636 /* Crypto three-reg SHA
13637  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13638  * +-----------------+------+---+------+---+--------+-----+------+------+
13639  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13640  * +-----------------+------+---+------+---+--------+-----+------+------+
13641  */
13642 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13643 {
13644     int size = extract32(insn, 22, 2);
13645     int opcode = extract32(insn, 12, 3);
13646     int rm = extract32(insn, 16, 5);
13647     int rn = extract32(insn, 5, 5);
13648     int rd = extract32(insn, 0, 5);
13649     gen_helper_gvec_3 *genfn;
13650     bool feature;
13651 
13652     if (size != 0) {
13653         unallocated_encoding(s);
13654         return;
13655     }
13656 
13657     switch (opcode) {
13658     case 0: /* SHA1C */
13659         genfn = gen_helper_crypto_sha1c;
13660         feature = dc_isar_feature(aa64_sha1, s);
13661         break;
13662     case 1: /* SHA1P */
13663         genfn = gen_helper_crypto_sha1p;
13664         feature = dc_isar_feature(aa64_sha1, s);
13665         break;
13666     case 2: /* SHA1M */
13667         genfn = gen_helper_crypto_sha1m;
13668         feature = dc_isar_feature(aa64_sha1, s);
13669         break;
13670     case 3: /* SHA1SU0 */
13671         genfn = gen_helper_crypto_sha1su0;
13672         feature = dc_isar_feature(aa64_sha1, s);
13673         break;
13674     case 4: /* SHA256H */
13675         genfn = gen_helper_crypto_sha256h;
13676         feature = dc_isar_feature(aa64_sha256, s);
13677         break;
13678     case 5: /* SHA256H2 */
13679         genfn = gen_helper_crypto_sha256h2;
13680         feature = dc_isar_feature(aa64_sha256, s);
13681         break;
13682     case 6: /* SHA256SU1 */
13683         genfn = gen_helper_crypto_sha256su1;
13684         feature = dc_isar_feature(aa64_sha256, s);
13685         break;
13686     default:
13687         unallocated_encoding(s);
13688         return;
13689     }
13690 
13691     if (!feature) {
13692         unallocated_encoding(s);
13693         return;
13694     }
13695 
13696     if (!fp_access_check(s)) {
13697         return;
13698     }
13699     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13700 }
13701 
13702 /* Crypto two-reg SHA
13703  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13704  * +-----------------+------+-----------+--------+-----+------+------+
13705  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13706  * +-----------------+------+-----------+--------+-----+------+------+
13707  */
13708 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13709 {
13710     int size = extract32(insn, 22, 2);
13711     int opcode = extract32(insn, 12, 5);
13712     int rn = extract32(insn, 5, 5);
13713     int rd = extract32(insn, 0, 5);
13714     gen_helper_gvec_2 *genfn;
13715     bool feature;
13716 
13717     if (size != 0) {
13718         unallocated_encoding(s);
13719         return;
13720     }
13721 
13722     switch (opcode) {
13723     case 0: /* SHA1H */
13724         feature = dc_isar_feature(aa64_sha1, s);
13725         genfn = gen_helper_crypto_sha1h;
13726         break;
13727     case 1: /* SHA1SU1 */
13728         feature = dc_isar_feature(aa64_sha1, s);
13729         genfn = gen_helper_crypto_sha1su1;
13730         break;
13731     case 2: /* SHA256SU0 */
13732         feature = dc_isar_feature(aa64_sha256, s);
13733         genfn = gen_helper_crypto_sha256su0;
13734         break;
13735     default:
13736         unallocated_encoding(s);
13737         return;
13738     }
13739 
13740     if (!feature) {
13741         unallocated_encoding(s);
13742         return;
13743     }
13744 
13745     if (!fp_access_check(s)) {
13746         return;
13747     }
13748     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13749 }
13750 
13751 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13752 {
13753     tcg_gen_rotli_i64(d, m, 1);
13754     tcg_gen_xor_i64(d, d, n);
13755 }
13756 
13757 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13758 {
13759     tcg_gen_rotli_vec(vece, d, m, 1);
13760     tcg_gen_xor_vec(vece, d, d, n);
13761 }
13762 
13763 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13764                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13765 {
13766     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13767     static const GVecGen3 op = {
13768         .fni8 = gen_rax1_i64,
13769         .fniv = gen_rax1_vec,
13770         .opt_opc = vecop_list,
13771         .fno = gen_helper_crypto_rax1,
13772         .vece = MO_64,
13773     };
13774     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13775 }
13776 
13777 /* Crypto three-reg SHA512
13778  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13779  * +-----------------------+------+---+---+-----+--------+------+------+
13780  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13781  * +-----------------------+------+---+---+-----+--------+------+------+
13782  */
13783 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13784 {
13785     int opcode = extract32(insn, 10, 2);
13786     int o =  extract32(insn, 14, 1);
13787     int rm = extract32(insn, 16, 5);
13788     int rn = extract32(insn, 5, 5);
13789     int rd = extract32(insn, 0, 5);
13790     bool feature;
13791     gen_helper_gvec_3 *oolfn = NULL;
13792     GVecGen3Fn *gvecfn = NULL;
13793 
13794     if (o == 0) {
13795         switch (opcode) {
13796         case 0: /* SHA512H */
13797             feature = dc_isar_feature(aa64_sha512, s);
13798             oolfn = gen_helper_crypto_sha512h;
13799             break;
13800         case 1: /* SHA512H2 */
13801             feature = dc_isar_feature(aa64_sha512, s);
13802             oolfn = gen_helper_crypto_sha512h2;
13803             break;
13804         case 2: /* SHA512SU1 */
13805             feature = dc_isar_feature(aa64_sha512, s);
13806             oolfn = gen_helper_crypto_sha512su1;
13807             break;
13808         case 3: /* RAX1 */
13809             feature = dc_isar_feature(aa64_sha3, s);
13810             gvecfn = gen_gvec_rax1;
13811             break;
13812         default:
13813             g_assert_not_reached();
13814         }
13815     } else {
13816         switch (opcode) {
13817         case 0: /* SM3PARTW1 */
13818             feature = dc_isar_feature(aa64_sm3, s);
13819             oolfn = gen_helper_crypto_sm3partw1;
13820             break;
13821         case 1: /* SM3PARTW2 */
13822             feature = dc_isar_feature(aa64_sm3, s);
13823             oolfn = gen_helper_crypto_sm3partw2;
13824             break;
13825         case 2: /* SM4EKEY */
13826             feature = dc_isar_feature(aa64_sm4, s);
13827             oolfn = gen_helper_crypto_sm4ekey;
13828             break;
13829         default:
13830             unallocated_encoding(s);
13831             return;
13832         }
13833     }
13834 
13835     if (!feature) {
13836         unallocated_encoding(s);
13837         return;
13838     }
13839 
13840     if (!fp_access_check(s)) {
13841         return;
13842     }
13843 
13844     if (oolfn) {
13845         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13846     } else {
13847         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13848     }
13849 }
13850 
13851 /* Crypto two-reg SHA512
13852  *  31                                     12  11  10  9    5 4    0
13853  * +-----------------------------------------+--------+------+------+
13854  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13855  * +-----------------------------------------+--------+------+------+
13856  */
13857 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13858 {
13859     int opcode = extract32(insn, 10, 2);
13860     int rn = extract32(insn, 5, 5);
13861     int rd = extract32(insn, 0, 5);
13862     bool feature;
13863 
13864     switch (opcode) {
13865     case 0: /* SHA512SU0 */
13866         feature = dc_isar_feature(aa64_sha512, s);
13867         break;
13868     case 1: /* SM4E */
13869         feature = dc_isar_feature(aa64_sm4, s);
13870         break;
13871     default:
13872         unallocated_encoding(s);
13873         return;
13874     }
13875 
13876     if (!feature) {
13877         unallocated_encoding(s);
13878         return;
13879     }
13880 
13881     if (!fp_access_check(s)) {
13882         return;
13883     }
13884 
13885     switch (opcode) {
13886     case 0: /* SHA512SU0 */
13887         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13888         break;
13889     case 1: /* SM4E */
13890         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13891         break;
13892     default:
13893         g_assert_not_reached();
13894     }
13895 }
13896 
13897 /* Crypto four-register
13898  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13899  * +-------------------+-----+------+---+------+------+------+
13900  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13901  * +-------------------+-----+------+---+------+------+------+
13902  */
13903 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13904 {
13905     int op0 = extract32(insn, 21, 2);
13906     int rm = extract32(insn, 16, 5);
13907     int ra = extract32(insn, 10, 5);
13908     int rn = extract32(insn, 5, 5);
13909     int rd = extract32(insn, 0, 5);
13910     bool feature;
13911 
13912     switch (op0) {
13913     case 0: /* EOR3 */
13914     case 1: /* BCAX */
13915         feature = dc_isar_feature(aa64_sha3, s);
13916         break;
13917     case 2: /* SM3SS1 */
13918         feature = dc_isar_feature(aa64_sm3, s);
13919         break;
13920     default:
13921         unallocated_encoding(s);
13922         return;
13923     }
13924 
13925     if (!feature) {
13926         unallocated_encoding(s);
13927         return;
13928     }
13929 
13930     if (!fp_access_check(s)) {
13931         return;
13932     }
13933 
13934     if (op0 < 2) {
13935         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13936         int pass;
13937 
13938         tcg_op1 = tcg_temp_new_i64();
13939         tcg_op2 = tcg_temp_new_i64();
13940         tcg_op3 = tcg_temp_new_i64();
13941         tcg_res[0] = tcg_temp_new_i64();
13942         tcg_res[1] = tcg_temp_new_i64();
13943 
13944         for (pass = 0; pass < 2; pass++) {
13945             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13946             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13947             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13948 
13949             if (op0 == 0) {
13950                 /* EOR3 */
13951                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13952             } else {
13953                 /* BCAX */
13954                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13955             }
13956             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13957         }
13958         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13959         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13960     } else {
13961         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13962 
13963         tcg_op1 = tcg_temp_new_i32();
13964         tcg_op2 = tcg_temp_new_i32();
13965         tcg_op3 = tcg_temp_new_i32();
13966         tcg_res = tcg_temp_new_i32();
13967         tcg_zero = tcg_constant_i32(0);
13968 
13969         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13970         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13971         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13972 
13973         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13974         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13975         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13976         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13977 
13978         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13979         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13980         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13981         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13982     }
13983 }
13984 
13985 /* Crypto XAR
13986  *  31                   21 20  16 15    10 9    5 4    0
13987  * +-----------------------+------+--------+------+------+
13988  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13989  * +-----------------------+------+--------+------+------+
13990  */
13991 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13992 {
13993     int rm = extract32(insn, 16, 5);
13994     int imm6 = extract32(insn, 10, 6);
13995     int rn = extract32(insn, 5, 5);
13996     int rd = extract32(insn, 0, 5);
13997 
13998     if (!dc_isar_feature(aa64_sha3, s)) {
13999         unallocated_encoding(s);
14000         return;
14001     }
14002 
14003     if (!fp_access_check(s)) {
14004         return;
14005     }
14006 
14007     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
14008                  vec_full_reg_offset(s, rn),
14009                  vec_full_reg_offset(s, rm), imm6, 16,
14010                  vec_full_reg_size(s));
14011 }
14012 
14013 /* Crypto three-reg imm2
14014  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
14015  * +-----------------------+------+-----+------+--------+------+------+
14016  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
14017  * +-----------------------+------+-----+------+--------+------+------+
14018  */
14019 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
14020 {
14021     static gen_helper_gvec_3 * const fns[4] = {
14022         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
14023         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
14024     };
14025     int opcode = extract32(insn, 10, 2);
14026     int imm2 = extract32(insn, 12, 2);
14027     int rm = extract32(insn, 16, 5);
14028     int rn = extract32(insn, 5, 5);
14029     int rd = extract32(insn, 0, 5);
14030 
14031     if (!dc_isar_feature(aa64_sm3, s)) {
14032         unallocated_encoding(s);
14033         return;
14034     }
14035 
14036     if (!fp_access_check(s)) {
14037         return;
14038     }
14039 
14040     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
14041 }
14042 
14043 /* C3.6 Data processing - SIMD, inc Crypto
14044  *
14045  * As the decode gets a little complex we are using a table based
14046  * approach for this part of the decode.
14047  */
14048 static const AArch64DecodeTable data_proc_simd[] = {
14049     /* pattern  ,  mask     ,  fn                        */
14050     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
14051     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
14052     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
14053     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
14054     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
14055     { 0x0e000400, 0x9fe08400, disas_simd_copy },
14056     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
14057     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
14058     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
14059     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
14060     { 0x0e000000, 0xbf208c00, disas_simd_tb },
14061     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
14062     { 0x2e000000, 0xbf208400, disas_simd_ext },
14063     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
14064     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
14065     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
14066     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
14067     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
14068     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
14069     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
14070     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
14071     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
14072     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
14073     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
14074     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
14075     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
14076     { 0xce000000, 0xff808000, disas_crypto_four_reg },
14077     { 0xce800000, 0xffe00000, disas_crypto_xar },
14078     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
14079     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
14080     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
14081     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
14082     { 0x00000000, 0x00000000, NULL }
14083 };
14084 
14085 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
14086 {
14087     /* Note that this is called with all non-FP cases from
14088      * table C3-6 so it must UNDEF for entries not specifically
14089      * allocated to instructions in that table.
14090      */
14091     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
14092     if (fn) {
14093         fn(s, insn);
14094     } else {
14095         unallocated_encoding(s);
14096     }
14097 }
14098 
14099 /* C3.6 Data processing - SIMD and floating point */
14100 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
14101 {
14102     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
14103         disas_data_proc_fp(s, insn);
14104     } else {
14105         /* SIMD, including crypto */
14106         disas_data_proc_simd(s, insn);
14107     }
14108 }
14109 
14110 static bool trans_OK(DisasContext *s, arg_OK *a)
14111 {
14112     return true;
14113 }
14114 
14115 static bool trans_FAIL(DisasContext *s, arg_OK *a)
14116 {
14117     s->is_nonstreaming = true;
14118     return true;
14119 }
14120 
14121 /**
14122  * is_guarded_page:
14123  * @env: The cpu environment
14124  * @s: The DisasContext
14125  *
14126  * Return true if the page is guarded.
14127  */
14128 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
14129 {
14130     uint64_t addr = s->base.pc_first;
14131 #ifdef CONFIG_USER_ONLY
14132     return page_get_flags(addr) & PAGE_BTI;
14133 #else
14134     CPUTLBEntryFull *full;
14135     void *host;
14136     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14137     int flags;
14138 
14139     /*
14140      * We test this immediately after reading an insn, which means
14141      * that the TLB entry must be present and valid, and thus this
14142      * access will never raise an exception.
14143      */
14144     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
14145                               false, &host, &full, 0);
14146     assert(!(flags & TLB_INVALID_MASK));
14147 
14148     return full->guarded;
14149 #endif
14150 }
14151 
14152 /**
14153  * btype_destination_ok:
14154  * @insn: The instruction at the branch destination
14155  * @bt: SCTLR_ELx.BT
14156  * @btype: PSTATE.BTYPE, and is non-zero
14157  *
14158  * On a guarded page, there are a limited number of insns
14159  * that may be present at the branch target:
14160  *   - branch target identifiers,
14161  *   - paciasp, pacibsp,
14162  *   - BRK insn
14163  *   - HLT insn
14164  * Anything else causes a Branch Target Exception.
14165  *
14166  * Return true if the branch is compatible, false to raise BTITRAP.
14167  */
14168 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14169 {
14170     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14171         /* HINT space */
14172         switch (extract32(insn, 5, 7)) {
14173         case 0b011001: /* PACIASP */
14174         case 0b011011: /* PACIBSP */
14175             /*
14176              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14177              * with btype == 3.  Otherwise all btype are ok.
14178              */
14179             return !bt || btype != 3;
14180         case 0b100000: /* BTI */
14181             /* Not compatible with any btype.  */
14182             return false;
14183         case 0b100010: /* BTI c */
14184             /* Not compatible with btype == 3 */
14185             return btype != 3;
14186         case 0b100100: /* BTI j */
14187             /* Not compatible with btype == 2 */
14188             return btype != 2;
14189         case 0b100110: /* BTI jc */
14190             /* Compatible with any btype.  */
14191             return true;
14192         }
14193     } else {
14194         switch (insn & 0xffe0001fu) {
14195         case 0xd4200000u: /* BRK */
14196         case 0xd4400000u: /* HLT */
14197             /* Give priority to the breakpoint exception.  */
14198             return true;
14199         }
14200     }
14201     return false;
14202 }
14203 
14204 /* C3.1 A64 instruction index by encoding */
14205 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14206 {
14207     switch (extract32(insn, 25, 4)) {
14208     case 0x8: case 0x9: /* Data processing - immediate */
14209         disas_data_proc_imm(s, insn);
14210         break;
14211     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14212         disas_b_exc_sys(s, insn);
14213         break;
14214     case 0x4:
14215     case 0x6:
14216     case 0xc:
14217     case 0xe:      /* Loads and stores */
14218         disas_ldst(s, insn);
14219         break;
14220     case 0x5:
14221     case 0xd:      /* Data processing - register */
14222         disas_data_proc_reg(s, insn);
14223         break;
14224     case 0x7:
14225     case 0xf:      /* Data processing - SIMD and floating point */
14226         disas_data_proc_simd_fp(s, insn);
14227         break;
14228     default:
14229         unallocated_encoding(s);
14230         break;
14231     }
14232 }
14233 
14234 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14235                                           CPUState *cpu)
14236 {
14237     DisasContext *dc = container_of(dcbase, DisasContext, base);
14238     CPUARMState *env = cpu->env_ptr;
14239     ARMCPU *arm_cpu = env_archcpu(env);
14240     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14241     int bound, core_mmu_idx;
14242 
14243     dc->isar = &arm_cpu->isar;
14244     dc->condjmp = 0;
14245     dc->pc_save = dc->base.pc_first;
14246     dc->aarch64 = true;
14247     dc->thumb = false;
14248     dc->sctlr_b = 0;
14249     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14250     dc->condexec_mask = 0;
14251     dc->condexec_cond = 0;
14252     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14253     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14254     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14255     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14256     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14257     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14258 #if !defined(CONFIG_USER_ONLY)
14259     dc->user = (dc->current_el == 0);
14260 #endif
14261     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14262     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14263     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14264     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14265     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14266     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14267     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14268     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14269     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14270     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14271     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14272     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14273     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14274     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14275     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14276     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14277     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14278     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14279     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14280     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14281     dc->vec_len = 0;
14282     dc->vec_stride = 0;
14283     dc->cp_regs = arm_cpu->cp_regs;
14284     dc->features = env->features;
14285     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14286 
14287 #ifdef CONFIG_USER_ONLY
14288     /* In sve_probe_page, we assume TBI is enabled. */
14289     tcg_debug_assert(dc->tbid & 1);
14290 #endif
14291 
14292     /* Single step state. The code-generation logic here is:
14293      *  SS_ACTIVE == 0:
14294      *   generate code with no special handling for single-stepping (except
14295      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14296      *   this happens anyway because those changes are all system register or
14297      *   PSTATE writes).
14298      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14299      *   emit code for one insn
14300      *   emit code to clear PSTATE.SS
14301      *   emit code to generate software step exception for completed step
14302      *   end TB (as usual for having generated an exception)
14303      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14304      *   emit code to generate a software step exception
14305      *   end the TB
14306      */
14307     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14308     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14309     dc->is_ldex = false;
14310 
14311     /* Bound the number of insns to execute to those left on the page.  */
14312     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14313 
14314     /* If architectural single step active, limit to 1.  */
14315     if (dc->ss_active) {
14316         bound = 1;
14317     }
14318     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14319 }
14320 
14321 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14322 {
14323 }
14324 
14325 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14326 {
14327     DisasContext *dc = container_of(dcbase, DisasContext, base);
14328     target_ulong pc_arg = dc->base.pc_next;
14329 
14330     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14331         pc_arg &= ~TARGET_PAGE_MASK;
14332     }
14333     tcg_gen_insn_start(pc_arg, 0, 0);
14334     dc->insn_start = tcg_last_op();
14335 }
14336 
14337 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14338 {
14339     DisasContext *s = container_of(dcbase, DisasContext, base);
14340     CPUARMState *env = cpu->env_ptr;
14341     uint64_t pc = s->base.pc_next;
14342     uint32_t insn;
14343 
14344     /* Singlestep exceptions have the highest priority. */
14345     if (s->ss_active && !s->pstate_ss) {
14346         /* Singlestep state is Active-pending.
14347          * If we're in this state at the start of a TB then either
14348          *  a) we just took an exception to an EL which is being debugged
14349          *     and this is the first insn in the exception handler
14350          *  b) debug exceptions were masked and we just unmasked them
14351          *     without changing EL (eg by clearing PSTATE.D)
14352          * In either case we're going to take a swstep exception in the
14353          * "did not step an insn" case, and so the syndrome ISV and EX
14354          * bits should be zero.
14355          */
14356         assert(s->base.num_insns == 1);
14357         gen_swstep_exception(s, 0, 0);
14358         s->base.is_jmp = DISAS_NORETURN;
14359         s->base.pc_next = pc + 4;
14360         return;
14361     }
14362 
14363     if (pc & 3) {
14364         /*
14365          * PC alignment fault.  This has priority over the instruction abort
14366          * that we would receive from a translation fault via arm_ldl_code.
14367          * This should only be possible after an indirect branch, at the
14368          * start of the TB.
14369          */
14370         assert(s->base.num_insns == 1);
14371         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14372         s->base.is_jmp = DISAS_NORETURN;
14373         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14374         return;
14375     }
14376 
14377     s->pc_curr = pc;
14378     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14379     s->insn = insn;
14380     s->base.pc_next = pc + 4;
14381 
14382     s->fp_access_checked = false;
14383     s->sve_access_checked = false;
14384 
14385     if (s->pstate_il) {
14386         /*
14387          * Illegal execution state. This has priority over BTI
14388          * exceptions, but comes after instruction abort exceptions.
14389          */
14390         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14391         return;
14392     }
14393 
14394     if (dc_isar_feature(aa64_bti, s)) {
14395         if (s->base.num_insns == 1) {
14396             /*
14397              * At the first insn of the TB, compute s->guarded_page.
14398              * We delayed computing this until successfully reading
14399              * the first insn of the TB, above.  This (mostly) ensures
14400              * that the softmmu tlb entry has been populated, and the
14401              * page table GP bit is available.
14402              *
14403              * Note that we need to compute this even if btype == 0,
14404              * because this value is used for BR instructions later
14405              * where ENV is not available.
14406              */
14407             s->guarded_page = is_guarded_page(env, s);
14408 
14409             /* First insn can have btype set to non-zero.  */
14410             tcg_debug_assert(s->btype >= 0);
14411 
14412             /*
14413              * Note that the Branch Target Exception has fairly high
14414              * priority -- below debugging exceptions but above most
14415              * everything else.  This allows us to handle this now
14416              * instead of waiting until the insn is otherwise decoded.
14417              */
14418             if (s->btype != 0
14419                 && s->guarded_page
14420                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14421                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14422                 return;
14423             }
14424         } else {
14425             /* Not the first insn: btype must be 0.  */
14426             tcg_debug_assert(s->btype == 0);
14427         }
14428     }
14429 
14430     s->is_nonstreaming = false;
14431     if (s->sme_trap_nonstreaming) {
14432         disas_sme_fa64(s, insn);
14433     }
14434 
14435     if (!disas_a64(s, insn) &&
14436         !disas_sme(s, insn) &&
14437         !disas_sve(s, insn)) {
14438         disas_a64_legacy(s, insn);
14439     }
14440 
14441     /*
14442      * After execution of most insns, btype is reset to 0.
14443      * Note that we set btype == -1 when the insn sets btype.
14444      */
14445     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14446         reset_btype(s);
14447     }
14448 }
14449 
14450 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14451 {
14452     DisasContext *dc = container_of(dcbase, DisasContext, base);
14453 
14454     if (unlikely(dc->ss_active)) {
14455         /* Note that this means single stepping WFI doesn't halt the CPU.
14456          * For conditional branch insns this is harmless unreachable code as
14457          * gen_goto_tb() has already handled emitting the debug exception
14458          * (and thus a tb-jump is not possible when singlestepping).
14459          */
14460         switch (dc->base.is_jmp) {
14461         default:
14462             gen_a64_update_pc(dc, 4);
14463             /* fall through */
14464         case DISAS_EXIT:
14465         case DISAS_JUMP:
14466             gen_step_complete_exception(dc);
14467             break;
14468         case DISAS_NORETURN:
14469             break;
14470         }
14471     } else {
14472         switch (dc->base.is_jmp) {
14473         case DISAS_NEXT:
14474         case DISAS_TOO_MANY:
14475             gen_goto_tb(dc, 1, 4);
14476             break;
14477         default:
14478         case DISAS_UPDATE_EXIT:
14479             gen_a64_update_pc(dc, 4);
14480             /* fall through */
14481         case DISAS_EXIT:
14482             tcg_gen_exit_tb(NULL, 0);
14483             break;
14484         case DISAS_UPDATE_NOCHAIN:
14485             gen_a64_update_pc(dc, 4);
14486             /* fall through */
14487         case DISAS_JUMP:
14488             tcg_gen_lookup_and_goto_ptr();
14489             break;
14490         case DISAS_NORETURN:
14491         case DISAS_SWI:
14492             break;
14493         case DISAS_WFE:
14494             gen_a64_update_pc(dc, 4);
14495             gen_helper_wfe(cpu_env);
14496             break;
14497         case DISAS_YIELD:
14498             gen_a64_update_pc(dc, 4);
14499             gen_helper_yield(cpu_env);
14500             break;
14501         case DISAS_WFI:
14502             /*
14503              * This is a special case because we don't want to just halt
14504              * the CPU if trying to debug across a WFI.
14505              */
14506             gen_a64_update_pc(dc, 4);
14507             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14508             /*
14509              * The helper doesn't necessarily throw an exception, but we
14510              * must go back to the main loop to check for interrupts anyway.
14511              */
14512             tcg_gen_exit_tb(NULL, 0);
14513             break;
14514         }
14515     }
14516 }
14517 
14518 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14519                                  CPUState *cpu, FILE *logfile)
14520 {
14521     DisasContext *dc = container_of(dcbase, DisasContext, base);
14522 
14523     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14524     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14525 }
14526 
14527 const TranslatorOps aarch64_translator_ops = {
14528     .init_disas_context = aarch64_tr_init_disas_context,
14529     .tb_start           = aarch64_tr_tb_start,
14530     .insn_start         = aarch64_tr_insn_start,
14531     .translate_insn     = aarch64_tr_translate_insn,
14532     .tb_stop            = aarch64_tr_tb_stop,
14533     .disas_log          = aarch64_tr_disas_log,
14534 };
14535