xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision e6073d88)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "disas/disas.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Include the generated decoders.
51  */
52 
53 #include "decode-sme-fa64.c.inc"
54 #include "decode-a64.c.inc"
55 
56 /* Table based decoder typedefs - used when the relevant bits for decode
57  * are too awkwardly scattered across the instruction (eg SIMD).
58  */
59 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
60 
61 typedef struct AArch64DecodeTable {
62     uint32_t pattern;
63     uint32_t mask;
64     AArch64DecodeFn *disas_fn;
65 } AArch64DecodeTable;
66 
67 /* initialize TCG globals.  */
68 void a64_translate_init(void)
69 {
70     int i;
71 
72     cpu_pc = tcg_global_mem_new_i64(cpu_env,
73                                     offsetof(CPUARMState, pc),
74                                     "pc");
75     for (i = 0; i < 32; i++) {
76         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
77                                           offsetof(CPUARMState, xregs[i]),
78                                           regnames[i]);
79     }
80 
81     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
82         offsetof(CPUARMState, exclusive_high), "exclusive_high");
83 }
84 
85 /*
86  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
87  */
88 static int get_a64_user_mem_index(DisasContext *s)
89 {
90     /*
91      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
92      * which is the usual mmu_idx for this cpu state.
93      */
94     ARMMMUIdx useridx = s->mmu_idx;
95 
96     if (s->unpriv) {
97         /*
98          * We have pre-computed the condition for AccType_UNPRIV.
99          * Therefore we should never get here with a mmu_idx for
100          * which we do not know the corresponding user mmu_idx.
101          */
102         switch (useridx) {
103         case ARMMMUIdx_E10_1:
104         case ARMMMUIdx_E10_1_PAN:
105             useridx = ARMMMUIdx_E10_0;
106             break;
107         case ARMMMUIdx_E20_2:
108         case ARMMMUIdx_E20_2_PAN:
109             useridx = ARMMMUIdx_E20_0;
110             break;
111         default:
112             g_assert_not_reached();
113         }
114     }
115     return arm_to_core_mmu_idx(useridx);
116 }
117 
118 static void set_btype_raw(int val)
119 {
120     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
121                    offsetof(CPUARMState, btype));
122 }
123 
124 static void set_btype(DisasContext *s, int val)
125 {
126     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
127     tcg_debug_assert(val >= 1 && val <= 3);
128     set_btype_raw(val);
129     s->btype = -1;
130 }
131 
132 static void reset_btype(DisasContext *s)
133 {
134     if (s->btype != 0) {
135         set_btype_raw(0);
136         s->btype = 0;
137     }
138 }
139 
140 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
141 {
142     assert(s->pc_save != -1);
143     if (tb_cflags(s->base.tb) & CF_PCREL) {
144         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
145     } else {
146         tcg_gen_movi_i64(dest, s->pc_curr + diff);
147     }
148 }
149 
150 void gen_a64_update_pc(DisasContext *s, target_long diff)
151 {
152     gen_pc_plus_diff(s, cpu_pc, diff);
153     s->pc_save = s->pc_curr + diff;
154 }
155 
156 /*
157  * Handle Top Byte Ignore (TBI) bits.
158  *
159  * If address tagging is enabled via the TCR TBI bits:
160  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
161  *    then the address is zero-extended, clearing bits [63:56]
162  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163  *    and TBI1 controls addressses with bit 55 == 1.
164  *    If the appropriate TBI bit is set for the address then
165  *    the address is sign-extended from bit 55 into bits [63:56]
166  *
167  * Here We have concatenated TBI{1,0} into tbi.
168  */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170                                 TCGv_i64 src, int tbi)
171 {
172     if (tbi == 0) {
173         /* Load unmodified address */
174         tcg_gen_mov_i64(dst, src);
175     } else if (!regime_has_2_ranges(s->mmu_idx)) {
176         /* Force tag byte to all zero */
177         tcg_gen_extract_i64(dst, src, 0, 56);
178     } else {
179         /* Sign-extend from bit 55.  */
180         tcg_gen_sextract_i64(dst, src, 0, 56);
181 
182         switch (tbi) {
183         case 1:
184             /* tbi0 but !tbi1: only use the extension if positive */
185             tcg_gen_and_i64(dst, dst, src);
186             break;
187         case 2:
188             /* !tbi0 but tbi1: only use the extension if negative */
189             tcg_gen_or_i64(dst, dst, src);
190             break;
191         case 3:
192             /* tbi0 and tbi1: always use the extension */
193             break;
194         default:
195             g_assert_not_reached();
196         }
197     }
198 }
199 
200 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
201 {
202     /*
203      * If address tagging is enabled for instructions via the TCR TBI bits,
204      * then loading an address into the PC will clear out any tag.
205      */
206     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
207     s->pc_save = -1;
208 }
209 
210 /*
211  * Handle MTE and/or TBI.
212  *
213  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
214  * for the tag to be present in the FAR_ELx register.  But for user-only
215  * mode we do not have a TLB with which to implement this, so we must
216  * remove the top byte now.
217  *
218  * Always return a fresh temporary that we can increment independently
219  * of the write-back address.
220  */
221 
222 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
223 {
224     TCGv_i64 clean = tcg_temp_new_i64();
225 #ifdef CONFIG_USER_ONLY
226     gen_top_byte_ignore(s, clean, addr, s->tbid);
227 #else
228     tcg_gen_mov_i64(clean, addr);
229 #endif
230     return clean;
231 }
232 
233 /* Insert a zero tag into src, with the result at dst. */
234 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
235 {
236     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
237 }
238 
239 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
240                              MMUAccessType acc, int log2_size)
241 {
242     gen_helper_probe_access(cpu_env, ptr,
243                             tcg_constant_i32(acc),
244                             tcg_constant_i32(get_mem_index(s)),
245                             tcg_constant_i32(1 << log2_size));
246 }
247 
248 /*
249  * For MTE, check a single logical or atomic access.  This probes a single
250  * address, the exact one specified.  The size and alignment of the access
251  * is not relevant to MTE, per se, but watchpoints do require the size,
252  * and we want to recognize those before making any other changes to state.
253  */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255                                       bool is_write, bool tag_checked,
256                                       int log2_size, bool is_unpriv,
257                                       int core_idx)
258 {
259     if (tag_checked && s->mte_active[is_unpriv]) {
260         TCGv_i64 ret;
261         int desc = 0;
262 
263         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
264         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
265         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
266         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
267         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
268 
269         ret = tcg_temp_new_i64();
270         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
271 
272         return ret;
273     }
274     return clean_data_tbi(s, addr);
275 }
276 
277 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
278                         bool tag_checked, int log2_size)
279 {
280     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
281                                  false, get_mem_index(s));
282 }
283 
284 /*
285  * For MTE, check multiple logical sequential accesses.
286  */
287 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int size)
289 {
290     if (tag_checked && s->mte_active[0]) {
291         TCGv_i64 ret;
292         int desc = 0;
293 
294         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 typedef struct DisasCompare64 {
309     TCGCond cond;
310     TCGv_i64 value;
311 } DisasCompare64;
312 
313 static void a64_test_cc(DisasCompare64 *c64, int cc)
314 {
315     DisasCompare c32;
316 
317     arm_test_cc(&c32, cc);
318 
319     /*
320      * Sign-extend the 32-bit value so that the GE/LT comparisons work
321      * properly.  The NE/EQ comparisons are also fine with this choice.
322       */
323     c64->cond = c32.cond;
324     c64->value = tcg_temp_new_i64();
325     tcg_gen_ext_i32_i64(c64->value, c32.value);
326 }
327 
328 static void gen_rebuild_hflags(DisasContext *s)
329 {
330     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
331 }
332 
333 static void gen_exception_internal(int excp)
334 {
335     assert(excp_is_internal(excp));
336     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
337 }
338 
339 static void gen_exception_internal_insn(DisasContext *s, int excp)
340 {
341     gen_a64_update_pc(s, 0);
342     gen_exception_internal(excp);
343     s->base.is_jmp = DISAS_NORETURN;
344 }
345 
346 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
347 {
348     gen_a64_update_pc(s, 0);
349     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
350     s->base.is_jmp = DISAS_NORETURN;
351 }
352 
353 static void gen_step_complete_exception(DisasContext *s)
354 {
355     /* We just completed step of an insn. Move from Active-not-pending
356      * to Active-pending, and then also take the swstep exception.
357      * This corresponds to making the (IMPDEF) choice to prioritize
358      * swstep exceptions over asynchronous exceptions taken to an exception
359      * level where debug is disabled. This choice has the advantage that
360      * we do not need to maintain internal state corresponding to the
361      * ISV/EX syndrome bits between completion of the step and generation
362      * of the exception, and our syndrome information is always correct.
363      */
364     gen_ss_advance(s);
365     gen_swstep_exception(s, 1, s->is_ldex);
366     s->base.is_jmp = DISAS_NORETURN;
367 }
368 
369 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
370 {
371     if (s->ss_active) {
372         return false;
373     }
374     return translator_use_goto_tb(&s->base, dest);
375 }
376 
377 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
378 {
379     if (use_goto_tb(s, s->pc_curr + diff)) {
380         /*
381          * For pcrel, the pc must always be up-to-date on entry to
382          * the linked TB, so that it can use simple additions for all
383          * further adjustments.  For !pcrel, the linked TB is compiled
384          * to know its full virtual address, so we can delay the
385          * update to pc to the unlinked path.  A long chain of links
386          * can thus avoid many updates to the PC.
387          */
388         if (tb_cflags(s->base.tb) & CF_PCREL) {
389             gen_a64_update_pc(s, diff);
390             tcg_gen_goto_tb(n);
391         } else {
392             tcg_gen_goto_tb(n);
393             gen_a64_update_pc(s, diff);
394         }
395         tcg_gen_exit_tb(s->base.tb, n);
396         s->base.is_jmp = DISAS_NORETURN;
397     } else {
398         gen_a64_update_pc(s, diff);
399         if (s->ss_active) {
400             gen_step_complete_exception(s);
401         } else {
402             tcg_gen_lookup_and_goto_ptr();
403             s->base.is_jmp = DISAS_NORETURN;
404         }
405     }
406 }
407 
408 /*
409  * Register access functions
410  *
411  * These functions are used for directly accessing a register in where
412  * changes to the final register value are likely to be made. If you
413  * need to use a register for temporary calculation (e.g. index type
414  * operations) use the read_* form.
415  *
416  * B1.2.1 Register mappings
417  *
418  * In instruction register encoding 31 can refer to ZR (zero register) or
419  * the SP (stack pointer) depending on context. In QEMU's case we map SP
420  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
421  * This is the point of the _sp forms.
422  */
423 TCGv_i64 cpu_reg(DisasContext *s, int reg)
424 {
425     if (reg == 31) {
426         TCGv_i64 t = tcg_temp_new_i64();
427         tcg_gen_movi_i64(t, 0);
428         return t;
429     } else {
430         return cpu_X[reg];
431     }
432 }
433 
434 /* register access for when 31 == SP */
435 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
436 {
437     return cpu_X[reg];
438 }
439 
440 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
441  * representing the register contents. This TCGv is an auto-freed
442  * temporary so it need not be explicitly freed, and may be modified.
443  */
444 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
445 {
446     TCGv_i64 v = tcg_temp_new_i64();
447     if (reg != 31) {
448         if (sf) {
449             tcg_gen_mov_i64(v, cpu_X[reg]);
450         } else {
451             tcg_gen_ext32u_i64(v, cpu_X[reg]);
452         }
453     } else {
454         tcg_gen_movi_i64(v, 0);
455     }
456     return v;
457 }
458 
459 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
460 {
461     TCGv_i64 v = tcg_temp_new_i64();
462     if (sf) {
463         tcg_gen_mov_i64(v, cpu_X[reg]);
464     } else {
465         tcg_gen_ext32u_i64(v, cpu_X[reg]);
466     }
467     return v;
468 }
469 
470 /* Return the offset into CPUARMState of a slice (from
471  * the least significant end) of FP register Qn (ie
472  * Dn, Sn, Hn or Bn).
473  * (Note that this is not the same mapping as for A32; see cpu.h)
474  */
475 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
476 {
477     return vec_reg_offset(s, regno, 0, size);
478 }
479 
480 /* Offset of the high half of the 128 bit vector Qn */
481 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
482 {
483     return vec_reg_offset(s, regno, 1, MO_64);
484 }
485 
486 /* Convenience accessors for reading and writing single and double
487  * FP registers. Writing clears the upper parts of the associated
488  * 128 bit vector register, as required by the architecture.
489  * Note that unlike the GP register accessors, the values returned
490  * by the read functions must be manually freed.
491  */
492 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
493 {
494     TCGv_i64 v = tcg_temp_new_i64();
495 
496     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
497     return v;
498 }
499 
500 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
501 {
502     TCGv_i32 v = tcg_temp_new_i32();
503 
504     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
505     return v;
506 }
507 
508 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
509 {
510     TCGv_i32 v = tcg_temp_new_i32();
511 
512     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
513     return v;
514 }
515 
516 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
517  * If SVE is not enabled, then there are only 128 bits in the vector.
518  */
519 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
520 {
521     unsigned ofs = fp_reg_offset(s, rd, MO_64);
522     unsigned vsz = vec_full_reg_size(s);
523 
524     /* Nop move, with side effect of clearing the tail. */
525     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
526 }
527 
528 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
529 {
530     unsigned ofs = fp_reg_offset(s, reg, MO_64);
531 
532     tcg_gen_st_i64(v, cpu_env, ofs);
533     clear_vec_high(s, false, reg);
534 }
535 
536 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
537 {
538     TCGv_i64 tmp = tcg_temp_new_i64();
539 
540     tcg_gen_extu_i32_i64(tmp, v);
541     write_fp_dreg(s, reg, tmp);
542 }
543 
544 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
545 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
546                          GVecGen2Fn *gvec_fn, int vece)
547 {
548     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
549             is_q ? 16 : 8, vec_full_reg_size(s));
550 }
551 
552 /* Expand a 2-operand + immediate AdvSIMD vector operation using
553  * an expander function.
554  */
555 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
556                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             imm, is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
563 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
564                          GVecGen3Fn *gvec_fn, int vece)
565 {
566     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
567             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
568 }
569 
570 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
571 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
572                          int rx, GVecGen4Fn *gvec_fn, int vece)
573 {
574     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
575             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
576             is_q ? 16 : 8, vec_full_reg_size(s));
577 }
578 
579 /* Expand a 2-operand operation using an out-of-line helper.  */
580 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
581                              int rn, int data, gen_helper_gvec_2 *fn)
582 {
583     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
584                        vec_full_reg_offset(s, rn),
585                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
586 }
587 
588 /* Expand a 3-operand operation using an out-of-line helper.  */
589 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
590                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
591 {
592     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
593                        vec_full_reg_offset(s, rn),
594                        vec_full_reg_offset(s, rm),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
599  * an out-of-line helper.
600  */
601 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
602                               int rm, bool is_fp16, int data,
603                               gen_helper_gvec_3_ptr *fn)
604 {
605     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
606     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
607                        vec_full_reg_offset(s, rn),
608                        vec_full_reg_offset(s, rm), fpst,
609                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
610 }
611 
612 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
613 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
614                             int rm, gen_helper_gvec_3_ptr *fn)
615 {
616     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
617 
618     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
619     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
620                        vec_full_reg_offset(s, rn),
621                        vec_full_reg_offset(s, rm), qc_ptr,
622                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
623 }
624 
625 /* Expand a 4-operand operation using an out-of-line helper.  */
626 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
627                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
628 {
629     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm),
632                        vec_full_reg_offset(s, ra),
633                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
634 }
635 
636 /*
637  * Expand a 4-operand + fpstatus pointer + simd data value operation using
638  * an out-of-line helper.
639  */
640 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
641                               int rm, int ra, bool is_fp16, int data,
642                               gen_helper_gvec_4_ptr *fn)
643 {
644     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
645     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
646                        vec_full_reg_offset(s, rn),
647                        vec_full_reg_offset(s, rm),
648                        vec_full_reg_offset(s, ra), fpst,
649                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
650 }
651 
652 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
653  * than the 32 bit equivalent.
654  */
655 static inline void gen_set_NZ64(TCGv_i64 result)
656 {
657     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
658     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
659 }
660 
661 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
662 static inline void gen_logic_CC(int sf, TCGv_i64 result)
663 {
664     if (sf) {
665         gen_set_NZ64(result);
666     } else {
667         tcg_gen_extrl_i64_i32(cpu_ZF, result);
668         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
669     }
670     tcg_gen_movi_i32(cpu_CF, 0);
671     tcg_gen_movi_i32(cpu_VF, 0);
672 }
673 
674 /* dest = T0 + T1; compute C, N, V and Z flags */
675 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
676 {
677     TCGv_i64 result, flag, tmp;
678     result = tcg_temp_new_i64();
679     flag = tcg_temp_new_i64();
680     tmp = tcg_temp_new_i64();
681 
682     tcg_gen_movi_i64(tmp, 0);
683     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
684 
685     tcg_gen_extrl_i64_i32(cpu_CF, flag);
686 
687     gen_set_NZ64(result);
688 
689     tcg_gen_xor_i64(flag, result, t0);
690     tcg_gen_xor_i64(tmp, t0, t1);
691     tcg_gen_andc_i64(flag, flag, tmp);
692     tcg_gen_extrh_i64_i32(cpu_VF, flag);
693 
694     tcg_gen_mov_i64(dest, result);
695 }
696 
697 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
698 {
699     TCGv_i32 t0_32 = tcg_temp_new_i32();
700     TCGv_i32 t1_32 = tcg_temp_new_i32();
701     TCGv_i32 tmp = tcg_temp_new_i32();
702 
703     tcg_gen_movi_i32(tmp, 0);
704     tcg_gen_extrl_i64_i32(t0_32, t0);
705     tcg_gen_extrl_i64_i32(t1_32, t1);
706     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
707     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
708     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
709     tcg_gen_xor_i32(tmp, t0_32, t1_32);
710     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
711     tcg_gen_extu_i32_i64(dest, cpu_NF);
712 }
713 
714 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
715 {
716     if (sf) {
717         gen_add64_CC(dest, t0, t1);
718     } else {
719         gen_add32_CC(dest, t0, t1);
720     }
721 }
722 
723 /* dest = T0 - T1; compute C, N, V and Z flags */
724 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
725 {
726     /* 64 bit arithmetic */
727     TCGv_i64 result, flag, tmp;
728 
729     result = tcg_temp_new_i64();
730     flag = tcg_temp_new_i64();
731     tcg_gen_sub_i64(result, t0, t1);
732 
733     gen_set_NZ64(result);
734 
735     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
736     tcg_gen_extrl_i64_i32(cpu_CF, flag);
737 
738     tcg_gen_xor_i64(flag, result, t0);
739     tmp = tcg_temp_new_i64();
740     tcg_gen_xor_i64(tmp, t0, t1);
741     tcg_gen_and_i64(flag, flag, tmp);
742     tcg_gen_extrh_i64_i32(cpu_VF, flag);
743     tcg_gen_mov_i64(dest, result);
744 }
745 
746 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
747 {
748     /* 32 bit arithmetic */
749     TCGv_i32 t0_32 = tcg_temp_new_i32();
750     TCGv_i32 t1_32 = tcg_temp_new_i32();
751     TCGv_i32 tmp;
752 
753     tcg_gen_extrl_i64_i32(t0_32, t0);
754     tcg_gen_extrl_i64_i32(t1_32, t1);
755     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
756     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
757     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
758     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
759     tmp = tcg_temp_new_i32();
760     tcg_gen_xor_i32(tmp, t0_32, t1_32);
761     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
762     tcg_gen_extu_i32_i64(dest, cpu_NF);
763 }
764 
765 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
766 {
767     if (sf) {
768         gen_sub64_CC(dest, t0, t1);
769     } else {
770         gen_sub32_CC(dest, t0, t1);
771     }
772 }
773 
774 /* dest = T0 + T1 + CF; do not compute flags. */
775 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
776 {
777     TCGv_i64 flag = tcg_temp_new_i64();
778     tcg_gen_extu_i32_i64(flag, cpu_CF);
779     tcg_gen_add_i64(dest, t0, t1);
780     tcg_gen_add_i64(dest, dest, flag);
781 
782     if (!sf) {
783         tcg_gen_ext32u_i64(dest, dest);
784     }
785 }
786 
787 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
788 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
789 {
790     if (sf) {
791         TCGv_i64 result = tcg_temp_new_i64();
792         TCGv_i64 cf_64 = tcg_temp_new_i64();
793         TCGv_i64 vf_64 = tcg_temp_new_i64();
794         TCGv_i64 tmp = tcg_temp_new_i64();
795         TCGv_i64 zero = tcg_constant_i64(0);
796 
797         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
798         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
799         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
800         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
801         gen_set_NZ64(result);
802 
803         tcg_gen_xor_i64(vf_64, result, t0);
804         tcg_gen_xor_i64(tmp, t0, t1);
805         tcg_gen_andc_i64(vf_64, vf_64, tmp);
806         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
807 
808         tcg_gen_mov_i64(dest, result);
809     } else {
810         TCGv_i32 t0_32 = tcg_temp_new_i32();
811         TCGv_i32 t1_32 = tcg_temp_new_i32();
812         TCGv_i32 tmp = tcg_temp_new_i32();
813         TCGv_i32 zero = tcg_constant_i32(0);
814 
815         tcg_gen_extrl_i64_i32(t0_32, t0);
816         tcg_gen_extrl_i64_i32(t1_32, t1);
817         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
818         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
819 
820         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
821         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
822         tcg_gen_xor_i32(tmp, t0_32, t1_32);
823         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
824         tcg_gen_extu_i32_i64(dest, cpu_NF);
825     }
826 }
827 
828 /*
829  * Load/Store generators
830  */
831 
832 /*
833  * Store from GPR register to memory.
834  */
835 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
836                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
837                              bool iss_valid,
838                              unsigned int iss_srt,
839                              bool iss_sf, bool iss_ar)
840 {
841     memop = finalize_memop(s, memop);
842     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
843 
844     if (iss_valid) {
845         uint32_t syn;
846 
847         syn = syn_data_abort_with_iss(0,
848                                       (memop & MO_SIZE),
849                                       false,
850                                       iss_srt,
851                                       iss_sf,
852                                       iss_ar,
853                                       0, 0, 0, 0, 0, false);
854         disas_set_insn_syndrome(s, syn);
855     }
856 }
857 
858 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
859                       TCGv_i64 tcg_addr, MemOp memop,
860                       bool iss_valid,
861                       unsigned int iss_srt,
862                       bool iss_sf, bool iss_ar)
863 {
864     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
865                      iss_valid, iss_srt, iss_sf, iss_ar);
866 }
867 
868 /*
869  * Load from memory to GPR register
870  */
871 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
872                              MemOp memop, bool extend, int memidx,
873                              bool iss_valid, unsigned int iss_srt,
874                              bool iss_sf, bool iss_ar)
875 {
876     memop = finalize_memop(s, memop);
877     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
878 
879     if (extend && (memop & MO_SIGN)) {
880         g_assert((memop & MO_SIZE) <= MO_32);
881         tcg_gen_ext32u_i64(dest, dest);
882     }
883 
884     if (iss_valid) {
885         uint32_t syn;
886 
887         syn = syn_data_abort_with_iss(0,
888                                       (memop & MO_SIZE),
889                                       (memop & MO_SIGN) != 0,
890                                       iss_srt,
891                                       iss_sf,
892                                       iss_ar,
893                                       0, 0, 0, 0, 0, false);
894         disas_set_insn_syndrome(s, syn);
895     }
896 }
897 
898 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
899                       MemOp memop, bool extend,
900                       bool iss_valid, unsigned int iss_srt,
901                       bool iss_sf, bool iss_ar)
902 {
903     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
904                      iss_valid, iss_srt, iss_sf, iss_ar);
905 }
906 
907 /*
908  * Store from FP register to memory
909  */
910 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
911 {
912     /* This writes the bottom N bits of a 128 bit wide vector to memory */
913     TCGv_i64 tmplo = tcg_temp_new_i64();
914     MemOp mop = finalize_memop_asimd(s, size);
915 
916     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
917 
918     if (size < MO_128) {
919         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
920     } else {
921         TCGv_i64 tmphi = tcg_temp_new_i64();
922         TCGv_i128 t16 = tcg_temp_new_i128();
923 
924         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
925         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
926 
927         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
928     }
929 }
930 
931 /*
932  * Load from memory to FP register
933  */
934 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
935 {
936     /* This always zero-extends and writes to a full 128 bit wide vector */
937     TCGv_i64 tmplo = tcg_temp_new_i64();
938     TCGv_i64 tmphi = NULL;
939     MemOp mop = finalize_memop_asimd(s, size);
940 
941     if (size < MO_128) {
942         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
943     } else {
944         TCGv_i128 t16 = tcg_temp_new_i128();
945 
946         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
947 
948         tmphi = tcg_temp_new_i64();
949         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
950     }
951 
952     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
953 
954     if (tmphi) {
955         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
956     }
957     clear_vec_high(s, tmphi != NULL, destidx);
958 }
959 
960 /*
961  * Vector load/store helpers.
962  *
963  * The principal difference between this and a FP load is that we don't
964  * zero extend as we are filling a partial chunk of the vector register.
965  * These functions don't support 128 bit loads/stores, which would be
966  * normal load/store operations.
967  *
968  * The _i32 versions are useful when operating on 32 bit quantities
969  * (eg for floating point single or using Neon helper functions).
970  */
971 
972 /* Get value of an element within a vector register */
973 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
974                              int element, MemOp memop)
975 {
976     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
977     switch ((unsigned)memop) {
978     case MO_8:
979         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
980         break;
981     case MO_16:
982         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
983         break;
984     case MO_32:
985         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
986         break;
987     case MO_8|MO_SIGN:
988         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
989         break;
990     case MO_16|MO_SIGN:
991         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
992         break;
993     case MO_32|MO_SIGN:
994         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
995         break;
996     case MO_64:
997     case MO_64|MO_SIGN:
998         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
999         break;
1000     default:
1001         g_assert_not_reached();
1002     }
1003 }
1004 
1005 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1006                                  int element, MemOp memop)
1007 {
1008     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1009     switch (memop) {
1010     case MO_8:
1011         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1012         break;
1013     case MO_16:
1014         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1015         break;
1016     case MO_8|MO_SIGN:
1017         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1018         break;
1019     case MO_16|MO_SIGN:
1020         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1021         break;
1022     case MO_32:
1023     case MO_32|MO_SIGN:
1024         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1025         break;
1026     default:
1027         g_assert_not_reached();
1028     }
1029 }
1030 
1031 /* Set value of an element within a vector register */
1032 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1033                               int element, MemOp memop)
1034 {
1035     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1036     switch (memop) {
1037     case MO_8:
1038         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1039         break;
1040     case MO_16:
1041         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1042         break;
1043     case MO_32:
1044         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1045         break;
1046     case MO_64:
1047         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1048         break;
1049     default:
1050         g_assert_not_reached();
1051     }
1052 }
1053 
1054 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1055                                   int destidx, int element, MemOp memop)
1056 {
1057     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1058     switch (memop) {
1059     case MO_8:
1060         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1061         break;
1062     case MO_16:
1063         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1064         break;
1065     case MO_32:
1066         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1067         break;
1068     default:
1069         g_assert_not_reached();
1070     }
1071 }
1072 
1073 /* Store from vector register to memory */
1074 static void do_vec_st(DisasContext *s, int srcidx, int element,
1075                       TCGv_i64 tcg_addr, MemOp mop)
1076 {
1077     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1078 
1079     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1080     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1081 }
1082 
1083 /* Load from memory to vector register */
1084 static void do_vec_ld(DisasContext *s, int destidx, int element,
1085                       TCGv_i64 tcg_addr, MemOp mop)
1086 {
1087     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1088 
1089     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1090     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1091 }
1092 
1093 /* Check that FP/Neon access is enabled. If it is, return
1094  * true. If not, emit code to generate an appropriate exception,
1095  * and return false; the caller should not emit any code for
1096  * the instruction. Note that this check must happen after all
1097  * unallocated-encoding checks (otherwise the syndrome information
1098  * for the resulting exception will be incorrect).
1099  */
1100 static bool fp_access_check_only(DisasContext *s)
1101 {
1102     if (s->fp_excp_el) {
1103         assert(!s->fp_access_checked);
1104         s->fp_access_checked = true;
1105 
1106         gen_exception_insn_el(s, 0, EXCP_UDEF,
1107                               syn_fp_access_trap(1, 0xe, false, 0),
1108                               s->fp_excp_el);
1109         return false;
1110     }
1111     s->fp_access_checked = true;
1112     return true;
1113 }
1114 
1115 static bool fp_access_check(DisasContext *s)
1116 {
1117     if (!fp_access_check_only(s)) {
1118         return false;
1119     }
1120     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1121         gen_exception_insn(s, 0, EXCP_UDEF,
1122                            syn_smetrap(SME_ET_Streaming, false));
1123         return false;
1124     }
1125     return true;
1126 }
1127 
1128 /*
1129  * Check that SVE access is enabled.  If it is, return true.
1130  * If not, emit code to generate an appropriate exception and return false.
1131  * This function corresponds to CheckSVEEnabled().
1132  */
1133 bool sve_access_check(DisasContext *s)
1134 {
1135     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1136         assert(dc_isar_feature(aa64_sme, s));
1137         if (!sme_sm_enabled_check(s)) {
1138             goto fail_exit;
1139         }
1140     } else if (s->sve_excp_el) {
1141         gen_exception_insn_el(s, 0, EXCP_UDEF,
1142                               syn_sve_access_trap(), s->sve_excp_el);
1143         goto fail_exit;
1144     }
1145     s->sve_access_checked = true;
1146     return fp_access_check(s);
1147 
1148  fail_exit:
1149     /* Assert that we only raise one exception per instruction. */
1150     assert(!s->sve_access_checked);
1151     s->sve_access_checked = true;
1152     return false;
1153 }
1154 
1155 /*
1156  * Check that SME access is enabled, raise an exception if not.
1157  * Note that this function corresponds to CheckSMEAccess and is
1158  * only used directly for cpregs.
1159  */
1160 static bool sme_access_check(DisasContext *s)
1161 {
1162     if (s->sme_excp_el) {
1163         gen_exception_insn_el(s, 0, EXCP_UDEF,
1164                               syn_smetrap(SME_ET_AccessTrap, false),
1165                               s->sme_excp_el);
1166         return false;
1167     }
1168     return true;
1169 }
1170 
1171 /* This function corresponds to CheckSMEEnabled. */
1172 bool sme_enabled_check(DisasContext *s)
1173 {
1174     /*
1175      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1176      * to be zero when fp_excp_el has priority.  This is because we need
1177      * sme_excp_el by itself for cpregs access checks.
1178      */
1179     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1180         s->fp_access_checked = true;
1181         return sme_access_check(s);
1182     }
1183     return fp_access_check_only(s);
1184 }
1185 
1186 /* Common subroutine for CheckSMEAnd*Enabled. */
1187 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1188 {
1189     if (!sme_enabled_check(s)) {
1190         return false;
1191     }
1192     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1193         gen_exception_insn(s, 0, EXCP_UDEF,
1194                            syn_smetrap(SME_ET_NotStreaming, false));
1195         return false;
1196     }
1197     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1198         gen_exception_insn(s, 0, EXCP_UDEF,
1199                            syn_smetrap(SME_ET_InactiveZA, false));
1200         return false;
1201     }
1202     return true;
1203 }
1204 
1205 /*
1206  * This utility function is for doing register extension with an
1207  * optional shift. You will likely want to pass a temporary for the
1208  * destination register. See DecodeRegExtend() in the ARM ARM.
1209  */
1210 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1211                               int option, unsigned int shift)
1212 {
1213     int extsize = extract32(option, 0, 2);
1214     bool is_signed = extract32(option, 2, 1);
1215 
1216     if (is_signed) {
1217         switch (extsize) {
1218         case 0:
1219             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1220             break;
1221         case 1:
1222             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1223             break;
1224         case 2:
1225             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1226             break;
1227         case 3:
1228             tcg_gen_mov_i64(tcg_out, tcg_in);
1229             break;
1230         }
1231     } else {
1232         switch (extsize) {
1233         case 0:
1234             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1235             break;
1236         case 1:
1237             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1238             break;
1239         case 2:
1240             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1241             break;
1242         case 3:
1243             tcg_gen_mov_i64(tcg_out, tcg_in);
1244             break;
1245         }
1246     }
1247 
1248     if (shift) {
1249         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1250     }
1251 }
1252 
1253 static inline void gen_check_sp_alignment(DisasContext *s)
1254 {
1255     /* The AArch64 architecture mandates that (if enabled via PSTATE
1256      * or SCTLR bits) there is a check that SP is 16-aligned on every
1257      * SP-relative load or store (with an exception generated if it is not).
1258      * In line with general QEMU practice regarding misaligned accesses,
1259      * we omit these checks for the sake of guest program performance.
1260      * This function is provided as a hook so we can more easily add these
1261      * checks in future (possibly as a "favour catching guest program bugs
1262      * over speed" user selectable option).
1263      */
1264 }
1265 
1266 /*
1267  * This provides a simple table based table lookup decoder. It is
1268  * intended to be used when the relevant bits for decode are too
1269  * awkwardly placed and switch/if based logic would be confusing and
1270  * deeply nested. Since it's a linear search through the table, tables
1271  * should be kept small.
1272  *
1273  * It returns the first handler where insn & mask == pattern, or
1274  * NULL if there is no match.
1275  * The table is terminated by an empty mask (i.e. 0)
1276  */
1277 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1278                                                uint32_t insn)
1279 {
1280     const AArch64DecodeTable *tptr = table;
1281 
1282     while (tptr->mask) {
1283         if ((insn & tptr->mask) == tptr->pattern) {
1284             return tptr->disas_fn;
1285         }
1286         tptr++;
1287     }
1288     return NULL;
1289 }
1290 
1291 /*
1292  * The instruction disassembly implemented here matches
1293  * the instruction encoding classifications in chapter C4
1294  * of the ARM Architecture Reference Manual (DDI0487B_a);
1295  * classification names and decode diagrams here should generally
1296  * match up with those in the manual.
1297  */
1298 
1299 static bool trans_B(DisasContext *s, arg_i *a)
1300 {
1301     reset_btype(s);
1302     gen_goto_tb(s, 0, a->imm);
1303     return true;
1304 }
1305 
1306 static bool trans_BL(DisasContext *s, arg_i *a)
1307 {
1308     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1309     reset_btype(s);
1310     gen_goto_tb(s, 0, a->imm);
1311     return true;
1312 }
1313 
1314 
1315 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1316 {
1317     DisasLabel match;
1318     TCGv_i64 tcg_cmp;
1319 
1320     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1321     reset_btype(s);
1322 
1323     match = gen_disas_label(s);
1324     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1325                         tcg_cmp, 0, match.label);
1326     gen_goto_tb(s, 0, 4);
1327     set_disas_label(s, match);
1328     gen_goto_tb(s, 1, a->imm);
1329     return true;
1330 }
1331 
1332 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1333 {
1334     DisasLabel match;
1335     TCGv_i64 tcg_cmp;
1336 
1337     tcg_cmp = tcg_temp_new_i64();
1338     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1339 
1340     reset_btype(s);
1341 
1342     match = gen_disas_label(s);
1343     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1344                         tcg_cmp, 0, match.label);
1345     gen_goto_tb(s, 0, 4);
1346     set_disas_label(s, match);
1347     gen_goto_tb(s, 1, a->imm);
1348     return true;
1349 }
1350 
1351 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1352 {
1353     reset_btype(s);
1354     if (a->cond < 0x0e) {
1355         /* genuinely conditional branches */
1356         DisasLabel match = gen_disas_label(s);
1357         arm_gen_test_cc(a->cond, match.label);
1358         gen_goto_tb(s, 0, 4);
1359         set_disas_label(s, match);
1360         gen_goto_tb(s, 1, a->imm);
1361     } else {
1362         /* 0xe and 0xf are both "always" conditions */
1363         gen_goto_tb(s, 0, a->imm);
1364     }
1365     return true;
1366 }
1367 
1368 static void set_btype_for_br(DisasContext *s, int rn)
1369 {
1370     if (dc_isar_feature(aa64_bti, s)) {
1371         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1372         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1373     }
1374 }
1375 
1376 static void set_btype_for_blr(DisasContext *s)
1377 {
1378     if (dc_isar_feature(aa64_bti, s)) {
1379         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1380         set_btype(s, 2);
1381     }
1382 }
1383 
1384 static bool trans_BR(DisasContext *s, arg_r *a)
1385 {
1386     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1387     set_btype_for_br(s, a->rn);
1388     s->base.is_jmp = DISAS_JUMP;
1389     return true;
1390 }
1391 
1392 static bool trans_BLR(DisasContext *s, arg_r *a)
1393 {
1394     TCGv_i64 dst = cpu_reg(s, a->rn);
1395     TCGv_i64 lr = cpu_reg(s, 30);
1396     if (dst == lr) {
1397         TCGv_i64 tmp = tcg_temp_new_i64();
1398         tcg_gen_mov_i64(tmp, dst);
1399         dst = tmp;
1400     }
1401     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1402     gen_a64_set_pc(s, dst);
1403     set_btype_for_blr(s);
1404     s->base.is_jmp = DISAS_JUMP;
1405     return true;
1406 }
1407 
1408 static bool trans_RET(DisasContext *s, arg_r *a)
1409 {
1410     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1411     s->base.is_jmp = DISAS_JUMP;
1412     return true;
1413 }
1414 
1415 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1416                                    TCGv_i64 modifier, bool use_key_a)
1417 {
1418     TCGv_i64 truedst;
1419     /*
1420      * Return the branch target for a BRAA/RETA/etc, which is either
1421      * just the destination dst, or that value with the pauth check
1422      * done and the code removed from the high bits.
1423      */
1424     if (!s->pauth_active) {
1425         return dst;
1426     }
1427 
1428     truedst = tcg_temp_new_i64();
1429     if (use_key_a) {
1430         gen_helper_autia(truedst, cpu_env, dst, modifier);
1431     } else {
1432         gen_helper_autib(truedst, cpu_env, dst, modifier);
1433     }
1434     return truedst;
1435 }
1436 
1437 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1438 {
1439     TCGv_i64 dst;
1440 
1441     if (!dc_isar_feature(aa64_pauth, s)) {
1442         return false;
1443     }
1444 
1445     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1446     gen_a64_set_pc(s, dst);
1447     set_btype_for_br(s, a->rn);
1448     s->base.is_jmp = DISAS_JUMP;
1449     return true;
1450 }
1451 
1452 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1453 {
1454     TCGv_i64 dst, lr;
1455 
1456     if (!dc_isar_feature(aa64_pauth, s)) {
1457         return false;
1458     }
1459 
1460     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1461     lr = cpu_reg(s, 30);
1462     if (dst == lr) {
1463         TCGv_i64 tmp = tcg_temp_new_i64();
1464         tcg_gen_mov_i64(tmp, dst);
1465         dst = tmp;
1466     }
1467     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1468     gen_a64_set_pc(s, dst);
1469     set_btype_for_blr(s);
1470     s->base.is_jmp = DISAS_JUMP;
1471     return true;
1472 }
1473 
1474 static bool trans_RETA(DisasContext *s, arg_reta *a)
1475 {
1476     TCGv_i64 dst;
1477 
1478     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1479     gen_a64_set_pc(s, dst);
1480     s->base.is_jmp = DISAS_JUMP;
1481     return true;
1482 }
1483 
1484 static bool trans_BRA(DisasContext *s, arg_bra *a)
1485 {
1486     TCGv_i64 dst;
1487 
1488     if (!dc_isar_feature(aa64_pauth, s)) {
1489         return false;
1490     }
1491     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1492     gen_a64_set_pc(s, dst);
1493     set_btype_for_br(s, a->rn);
1494     s->base.is_jmp = DISAS_JUMP;
1495     return true;
1496 }
1497 
1498 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1499 {
1500     TCGv_i64 dst, lr;
1501 
1502     if (!dc_isar_feature(aa64_pauth, s)) {
1503         return false;
1504     }
1505     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1506     lr = cpu_reg(s, 30);
1507     if (dst == lr) {
1508         TCGv_i64 tmp = tcg_temp_new_i64();
1509         tcg_gen_mov_i64(tmp, dst);
1510         dst = tmp;
1511     }
1512     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1513     gen_a64_set_pc(s, dst);
1514     set_btype_for_blr(s);
1515     s->base.is_jmp = DISAS_JUMP;
1516     return true;
1517 }
1518 
1519 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1520 {
1521     TCGv_i64 dst;
1522 
1523     if (s->current_el == 0) {
1524         return false;
1525     }
1526     if (s->fgt_eret) {
1527         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1528         return true;
1529     }
1530     dst = tcg_temp_new_i64();
1531     tcg_gen_ld_i64(dst, cpu_env,
1532                    offsetof(CPUARMState, elr_el[s->current_el]));
1533 
1534     translator_io_start(&s->base);
1535 
1536     gen_helper_exception_return(cpu_env, dst);
1537     /* Must exit loop to check un-masked IRQs */
1538     s->base.is_jmp = DISAS_EXIT;
1539     return true;
1540 }
1541 
1542 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1543 {
1544     TCGv_i64 dst;
1545 
1546     if (!dc_isar_feature(aa64_pauth, s)) {
1547         return false;
1548     }
1549     if (s->current_el == 0) {
1550         return false;
1551     }
1552     /* The FGT trap takes precedence over an auth trap. */
1553     if (s->fgt_eret) {
1554         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1555         return true;
1556     }
1557     dst = tcg_temp_new_i64();
1558     tcg_gen_ld_i64(dst, cpu_env,
1559                    offsetof(CPUARMState, elr_el[s->current_el]));
1560 
1561     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1562 
1563     translator_io_start(&s->base);
1564 
1565     gen_helper_exception_return(cpu_env, dst);
1566     /* Must exit loop to check un-masked IRQs */
1567     s->base.is_jmp = DISAS_EXIT;
1568     return true;
1569 }
1570 
1571 /* HINT instruction group, including various allocated HINTs */
1572 static void handle_hint(DisasContext *s, uint32_t insn,
1573                         unsigned int op1, unsigned int op2, unsigned int crm)
1574 {
1575     unsigned int selector = crm << 3 | op2;
1576 
1577     if (op1 != 3) {
1578         unallocated_encoding(s);
1579         return;
1580     }
1581 
1582     switch (selector) {
1583     case 0b00000: /* NOP */
1584         break;
1585     case 0b00011: /* WFI */
1586         s->base.is_jmp = DISAS_WFI;
1587         break;
1588     case 0b00001: /* YIELD */
1589         /* When running in MTTCG we don't generate jumps to the yield and
1590          * WFE helpers as it won't affect the scheduling of other vCPUs.
1591          * If we wanted to more completely model WFE/SEV so we don't busy
1592          * spin unnecessarily we would need to do something more involved.
1593          */
1594         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1595             s->base.is_jmp = DISAS_YIELD;
1596         }
1597         break;
1598     case 0b00010: /* WFE */
1599         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1600             s->base.is_jmp = DISAS_WFE;
1601         }
1602         break;
1603     case 0b00100: /* SEV */
1604     case 0b00101: /* SEVL */
1605     case 0b00110: /* DGH */
1606         /* we treat all as NOP at least for now */
1607         break;
1608     case 0b00111: /* XPACLRI */
1609         if (s->pauth_active) {
1610             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1611         }
1612         break;
1613     case 0b01000: /* PACIA1716 */
1614         if (s->pauth_active) {
1615             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1616         }
1617         break;
1618     case 0b01010: /* PACIB1716 */
1619         if (s->pauth_active) {
1620             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1621         }
1622         break;
1623     case 0b01100: /* AUTIA1716 */
1624         if (s->pauth_active) {
1625             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1626         }
1627         break;
1628     case 0b01110: /* AUTIB1716 */
1629         if (s->pauth_active) {
1630             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1631         }
1632         break;
1633     case 0b10000: /* ESB */
1634         /* Without RAS, we must implement this as NOP. */
1635         if (dc_isar_feature(aa64_ras, s)) {
1636             /*
1637              * QEMU does not have a source of physical SErrors,
1638              * so we are only concerned with virtual SErrors.
1639              * The pseudocode in the ARM for this case is
1640              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1641              *      AArch64.vESBOperation();
1642              * Most of the condition can be evaluated at translation time.
1643              * Test for EL2 present, and defer test for SEL2 to runtime.
1644              */
1645             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1646                 gen_helper_vesb(cpu_env);
1647             }
1648         }
1649         break;
1650     case 0b11000: /* PACIAZ */
1651         if (s->pauth_active) {
1652             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1653                              tcg_constant_i64(0));
1654         }
1655         break;
1656     case 0b11001: /* PACIASP */
1657         if (s->pauth_active) {
1658             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1659         }
1660         break;
1661     case 0b11010: /* PACIBZ */
1662         if (s->pauth_active) {
1663             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1664                              tcg_constant_i64(0));
1665         }
1666         break;
1667     case 0b11011: /* PACIBSP */
1668         if (s->pauth_active) {
1669             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1670         }
1671         break;
1672     case 0b11100: /* AUTIAZ */
1673         if (s->pauth_active) {
1674             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1675                              tcg_constant_i64(0));
1676         }
1677         break;
1678     case 0b11101: /* AUTIASP */
1679         if (s->pauth_active) {
1680             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1681         }
1682         break;
1683     case 0b11110: /* AUTIBZ */
1684         if (s->pauth_active) {
1685             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1686                              tcg_constant_i64(0));
1687         }
1688         break;
1689     case 0b11111: /* AUTIBSP */
1690         if (s->pauth_active) {
1691             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1692         }
1693         break;
1694     default:
1695         /* default specified as NOP equivalent */
1696         break;
1697     }
1698 }
1699 
1700 static void gen_clrex(DisasContext *s, uint32_t insn)
1701 {
1702     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1703 }
1704 
1705 /* CLREX, DSB, DMB, ISB */
1706 static void handle_sync(DisasContext *s, uint32_t insn,
1707                         unsigned int op1, unsigned int op2, unsigned int crm)
1708 {
1709     TCGBar bar;
1710 
1711     if (op1 != 3) {
1712         unallocated_encoding(s);
1713         return;
1714     }
1715 
1716     switch (op2) {
1717     case 2: /* CLREX */
1718         gen_clrex(s, insn);
1719         return;
1720     case 4: /* DSB */
1721     case 5: /* DMB */
1722         switch (crm & 3) {
1723         case 1: /* MBReqTypes_Reads */
1724             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1725             break;
1726         case 2: /* MBReqTypes_Writes */
1727             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1728             break;
1729         default: /* MBReqTypes_All */
1730             bar = TCG_BAR_SC | TCG_MO_ALL;
1731             break;
1732         }
1733         tcg_gen_mb(bar);
1734         return;
1735     case 6: /* ISB */
1736         /* We need to break the TB after this insn to execute
1737          * a self-modified code correctly and also to take
1738          * any pending interrupts immediately.
1739          */
1740         reset_btype(s);
1741         gen_goto_tb(s, 0, 4);
1742         return;
1743 
1744     case 7: /* SB */
1745         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1746             goto do_unallocated;
1747         }
1748         /*
1749          * TODO: There is no speculation barrier opcode for TCG;
1750          * MB and end the TB instead.
1751          */
1752         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1753         gen_goto_tb(s, 0, 4);
1754         return;
1755 
1756     default:
1757     do_unallocated:
1758         unallocated_encoding(s);
1759         return;
1760     }
1761 }
1762 
1763 static void gen_xaflag(void)
1764 {
1765     TCGv_i32 z = tcg_temp_new_i32();
1766 
1767     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1768 
1769     /*
1770      * (!C & !Z) << 31
1771      * (!(C | Z)) << 31
1772      * ~((C | Z) << 31)
1773      * ~-(C | Z)
1774      * (C | Z) - 1
1775      */
1776     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1777     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1778 
1779     /* !(Z & C) */
1780     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1781     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1782 
1783     /* (!C & Z) << 31 -> -(Z & ~C) */
1784     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1785     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1786 
1787     /* C | Z */
1788     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1789 }
1790 
1791 static void gen_axflag(void)
1792 {
1793     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1794     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1795 
1796     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1797     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1798 
1799     tcg_gen_movi_i32(cpu_NF, 0);
1800     tcg_gen_movi_i32(cpu_VF, 0);
1801 }
1802 
1803 /* MSR (immediate) - move immediate to processor state field */
1804 static void handle_msr_i(DisasContext *s, uint32_t insn,
1805                          unsigned int op1, unsigned int op2, unsigned int crm)
1806 {
1807     int op = op1 << 3 | op2;
1808 
1809     /* End the TB by default, chaining is ok.  */
1810     s->base.is_jmp = DISAS_TOO_MANY;
1811 
1812     switch (op) {
1813     case 0x00: /* CFINV */
1814         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1815             goto do_unallocated;
1816         }
1817         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1818         s->base.is_jmp = DISAS_NEXT;
1819         break;
1820 
1821     case 0x01: /* XAFlag */
1822         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1823             goto do_unallocated;
1824         }
1825         gen_xaflag();
1826         s->base.is_jmp = DISAS_NEXT;
1827         break;
1828 
1829     case 0x02: /* AXFlag */
1830         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1831             goto do_unallocated;
1832         }
1833         gen_axflag();
1834         s->base.is_jmp = DISAS_NEXT;
1835         break;
1836 
1837     case 0x03: /* UAO */
1838         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1839             goto do_unallocated;
1840         }
1841         if (crm & 1) {
1842             set_pstate_bits(PSTATE_UAO);
1843         } else {
1844             clear_pstate_bits(PSTATE_UAO);
1845         }
1846         gen_rebuild_hflags(s);
1847         break;
1848 
1849     case 0x04: /* PAN */
1850         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1851             goto do_unallocated;
1852         }
1853         if (crm & 1) {
1854             set_pstate_bits(PSTATE_PAN);
1855         } else {
1856             clear_pstate_bits(PSTATE_PAN);
1857         }
1858         gen_rebuild_hflags(s);
1859         break;
1860 
1861     case 0x05: /* SPSel */
1862         if (s->current_el == 0) {
1863             goto do_unallocated;
1864         }
1865         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1866         break;
1867 
1868     case 0x19: /* SSBS */
1869         if (!dc_isar_feature(aa64_ssbs, s)) {
1870             goto do_unallocated;
1871         }
1872         if (crm & 1) {
1873             set_pstate_bits(PSTATE_SSBS);
1874         } else {
1875             clear_pstate_bits(PSTATE_SSBS);
1876         }
1877         /* Don't need to rebuild hflags since SSBS is a nop */
1878         break;
1879 
1880     case 0x1a: /* DIT */
1881         if (!dc_isar_feature(aa64_dit, s)) {
1882             goto do_unallocated;
1883         }
1884         if (crm & 1) {
1885             set_pstate_bits(PSTATE_DIT);
1886         } else {
1887             clear_pstate_bits(PSTATE_DIT);
1888         }
1889         /* There's no need to rebuild hflags because DIT is a nop */
1890         break;
1891 
1892     case 0x1e: /* DAIFSet */
1893         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1894         break;
1895 
1896     case 0x1f: /* DAIFClear */
1897         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1898         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1899         s->base.is_jmp = DISAS_UPDATE_EXIT;
1900         break;
1901 
1902     case 0x1c: /* TCO */
1903         if (dc_isar_feature(aa64_mte, s)) {
1904             /* Full MTE is enabled -- set the TCO bit as directed. */
1905             if (crm & 1) {
1906                 set_pstate_bits(PSTATE_TCO);
1907             } else {
1908                 clear_pstate_bits(PSTATE_TCO);
1909             }
1910             gen_rebuild_hflags(s);
1911             /* Many factors, including TCO, go into MTE_ACTIVE. */
1912             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1913         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1914             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1915             s->base.is_jmp = DISAS_NEXT;
1916         } else {
1917             goto do_unallocated;
1918         }
1919         break;
1920 
1921     case 0x1b: /* SVCR* */
1922         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1923             goto do_unallocated;
1924         }
1925         if (sme_access_check(s)) {
1926             int old = s->pstate_sm | (s->pstate_za << 1);
1927             int new = (crm & 1) * 3;
1928             int msk = (crm >> 1) & 3;
1929 
1930             if ((old ^ new) & msk) {
1931                 /* At least one bit changes. */
1932                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1933                                     tcg_constant_i32(msk));
1934             } else {
1935                 s->base.is_jmp = DISAS_NEXT;
1936             }
1937         }
1938         break;
1939 
1940     default:
1941     do_unallocated:
1942         unallocated_encoding(s);
1943         return;
1944     }
1945 }
1946 
1947 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1948 {
1949     TCGv_i32 tmp = tcg_temp_new_i32();
1950     TCGv_i32 nzcv = tcg_temp_new_i32();
1951 
1952     /* build bit 31, N */
1953     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1954     /* build bit 30, Z */
1955     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1956     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1957     /* build bit 29, C */
1958     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1959     /* build bit 28, V */
1960     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1961     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1962     /* generate result */
1963     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1964 }
1965 
1966 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1967 {
1968     TCGv_i32 nzcv = tcg_temp_new_i32();
1969 
1970     /* take NZCV from R[t] */
1971     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1972 
1973     /* bit 31, N */
1974     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1975     /* bit 30, Z */
1976     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1977     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1978     /* bit 29, C */
1979     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1980     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1981     /* bit 28, V */
1982     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1983     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1984 }
1985 
1986 static void gen_sysreg_undef(DisasContext *s, bool isread,
1987                              uint8_t op0, uint8_t op1, uint8_t op2,
1988                              uint8_t crn, uint8_t crm, uint8_t rt)
1989 {
1990     /*
1991      * Generate code to emit an UNDEF with correct syndrome
1992      * information for a failed system register access.
1993      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
1994      * but if FEAT_IDST is implemented then read accesses to registers
1995      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
1996      * syndrome.
1997      */
1998     uint32_t syndrome;
1999 
2000     if (isread && dc_isar_feature(aa64_ids, s) &&
2001         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2002         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2003     } else {
2004         syndrome = syn_uncategorized();
2005     }
2006     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2007 }
2008 
2009 /* MRS - move from system register
2010  * MSR (register) - move to system register
2011  * SYS
2012  * SYSL
2013  * These are all essentially the same insn in 'read' and 'write'
2014  * versions, with varying op0 fields.
2015  */
2016 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
2017                        unsigned int op0, unsigned int op1, unsigned int op2,
2018                        unsigned int crn, unsigned int crm, unsigned int rt)
2019 {
2020     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2021                                       crn, crm, op0, op1, op2);
2022     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2023     bool need_exit_tb = false;
2024     TCGv_ptr tcg_ri = NULL;
2025     TCGv_i64 tcg_rt;
2026 
2027     if (!ri) {
2028         /* Unknown register; this might be a guest error or a QEMU
2029          * unimplemented feature.
2030          */
2031         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2032                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2033                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2034         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2035         return;
2036     }
2037 
2038     /* Check access permissions */
2039     if (!cp_access_ok(s->current_el, ri, isread)) {
2040         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2041         return;
2042     }
2043 
2044     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2045         /* Emit code to perform further access permissions checks at
2046          * runtime; this may result in an exception.
2047          */
2048         uint32_t syndrome;
2049 
2050         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2051         gen_a64_update_pc(s, 0);
2052         tcg_ri = tcg_temp_new_ptr();
2053         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2054                                        tcg_constant_i32(key),
2055                                        tcg_constant_i32(syndrome),
2056                                        tcg_constant_i32(isread));
2057     } else if (ri->type & ARM_CP_RAISES_EXC) {
2058         /*
2059          * The readfn or writefn might raise an exception;
2060          * synchronize the CPU state in case it does.
2061          */
2062         gen_a64_update_pc(s, 0);
2063     }
2064 
2065     /* Handle special cases first */
2066     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2067     case 0:
2068         break;
2069     case ARM_CP_NOP:
2070         return;
2071     case ARM_CP_NZCV:
2072         tcg_rt = cpu_reg(s, rt);
2073         if (isread) {
2074             gen_get_nzcv(tcg_rt);
2075         } else {
2076             gen_set_nzcv(tcg_rt);
2077         }
2078         return;
2079     case ARM_CP_CURRENTEL:
2080         /* Reads as current EL value from pstate, which is
2081          * guaranteed to be constant by the tb flags.
2082          */
2083         tcg_rt = cpu_reg(s, rt);
2084         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2085         return;
2086     case ARM_CP_DC_ZVA:
2087         /* Writes clear the aligned block of memory which rt points into. */
2088         if (s->mte_active[0]) {
2089             int desc = 0;
2090 
2091             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2092             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2093             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2094 
2095             tcg_rt = tcg_temp_new_i64();
2096             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2097                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2098         } else {
2099             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2100         }
2101         gen_helper_dc_zva(cpu_env, tcg_rt);
2102         return;
2103     case ARM_CP_DC_GVA:
2104         {
2105             TCGv_i64 clean_addr, tag;
2106 
2107             /*
2108              * DC_GVA, like DC_ZVA, requires that we supply the original
2109              * pointer for an invalid page.  Probe that address first.
2110              */
2111             tcg_rt = cpu_reg(s, rt);
2112             clean_addr = clean_data_tbi(s, tcg_rt);
2113             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2114 
2115             if (s->ata) {
2116                 /* Extract the tag from the register to match STZGM.  */
2117                 tag = tcg_temp_new_i64();
2118                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2119                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2120             }
2121         }
2122         return;
2123     case ARM_CP_DC_GZVA:
2124         {
2125             TCGv_i64 clean_addr, tag;
2126 
2127             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2128             tcg_rt = cpu_reg(s, rt);
2129             clean_addr = clean_data_tbi(s, tcg_rt);
2130             gen_helper_dc_zva(cpu_env, clean_addr);
2131 
2132             if (s->ata) {
2133                 /* Extract the tag from the register to match STZGM.  */
2134                 tag = tcg_temp_new_i64();
2135                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2136                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2137             }
2138         }
2139         return;
2140     default:
2141         g_assert_not_reached();
2142     }
2143     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2144         return;
2145     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2146         return;
2147     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2148         return;
2149     }
2150 
2151     if (ri->type & ARM_CP_IO) {
2152         /* I/O operations must end the TB here (whether read or write) */
2153         need_exit_tb = translator_io_start(&s->base);
2154     }
2155 
2156     tcg_rt = cpu_reg(s, rt);
2157 
2158     if (isread) {
2159         if (ri->type & ARM_CP_CONST) {
2160             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2161         } else if (ri->readfn) {
2162             if (!tcg_ri) {
2163                 tcg_ri = gen_lookup_cp_reg(key);
2164             }
2165             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2166         } else {
2167             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2168         }
2169     } else {
2170         if (ri->type & ARM_CP_CONST) {
2171             /* If not forbidden by access permissions, treat as WI */
2172             return;
2173         } else if (ri->writefn) {
2174             if (!tcg_ri) {
2175                 tcg_ri = gen_lookup_cp_reg(key);
2176             }
2177             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2178         } else {
2179             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2180         }
2181     }
2182 
2183     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2184         /*
2185          * A write to any coprocessor regiser that ends a TB
2186          * must rebuild the hflags for the next TB.
2187          */
2188         gen_rebuild_hflags(s);
2189         /*
2190          * We default to ending the TB on a coprocessor register write,
2191          * but allow this to be suppressed by the register definition
2192          * (usually only necessary to work around guest bugs).
2193          */
2194         need_exit_tb = true;
2195     }
2196     if (need_exit_tb) {
2197         s->base.is_jmp = DISAS_UPDATE_EXIT;
2198     }
2199 }
2200 
2201 /* System
2202  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2203  * +---------------------+---+-----+-----+-------+-------+-----+------+
2204  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2205  * +---------------------+---+-----+-----+-------+-------+-----+------+
2206  */
2207 static void disas_system(DisasContext *s, uint32_t insn)
2208 {
2209     unsigned int l, op0, op1, crn, crm, op2, rt;
2210     l = extract32(insn, 21, 1);
2211     op0 = extract32(insn, 19, 2);
2212     op1 = extract32(insn, 16, 3);
2213     crn = extract32(insn, 12, 4);
2214     crm = extract32(insn, 8, 4);
2215     op2 = extract32(insn, 5, 3);
2216     rt = extract32(insn, 0, 5);
2217 
2218     if (op0 == 0) {
2219         if (l || rt != 31) {
2220             unallocated_encoding(s);
2221             return;
2222         }
2223         switch (crn) {
2224         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2225             handle_hint(s, insn, op1, op2, crm);
2226             break;
2227         case 3: /* CLREX, DSB, DMB, ISB */
2228             handle_sync(s, insn, op1, op2, crm);
2229             break;
2230         case 4: /* MSR (immediate) */
2231             handle_msr_i(s, insn, op1, op2, crm);
2232             break;
2233         default:
2234             unallocated_encoding(s);
2235             break;
2236         }
2237         return;
2238     }
2239     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2240 }
2241 
2242 /* Exception generation
2243  *
2244  *  31             24 23 21 20                     5 4   2 1  0
2245  * +-----------------+-----+------------------------+-----+----+
2246  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2247  * +-----------------------+------------------------+----------+
2248  */
2249 static void disas_exc(DisasContext *s, uint32_t insn)
2250 {
2251     int opc = extract32(insn, 21, 3);
2252     int op2_ll = extract32(insn, 0, 5);
2253     int imm16 = extract32(insn, 5, 16);
2254     uint32_t syndrome;
2255 
2256     switch (opc) {
2257     case 0:
2258         /* For SVC, HVC and SMC we advance the single-step state
2259          * machine before taking the exception. This is architecturally
2260          * mandated, to ensure that single-stepping a system call
2261          * instruction works properly.
2262          */
2263         switch (op2_ll) {
2264         case 1:                                                     /* SVC */
2265             syndrome = syn_aa64_svc(imm16);
2266             if (s->fgt_svc) {
2267                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2268                 break;
2269             }
2270             gen_ss_advance(s);
2271             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2272             break;
2273         case 2:                                                     /* HVC */
2274             if (s->current_el == 0) {
2275                 unallocated_encoding(s);
2276                 break;
2277             }
2278             /* The pre HVC helper handles cases when HVC gets trapped
2279              * as an undefined insn by runtime configuration.
2280              */
2281             gen_a64_update_pc(s, 0);
2282             gen_helper_pre_hvc(cpu_env);
2283             gen_ss_advance(s);
2284             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2285             break;
2286         case 3:                                                     /* SMC */
2287             if (s->current_el == 0) {
2288                 unallocated_encoding(s);
2289                 break;
2290             }
2291             gen_a64_update_pc(s, 0);
2292             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2293             gen_ss_advance(s);
2294             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2295             break;
2296         default:
2297             unallocated_encoding(s);
2298             break;
2299         }
2300         break;
2301     case 1:
2302         if (op2_ll != 0) {
2303             unallocated_encoding(s);
2304             break;
2305         }
2306         /* BRK */
2307         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2308         break;
2309     case 2:
2310         if (op2_ll != 0) {
2311             unallocated_encoding(s);
2312             break;
2313         }
2314         /* HLT. This has two purposes.
2315          * Architecturally, it is an external halting debug instruction.
2316          * Since QEMU doesn't implement external debug, we treat this as
2317          * it is required for halting debug disabled: it will UNDEF.
2318          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2319          */
2320         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2321             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2322         } else {
2323             unallocated_encoding(s);
2324         }
2325         break;
2326     case 5:
2327         if (op2_ll < 1 || op2_ll > 3) {
2328             unallocated_encoding(s);
2329             break;
2330         }
2331         /* DCPS1, DCPS2, DCPS3 */
2332         unallocated_encoding(s);
2333         break;
2334     default:
2335         unallocated_encoding(s);
2336         break;
2337     }
2338 }
2339 
2340 /* Branches, exception generating and system instructions */
2341 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2342 {
2343     switch (extract32(insn, 25, 7)) {
2344     case 0x6a: /* Exception generation / System */
2345         if (insn & (1 << 24)) {
2346             if (extract32(insn, 22, 2) == 0) {
2347                 disas_system(s, insn);
2348             } else {
2349                 unallocated_encoding(s);
2350             }
2351         } else {
2352             disas_exc(s, insn);
2353         }
2354         break;
2355     default:
2356         unallocated_encoding(s);
2357         break;
2358     }
2359 }
2360 
2361 /*
2362  * Load/Store exclusive instructions are implemented by remembering
2363  * the value/address loaded, and seeing if these are the same
2364  * when the store is performed. This is not actually the architecturally
2365  * mandated semantics, but it works for typical guest code sequences
2366  * and avoids having to monitor regular stores.
2367  *
2368  * The store exclusive uses the atomic cmpxchg primitives to avoid
2369  * races in multi-threaded linux-user and when MTTCG softmmu is
2370  * enabled.
2371  */
2372 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2373                                TCGv_i64 addr, int size, bool is_pair)
2374 {
2375     int idx = get_mem_index(s);
2376     MemOp memop;
2377 
2378     g_assert(size <= 3);
2379     if (is_pair) {
2380         g_assert(size >= 2);
2381         if (size == 2) {
2382             /* The pair must be single-copy atomic for the doubleword.  */
2383             memop = finalize_memop(s, MO_64 | MO_ALIGN);
2384             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2385             if (s->be_data == MO_LE) {
2386                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2387                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2388             } else {
2389                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2390                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2391             }
2392         } else {
2393             /*
2394              * The pair must be single-copy atomic for *each* doubleword, not
2395              * the entire quadword, however it must be quadword aligned.
2396              * Expose the complete load to tcg, for ease of tlb lookup,
2397              * but indicate that only 8-byte atomicity is required.
2398              */
2399             TCGv_i128 t16 = tcg_temp_new_i128();
2400 
2401             memop = finalize_memop_atom(s, MO_128 | MO_ALIGN_16,
2402                                         MO_ATOM_IFALIGN_PAIR);
2403             tcg_gen_qemu_ld_i128(t16, addr, idx, memop);
2404 
2405             if (s->be_data == MO_LE) {
2406                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2407                                       cpu_exclusive_high, t16);
2408             } else {
2409                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2410                                       cpu_exclusive_val, t16);
2411             }
2412             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2413             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2414         }
2415     } else {
2416         memop = finalize_memop(s, size | MO_ALIGN);
2417         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2418         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2419     }
2420     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2421 }
2422 
2423 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2424                                 TCGv_i64 addr, int size, int is_pair)
2425 {
2426     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2427      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2428      *     [addr] = {Rt};
2429      *     if (is_pair) {
2430      *         [addr + datasize] = {Rt2};
2431      *     }
2432      *     {Rd} = 0;
2433      * } else {
2434      *     {Rd} = 1;
2435      * }
2436      * env->exclusive_addr = -1;
2437      */
2438     TCGLabel *fail_label = gen_new_label();
2439     TCGLabel *done_label = gen_new_label();
2440     TCGv_i64 tmp;
2441 
2442     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2443 
2444     tmp = tcg_temp_new_i64();
2445     if (is_pair) {
2446         if (size == 2) {
2447             if (s->be_data == MO_LE) {
2448                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2449             } else {
2450                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2451             }
2452             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2453                                        cpu_exclusive_val, tmp,
2454                                        get_mem_index(s),
2455                                        MO_64 | MO_ALIGN | s->be_data);
2456             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2457         } else {
2458             TCGv_i128 t16 = tcg_temp_new_i128();
2459             TCGv_i128 c16 = tcg_temp_new_i128();
2460             TCGv_i64 a, b;
2461 
2462             if (s->be_data == MO_LE) {
2463                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2464                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2465                                         cpu_exclusive_high);
2466             } else {
2467                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2468                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2469                                         cpu_exclusive_val);
2470             }
2471 
2472             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2473                                         get_mem_index(s),
2474                                         MO_128 | MO_ALIGN | s->be_data);
2475 
2476             a = tcg_temp_new_i64();
2477             b = tcg_temp_new_i64();
2478             if (s->be_data == MO_LE) {
2479                 tcg_gen_extr_i128_i64(a, b, t16);
2480             } else {
2481                 tcg_gen_extr_i128_i64(b, a, t16);
2482             }
2483 
2484             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2485             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2486             tcg_gen_or_i64(tmp, a, b);
2487 
2488             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2489         }
2490     } else {
2491         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2492                                    cpu_reg(s, rt), get_mem_index(s),
2493                                    size | MO_ALIGN | s->be_data);
2494         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2495     }
2496     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2497     tcg_gen_br(done_label);
2498 
2499     gen_set_label(fail_label);
2500     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2501     gen_set_label(done_label);
2502     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2503 }
2504 
2505 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2506                                  int rn, int size)
2507 {
2508     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2509     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2510     int memidx = get_mem_index(s);
2511     TCGv_i64 clean_addr;
2512 
2513     if (rn == 31) {
2514         gen_check_sp_alignment(s);
2515     }
2516     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2517     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2518                                size | MO_ALIGN | s->be_data);
2519 }
2520 
2521 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2522                                       int rn, int size)
2523 {
2524     TCGv_i64 s1 = cpu_reg(s, rs);
2525     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2526     TCGv_i64 t1 = cpu_reg(s, rt);
2527     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2528     TCGv_i64 clean_addr;
2529     int memidx = get_mem_index(s);
2530 
2531     if (rn == 31) {
2532         gen_check_sp_alignment(s);
2533     }
2534 
2535     /* This is a single atomic access, despite the "pair". */
2536     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2537 
2538     if (size == 2) {
2539         TCGv_i64 cmp = tcg_temp_new_i64();
2540         TCGv_i64 val = tcg_temp_new_i64();
2541 
2542         if (s->be_data == MO_LE) {
2543             tcg_gen_concat32_i64(val, t1, t2);
2544             tcg_gen_concat32_i64(cmp, s1, s2);
2545         } else {
2546             tcg_gen_concat32_i64(val, t2, t1);
2547             tcg_gen_concat32_i64(cmp, s2, s1);
2548         }
2549 
2550         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2551                                    MO_64 | MO_ALIGN | s->be_data);
2552 
2553         if (s->be_data == MO_LE) {
2554             tcg_gen_extr32_i64(s1, s2, cmp);
2555         } else {
2556             tcg_gen_extr32_i64(s2, s1, cmp);
2557         }
2558     } else {
2559         TCGv_i128 cmp = tcg_temp_new_i128();
2560         TCGv_i128 val = tcg_temp_new_i128();
2561 
2562         if (s->be_data == MO_LE) {
2563             tcg_gen_concat_i64_i128(val, t1, t2);
2564             tcg_gen_concat_i64_i128(cmp, s1, s2);
2565         } else {
2566             tcg_gen_concat_i64_i128(val, t2, t1);
2567             tcg_gen_concat_i64_i128(cmp, s2, s1);
2568         }
2569 
2570         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2571                                     MO_128 | MO_ALIGN | s->be_data);
2572 
2573         if (s->be_data == MO_LE) {
2574             tcg_gen_extr_i128_i64(s1, s2, cmp);
2575         } else {
2576             tcg_gen_extr_i128_i64(s2, s1, cmp);
2577         }
2578     }
2579 }
2580 
2581 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2582  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2583  */
2584 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2585 {
2586     int opc0 = extract32(opc, 0, 1);
2587     int regsize;
2588 
2589     if (is_signed) {
2590         regsize = opc0 ? 32 : 64;
2591     } else {
2592         regsize = size == 3 ? 64 : 32;
2593     }
2594     return regsize == 64;
2595 }
2596 
2597 /* Load/store exclusive
2598  *
2599  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2600  * +-----+-------------+----+---+----+------+----+-------+------+------+
2601  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2602  * +-----+-------------+----+---+----+------+----+-------+------+------+
2603  *
2604  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2605  *   L: 0 -> store, 1 -> load
2606  *  o2: 0 -> exclusive, 1 -> not
2607  *  o1: 0 -> single register, 1 -> register pair
2608  *  o0: 1 -> load-acquire/store-release, 0 -> not
2609  */
2610 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2611 {
2612     int rt = extract32(insn, 0, 5);
2613     int rn = extract32(insn, 5, 5);
2614     int rt2 = extract32(insn, 10, 5);
2615     int rs = extract32(insn, 16, 5);
2616     int is_lasr = extract32(insn, 15, 1);
2617     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2618     int size = extract32(insn, 30, 2);
2619     TCGv_i64 clean_addr;
2620 
2621     switch (o2_L_o1_o0) {
2622     case 0x0: /* STXR */
2623     case 0x1: /* STLXR */
2624         if (rn == 31) {
2625             gen_check_sp_alignment(s);
2626         }
2627         if (is_lasr) {
2628             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2629         }
2630         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2631                                     true, rn != 31, size);
2632         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2633         return;
2634 
2635     case 0x4: /* LDXR */
2636     case 0x5: /* LDAXR */
2637         if (rn == 31) {
2638             gen_check_sp_alignment(s);
2639         }
2640         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2641                                     false, rn != 31, size);
2642         s->is_ldex = true;
2643         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2644         if (is_lasr) {
2645             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2646         }
2647         return;
2648 
2649     case 0x8: /* STLLR */
2650         if (!dc_isar_feature(aa64_lor, s)) {
2651             break;
2652         }
2653         /* StoreLORelease is the same as Store-Release for QEMU.  */
2654         /* fall through */
2655     case 0x9: /* STLR */
2656         /* Generate ISS for non-exclusive accesses including LASR.  */
2657         if (rn == 31) {
2658             gen_check_sp_alignment(s);
2659         }
2660         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2661         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2662                                     true, rn != 31, size);
2663         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2664         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2665                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2666         return;
2667 
2668     case 0xc: /* LDLAR */
2669         if (!dc_isar_feature(aa64_lor, s)) {
2670             break;
2671         }
2672         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2673         /* fall through */
2674     case 0xd: /* LDAR */
2675         /* Generate ISS for non-exclusive accesses including LASR.  */
2676         if (rn == 31) {
2677             gen_check_sp_alignment(s);
2678         }
2679         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2680                                     false, rn != 31, size);
2681         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2682         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2683                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2684         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2685         return;
2686 
2687     case 0x2: case 0x3: /* CASP / STXP */
2688         if (size & 2) { /* STXP / STLXP */
2689             if (rn == 31) {
2690                 gen_check_sp_alignment(s);
2691             }
2692             if (is_lasr) {
2693                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2694             }
2695             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2696                                         true, rn != 31, size);
2697             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2698             return;
2699         }
2700         if (rt2 == 31
2701             && ((rt | rs) & 1) == 0
2702             && dc_isar_feature(aa64_atomics, s)) {
2703             /* CASP / CASPL */
2704             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2705             return;
2706         }
2707         break;
2708 
2709     case 0x6: case 0x7: /* CASPA / LDXP */
2710         if (size & 2) { /* LDXP / LDAXP */
2711             if (rn == 31) {
2712                 gen_check_sp_alignment(s);
2713             }
2714             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2715                                         false, rn != 31, size);
2716             s->is_ldex = true;
2717             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2718             if (is_lasr) {
2719                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2720             }
2721             return;
2722         }
2723         if (rt2 == 31
2724             && ((rt | rs) & 1) == 0
2725             && dc_isar_feature(aa64_atomics, s)) {
2726             /* CASPA / CASPAL */
2727             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2728             return;
2729         }
2730         break;
2731 
2732     case 0xa: /* CAS */
2733     case 0xb: /* CASL */
2734     case 0xe: /* CASA */
2735     case 0xf: /* CASAL */
2736         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2737             gen_compare_and_swap(s, rs, rt, rn, size);
2738             return;
2739         }
2740         break;
2741     }
2742     unallocated_encoding(s);
2743 }
2744 
2745 /*
2746  * Load register (literal)
2747  *
2748  *  31 30 29   27  26 25 24 23                5 4     0
2749  * +-----+-------+---+-----+-------------------+-------+
2750  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2751  * +-----+-------+---+-----+-------------------+-------+
2752  *
2753  * V: 1 -> vector (simd/fp)
2754  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2755  *                   10-> 32 bit signed, 11 -> prefetch
2756  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2757  */
2758 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2759 {
2760     int rt = extract32(insn, 0, 5);
2761     int64_t imm = sextract32(insn, 5, 19) << 2;
2762     bool is_vector = extract32(insn, 26, 1);
2763     int opc = extract32(insn, 30, 2);
2764     bool is_signed = false;
2765     int size = 2;
2766     TCGv_i64 tcg_rt, clean_addr;
2767 
2768     if (is_vector) {
2769         if (opc == 3) {
2770             unallocated_encoding(s);
2771             return;
2772         }
2773         size = 2 + opc;
2774         if (!fp_access_check(s)) {
2775             return;
2776         }
2777     } else {
2778         if (opc == 3) {
2779             /* PRFM (literal) : prefetch */
2780             return;
2781         }
2782         size = 2 + extract32(opc, 0, 1);
2783         is_signed = extract32(opc, 1, 1);
2784     }
2785 
2786     tcg_rt = cpu_reg(s, rt);
2787 
2788     clean_addr = tcg_temp_new_i64();
2789     gen_pc_plus_diff(s, clean_addr, imm);
2790     if (is_vector) {
2791         do_fp_ld(s, rt, clean_addr, size);
2792     } else {
2793         /* Only unsigned 32bit loads target 32bit registers.  */
2794         bool iss_sf = opc != 0;
2795 
2796         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2797                   false, true, rt, iss_sf, false);
2798     }
2799 }
2800 
2801 /*
2802  * LDNP (Load Pair - non-temporal hint)
2803  * LDP (Load Pair - non vector)
2804  * LDPSW (Load Pair Signed Word - non vector)
2805  * STNP (Store Pair - non-temporal hint)
2806  * STP (Store Pair - non vector)
2807  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2808  * LDP (Load Pair of SIMD&FP)
2809  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2810  * STP (Store Pair of SIMD&FP)
2811  *
2812  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2813  * +-----+-------+---+---+-------+---+-----------------------------+
2814  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2815  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2816  *
2817  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2818  *      LDPSW/STGP               01
2819  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2820  *   V: 0 -> GPR, 1 -> Vector
2821  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2822  *      10 -> signed offset, 11 -> pre-index
2823  *   L: 0 -> Store 1 -> Load
2824  *
2825  * Rt, Rt2 = GPR or SIMD registers to be stored
2826  * Rn = general purpose register containing address
2827  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2828  */
2829 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2830 {
2831     int rt = extract32(insn, 0, 5);
2832     int rn = extract32(insn, 5, 5);
2833     int rt2 = extract32(insn, 10, 5);
2834     uint64_t offset = sextract64(insn, 15, 7);
2835     int index = extract32(insn, 23, 2);
2836     bool is_vector = extract32(insn, 26, 1);
2837     bool is_load = extract32(insn, 22, 1);
2838     int opc = extract32(insn, 30, 2);
2839 
2840     bool is_signed = false;
2841     bool postindex = false;
2842     bool wback = false;
2843     bool set_tag = false;
2844 
2845     TCGv_i64 clean_addr, dirty_addr;
2846 
2847     int size;
2848 
2849     if (opc == 3) {
2850         unallocated_encoding(s);
2851         return;
2852     }
2853 
2854     if (is_vector) {
2855         size = 2 + opc;
2856     } else if (opc == 1 && !is_load) {
2857         /* STGP */
2858         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2859             unallocated_encoding(s);
2860             return;
2861         }
2862         size = 3;
2863         set_tag = true;
2864     } else {
2865         size = 2 + extract32(opc, 1, 1);
2866         is_signed = extract32(opc, 0, 1);
2867         if (!is_load && is_signed) {
2868             unallocated_encoding(s);
2869             return;
2870         }
2871     }
2872 
2873     switch (index) {
2874     case 1: /* post-index */
2875         postindex = true;
2876         wback = true;
2877         break;
2878     case 0:
2879         /* signed offset with "non-temporal" hint. Since we don't emulate
2880          * caches we don't care about hints to the cache system about
2881          * data access patterns, and handle this identically to plain
2882          * signed offset.
2883          */
2884         if (is_signed) {
2885             /* There is no non-temporal-hint version of LDPSW */
2886             unallocated_encoding(s);
2887             return;
2888         }
2889         postindex = false;
2890         break;
2891     case 2: /* signed offset, rn not updated */
2892         postindex = false;
2893         break;
2894     case 3: /* pre-index */
2895         postindex = false;
2896         wback = true;
2897         break;
2898     }
2899 
2900     if (is_vector && !fp_access_check(s)) {
2901         return;
2902     }
2903 
2904     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2905 
2906     if (rn == 31) {
2907         gen_check_sp_alignment(s);
2908     }
2909 
2910     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2911     if (!postindex) {
2912         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2913     }
2914 
2915     if (set_tag) {
2916         if (!s->ata) {
2917             /*
2918              * TODO: We could rely on the stores below, at least for
2919              * system mode, if we arrange to add MO_ALIGN_16.
2920              */
2921             gen_helper_stg_stub(cpu_env, dirty_addr);
2922         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2923             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2924         } else {
2925             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2926         }
2927     }
2928 
2929     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2930                                 (wback || rn != 31) && !set_tag, 2 << size);
2931 
2932     if (is_vector) {
2933         if (is_load) {
2934             do_fp_ld(s, rt, clean_addr, size);
2935         } else {
2936             do_fp_st(s, rt, clean_addr, size);
2937         }
2938         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2939         if (is_load) {
2940             do_fp_ld(s, rt2, clean_addr, size);
2941         } else {
2942             do_fp_st(s, rt2, clean_addr, size);
2943         }
2944     } else {
2945         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2946         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2947 
2948         if (is_load) {
2949             TCGv_i64 tmp = tcg_temp_new_i64();
2950 
2951             /* Do not modify tcg_rt before recognizing any exception
2952              * from the second load.
2953              */
2954             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
2955                       false, false, 0, false, false);
2956             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2957             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
2958                       false, false, 0, false, false);
2959 
2960             tcg_gen_mov_i64(tcg_rt, tmp);
2961         } else {
2962             do_gpr_st(s, tcg_rt, clean_addr, size,
2963                       false, 0, false, false);
2964             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2965             do_gpr_st(s, tcg_rt2, clean_addr, size,
2966                       false, 0, false, false);
2967         }
2968     }
2969 
2970     if (wback) {
2971         if (postindex) {
2972             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2973         }
2974         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2975     }
2976 }
2977 
2978 /*
2979  * Load/store (immediate post-indexed)
2980  * Load/store (immediate pre-indexed)
2981  * Load/store (unscaled immediate)
2982  *
2983  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2984  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2985  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2986  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2987  *
2988  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2989          10 -> unprivileged
2990  * V = 0 -> non-vector
2991  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2992  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2993  */
2994 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2995                                 int opc,
2996                                 int size,
2997                                 int rt,
2998                                 bool is_vector)
2999 {
3000     int rn = extract32(insn, 5, 5);
3001     int imm9 = sextract32(insn, 12, 9);
3002     int idx = extract32(insn, 10, 2);
3003     bool is_signed = false;
3004     bool is_store = false;
3005     bool is_extended = false;
3006     bool is_unpriv = (idx == 2);
3007     bool iss_valid;
3008     bool post_index;
3009     bool writeback;
3010     int memidx;
3011 
3012     TCGv_i64 clean_addr, dirty_addr;
3013 
3014     if (is_vector) {
3015         size |= (opc & 2) << 1;
3016         if (size > 4 || is_unpriv) {
3017             unallocated_encoding(s);
3018             return;
3019         }
3020         is_store = ((opc & 1) == 0);
3021         if (!fp_access_check(s)) {
3022             return;
3023         }
3024     } else {
3025         if (size == 3 && opc == 2) {
3026             /* PRFM - prefetch */
3027             if (idx != 0) {
3028                 unallocated_encoding(s);
3029                 return;
3030             }
3031             return;
3032         }
3033         if (opc == 3 && size > 1) {
3034             unallocated_encoding(s);
3035             return;
3036         }
3037         is_store = (opc == 0);
3038         is_signed = extract32(opc, 1, 1);
3039         is_extended = (size < 3) && extract32(opc, 0, 1);
3040     }
3041 
3042     switch (idx) {
3043     case 0:
3044     case 2:
3045         post_index = false;
3046         writeback = false;
3047         break;
3048     case 1:
3049         post_index = true;
3050         writeback = true;
3051         break;
3052     case 3:
3053         post_index = false;
3054         writeback = true;
3055         break;
3056     default:
3057         g_assert_not_reached();
3058     }
3059 
3060     iss_valid = !is_vector && !writeback;
3061 
3062     if (rn == 31) {
3063         gen_check_sp_alignment(s);
3064     }
3065 
3066     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3067     if (!post_index) {
3068         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3069     }
3070 
3071     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3072     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3073                                        writeback || rn != 31,
3074                                        size, is_unpriv, memidx);
3075 
3076     if (is_vector) {
3077         if (is_store) {
3078             do_fp_st(s, rt, clean_addr, size);
3079         } else {
3080             do_fp_ld(s, rt, clean_addr, size);
3081         }
3082     } else {
3083         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3084         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3085 
3086         if (is_store) {
3087             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3088                              iss_valid, rt, iss_sf, false);
3089         } else {
3090             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3091                              is_extended, memidx,
3092                              iss_valid, rt, iss_sf, false);
3093         }
3094     }
3095 
3096     if (writeback) {
3097         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3098         if (post_index) {
3099             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3100         }
3101         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3102     }
3103 }
3104 
3105 /*
3106  * Load/store (register offset)
3107  *
3108  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3109  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3110  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3111  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3112  *
3113  * For non-vector:
3114  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3115  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3116  * For vector:
3117  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3118  *   opc<0>: 0 -> store, 1 -> load
3119  * V: 1 -> vector/simd
3120  * opt: extend encoding (see DecodeRegExtend)
3121  * S: if S=1 then scale (essentially index by sizeof(size))
3122  * Rt: register to transfer into/out of
3123  * Rn: address register or SP for base
3124  * Rm: offset register or ZR for offset
3125  */
3126 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3127                                    int opc,
3128                                    int size,
3129                                    int rt,
3130                                    bool is_vector)
3131 {
3132     int rn = extract32(insn, 5, 5);
3133     int shift = extract32(insn, 12, 1);
3134     int rm = extract32(insn, 16, 5);
3135     int opt = extract32(insn, 13, 3);
3136     bool is_signed = false;
3137     bool is_store = false;
3138     bool is_extended = false;
3139 
3140     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3141 
3142     if (extract32(opt, 1, 1) == 0) {
3143         unallocated_encoding(s);
3144         return;
3145     }
3146 
3147     if (is_vector) {
3148         size |= (opc & 2) << 1;
3149         if (size > 4) {
3150             unallocated_encoding(s);
3151             return;
3152         }
3153         is_store = !extract32(opc, 0, 1);
3154         if (!fp_access_check(s)) {
3155             return;
3156         }
3157     } else {
3158         if (size == 3 && opc == 2) {
3159             /* PRFM - prefetch */
3160             return;
3161         }
3162         if (opc == 3 && size > 1) {
3163             unallocated_encoding(s);
3164             return;
3165         }
3166         is_store = (opc == 0);
3167         is_signed = extract32(opc, 1, 1);
3168         is_extended = (size < 3) && extract32(opc, 0, 1);
3169     }
3170 
3171     if (rn == 31) {
3172         gen_check_sp_alignment(s);
3173     }
3174     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3175 
3176     tcg_rm = read_cpu_reg(s, rm, 1);
3177     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3178 
3179     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3180     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3181 
3182     if (is_vector) {
3183         if (is_store) {
3184             do_fp_st(s, rt, clean_addr, size);
3185         } else {
3186             do_fp_ld(s, rt, clean_addr, size);
3187         }
3188     } else {
3189         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3190         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3191         if (is_store) {
3192             do_gpr_st(s, tcg_rt, clean_addr, size,
3193                       true, rt, iss_sf, false);
3194         } else {
3195             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3196                       is_extended, true, rt, iss_sf, false);
3197         }
3198     }
3199 }
3200 
3201 /*
3202  * Load/store (unsigned immediate)
3203  *
3204  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3205  * +----+-------+---+-----+-----+------------+-------+------+
3206  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3207  * +----+-------+---+-----+-----+------------+-------+------+
3208  *
3209  * For non-vector:
3210  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3211  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3212  * For vector:
3213  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3214  *   opc<0>: 0 -> store, 1 -> load
3215  * Rn: base address register (inc SP)
3216  * Rt: target register
3217  */
3218 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3219                                         int opc,
3220                                         int size,
3221                                         int rt,
3222                                         bool is_vector)
3223 {
3224     int rn = extract32(insn, 5, 5);
3225     unsigned int imm12 = extract32(insn, 10, 12);
3226     unsigned int offset;
3227 
3228     TCGv_i64 clean_addr, dirty_addr;
3229 
3230     bool is_store;
3231     bool is_signed = false;
3232     bool is_extended = false;
3233 
3234     if (is_vector) {
3235         size |= (opc & 2) << 1;
3236         if (size > 4) {
3237             unallocated_encoding(s);
3238             return;
3239         }
3240         is_store = !extract32(opc, 0, 1);
3241         if (!fp_access_check(s)) {
3242             return;
3243         }
3244     } else {
3245         if (size == 3 && opc == 2) {
3246             /* PRFM - prefetch */
3247             return;
3248         }
3249         if (opc == 3 && size > 1) {
3250             unallocated_encoding(s);
3251             return;
3252         }
3253         is_store = (opc == 0);
3254         is_signed = extract32(opc, 1, 1);
3255         is_extended = (size < 3) && extract32(opc, 0, 1);
3256     }
3257 
3258     if (rn == 31) {
3259         gen_check_sp_alignment(s);
3260     }
3261     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3262     offset = imm12 << size;
3263     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3264     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3265 
3266     if (is_vector) {
3267         if (is_store) {
3268             do_fp_st(s, rt, clean_addr, size);
3269         } else {
3270             do_fp_ld(s, rt, clean_addr, size);
3271         }
3272     } else {
3273         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3274         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3275         if (is_store) {
3276             do_gpr_st(s, tcg_rt, clean_addr, size,
3277                       true, rt, iss_sf, false);
3278         } else {
3279             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3280                       is_extended, true, rt, iss_sf, false);
3281         }
3282     }
3283 }
3284 
3285 /* Atomic memory operations
3286  *
3287  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3288  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3289  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3290  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3291  *
3292  * Rt: the result register
3293  * Rn: base address or SP
3294  * Rs: the source register for the operation
3295  * V: vector flag (always 0 as of v8.3)
3296  * A: acquire flag
3297  * R: release flag
3298  */
3299 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3300                               int size, int rt, bool is_vector)
3301 {
3302     int rs = extract32(insn, 16, 5);
3303     int rn = extract32(insn, 5, 5);
3304     int o3_opc = extract32(insn, 12, 4);
3305     bool r = extract32(insn, 22, 1);
3306     bool a = extract32(insn, 23, 1);
3307     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3308     AtomicThreeOpFn *fn = NULL;
3309     MemOp mop = s->be_data | size | MO_ALIGN;
3310 
3311     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3312         unallocated_encoding(s);
3313         return;
3314     }
3315     switch (o3_opc) {
3316     case 000: /* LDADD */
3317         fn = tcg_gen_atomic_fetch_add_i64;
3318         break;
3319     case 001: /* LDCLR */
3320         fn = tcg_gen_atomic_fetch_and_i64;
3321         break;
3322     case 002: /* LDEOR */
3323         fn = tcg_gen_atomic_fetch_xor_i64;
3324         break;
3325     case 003: /* LDSET */
3326         fn = tcg_gen_atomic_fetch_or_i64;
3327         break;
3328     case 004: /* LDSMAX */
3329         fn = tcg_gen_atomic_fetch_smax_i64;
3330         mop |= MO_SIGN;
3331         break;
3332     case 005: /* LDSMIN */
3333         fn = tcg_gen_atomic_fetch_smin_i64;
3334         mop |= MO_SIGN;
3335         break;
3336     case 006: /* LDUMAX */
3337         fn = tcg_gen_atomic_fetch_umax_i64;
3338         break;
3339     case 007: /* LDUMIN */
3340         fn = tcg_gen_atomic_fetch_umin_i64;
3341         break;
3342     case 010: /* SWP */
3343         fn = tcg_gen_atomic_xchg_i64;
3344         break;
3345     case 014: /* LDAPR, LDAPRH, LDAPRB */
3346         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3347             rs != 31 || a != 1 || r != 0) {
3348             unallocated_encoding(s);
3349             return;
3350         }
3351         break;
3352     default:
3353         unallocated_encoding(s);
3354         return;
3355     }
3356 
3357     if (rn == 31) {
3358         gen_check_sp_alignment(s);
3359     }
3360     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3361 
3362     if (o3_opc == 014) {
3363         /*
3364          * LDAPR* are a special case because they are a simple load, not a
3365          * fetch-and-do-something op.
3366          * The architectural consistency requirements here are weaker than
3367          * full load-acquire (we only need "load-acquire processor consistent"),
3368          * but we choose to implement them as full LDAQ.
3369          */
3370         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3371                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3372         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3373         return;
3374     }
3375 
3376     tcg_rs = read_cpu_reg(s, rs, true);
3377     tcg_rt = cpu_reg(s, rt);
3378 
3379     if (o3_opc == 1) { /* LDCLR */
3380         tcg_gen_not_i64(tcg_rs, tcg_rs);
3381     }
3382 
3383     /* The tcg atomic primitives are all full barriers.  Therefore we
3384      * can ignore the Acquire and Release bits of this instruction.
3385      */
3386     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3387 
3388     if ((mop & MO_SIGN) && size != MO_64) {
3389         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3390     }
3391 }
3392 
3393 /*
3394  * PAC memory operations
3395  *
3396  *  31  30      27  26    24    22  21       12  11  10    5     0
3397  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3398  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3399  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3400  *
3401  * Rt: the result register
3402  * Rn: base address or SP
3403  * V: vector flag (always 0 as of v8.3)
3404  * M: clear for key DA, set for key DB
3405  * W: pre-indexing flag
3406  * S: sign for imm9.
3407  */
3408 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3409                            int size, int rt, bool is_vector)
3410 {
3411     int rn = extract32(insn, 5, 5);
3412     bool is_wback = extract32(insn, 11, 1);
3413     bool use_key_a = !extract32(insn, 23, 1);
3414     int offset;
3415     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3416 
3417     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3418         unallocated_encoding(s);
3419         return;
3420     }
3421 
3422     if (rn == 31) {
3423         gen_check_sp_alignment(s);
3424     }
3425     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3426 
3427     if (s->pauth_active) {
3428         if (use_key_a) {
3429             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3430                              tcg_constant_i64(0));
3431         } else {
3432             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3433                              tcg_constant_i64(0));
3434         }
3435     }
3436 
3437     /* Form the 10-bit signed, scaled offset.  */
3438     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3439     offset = sextract32(offset << size, 0, 10 + size);
3440     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3441 
3442     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3443     clean_addr = gen_mte_check1(s, dirty_addr, false,
3444                                 is_wback || rn != 31, size);
3445 
3446     tcg_rt = cpu_reg(s, rt);
3447     do_gpr_ld(s, tcg_rt, clean_addr, size,
3448               /* extend */ false, /* iss_valid */ !is_wback,
3449               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3450 
3451     if (is_wback) {
3452         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3453     }
3454 }
3455 
3456 /*
3457  * LDAPR/STLR (unscaled immediate)
3458  *
3459  *  31  30            24    22  21       12    10    5     0
3460  * +------+-------------+-----+---+--------+-----+----+-----+
3461  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3462  * +------+-------------+-----+---+--------+-----+----+-----+
3463  *
3464  * Rt: source or destination register
3465  * Rn: base register
3466  * imm9: unscaled immediate offset
3467  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3468  * size: size of load/store
3469  */
3470 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3471 {
3472     int rt = extract32(insn, 0, 5);
3473     int rn = extract32(insn, 5, 5);
3474     int offset = sextract32(insn, 12, 9);
3475     int opc = extract32(insn, 22, 2);
3476     int size = extract32(insn, 30, 2);
3477     TCGv_i64 clean_addr, dirty_addr;
3478     bool is_store = false;
3479     bool extend = false;
3480     bool iss_sf;
3481     MemOp mop;
3482 
3483     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3484         unallocated_encoding(s);
3485         return;
3486     }
3487 
3488     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3489     mop = size | MO_ALIGN;
3490 
3491     switch (opc) {
3492     case 0: /* STLURB */
3493         is_store = true;
3494         break;
3495     case 1: /* LDAPUR* */
3496         break;
3497     case 2: /* LDAPURS* 64-bit variant */
3498         if (size == 3) {
3499             unallocated_encoding(s);
3500             return;
3501         }
3502         mop |= MO_SIGN;
3503         break;
3504     case 3: /* LDAPURS* 32-bit variant */
3505         if (size > 1) {
3506             unallocated_encoding(s);
3507             return;
3508         }
3509         mop |= MO_SIGN;
3510         extend = true; /* zero-extend 32->64 after signed load */
3511         break;
3512     default:
3513         g_assert_not_reached();
3514     }
3515 
3516     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3517 
3518     if (rn == 31) {
3519         gen_check_sp_alignment(s);
3520     }
3521 
3522     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3523     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3524     clean_addr = clean_data_tbi(s, dirty_addr);
3525 
3526     if (is_store) {
3527         /* Store-Release semantics */
3528         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3529         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3530     } else {
3531         /*
3532          * Load-AcquirePC semantics; we implement as the slightly more
3533          * restrictive Load-Acquire.
3534          */
3535         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3536                   extend, true, rt, iss_sf, true);
3537         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3538     }
3539 }
3540 
3541 /* Load/store register (all forms) */
3542 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3543 {
3544     int rt = extract32(insn, 0, 5);
3545     int opc = extract32(insn, 22, 2);
3546     bool is_vector = extract32(insn, 26, 1);
3547     int size = extract32(insn, 30, 2);
3548 
3549     switch (extract32(insn, 24, 2)) {
3550     case 0:
3551         if (extract32(insn, 21, 1) == 0) {
3552             /* Load/store register (unscaled immediate)
3553              * Load/store immediate pre/post-indexed
3554              * Load/store register unprivileged
3555              */
3556             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3557             return;
3558         }
3559         switch (extract32(insn, 10, 2)) {
3560         case 0:
3561             disas_ldst_atomic(s, insn, size, rt, is_vector);
3562             return;
3563         case 2:
3564             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3565             return;
3566         default:
3567             disas_ldst_pac(s, insn, size, rt, is_vector);
3568             return;
3569         }
3570         break;
3571     case 1:
3572         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3573         return;
3574     }
3575     unallocated_encoding(s);
3576 }
3577 
3578 /* AdvSIMD load/store multiple structures
3579  *
3580  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3581  * +---+---+---------------+---+-------------+--------+------+------+------+
3582  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3583  * +---+---+---------------+---+-------------+--------+------+------+------+
3584  *
3585  * AdvSIMD load/store multiple structures (post-indexed)
3586  *
3587  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3588  * +---+---+---------------+---+---+---------+--------+------+------+------+
3589  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3590  * +---+---+---------------+---+---+---------+--------+------+------+------+
3591  *
3592  * Rt: first (or only) SIMD&FP register to be transferred
3593  * Rn: base address or SP
3594  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3595  */
3596 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3597 {
3598     int rt = extract32(insn, 0, 5);
3599     int rn = extract32(insn, 5, 5);
3600     int rm = extract32(insn, 16, 5);
3601     int size = extract32(insn, 10, 2);
3602     int opcode = extract32(insn, 12, 4);
3603     bool is_store = !extract32(insn, 22, 1);
3604     bool is_postidx = extract32(insn, 23, 1);
3605     bool is_q = extract32(insn, 30, 1);
3606     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3607     MemOp endian, align, mop;
3608 
3609     int total;    /* total bytes */
3610     int elements; /* elements per vector */
3611     int rpt;    /* num iterations */
3612     int selem;  /* structure elements */
3613     int r;
3614 
3615     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3616         unallocated_encoding(s);
3617         return;
3618     }
3619 
3620     if (!is_postidx && rm != 0) {
3621         unallocated_encoding(s);
3622         return;
3623     }
3624 
3625     /* From the shared decode logic */
3626     switch (opcode) {
3627     case 0x0:
3628         rpt = 1;
3629         selem = 4;
3630         break;
3631     case 0x2:
3632         rpt = 4;
3633         selem = 1;
3634         break;
3635     case 0x4:
3636         rpt = 1;
3637         selem = 3;
3638         break;
3639     case 0x6:
3640         rpt = 3;
3641         selem = 1;
3642         break;
3643     case 0x7:
3644         rpt = 1;
3645         selem = 1;
3646         break;
3647     case 0x8:
3648         rpt = 1;
3649         selem = 2;
3650         break;
3651     case 0xa:
3652         rpt = 2;
3653         selem = 1;
3654         break;
3655     default:
3656         unallocated_encoding(s);
3657         return;
3658     }
3659 
3660     if (size == 3 && !is_q && selem != 1) {
3661         /* reserved */
3662         unallocated_encoding(s);
3663         return;
3664     }
3665 
3666     if (!fp_access_check(s)) {
3667         return;
3668     }
3669 
3670     if (rn == 31) {
3671         gen_check_sp_alignment(s);
3672     }
3673 
3674     /* For our purposes, bytes are always little-endian.  */
3675     endian = s->be_data;
3676     if (size == 0) {
3677         endian = MO_LE;
3678     }
3679 
3680     total = rpt * selem * (is_q ? 16 : 8);
3681     tcg_rn = cpu_reg_sp(s, rn);
3682 
3683     /*
3684      * Issue the MTE check vs the logical repeat count, before we
3685      * promote consecutive little-endian elements below.
3686      */
3687     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3688                                 total);
3689 
3690     /*
3691      * Consecutive little-endian elements from a single register
3692      * can be promoted to a larger little-endian operation.
3693      */
3694     align = MO_ALIGN;
3695     if (selem == 1 && endian == MO_LE) {
3696         align = pow2_align(size);
3697         size = 3;
3698     }
3699     if (!s->align_mem) {
3700         align = 0;
3701     }
3702     mop = endian | size | align;
3703 
3704     elements = (is_q ? 16 : 8) >> size;
3705     tcg_ebytes = tcg_constant_i64(1 << size);
3706     for (r = 0; r < rpt; r++) {
3707         int e;
3708         for (e = 0; e < elements; e++) {
3709             int xs;
3710             for (xs = 0; xs < selem; xs++) {
3711                 int tt = (rt + r + xs) % 32;
3712                 if (is_store) {
3713                     do_vec_st(s, tt, e, clean_addr, mop);
3714                 } else {
3715                     do_vec_ld(s, tt, e, clean_addr, mop);
3716                 }
3717                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3718             }
3719         }
3720     }
3721 
3722     if (!is_store) {
3723         /* For non-quad operations, setting a slice of the low
3724          * 64 bits of the register clears the high 64 bits (in
3725          * the ARM ARM pseudocode this is implicit in the fact
3726          * that 'rval' is a 64 bit wide variable).
3727          * For quad operations, we might still need to zero the
3728          * high bits of SVE.
3729          */
3730         for (r = 0; r < rpt * selem; r++) {
3731             int tt = (rt + r) % 32;
3732             clear_vec_high(s, is_q, tt);
3733         }
3734     }
3735 
3736     if (is_postidx) {
3737         if (rm == 31) {
3738             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3739         } else {
3740             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3741         }
3742     }
3743 }
3744 
3745 /* AdvSIMD load/store single structure
3746  *
3747  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3748  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3749  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3750  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3751  *
3752  * AdvSIMD load/store single structure (post-indexed)
3753  *
3754  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3755  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3756  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3757  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3758  *
3759  * Rt: first (or only) SIMD&FP register to be transferred
3760  * Rn: base address or SP
3761  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3762  * index = encoded in Q:S:size dependent on size
3763  *
3764  * lane_size = encoded in R, opc
3765  * transfer width = encoded in opc, S, size
3766  */
3767 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3768 {
3769     int rt = extract32(insn, 0, 5);
3770     int rn = extract32(insn, 5, 5);
3771     int rm = extract32(insn, 16, 5);
3772     int size = extract32(insn, 10, 2);
3773     int S = extract32(insn, 12, 1);
3774     int opc = extract32(insn, 13, 3);
3775     int R = extract32(insn, 21, 1);
3776     int is_load = extract32(insn, 22, 1);
3777     int is_postidx = extract32(insn, 23, 1);
3778     int is_q = extract32(insn, 30, 1);
3779 
3780     int scale = extract32(opc, 1, 2);
3781     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3782     bool replicate = false;
3783     int index = is_q << 3 | S << 2 | size;
3784     int xs, total;
3785     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3786     MemOp mop;
3787 
3788     if (extract32(insn, 31, 1)) {
3789         unallocated_encoding(s);
3790         return;
3791     }
3792     if (!is_postidx && rm != 0) {
3793         unallocated_encoding(s);
3794         return;
3795     }
3796 
3797     switch (scale) {
3798     case 3:
3799         if (!is_load || S) {
3800             unallocated_encoding(s);
3801             return;
3802         }
3803         scale = size;
3804         replicate = true;
3805         break;
3806     case 0:
3807         break;
3808     case 1:
3809         if (extract32(size, 0, 1)) {
3810             unallocated_encoding(s);
3811             return;
3812         }
3813         index >>= 1;
3814         break;
3815     case 2:
3816         if (extract32(size, 1, 1)) {
3817             unallocated_encoding(s);
3818             return;
3819         }
3820         if (!extract32(size, 0, 1)) {
3821             index >>= 2;
3822         } else {
3823             if (S) {
3824                 unallocated_encoding(s);
3825                 return;
3826             }
3827             index >>= 3;
3828             scale = 3;
3829         }
3830         break;
3831     default:
3832         g_assert_not_reached();
3833     }
3834 
3835     if (!fp_access_check(s)) {
3836         return;
3837     }
3838 
3839     if (rn == 31) {
3840         gen_check_sp_alignment(s);
3841     }
3842 
3843     total = selem << scale;
3844     tcg_rn = cpu_reg_sp(s, rn);
3845 
3846     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3847                                 total);
3848     mop = finalize_memop(s, scale);
3849 
3850     tcg_ebytes = tcg_constant_i64(1 << scale);
3851     for (xs = 0; xs < selem; xs++) {
3852         if (replicate) {
3853             /* Load and replicate to all elements */
3854             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3855 
3856             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3857             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3858                                  (is_q + 1) * 8, vec_full_reg_size(s),
3859                                  tcg_tmp);
3860         } else {
3861             /* Load/store one element per register */
3862             if (is_load) {
3863                 do_vec_ld(s, rt, index, clean_addr, mop);
3864             } else {
3865                 do_vec_st(s, rt, index, clean_addr, mop);
3866             }
3867         }
3868         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3869         rt = (rt + 1) % 32;
3870     }
3871 
3872     if (is_postidx) {
3873         if (rm == 31) {
3874             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3875         } else {
3876             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3877         }
3878     }
3879 }
3880 
3881 /*
3882  * Load/Store memory tags
3883  *
3884  *  31 30 29         24     22  21     12    10      5      0
3885  * +-----+-------------+-----+---+------+-----+------+------+
3886  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3887  * +-----+-------------+-----+---+------+-----+------+------+
3888  */
3889 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3890 {
3891     int rt = extract32(insn, 0, 5);
3892     int rn = extract32(insn, 5, 5);
3893     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3894     int op2 = extract32(insn, 10, 2);
3895     int op1 = extract32(insn, 22, 2);
3896     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3897     int index = 0;
3898     TCGv_i64 addr, clean_addr, tcg_rt;
3899 
3900     /* We checked insn bits [29:24,21] in the caller.  */
3901     if (extract32(insn, 30, 2) != 3) {
3902         goto do_unallocated;
3903     }
3904 
3905     /*
3906      * @index is a tri-state variable which has 3 states:
3907      * < 0 : post-index, writeback
3908      * = 0 : signed offset
3909      * > 0 : pre-index, writeback
3910      */
3911     switch (op1) {
3912     case 0:
3913         if (op2 != 0) {
3914             /* STG */
3915             index = op2 - 2;
3916         } else {
3917             /* STZGM */
3918             if (s->current_el == 0 || offset != 0) {
3919                 goto do_unallocated;
3920             }
3921             is_mult = is_zero = true;
3922         }
3923         break;
3924     case 1:
3925         if (op2 != 0) {
3926             /* STZG */
3927             is_zero = true;
3928             index = op2 - 2;
3929         } else {
3930             /* LDG */
3931             is_load = true;
3932         }
3933         break;
3934     case 2:
3935         if (op2 != 0) {
3936             /* ST2G */
3937             is_pair = true;
3938             index = op2 - 2;
3939         } else {
3940             /* STGM */
3941             if (s->current_el == 0 || offset != 0) {
3942                 goto do_unallocated;
3943             }
3944             is_mult = true;
3945         }
3946         break;
3947     case 3:
3948         if (op2 != 0) {
3949             /* STZ2G */
3950             is_pair = is_zero = true;
3951             index = op2 - 2;
3952         } else {
3953             /* LDGM */
3954             if (s->current_el == 0 || offset != 0) {
3955                 goto do_unallocated;
3956             }
3957             is_mult = is_load = true;
3958         }
3959         break;
3960 
3961     default:
3962     do_unallocated:
3963         unallocated_encoding(s);
3964         return;
3965     }
3966 
3967     if (is_mult
3968         ? !dc_isar_feature(aa64_mte, s)
3969         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
3970         goto do_unallocated;
3971     }
3972 
3973     if (rn == 31) {
3974         gen_check_sp_alignment(s);
3975     }
3976 
3977     addr = read_cpu_reg_sp(s, rn, true);
3978     if (index >= 0) {
3979         /* pre-index or signed offset */
3980         tcg_gen_addi_i64(addr, addr, offset);
3981     }
3982 
3983     if (is_mult) {
3984         tcg_rt = cpu_reg(s, rt);
3985 
3986         if (is_zero) {
3987             int size = 4 << s->dcz_blocksize;
3988 
3989             if (s->ata) {
3990                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
3991             }
3992             /*
3993              * The non-tags portion of STZGM is mostly like DC_ZVA,
3994              * except the alignment happens before the access.
3995              */
3996             clean_addr = clean_data_tbi(s, addr);
3997             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
3998             gen_helper_dc_zva(cpu_env, clean_addr);
3999         } else if (s->ata) {
4000             if (is_load) {
4001                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4002             } else {
4003                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4004             }
4005         } else {
4006             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4007             int size = 4 << GMID_EL1_BS;
4008 
4009             clean_addr = clean_data_tbi(s, addr);
4010             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4011             gen_probe_access(s, clean_addr, acc, size);
4012 
4013             if (is_load) {
4014                 /* The result tags are zeros.  */
4015                 tcg_gen_movi_i64(tcg_rt, 0);
4016             }
4017         }
4018         return;
4019     }
4020 
4021     if (is_load) {
4022         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4023         tcg_rt = cpu_reg(s, rt);
4024         if (s->ata) {
4025             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4026         } else {
4027             clean_addr = clean_data_tbi(s, addr);
4028             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4029             gen_address_with_allocation_tag0(tcg_rt, addr);
4030         }
4031     } else {
4032         tcg_rt = cpu_reg_sp(s, rt);
4033         if (!s->ata) {
4034             /*
4035              * For STG and ST2G, we need to check alignment and probe memory.
4036              * TODO: For STZG and STZ2G, we could rely on the stores below,
4037              * at least for system mode; user-only won't enforce alignment.
4038              */
4039             if (is_pair) {
4040                 gen_helper_st2g_stub(cpu_env, addr);
4041             } else {
4042                 gen_helper_stg_stub(cpu_env, addr);
4043             }
4044         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4045             if (is_pair) {
4046                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4047             } else {
4048                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4049             }
4050         } else {
4051             if (is_pair) {
4052                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4053             } else {
4054                 gen_helper_stg(cpu_env, addr, tcg_rt);
4055             }
4056         }
4057     }
4058 
4059     if (is_zero) {
4060         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4061         TCGv_i64 zero64 = tcg_constant_i64(0);
4062         TCGv_i128 zero128 = tcg_temp_new_i128();
4063         int mem_index = get_mem_index(s);
4064         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4065 
4066         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4067 
4068         /* This is 1 or 2 atomic 16-byte operations. */
4069         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4070         if (is_pair) {
4071             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4072             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4073         }
4074     }
4075 
4076     if (index != 0) {
4077         /* pre-index or post-index */
4078         if (index < 0) {
4079             /* post-index */
4080             tcg_gen_addi_i64(addr, addr, offset);
4081         }
4082         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4083     }
4084 }
4085 
4086 /* Loads and stores */
4087 static void disas_ldst(DisasContext *s, uint32_t insn)
4088 {
4089     switch (extract32(insn, 24, 6)) {
4090     case 0x08: /* Load/store exclusive */
4091         disas_ldst_excl(s, insn);
4092         break;
4093     case 0x18: case 0x1c: /* Load register (literal) */
4094         disas_ld_lit(s, insn);
4095         break;
4096     case 0x28: case 0x29:
4097     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4098         disas_ldst_pair(s, insn);
4099         break;
4100     case 0x38: case 0x39:
4101     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4102         disas_ldst_reg(s, insn);
4103         break;
4104     case 0x0c: /* AdvSIMD load/store multiple structures */
4105         disas_ldst_multiple_struct(s, insn);
4106         break;
4107     case 0x0d: /* AdvSIMD load/store single structure */
4108         disas_ldst_single_struct(s, insn);
4109         break;
4110     case 0x19:
4111         if (extract32(insn, 21, 1) != 0) {
4112             disas_ldst_tag(s, insn);
4113         } else if (extract32(insn, 10, 2) == 0) {
4114             disas_ldst_ldapr_stlr(s, insn);
4115         } else {
4116             unallocated_encoding(s);
4117         }
4118         break;
4119     default:
4120         unallocated_encoding(s);
4121         break;
4122     }
4123 }
4124 
4125 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4126 
4127 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4128                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4129 {
4130     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4131     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4132     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4133 
4134     fn(tcg_rd, tcg_rn, tcg_imm);
4135     if (!a->sf) {
4136         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4137     }
4138     return true;
4139 }
4140 
4141 /*
4142  * PC-rel. addressing
4143  */
4144 
4145 static bool trans_ADR(DisasContext *s, arg_ri *a)
4146 {
4147     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4148     return true;
4149 }
4150 
4151 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4152 {
4153     int64_t offset = (int64_t)a->imm << 12;
4154 
4155     /* The page offset is ok for CF_PCREL. */
4156     offset -= s->pc_curr & 0xfff;
4157     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4158     return true;
4159 }
4160 
4161 /*
4162  * Add/subtract (immediate)
4163  */
4164 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4165 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4166 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4167 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4168 
4169 /*
4170  * Add/subtract (immediate, with tags)
4171  */
4172 
4173 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4174                                       bool sub_op)
4175 {
4176     TCGv_i64 tcg_rn, tcg_rd;
4177     int imm;
4178 
4179     imm = a->uimm6 << LOG2_TAG_GRANULE;
4180     if (sub_op) {
4181         imm = -imm;
4182     }
4183 
4184     tcg_rn = cpu_reg_sp(s, a->rn);
4185     tcg_rd = cpu_reg_sp(s, a->rd);
4186 
4187     if (s->ata) {
4188         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4189                            tcg_constant_i32(imm),
4190                            tcg_constant_i32(a->uimm4));
4191     } else {
4192         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4193         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4194     }
4195     return true;
4196 }
4197 
4198 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4199 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4200 
4201 /* The input should be a value in the bottom e bits (with higher
4202  * bits zero); returns that value replicated into every element
4203  * of size e in a 64 bit integer.
4204  */
4205 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4206 {
4207     assert(e != 0);
4208     while (e < 64) {
4209         mask |= mask << e;
4210         e *= 2;
4211     }
4212     return mask;
4213 }
4214 
4215 /*
4216  * Logical (immediate)
4217  */
4218 
4219 /*
4220  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4221  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4222  * value (ie should cause a guest UNDEF exception), and true if they are
4223  * valid, in which case the decoded bit pattern is written to result.
4224  */
4225 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4226                             unsigned int imms, unsigned int immr)
4227 {
4228     uint64_t mask;
4229     unsigned e, levels, s, r;
4230     int len;
4231 
4232     assert(immn < 2 && imms < 64 && immr < 64);
4233 
4234     /* The bit patterns we create here are 64 bit patterns which
4235      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4236      * 64 bits each. Each element contains the same value: a run
4237      * of between 1 and e-1 non-zero bits, rotated within the
4238      * element by between 0 and e-1 bits.
4239      *
4240      * The element size and run length are encoded into immn (1 bit)
4241      * and imms (6 bits) as follows:
4242      * 64 bit elements: immn = 1, imms = <length of run - 1>
4243      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4244      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4245      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4246      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4247      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4248      * Notice that immn = 0, imms = 11111x is the only combination
4249      * not covered by one of the above options; this is reserved.
4250      * Further, <length of run - 1> all-ones is a reserved pattern.
4251      *
4252      * In all cases the rotation is by immr % e (and immr is 6 bits).
4253      */
4254 
4255     /* First determine the element size */
4256     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4257     if (len < 1) {
4258         /* This is the immn == 0, imms == 0x11111x case */
4259         return false;
4260     }
4261     e = 1 << len;
4262 
4263     levels = e - 1;
4264     s = imms & levels;
4265     r = immr & levels;
4266 
4267     if (s == levels) {
4268         /* <length of run - 1> mustn't be all-ones. */
4269         return false;
4270     }
4271 
4272     /* Create the value of one element: s+1 set bits rotated
4273      * by r within the element (which is e bits wide)...
4274      */
4275     mask = MAKE_64BIT_MASK(0, s + 1);
4276     if (r) {
4277         mask = (mask >> r) | (mask << (e - r));
4278         mask &= MAKE_64BIT_MASK(0, e);
4279     }
4280     /* ...then replicate the element over the whole 64 bit value */
4281     mask = bitfield_replicate(mask, e);
4282     *result = mask;
4283     return true;
4284 }
4285 
4286 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4287                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4288 {
4289     TCGv_i64 tcg_rd, tcg_rn;
4290     uint64_t imm;
4291 
4292     /* Some immediate field values are reserved. */
4293     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4294                                 extract32(a->dbm, 0, 6),
4295                                 extract32(a->dbm, 6, 6))) {
4296         return false;
4297     }
4298     if (!a->sf) {
4299         imm &= 0xffffffffull;
4300     }
4301 
4302     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4303     tcg_rn = cpu_reg(s, a->rn);
4304 
4305     fn(tcg_rd, tcg_rn, imm);
4306     if (set_cc) {
4307         gen_logic_CC(a->sf, tcg_rd);
4308     }
4309     if (!a->sf) {
4310         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4311     }
4312     return true;
4313 }
4314 
4315 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4316 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4317 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4318 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4319 
4320 /*
4321  * Move wide (immediate)
4322  */
4323 
4324 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4325 {
4326     int pos = a->hw << 4;
4327     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4328     return true;
4329 }
4330 
4331 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4332 {
4333     int pos = a->hw << 4;
4334     uint64_t imm = a->imm;
4335 
4336     imm = ~(imm << pos);
4337     if (!a->sf) {
4338         imm = (uint32_t)imm;
4339     }
4340     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4341     return true;
4342 }
4343 
4344 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4345 {
4346     int pos = a->hw << 4;
4347     TCGv_i64 tcg_rd, tcg_im;
4348 
4349     tcg_rd = cpu_reg(s, a->rd);
4350     tcg_im = tcg_constant_i64(a->imm);
4351     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4352     if (!a->sf) {
4353         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4354     }
4355     return true;
4356 }
4357 
4358 /*
4359  * Bitfield
4360  */
4361 
4362 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4363 {
4364     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4365     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4366     unsigned int bitsize = a->sf ? 64 : 32;
4367     unsigned int ri = a->immr;
4368     unsigned int si = a->imms;
4369     unsigned int pos, len;
4370 
4371     if (si >= ri) {
4372         /* Wd<s-r:0> = Wn<s:r> */
4373         len = (si - ri) + 1;
4374         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4375         if (!a->sf) {
4376             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4377         }
4378     } else {
4379         /* Wd<32+s-r,32-r> = Wn<s:0> */
4380         len = si + 1;
4381         pos = (bitsize - ri) & (bitsize - 1);
4382 
4383         if (len < ri) {
4384             /*
4385              * Sign extend the destination field from len to fill the
4386              * balance of the word.  Let the deposit below insert all
4387              * of those sign bits.
4388              */
4389             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4390             len = ri;
4391         }
4392 
4393         /*
4394          * We start with zero, and we haven't modified any bits outside
4395          * bitsize, therefore no final zero-extension is unneeded for !sf.
4396          */
4397         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4398     }
4399     return true;
4400 }
4401 
4402 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4403 {
4404     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4405     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4406     unsigned int bitsize = a->sf ? 64 : 32;
4407     unsigned int ri = a->immr;
4408     unsigned int si = a->imms;
4409     unsigned int pos, len;
4410 
4411     tcg_rd = cpu_reg(s, a->rd);
4412     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4413 
4414     if (si >= ri) {
4415         /* Wd<s-r:0> = Wn<s:r> */
4416         len = (si - ri) + 1;
4417         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4418     } else {
4419         /* Wd<32+s-r,32-r> = Wn<s:0> */
4420         len = si + 1;
4421         pos = (bitsize - ri) & (bitsize - 1);
4422         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4423     }
4424     return true;
4425 }
4426 
4427 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4428 {
4429     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4430     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4431     unsigned int bitsize = a->sf ? 64 : 32;
4432     unsigned int ri = a->immr;
4433     unsigned int si = a->imms;
4434     unsigned int pos, len;
4435 
4436     tcg_rd = cpu_reg(s, a->rd);
4437     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4438 
4439     if (si >= ri) {
4440         /* Wd<s-r:0> = Wn<s:r> */
4441         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4442         len = (si - ri) + 1;
4443         pos = 0;
4444     } else {
4445         /* Wd<32+s-r,32-r> = Wn<s:0> */
4446         len = si + 1;
4447         pos = (bitsize - ri) & (bitsize - 1);
4448     }
4449 
4450     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4451     if (!a->sf) {
4452         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4453     }
4454     return true;
4455 }
4456 
4457 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4458 {
4459     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4460 
4461     tcg_rd = cpu_reg(s, a->rd);
4462 
4463     if (unlikely(a->imm == 0)) {
4464         /*
4465          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4466          * so an extract from bit 0 is a special case.
4467          */
4468         if (a->sf) {
4469             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4470         } else {
4471             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4472         }
4473     } else {
4474         tcg_rm = cpu_reg(s, a->rm);
4475         tcg_rn = cpu_reg(s, a->rn);
4476 
4477         if (a->sf) {
4478             /* Specialization to ROR happens in EXTRACT2.  */
4479             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4480         } else {
4481             TCGv_i32 t0 = tcg_temp_new_i32();
4482 
4483             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4484             if (a->rm == a->rn) {
4485                 tcg_gen_rotri_i32(t0, t0, a->imm);
4486             } else {
4487                 TCGv_i32 t1 = tcg_temp_new_i32();
4488                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4489                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4490             }
4491             tcg_gen_extu_i32_i64(tcg_rd, t0);
4492         }
4493     }
4494     return true;
4495 }
4496 
4497 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4498  * Note that it is the caller's responsibility to ensure that the
4499  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4500  * mandated semantics for out of range shifts.
4501  */
4502 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4503                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4504 {
4505     switch (shift_type) {
4506     case A64_SHIFT_TYPE_LSL:
4507         tcg_gen_shl_i64(dst, src, shift_amount);
4508         break;
4509     case A64_SHIFT_TYPE_LSR:
4510         tcg_gen_shr_i64(dst, src, shift_amount);
4511         break;
4512     case A64_SHIFT_TYPE_ASR:
4513         if (!sf) {
4514             tcg_gen_ext32s_i64(dst, src);
4515         }
4516         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4517         break;
4518     case A64_SHIFT_TYPE_ROR:
4519         if (sf) {
4520             tcg_gen_rotr_i64(dst, src, shift_amount);
4521         } else {
4522             TCGv_i32 t0, t1;
4523             t0 = tcg_temp_new_i32();
4524             t1 = tcg_temp_new_i32();
4525             tcg_gen_extrl_i64_i32(t0, src);
4526             tcg_gen_extrl_i64_i32(t1, shift_amount);
4527             tcg_gen_rotr_i32(t0, t0, t1);
4528             tcg_gen_extu_i32_i64(dst, t0);
4529         }
4530         break;
4531     default:
4532         assert(FALSE); /* all shift types should be handled */
4533         break;
4534     }
4535 
4536     if (!sf) { /* zero extend final result */
4537         tcg_gen_ext32u_i64(dst, dst);
4538     }
4539 }
4540 
4541 /* Shift a TCGv src by immediate, put result in dst.
4542  * The shift amount must be in range (this should always be true as the
4543  * relevant instructions will UNDEF on bad shift immediates).
4544  */
4545 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4546                           enum a64_shift_type shift_type, unsigned int shift_i)
4547 {
4548     assert(shift_i < (sf ? 64 : 32));
4549 
4550     if (shift_i == 0) {
4551         tcg_gen_mov_i64(dst, src);
4552     } else {
4553         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4554     }
4555 }
4556 
4557 /* Logical (shifted register)
4558  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4559  * +----+-----+-----------+-------+---+------+--------+------+------+
4560  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4561  * +----+-----+-----------+-------+---+------+--------+------+------+
4562  */
4563 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4564 {
4565     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4566     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4567 
4568     sf = extract32(insn, 31, 1);
4569     opc = extract32(insn, 29, 2);
4570     shift_type = extract32(insn, 22, 2);
4571     invert = extract32(insn, 21, 1);
4572     rm = extract32(insn, 16, 5);
4573     shift_amount = extract32(insn, 10, 6);
4574     rn = extract32(insn, 5, 5);
4575     rd = extract32(insn, 0, 5);
4576 
4577     if (!sf && (shift_amount & (1 << 5))) {
4578         unallocated_encoding(s);
4579         return;
4580     }
4581 
4582     tcg_rd = cpu_reg(s, rd);
4583 
4584     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4585         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4586          * register-register MOV and MVN, so it is worth special casing.
4587          */
4588         tcg_rm = cpu_reg(s, rm);
4589         if (invert) {
4590             tcg_gen_not_i64(tcg_rd, tcg_rm);
4591             if (!sf) {
4592                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4593             }
4594         } else {
4595             if (sf) {
4596                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4597             } else {
4598                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4599             }
4600         }
4601         return;
4602     }
4603 
4604     tcg_rm = read_cpu_reg(s, rm, sf);
4605 
4606     if (shift_amount) {
4607         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4608     }
4609 
4610     tcg_rn = cpu_reg(s, rn);
4611 
4612     switch (opc | (invert << 2)) {
4613     case 0: /* AND */
4614     case 3: /* ANDS */
4615         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4616         break;
4617     case 1: /* ORR */
4618         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4619         break;
4620     case 2: /* EOR */
4621         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4622         break;
4623     case 4: /* BIC */
4624     case 7: /* BICS */
4625         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4626         break;
4627     case 5: /* ORN */
4628         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4629         break;
4630     case 6: /* EON */
4631         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4632         break;
4633     default:
4634         assert(FALSE);
4635         break;
4636     }
4637 
4638     if (!sf) {
4639         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4640     }
4641 
4642     if (opc == 3) {
4643         gen_logic_CC(sf, tcg_rd);
4644     }
4645 }
4646 
4647 /*
4648  * Add/subtract (extended register)
4649  *
4650  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4651  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4652  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4653  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4654  *
4655  *  sf: 0 -> 32bit, 1 -> 64bit
4656  *  op: 0 -> add  , 1 -> sub
4657  *   S: 1 -> set flags
4658  * opt: 00
4659  * option: extension type (see DecodeRegExtend)
4660  * imm3: optional shift to Rm
4661  *
4662  * Rd = Rn + LSL(extend(Rm), amount)
4663  */
4664 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4665 {
4666     int rd = extract32(insn, 0, 5);
4667     int rn = extract32(insn, 5, 5);
4668     int imm3 = extract32(insn, 10, 3);
4669     int option = extract32(insn, 13, 3);
4670     int rm = extract32(insn, 16, 5);
4671     int opt = extract32(insn, 22, 2);
4672     bool setflags = extract32(insn, 29, 1);
4673     bool sub_op = extract32(insn, 30, 1);
4674     bool sf = extract32(insn, 31, 1);
4675 
4676     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4677     TCGv_i64 tcg_rd;
4678     TCGv_i64 tcg_result;
4679 
4680     if (imm3 > 4 || opt != 0) {
4681         unallocated_encoding(s);
4682         return;
4683     }
4684 
4685     /* non-flag setting ops may use SP */
4686     if (!setflags) {
4687         tcg_rd = cpu_reg_sp(s, rd);
4688     } else {
4689         tcg_rd = cpu_reg(s, rd);
4690     }
4691     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4692 
4693     tcg_rm = read_cpu_reg(s, rm, sf);
4694     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4695 
4696     tcg_result = tcg_temp_new_i64();
4697 
4698     if (!setflags) {
4699         if (sub_op) {
4700             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4701         } else {
4702             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4703         }
4704     } else {
4705         if (sub_op) {
4706             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4707         } else {
4708             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4709         }
4710     }
4711 
4712     if (sf) {
4713         tcg_gen_mov_i64(tcg_rd, tcg_result);
4714     } else {
4715         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4716     }
4717 }
4718 
4719 /*
4720  * Add/subtract (shifted register)
4721  *
4722  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4723  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4724  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4725  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4726  *
4727  *    sf: 0 -> 32bit, 1 -> 64bit
4728  *    op: 0 -> add  , 1 -> sub
4729  *     S: 1 -> set flags
4730  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4731  *  imm6: Shift amount to apply to Rm before the add/sub
4732  */
4733 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4734 {
4735     int rd = extract32(insn, 0, 5);
4736     int rn = extract32(insn, 5, 5);
4737     int imm6 = extract32(insn, 10, 6);
4738     int rm = extract32(insn, 16, 5);
4739     int shift_type = extract32(insn, 22, 2);
4740     bool setflags = extract32(insn, 29, 1);
4741     bool sub_op = extract32(insn, 30, 1);
4742     bool sf = extract32(insn, 31, 1);
4743 
4744     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4745     TCGv_i64 tcg_rn, tcg_rm;
4746     TCGv_i64 tcg_result;
4747 
4748     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4749         unallocated_encoding(s);
4750         return;
4751     }
4752 
4753     tcg_rn = read_cpu_reg(s, rn, sf);
4754     tcg_rm = read_cpu_reg(s, rm, sf);
4755 
4756     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4757 
4758     tcg_result = tcg_temp_new_i64();
4759 
4760     if (!setflags) {
4761         if (sub_op) {
4762             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4763         } else {
4764             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4765         }
4766     } else {
4767         if (sub_op) {
4768             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4769         } else {
4770             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4771         }
4772     }
4773 
4774     if (sf) {
4775         tcg_gen_mov_i64(tcg_rd, tcg_result);
4776     } else {
4777         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4778     }
4779 }
4780 
4781 /* Data-processing (3 source)
4782  *
4783  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4784  *  +--+------+-----------+------+------+----+------+------+------+
4785  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4786  *  +--+------+-----------+------+------+----+------+------+------+
4787  */
4788 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4789 {
4790     int rd = extract32(insn, 0, 5);
4791     int rn = extract32(insn, 5, 5);
4792     int ra = extract32(insn, 10, 5);
4793     int rm = extract32(insn, 16, 5);
4794     int op_id = (extract32(insn, 29, 3) << 4) |
4795         (extract32(insn, 21, 3) << 1) |
4796         extract32(insn, 15, 1);
4797     bool sf = extract32(insn, 31, 1);
4798     bool is_sub = extract32(op_id, 0, 1);
4799     bool is_high = extract32(op_id, 2, 1);
4800     bool is_signed = false;
4801     TCGv_i64 tcg_op1;
4802     TCGv_i64 tcg_op2;
4803     TCGv_i64 tcg_tmp;
4804 
4805     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4806     switch (op_id) {
4807     case 0x42: /* SMADDL */
4808     case 0x43: /* SMSUBL */
4809     case 0x44: /* SMULH */
4810         is_signed = true;
4811         break;
4812     case 0x0: /* MADD (32bit) */
4813     case 0x1: /* MSUB (32bit) */
4814     case 0x40: /* MADD (64bit) */
4815     case 0x41: /* MSUB (64bit) */
4816     case 0x4a: /* UMADDL */
4817     case 0x4b: /* UMSUBL */
4818     case 0x4c: /* UMULH */
4819         break;
4820     default:
4821         unallocated_encoding(s);
4822         return;
4823     }
4824 
4825     if (is_high) {
4826         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4827         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4828         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4829         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4830 
4831         if (is_signed) {
4832             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4833         } else {
4834             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4835         }
4836         return;
4837     }
4838 
4839     tcg_op1 = tcg_temp_new_i64();
4840     tcg_op2 = tcg_temp_new_i64();
4841     tcg_tmp = tcg_temp_new_i64();
4842 
4843     if (op_id < 0x42) {
4844         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4845         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4846     } else {
4847         if (is_signed) {
4848             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4849             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4850         } else {
4851             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4852             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4853         }
4854     }
4855 
4856     if (ra == 31 && !is_sub) {
4857         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4858         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4859     } else {
4860         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4861         if (is_sub) {
4862             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4863         } else {
4864             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4865         }
4866     }
4867 
4868     if (!sf) {
4869         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4870     }
4871 }
4872 
4873 /* Add/subtract (with carry)
4874  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4875  * +--+--+--+------------------------+------+-------------+------+-----+
4876  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4877  * +--+--+--+------------------------+------+-------------+------+-----+
4878  */
4879 
4880 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4881 {
4882     unsigned int sf, op, setflags, rm, rn, rd;
4883     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4884 
4885     sf = extract32(insn, 31, 1);
4886     op = extract32(insn, 30, 1);
4887     setflags = extract32(insn, 29, 1);
4888     rm = extract32(insn, 16, 5);
4889     rn = extract32(insn, 5, 5);
4890     rd = extract32(insn, 0, 5);
4891 
4892     tcg_rd = cpu_reg(s, rd);
4893     tcg_rn = cpu_reg(s, rn);
4894 
4895     if (op) {
4896         tcg_y = tcg_temp_new_i64();
4897         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4898     } else {
4899         tcg_y = cpu_reg(s, rm);
4900     }
4901 
4902     if (setflags) {
4903         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4904     } else {
4905         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4906     }
4907 }
4908 
4909 /*
4910  * Rotate right into flags
4911  *  31 30 29                21       15          10      5  4      0
4912  * +--+--+--+-----------------+--------+-----------+------+--+------+
4913  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4914  * +--+--+--+-----------------+--------+-----------+------+--+------+
4915  */
4916 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4917 {
4918     int mask = extract32(insn, 0, 4);
4919     int o2 = extract32(insn, 4, 1);
4920     int rn = extract32(insn, 5, 5);
4921     int imm6 = extract32(insn, 15, 6);
4922     int sf_op_s = extract32(insn, 29, 3);
4923     TCGv_i64 tcg_rn;
4924     TCGv_i32 nzcv;
4925 
4926     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4927         unallocated_encoding(s);
4928         return;
4929     }
4930 
4931     tcg_rn = read_cpu_reg(s, rn, 1);
4932     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4933 
4934     nzcv = tcg_temp_new_i32();
4935     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4936 
4937     if (mask & 8) { /* N */
4938         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4939     }
4940     if (mask & 4) { /* Z */
4941         tcg_gen_not_i32(cpu_ZF, nzcv);
4942         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4943     }
4944     if (mask & 2) { /* C */
4945         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4946     }
4947     if (mask & 1) { /* V */
4948         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4949     }
4950 }
4951 
4952 /*
4953  * Evaluate into flags
4954  *  31 30 29                21        15   14        10      5  4      0
4955  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4956  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4957  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4958  */
4959 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4960 {
4961     int o3_mask = extract32(insn, 0, 5);
4962     int rn = extract32(insn, 5, 5);
4963     int o2 = extract32(insn, 15, 6);
4964     int sz = extract32(insn, 14, 1);
4965     int sf_op_s = extract32(insn, 29, 3);
4966     TCGv_i32 tmp;
4967     int shift;
4968 
4969     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4970         !dc_isar_feature(aa64_condm_4, s)) {
4971         unallocated_encoding(s);
4972         return;
4973     }
4974     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4975 
4976     tmp = tcg_temp_new_i32();
4977     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4978     tcg_gen_shli_i32(cpu_NF, tmp, shift);
4979     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4980     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4981     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4982 }
4983 
4984 /* Conditional compare (immediate / register)
4985  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4986  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4987  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4988  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4989  *        [1]                             y                [0]       [0]
4990  */
4991 static void disas_cc(DisasContext *s, uint32_t insn)
4992 {
4993     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4994     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4995     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4996     DisasCompare c;
4997 
4998     if (!extract32(insn, 29, 1)) {
4999         unallocated_encoding(s);
5000         return;
5001     }
5002     if (insn & (1 << 10 | 1 << 4)) {
5003         unallocated_encoding(s);
5004         return;
5005     }
5006     sf = extract32(insn, 31, 1);
5007     op = extract32(insn, 30, 1);
5008     is_imm = extract32(insn, 11, 1);
5009     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5010     cond = extract32(insn, 12, 4);
5011     rn = extract32(insn, 5, 5);
5012     nzcv = extract32(insn, 0, 4);
5013 
5014     /* Set T0 = !COND.  */
5015     tcg_t0 = tcg_temp_new_i32();
5016     arm_test_cc(&c, cond);
5017     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5018 
5019     /* Load the arguments for the new comparison.  */
5020     if (is_imm) {
5021         tcg_y = tcg_temp_new_i64();
5022         tcg_gen_movi_i64(tcg_y, y);
5023     } else {
5024         tcg_y = cpu_reg(s, y);
5025     }
5026     tcg_rn = cpu_reg(s, rn);
5027 
5028     /* Set the flags for the new comparison.  */
5029     tcg_tmp = tcg_temp_new_i64();
5030     if (op) {
5031         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5032     } else {
5033         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5034     }
5035 
5036     /* If COND was false, force the flags to #nzcv.  Compute two masks
5037      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5038      * For tcg hosts that support ANDC, we can make do with just T1.
5039      * In either case, allow the tcg optimizer to delete any unused mask.
5040      */
5041     tcg_t1 = tcg_temp_new_i32();
5042     tcg_t2 = tcg_temp_new_i32();
5043     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5044     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5045 
5046     if (nzcv & 8) { /* N */
5047         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5048     } else {
5049         if (TCG_TARGET_HAS_andc_i32) {
5050             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5051         } else {
5052             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5053         }
5054     }
5055     if (nzcv & 4) { /* Z */
5056         if (TCG_TARGET_HAS_andc_i32) {
5057             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5058         } else {
5059             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5060         }
5061     } else {
5062         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5063     }
5064     if (nzcv & 2) { /* C */
5065         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5066     } else {
5067         if (TCG_TARGET_HAS_andc_i32) {
5068             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5069         } else {
5070             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5071         }
5072     }
5073     if (nzcv & 1) { /* V */
5074         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5075     } else {
5076         if (TCG_TARGET_HAS_andc_i32) {
5077             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5078         } else {
5079             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5080         }
5081     }
5082 }
5083 
5084 /* Conditional select
5085  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5086  * +----+----+---+-----------------+------+------+-----+------+------+
5087  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5088  * +----+----+---+-----------------+------+------+-----+------+------+
5089  */
5090 static void disas_cond_select(DisasContext *s, uint32_t insn)
5091 {
5092     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5093     TCGv_i64 tcg_rd, zero;
5094     DisasCompare64 c;
5095 
5096     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5097         /* S == 1 or op2<1> == 1 */
5098         unallocated_encoding(s);
5099         return;
5100     }
5101     sf = extract32(insn, 31, 1);
5102     else_inv = extract32(insn, 30, 1);
5103     rm = extract32(insn, 16, 5);
5104     cond = extract32(insn, 12, 4);
5105     else_inc = extract32(insn, 10, 1);
5106     rn = extract32(insn, 5, 5);
5107     rd = extract32(insn, 0, 5);
5108 
5109     tcg_rd = cpu_reg(s, rd);
5110 
5111     a64_test_cc(&c, cond);
5112     zero = tcg_constant_i64(0);
5113 
5114     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5115         /* CSET & CSETM.  */
5116         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5117         if (else_inv) {
5118             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5119         }
5120     } else {
5121         TCGv_i64 t_true = cpu_reg(s, rn);
5122         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5123         if (else_inv && else_inc) {
5124             tcg_gen_neg_i64(t_false, t_false);
5125         } else if (else_inv) {
5126             tcg_gen_not_i64(t_false, t_false);
5127         } else if (else_inc) {
5128             tcg_gen_addi_i64(t_false, t_false, 1);
5129         }
5130         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5131     }
5132 
5133     if (!sf) {
5134         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5135     }
5136 }
5137 
5138 static void handle_clz(DisasContext *s, unsigned int sf,
5139                        unsigned int rn, unsigned int rd)
5140 {
5141     TCGv_i64 tcg_rd, tcg_rn;
5142     tcg_rd = cpu_reg(s, rd);
5143     tcg_rn = cpu_reg(s, rn);
5144 
5145     if (sf) {
5146         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5147     } else {
5148         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5149         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5150         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5151         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5152     }
5153 }
5154 
5155 static void handle_cls(DisasContext *s, unsigned int sf,
5156                        unsigned int rn, unsigned int rd)
5157 {
5158     TCGv_i64 tcg_rd, tcg_rn;
5159     tcg_rd = cpu_reg(s, rd);
5160     tcg_rn = cpu_reg(s, rn);
5161 
5162     if (sf) {
5163         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5164     } else {
5165         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5166         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5167         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5168         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5169     }
5170 }
5171 
5172 static void handle_rbit(DisasContext *s, unsigned int sf,
5173                         unsigned int rn, unsigned int rd)
5174 {
5175     TCGv_i64 tcg_rd, tcg_rn;
5176     tcg_rd = cpu_reg(s, rd);
5177     tcg_rn = cpu_reg(s, rn);
5178 
5179     if (sf) {
5180         gen_helper_rbit64(tcg_rd, tcg_rn);
5181     } else {
5182         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5183         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5184         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5185         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5186     }
5187 }
5188 
5189 /* REV with sf==1, opcode==3 ("REV64") */
5190 static void handle_rev64(DisasContext *s, unsigned int sf,
5191                          unsigned int rn, unsigned int rd)
5192 {
5193     if (!sf) {
5194         unallocated_encoding(s);
5195         return;
5196     }
5197     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5198 }
5199 
5200 /* REV with sf==0, opcode==2
5201  * REV32 (sf==1, opcode==2)
5202  */
5203 static void handle_rev32(DisasContext *s, unsigned int sf,
5204                          unsigned int rn, unsigned int rd)
5205 {
5206     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5207     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5208 
5209     if (sf) {
5210         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5211         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5212     } else {
5213         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5214     }
5215 }
5216 
5217 /* REV16 (opcode==1) */
5218 static void handle_rev16(DisasContext *s, unsigned int sf,
5219                          unsigned int rn, unsigned int rd)
5220 {
5221     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5222     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5223     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5224     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5225 
5226     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5227     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5228     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5229     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5230     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5231 }
5232 
5233 /* Data-processing (1 source)
5234  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5235  * +----+---+---+-----------------+---------+--------+------+------+
5236  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5237  * +----+---+---+-----------------+---------+--------+------+------+
5238  */
5239 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5240 {
5241     unsigned int sf, opcode, opcode2, rn, rd;
5242     TCGv_i64 tcg_rd;
5243 
5244     if (extract32(insn, 29, 1)) {
5245         unallocated_encoding(s);
5246         return;
5247     }
5248 
5249     sf = extract32(insn, 31, 1);
5250     opcode = extract32(insn, 10, 6);
5251     opcode2 = extract32(insn, 16, 5);
5252     rn = extract32(insn, 5, 5);
5253     rd = extract32(insn, 0, 5);
5254 
5255 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5256 
5257     switch (MAP(sf, opcode2, opcode)) {
5258     case MAP(0, 0x00, 0x00): /* RBIT */
5259     case MAP(1, 0x00, 0x00):
5260         handle_rbit(s, sf, rn, rd);
5261         break;
5262     case MAP(0, 0x00, 0x01): /* REV16 */
5263     case MAP(1, 0x00, 0x01):
5264         handle_rev16(s, sf, rn, rd);
5265         break;
5266     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5267     case MAP(1, 0x00, 0x02):
5268         handle_rev32(s, sf, rn, rd);
5269         break;
5270     case MAP(1, 0x00, 0x03): /* REV64 */
5271         handle_rev64(s, sf, rn, rd);
5272         break;
5273     case MAP(0, 0x00, 0x04): /* CLZ */
5274     case MAP(1, 0x00, 0x04):
5275         handle_clz(s, sf, rn, rd);
5276         break;
5277     case MAP(0, 0x00, 0x05): /* CLS */
5278     case MAP(1, 0x00, 0x05):
5279         handle_cls(s, sf, rn, rd);
5280         break;
5281     case MAP(1, 0x01, 0x00): /* PACIA */
5282         if (s->pauth_active) {
5283             tcg_rd = cpu_reg(s, rd);
5284             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5285         } else if (!dc_isar_feature(aa64_pauth, s)) {
5286             goto do_unallocated;
5287         }
5288         break;
5289     case MAP(1, 0x01, 0x01): /* PACIB */
5290         if (s->pauth_active) {
5291             tcg_rd = cpu_reg(s, rd);
5292             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5293         } else if (!dc_isar_feature(aa64_pauth, s)) {
5294             goto do_unallocated;
5295         }
5296         break;
5297     case MAP(1, 0x01, 0x02): /* PACDA */
5298         if (s->pauth_active) {
5299             tcg_rd = cpu_reg(s, rd);
5300             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5301         } else if (!dc_isar_feature(aa64_pauth, s)) {
5302             goto do_unallocated;
5303         }
5304         break;
5305     case MAP(1, 0x01, 0x03): /* PACDB */
5306         if (s->pauth_active) {
5307             tcg_rd = cpu_reg(s, rd);
5308             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5309         } else if (!dc_isar_feature(aa64_pauth, s)) {
5310             goto do_unallocated;
5311         }
5312         break;
5313     case MAP(1, 0x01, 0x04): /* AUTIA */
5314         if (s->pauth_active) {
5315             tcg_rd = cpu_reg(s, rd);
5316             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5317         } else if (!dc_isar_feature(aa64_pauth, s)) {
5318             goto do_unallocated;
5319         }
5320         break;
5321     case MAP(1, 0x01, 0x05): /* AUTIB */
5322         if (s->pauth_active) {
5323             tcg_rd = cpu_reg(s, rd);
5324             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5325         } else if (!dc_isar_feature(aa64_pauth, s)) {
5326             goto do_unallocated;
5327         }
5328         break;
5329     case MAP(1, 0x01, 0x06): /* AUTDA */
5330         if (s->pauth_active) {
5331             tcg_rd = cpu_reg(s, rd);
5332             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5333         } else if (!dc_isar_feature(aa64_pauth, s)) {
5334             goto do_unallocated;
5335         }
5336         break;
5337     case MAP(1, 0x01, 0x07): /* AUTDB */
5338         if (s->pauth_active) {
5339             tcg_rd = cpu_reg(s, rd);
5340             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5341         } else if (!dc_isar_feature(aa64_pauth, s)) {
5342             goto do_unallocated;
5343         }
5344         break;
5345     case MAP(1, 0x01, 0x08): /* PACIZA */
5346         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5347             goto do_unallocated;
5348         } else if (s->pauth_active) {
5349             tcg_rd = cpu_reg(s, rd);
5350             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5351         }
5352         break;
5353     case MAP(1, 0x01, 0x09): /* PACIZB */
5354         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5355             goto do_unallocated;
5356         } else if (s->pauth_active) {
5357             tcg_rd = cpu_reg(s, rd);
5358             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5359         }
5360         break;
5361     case MAP(1, 0x01, 0x0a): /* PACDZA */
5362         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5363             goto do_unallocated;
5364         } else if (s->pauth_active) {
5365             tcg_rd = cpu_reg(s, rd);
5366             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5367         }
5368         break;
5369     case MAP(1, 0x01, 0x0b): /* PACDZB */
5370         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5371             goto do_unallocated;
5372         } else if (s->pauth_active) {
5373             tcg_rd = cpu_reg(s, rd);
5374             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5375         }
5376         break;
5377     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5378         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5379             goto do_unallocated;
5380         } else if (s->pauth_active) {
5381             tcg_rd = cpu_reg(s, rd);
5382             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5383         }
5384         break;
5385     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5386         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5387             goto do_unallocated;
5388         } else if (s->pauth_active) {
5389             tcg_rd = cpu_reg(s, rd);
5390             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5391         }
5392         break;
5393     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5394         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5395             goto do_unallocated;
5396         } else if (s->pauth_active) {
5397             tcg_rd = cpu_reg(s, rd);
5398             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5399         }
5400         break;
5401     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5402         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5403             goto do_unallocated;
5404         } else if (s->pauth_active) {
5405             tcg_rd = cpu_reg(s, rd);
5406             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5407         }
5408         break;
5409     case MAP(1, 0x01, 0x10): /* XPACI */
5410         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5411             goto do_unallocated;
5412         } else if (s->pauth_active) {
5413             tcg_rd = cpu_reg(s, rd);
5414             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5415         }
5416         break;
5417     case MAP(1, 0x01, 0x11): /* XPACD */
5418         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5419             goto do_unallocated;
5420         } else if (s->pauth_active) {
5421             tcg_rd = cpu_reg(s, rd);
5422             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5423         }
5424         break;
5425     default:
5426     do_unallocated:
5427         unallocated_encoding(s);
5428         break;
5429     }
5430 
5431 #undef MAP
5432 }
5433 
5434 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5435                        unsigned int rm, unsigned int rn, unsigned int rd)
5436 {
5437     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5438     tcg_rd = cpu_reg(s, rd);
5439 
5440     if (!sf && is_signed) {
5441         tcg_n = tcg_temp_new_i64();
5442         tcg_m = tcg_temp_new_i64();
5443         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5444         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5445     } else {
5446         tcg_n = read_cpu_reg(s, rn, sf);
5447         tcg_m = read_cpu_reg(s, rm, sf);
5448     }
5449 
5450     if (is_signed) {
5451         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5452     } else {
5453         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5454     }
5455 
5456     if (!sf) { /* zero extend final result */
5457         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5458     }
5459 }
5460 
5461 /* LSLV, LSRV, ASRV, RORV */
5462 static void handle_shift_reg(DisasContext *s,
5463                              enum a64_shift_type shift_type, unsigned int sf,
5464                              unsigned int rm, unsigned int rn, unsigned int rd)
5465 {
5466     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5467     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5468     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5469 
5470     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5471     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5472 }
5473 
5474 /* CRC32[BHWX], CRC32C[BHWX] */
5475 static void handle_crc32(DisasContext *s,
5476                          unsigned int sf, unsigned int sz, bool crc32c,
5477                          unsigned int rm, unsigned int rn, unsigned int rd)
5478 {
5479     TCGv_i64 tcg_acc, tcg_val;
5480     TCGv_i32 tcg_bytes;
5481 
5482     if (!dc_isar_feature(aa64_crc32, s)
5483         || (sf == 1 && sz != 3)
5484         || (sf == 0 && sz == 3)) {
5485         unallocated_encoding(s);
5486         return;
5487     }
5488 
5489     if (sz == 3) {
5490         tcg_val = cpu_reg(s, rm);
5491     } else {
5492         uint64_t mask;
5493         switch (sz) {
5494         case 0:
5495             mask = 0xFF;
5496             break;
5497         case 1:
5498             mask = 0xFFFF;
5499             break;
5500         case 2:
5501             mask = 0xFFFFFFFF;
5502             break;
5503         default:
5504             g_assert_not_reached();
5505         }
5506         tcg_val = tcg_temp_new_i64();
5507         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5508     }
5509 
5510     tcg_acc = cpu_reg(s, rn);
5511     tcg_bytes = tcg_constant_i32(1 << sz);
5512 
5513     if (crc32c) {
5514         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5515     } else {
5516         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5517     }
5518 }
5519 
5520 /* Data-processing (2 source)
5521  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5522  * +----+---+---+-----------------+------+--------+------+------+
5523  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5524  * +----+---+---+-----------------+------+--------+------+------+
5525  */
5526 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5527 {
5528     unsigned int sf, rm, opcode, rn, rd, setflag;
5529     sf = extract32(insn, 31, 1);
5530     setflag = extract32(insn, 29, 1);
5531     rm = extract32(insn, 16, 5);
5532     opcode = extract32(insn, 10, 6);
5533     rn = extract32(insn, 5, 5);
5534     rd = extract32(insn, 0, 5);
5535 
5536     if (setflag && opcode != 0) {
5537         unallocated_encoding(s);
5538         return;
5539     }
5540 
5541     switch (opcode) {
5542     case 0: /* SUBP(S) */
5543         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5544             goto do_unallocated;
5545         } else {
5546             TCGv_i64 tcg_n, tcg_m, tcg_d;
5547 
5548             tcg_n = read_cpu_reg_sp(s, rn, true);
5549             tcg_m = read_cpu_reg_sp(s, rm, true);
5550             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5551             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5552             tcg_d = cpu_reg(s, rd);
5553 
5554             if (setflag) {
5555                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5556             } else {
5557                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5558             }
5559         }
5560         break;
5561     case 2: /* UDIV */
5562         handle_div(s, false, sf, rm, rn, rd);
5563         break;
5564     case 3: /* SDIV */
5565         handle_div(s, true, sf, rm, rn, rd);
5566         break;
5567     case 4: /* IRG */
5568         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5569             goto do_unallocated;
5570         }
5571         if (s->ata) {
5572             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5573                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5574         } else {
5575             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5576                                              cpu_reg_sp(s, rn));
5577         }
5578         break;
5579     case 5: /* GMI */
5580         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5581             goto do_unallocated;
5582         } else {
5583             TCGv_i64 t = tcg_temp_new_i64();
5584 
5585             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5586             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5587             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5588         }
5589         break;
5590     case 8: /* LSLV */
5591         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5592         break;
5593     case 9: /* LSRV */
5594         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5595         break;
5596     case 10: /* ASRV */
5597         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5598         break;
5599     case 11: /* RORV */
5600         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5601         break;
5602     case 12: /* PACGA */
5603         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5604             goto do_unallocated;
5605         }
5606         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5607                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5608         break;
5609     case 16:
5610     case 17:
5611     case 18:
5612     case 19:
5613     case 20:
5614     case 21:
5615     case 22:
5616     case 23: /* CRC32 */
5617     {
5618         int sz = extract32(opcode, 0, 2);
5619         bool crc32c = extract32(opcode, 2, 1);
5620         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5621         break;
5622     }
5623     default:
5624     do_unallocated:
5625         unallocated_encoding(s);
5626         break;
5627     }
5628 }
5629 
5630 /*
5631  * Data processing - register
5632  *  31  30 29  28      25    21  20  16      10         0
5633  * +--+---+--+---+-------+-----+-------+-------+---------+
5634  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5635  * +--+---+--+---+-------+-----+-------+-------+---------+
5636  */
5637 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5638 {
5639     int op0 = extract32(insn, 30, 1);
5640     int op1 = extract32(insn, 28, 1);
5641     int op2 = extract32(insn, 21, 4);
5642     int op3 = extract32(insn, 10, 6);
5643 
5644     if (!op1) {
5645         if (op2 & 8) {
5646             if (op2 & 1) {
5647                 /* Add/sub (extended register) */
5648                 disas_add_sub_ext_reg(s, insn);
5649             } else {
5650                 /* Add/sub (shifted register) */
5651                 disas_add_sub_reg(s, insn);
5652             }
5653         } else {
5654             /* Logical (shifted register) */
5655             disas_logic_reg(s, insn);
5656         }
5657         return;
5658     }
5659 
5660     switch (op2) {
5661     case 0x0:
5662         switch (op3) {
5663         case 0x00: /* Add/subtract (with carry) */
5664             disas_adc_sbc(s, insn);
5665             break;
5666 
5667         case 0x01: /* Rotate right into flags */
5668         case 0x21:
5669             disas_rotate_right_into_flags(s, insn);
5670             break;
5671 
5672         case 0x02: /* Evaluate into flags */
5673         case 0x12:
5674         case 0x22:
5675         case 0x32:
5676             disas_evaluate_into_flags(s, insn);
5677             break;
5678 
5679         default:
5680             goto do_unallocated;
5681         }
5682         break;
5683 
5684     case 0x2: /* Conditional compare */
5685         disas_cc(s, insn); /* both imm and reg forms */
5686         break;
5687 
5688     case 0x4: /* Conditional select */
5689         disas_cond_select(s, insn);
5690         break;
5691 
5692     case 0x6: /* Data-processing */
5693         if (op0) {    /* (1 source) */
5694             disas_data_proc_1src(s, insn);
5695         } else {      /* (2 source) */
5696             disas_data_proc_2src(s, insn);
5697         }
5698         break;
5699     case 0x8 ... 0xf: /* (3 source) */
5700         disas_data_proc_3src(s, insn);
5701         break;
5702 
5703     default:
5704     do_unallocated:
5705         unallocated_encoding(s);
5706         break;
5707     }
5708 }
5709 
5710 static void handle_fp_compare(DisasContext *s, int size,
5711                               unsigned int rn, unsigned int rm,
5712                               bool cmp_with_zero, bool signal_all_nans)
5713 {
5714     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5715     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5716 
5717     if (size == MO_64) {
5718         TCGv_i64 tcg_vn, tcg_vm;
5719 
5720         tcg_vn = read_fp_dreg(s, rn);
5721         if (cmp_with_zero) {
5722             tcg_vm = tcg_constant_i64(0);
5723         } else {
5724             tcg_vm = read_fp_dreg(s, rm);
5725         }
5726         if (signal_all_nans) {
5727             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5728         } else {
5729             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5730         }
5731     } else {
5732         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5733         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5734 
5735         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5736         if (cmp_with_zero) {
5737             tcg_gen_movi_i32(tcg_vm, 0);
5738         } else {
5739             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5740         }
5741 
5742         switch (size) {
5743         case MO_32:
5744             if (signal_all_nans) {
5745                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5746             } else {
5747                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5748             }
5749             break;
5750         case MO_16:
5751             if (signal_all_nans) {
5752                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5753             } else {
5754                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5755             }
5756             break;
5757         default:
5758             g_assert_not_reached();
5759         }
5760     }
5761 
5762     gen_set_nzcv(tcg_flags);
5763 }
5764 
5765 /* Floating point compare
5766  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5767  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5768  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5769  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5770  */
5771 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5772 {
5773     unsigned int mos, type, rm, op, rn, opc, op2r;
5774     int size;
5775 
5776     mos = extract32(insn, 29, 3);
5777     type = extract32(insn, 22, 2);
5778     rm = extract32(insn, 16, 5);
5779     op = extract32(insn, 14, 2);
5780     rn = extract32(insn, 5, 5);
5781     opc = extract32(insn, 3, 2);
5782     op2r = extract32(insn, 0, 3);
5783 
5784     if (mos || op || op2r) {
5785         unallocated_encoding(s);
5786         return;
5787     }
5788 
5789     switch (type) {
5790     case 0:
5791         size = MO_32;
5792         break;
5793     case 1:
5794         size = MO_64;
5795         break;
5796     case 3:
5797         size = MO_16;
5798         if (dc_isar_feature(aa64_fp16, s)) {
5799             break;
5800         }
5801         /* fallthru */
5802     default:
5803         unallocated_encoding(s);
5804         return;
5805     }
5806 
5807     if (!fp_access_check(s)) {
5808         return;
5809     }
5810 
5811     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5812 }
5813 
5814 /* Floating point conditional compare
5815  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5816  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5817  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5818  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5819  */
5820 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5821 {
5822     unsigned int mos, type, rm, cond, rn, op, nzcv;
5823     TCGLabel *label_continue = NULL;
5824     int size;
5825 
5826     mos = extract32(insn, 29, 3);
5827     type = extract32(insn, 22, 2);
5828     rm = extract32(insn, 16, 5);
5829     cond = extract32(insn, 12, 4);
5830     rn = extract32(insn, 5, 5);
5831     op = extract32(insn, 4, 1);
5832     nzcv = extract32(insn, 0, 4);
5833 
5834     if (mos) {
5835         unallocated_encoding(s);
5836         return;
5837     }
5838 
5839     switch (type) {
5840     case 0:
5841         size = MO_32;
5842         break;
5843     case 1:
5844         size = MO_64;
5845         break;
5846     case 3:
5847         size = MO_16;
5848         if (dc_isar_feature(aa64_fp16, s)) {
5849             break;
5850         }
5851         /* fallthru */
5852     default:
5853         unallocated_encoding(s);
5854         return;
5855     }
5856 
5857     if (!fp_access_check(s)) {
5858         return;
5859     }
5860 
5861     if (cond < 0x0e) { /* not always */
5862         TCGLabel *label_match = gen_new_label();
5863         label_continue = gen_new_label();
5864         arm_gen_test_cc(cond, label_match);
5865         /* nomatch: */
5866         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5867         tcg_gen_br(label_continue);
5868         gen_set_label(label_match);
5869     }
5870 
5871     handle_fp_compare(s, size, rn, rm, false, op);
5872 
5873     if (cond < 0x0e) {
5874         gen_set_label(label_continue);
5875     }
5876 }
5877 
5878 /* Floating point conditional select
5879  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5880  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5881  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5882  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5883  */
5884 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5885 {
5886     unsigned int mos, type, rm, cond, rn, rd;
5887     TCGv_i64 t_true, t_false;
5888     DisasCompare64 c;
5889     MemOp sz;
5890 
5891     mos = extract32(insn, 29, 3);
5892     type = extract32(insn, 22, 2);
5893     rm = extract32(insn, 16, 5);
5894     cond = extract32(insn, 12, 4);
5895     rn = extract32(insn, 5, 5);
5896     rd = extract32(insn, 0, 5);
5897 
5898     if (mos) {
5899         unallocated_encoding(s);
5900         return;
5901     }
5902 
5903     switch (type) {
5904     case 0:
5905         sz = MO_32;
5906         break;
5907     case 1:
5908         sz = MO_64;
5909         break;
5910     case 3:
5911         sz = MO_16;
5912         if (dc_isar_feature(aa64_fp16, s)) {
5913             break;
5914         }
5915         /* fallthru */
5916     default:
5917         unallocated_encoding(s);
5918         return;
5919     }
5920 
5921     if (!fp_access_check(s)) {
5922         return;
5923     }
5924 
5925     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5926     t_true = tcg_temp_new_i64();
5927     t_false = tcg_temp_new_i64();
5928     read_vec_element(s, t_true, rn, 0, sz);
5929     read_vec_element(s, t_false, rm, 0, sz);
5930 
5931     a64_test_cc(&c, cond);
5932     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5933                         t_true, t_false);
5934 
5935     /* Note that sregs & hregs write back zeros to the high bits,
5936        and we've already done the zero-extension.  */
5937     write_fp_dreg(s, rd, t_true);
5938 }
5939 
5940 /* Floating-point data-processing (1 source) - half precision */
5941 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5942 {
5943     TCGv_ptr fpst = NULL;
5944     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5945     TCGv_i32 tcg_res = tcg_temp_new_i32();
5946 
5947     switch (opcode) {
5948     case 0x0: /* FMOV */
5949         tcg_gen_mov_i32(tcg_res, tcg_op);
5950         break;
5951     case 0x1: /* FABS */
5952         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5953         break;
5954     case 0x2: /* FNEG */
5955         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5956         break;
5957     case 0x3: /* FSQRT */
5958         fpst = fpstatus_ptr(FPST_FPCR_F16);
5959         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5960         break;
5961     case 0x8: /* FRINTN */
5962     case 0x9: /* FRINTP */
5963     case 0xa: /* FRINTM */
5964     case 0xb: /* FRINTZ */
5965     case 0xc: /* FRINTA */
5966     {
5967         TCGv_i32 tcg_rmode;
5968 
5969         fpst = fpstatus_ptr(FPST_FPCR_F16);
5970         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
5971         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5972         gen_restore_rmode(tcg_rmode, fpst);
5973         break;
5974     }
5975     case 0xe: /* FRINTX */
5976         fpst = fpstatus_ptr(FPST_FPCR_F16);
5977         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5978         break;
5979     case 0xf: /* FRINTI */
5980         fpst = fpstatus_ptr(FPST_FPCR_F16);
5981         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5982         break;
5983     default:
5984         g_assert_not_reached();
5985     }
5986 
5987     write_fp_sreg(s, rd, tcg_res);
5988 }
5989 
5990 /* Floating-point data-processing (1 source) - single precision */
5991 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5992 {
5993     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5994     TCGv_i32 tcg_op, tcg_res;
5995     TCGv_ptr fpst;
5996     int rmode = -1;
5997 
5998     tcg_op = read_fp_sreg(s, rn);
5999     tcg_res = tcg_temp_new_i32();
6000 
6001     switch (opcode) {
6002     case 0x0: /* FMOV */
6003         tcg_gen_mov_i32(tcg_res, tcg_op);
6004         goto done;
6005     case 0x1: /* FABS */
6006         gen_helper_vfp_abss(tcg_res, tcg_op);
6007         goto done;
6008     case 0x2: /* FNEG */
6009         gen_helper_vfp_negs(tcg_res, tcg_op);
6010         goto done;
6011     case 0x3: /* FSQRT */
6012         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6013         goto done;
6014     case 0x6: /* BFCVT */
6015         gen_fpst = gen_helper_bfcvt;
6016         break;
6017     case 0x8: /* FRINTN */
6018     case 0x9: /* FRINTP */
6019     case 0xa: /* FRINTM */
6020     case 0xb: /* FRINTZ */
6021     case 0xc: /* FRINTA */
6022         rmode = opcode & 7;
6023         gen_fpst = gen_helper_rints;
6024         break;
6025     case 0xe: /* FRINTX */
6026         gen_fpst = gen_helper_rints_exact;
6027         break;
6028     case 0xf: /* FRINTI */
6029         gen_fpst = gen_helper_rints;
6030         break;
6031     case 0x10: /* FRINT32Z */
6032         rmode = FPROUNDING_ZERO;
6033         gen_fpst = gen_helper_frint32_s;
6034         break;
6035     case 0x11: /* FRINT32X */
6036         gen_fpst = gen_helper_frint32_s;
6037         break;
6038     case 0x12: /* FRINT64Z */
6039         rmode = FPROUNDING_ZERO;
6040         gen_fpst = gen_helper_frint64_s;
6041         break;
6042     case 0x13: /* FRINT64X */
6043         gen_fpst = gen_helper_frint64_s;
6044         break;
6045     default:
6046         g_assert_not_reached();
6047     }
6048 
6049     fpst = fpstatus_ptr(FPST_FPCR);
6050     if (rmode >= 0) {
6051         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6052         gen_fpst(tcg_res, tcg_op, fpst);
6053         gen_restore_rmode(tcg_rmode, fpst);
6054     } else {
6055         gen_fpst(tcg_res, tcg_op, fpst);
6056     }
6057 
6058  done:
6059     write_fp_sreg(s, rd, tcg_res);
6060 }
6061 
6062 /* Floating-point data-processing (1 source) - double precision */
6063 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6064 {
6065     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6066     TCGv_i64 tcg_op, tcg_res;
6067     TCGv_ptr fpst;
6068     int rmode = -1;
6069 
6070     switch (opcode) {
6071     case 0x0: /* FMOV */
6072         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6073         return;
6074     }
6075 
6076     tcg_op = read_fp_dreg(s, rn);
6077     tcg_res = tcg_temp_new_i64();
6078 
6079     switch (opcode) {
6080     case 0x1: /* FABS */
6081         gen_helper_vfp_absd(tcg_res, tcg_op);
6082         goto done;
6083     case 0x2: /* FNEG */
6084         gen_helper_vfp_negd(tcg_res, tcg_op);
6085         goto done;
6086     case 0x3: /* FSQRT */
6087         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6088         goto done;
6089     case 0x8: /* FRINTN */
6090     case 0x9: /* FRINTP */
6091     case 0xa: /* FRINTM */
6092     case 0xb: /* FRINTZ */
6093     case 0xc: /* FRINTA */
6094         rmode = opcode & 7;
6095         gen_fpst = gen_helper_rintd;
6096         break;
6097     case 0xe: /* FRINTX */
6098         gen_fpst = gen_helper_rintd_exact;
6099         break;
6100     case 0xf: /* FRINTI */
6101         gen_fpst = gen_helper_rintd;
6102         break;
6103     case 0x10: /* FRINT32Z */
6104         rmode = FPROUNDING_ZERO;
6105         gen_fpst = gen_helper_frint32_d;
6106         break;
6107     case 0x11: /* FRINT32X */
6108         gen_fpst = gen_helper_frint32_d;
6109         break;
6110     case 0x12: /* FRINT64Z */
6111         rmode = FPROUNDING_ZERO;
6112         gen_fpst = gen_helper_frint64_d;
6113         break;
6114     case 0x13: /* FRINT64X */
6115         gen_fpst = gen_helper_frint64_d;
6116         break;
6117     default:
6118         g_assert_not_reached();
6119     }
6120 
6121     fpst = fpstatus_ptr(FPST_FPCR);
6122     if (rmode >= 0) {
6123         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6124         gen_fpst(tcg_res, tcg_op, fpst);
6125         gen_restore_rmode(tcg_rmode, fpst);
6126     } else {
6127         gen_fpst(tcg_res, tcg_op, fpst);
6128     }
6129 
6130  done:
6131     write_fp_dreg(s, rd, tcg_res);
6132 }
6133 
6134 static void handle_fp_fcvt(DisasContext *s, int opcode,
6135                            int rd, int rn, int dtype, int ntype)
6136 {
6137     switch (ntype) {
6138     case 0x0:
6139     {
6140         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6141         if (dtype == 1) {
6142             /* Single to double */
6143             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6144             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6145             write_fp_dreg(s, rd, tcg_rd);
6146         } else {
6147             /* Single to half */
6148             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6149             TCGv_i32 ahp = get_ahp_flag();
6150             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6151 
6152             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6153             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6154             write_fp_sreg(s, rd, tcg_rd);
6155         }
6156         break;
6157     }
6158     case 0x1:
6159     {
6160         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6161         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6162         if (dtype == 0) {
6163             /* Double to single */
6164             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6165         } else {
6166             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6167             TCGv_i32 ahp = get_ahp_flag();
6168             /* Double to half */
6169             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6170             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6171         }
6172         write_fp_sreg(s, rd, tcg_rd);
6173         break;
6174     }
6175     case 0x3:
6176     {
6177         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6178         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6179         TCGv_i32 tcg_ahp = get_ahp_flag();
6180         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6181         if (dtype == 0) {
6182             /* Half to single */
6183             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6184             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6185             write_fp_sreg(s, rd, tcg_rd);
6186         } else {
6187             /* Half to double */
6188             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6189             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6190             write_fp_dreg(s, rd, tcg_rd);
6191         }
6192         break;
6193     }
6194     default:
6195         g_assert_not_reached();
6196     }
6197 }
6198 
6199 /* Floating point data-processing (1 source)
6200  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6201  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6202  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6203  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6204  */
6205 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6206 {
6207     int mos = extract32(insn, 29, 3);
6208     int type = extract32(insn, 22, 2);
6209     int opcode = extract32(insn, 15, 6);
6210     int rn = extract32(insn, 5, 5);
6211     int rd = extract32(insn, 0, 5);
6212 
6213     if (mos) {
6214         goto do_unallocated;
6215     }
6216 
6217     switch (opcode) {
6218     case 0x4: case 0x5: case 0x7:
6219     {
6220         /* FCVT between half, single and double precision */
6221         int dtype = extract32(opcode, 0, 2);
6222         if (type == 2 || dtype == type) {
6223             goto do_unallocated;
6224         }
6225         if (!fp_access_check(s)) {
6226             return;
6227         }
6228 
6229         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6230         break;
6231     }
6232 
6233     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6234         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6235             goto do_unallocated;
6236         }
6237         /* fall through */
6238     case 0x0 ... 0x3:
6239     case 0x8 ... 0xc:
6240     case 0xe ... 0xf:
6241         /* 32-to-32 and 64-to-64 ops */
6242         switch (type) {
6243         case 0:
6244             if (!fp_access_check(s)) {
6245                 return;
6246             }
6247             handle_fp_1src_single(s, opcode, rd, rn);
6248             break;
6249         case 1:
6250             if (!fp_access_check(s)) {
6251                 return;
6252             }
6253             handle_fp_1src_double(s, opcode, rd, rn);
6254             break;
6255         case 3:
6256             if (!dc_isar_feature(aa64_fp16, s)) {
6257                 goto do_unallocated;
6258             }
6259 
6260             if (!fp_access_check(s)) {
6261                 return;
6262             }
6263             handle_fp_1src_half(s, opcode, rd, rn);
6264             break;
6265         default:
6266             goto do_unallocated;
6267         }
6268         break;
6269 
6270     case 0x6:
6271         switch (type) {
6272         case 1: /* BFCVT */
6273             if (!dc_isar_feature(aa64_bf16, s)) {
6274                 goto do_unallocated;
6275             }
6276             if (!fp_access_check(s)) {
6277                 return;
6278             }
6279             handle_fp_1src_single(s, opcode, rd, rn);
6280             break;
6281         default:
6282             goto do_unallocated;
6283         }
6284         break;
6285 
6286     default:
6287     do_unallocated:
6288         unallocated_encoding(s);
6289         break;
6290     }
6291 }
6292 
6293 /* Floating-point data-processing (2 source) - single precision */
6294 static void handle_fp_2src_single(DisasContext *s, int opcode,
6295                                   int rd, int rn, int rm)
6296 {
6297     TCGv_i32 tcg_op1;
6298     TCGv_i32 tcg_op2;
6299     TCGv_i32 tcg_res;
6300     TCGv_ptr fpst;
6301 
6302     tcg_res = tcg_temp_new_i32();
6303     fpst = fpstatus_ptr(FPST_FPCR);
6304     tcg_op1 = read_fp_sreg(s, rn);
6305     tcg_op2 = read_fp_sreg(s, rm);
6306 
6307     switch (opcode) {
6308     case 0x0: /* FMUL */
6309         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6310         break;
6311     case 0x1: /* FDIV */
6312         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6313         break;
6314     case 0x2: /* FADD */
6315         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6316         break;
6317     case 0x3: /* FSUB */
6318         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6319         break;
6320     case 0x4: /* FMAX */
6321         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6322         break;
6323     case 0x5: /* FMIN */
6324         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6325         break;
6326     case 0x6: /* FMAXNM */
6327         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6328         break;
6329     case 0x7: /* FMINNM */
6330         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6331         break;
6332     case 0x8: /* FNMUL */
6333         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6334         gen_helper_vfp_negs(tcg_res, tcg_res);
6335         break;
6336     }
6337 
6338     write_fp_sreg(s, rd, tcg_res);
6339 }
6340 
6341 /* Floating-point data-processing (2 source) - double precision */
6342 static void handle_fp_2src_double(DisasContext *s, int opcode,
6343                                   int rd, int rn, int rm)
6344 {
6345     TCGv_i64 tcg_op1;
6346     TCGv_i64 tcg_op2;
6347     TCGv_i64 tcg_res;
6348     TCGv_ptr fpst;
6349 
6350     tcg_res = tcg_temp_new_i64();
6351     fpst = fpstatus_ptr(FPST_FPCR);
6352     tcg_op1 = read_fp_dreg(s, rn);
6353     tcg_op2 = read_fp_dreg(s, rm);
6354 
6355     switch (opcode) {
6356     case 0x0: /* FMUL */
6357         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6358         break;
6359     case 0x1: /* FDIV */
6360         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6361         break;
6362     case 0x2: /* FADD */
6363         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6364         break;
6365     case 0x3: /* FSUB */
6366         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6367         break;
6368     case 0x4: /* FMAX */
6369         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6370         break;
6371     case 0x5: /* FMIN */
6372         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6373         break;
6374     case 0x6: /* FMAXNM */
6375         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6376         break;
6377     case 0x7: /* FMINNM */
6378         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6379         break;
6380     case 0x8: /* FNMUL */
6381         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6382         gen_helper_vfp_negd(tcg_res, tcg_res);
6383         break;
6384     }
6385 
6386     write_fp_dreg(s, rd, tcg_res);
6387 }
6388 
6389 /* Floating-point data-processing (2 source) - half precision */
6390 static void handle_fp_2src_half(DisasContext *s, int opcode,
6391                                 int rd, int rn, int rm)
6392 {
6393     TCGv_i32 tcg_op1;
6394     TCGv_i32 tcg_op2;
6395     TCGv_i32 tcg_res;
6396     TCGv_ptr fpst;
6397 
6398     tcg_res = tcg_temp_new_i32();
6399     fpst = fpstatus_ptr(FPST_FPCR_F16);
6400     tcg_op1 = read_fp_hreg(s, rn);
6401     tcg_op2 = read_fp_hreg(s, rm);
6402 
6403     switch (opcode) {
6404     case 0x0: /* FMUL */
6405         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6406         break;
6407     case 0x1: /* FDIV */
6408         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6409         break;
6410     case 0x2: /* FADD */
6411         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6412         break;
6413     case 0x3: /* FSUB */
6414         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6415         break;
6416     case 0x4: /* FMAX */
6417         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6418         break;
6419     case 0x5: /* FMIN */
6420         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6421         break;
6422     case 0x6: /* FMAXNM */
6423         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6424         break;
6425     case 0x7: /* FMINNM */
6426         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6427         break;
6428     case 0x8: /* FNMUL */
6429         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6430         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6431         break;
6432     default:
6433         g_assert_not_reached();
6434     }
6435 
6436     write_fp_sreg(s, rd, tcg_res);
6437 }
6438 
6439 /* Floating point data-processing (2 source)
6440  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6441  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6442  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6443  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6444  */
6445 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6446 {
6447     int mos = extract32(insn, 29, 3);
6448     int type = extract32(insn, 22, 2);
6449     int rd = extract32(insn, 0, 5);
6450     int rn = extract32(insn, 5, 5);
6451     int rm = extract32(insn, 16, 5);
6452     int opcode = extract32(insn, 12, 4);
6453 
6454     if (opcode > 8 || mos) {
6455         unallocated_encoding(s);
6456         return;
6457     }
6458 
6459     switch (type) {
6460     case 0:
6461         if (!fp_access_check(s)) {
6462             return;
6463         }
6464         handle_fp_2src_single(s, opcode, rd, rn, rm);
6465         break;
6466     case 1:
6467         if (!fp_access_check(s)) {
6468             return;
6469         }
6470         handle_fp_2src_double(s, opcode, rd, rn, rm);
6471         break;
6472     case 3:
6473         if (!dc_isar_feature(aa64_fp16, s)) {
6474             unallocated_encoding(s);
6475             return;
6476         }
6477         if (!fp_access_check(s)) {
6478             return;
6479         }
6480         handle_fp_2src_half(s, opcode, rd, rn, rm);
6481         break;
6482     default:
6483         unallocated_encoding(s);
6484     }
6485 }
6486 
6487 /* Floating-point data-processing (3 source) - single precision */
6488 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6489                                   int rd, int rn, int rm, int ra)
6490 {
6491     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6492     TCGv_i32 tcg_res = tcg_temp_new_i32();
6493     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6494 
6495     tcg_op1 = read_fp_sreg(s, rn);
6496     tcg_op2 = read_fp_sreg(s, rm);
6497     tcg_op3 = read_fp_sreg(s, ra);
6498 
6499     /* These are fused multiply-add, and must be done as one
6500      * floating point operation with no rounding between the
6501      * multiplication and addition steps.
6502      * NB that doing the negations here as separate steps is
6503      * correct : an input NaN should come out with its sign bit
6504      * flipped if it is a negated-input.
6505      */
6506     if (o1 == true) {
6507         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6508     }
6509 
6510     if (o0 != o1) {
6511         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6512     }
6513 
6514     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6515 
6516     write_fp_sreg(s, rd, tcg_res);
6517 }
6518 
6519 /* Floating-point data-processing (3 source) - double precision */
6520 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6521                                   int rd, int rn, int rm, int ra)
6522 {
6523     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6524     TCGv_i64 tcg_res = tcg_temp_new_i64();
6525     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6526 
6527     tcg_op1 = read_fp_dreg(s, rn);
6528     tcg_op2 = read_fp_dreg(s, rm);
6529     tcg_op3 = read_fp_dreg(s, ra);
6530 
6531     /* These are fused multiply-add, and must be done as one
6532      * floating point operation with no rounding between the
6533      * multiplication and addition steps.
6534      * NB that doing the negations here as separate steps is
6535      * correct : an input NaN should come out with its sign bit
6536      * flipped if it is a negated-input.
6537      */
6538     if (o1 == true) {
6539         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6540     }
6541 
6542     if (o0 != o1) {
6543         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6544     }
6545 
6546     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6547 
6548     write_fp_dreg(s, rd, tcg_res);
6549 }
6550 
6551 /* Floating-point data-processing (3 source) - half precision */
6552 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6553                                 int rd, int rn, int rm, int ra)
6554 {
6555     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6556     TCGv_i32 tcg_res = tcg_temp_new_i32();
6557     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6558 
6559     tcg_op1 = read_fp_hreg(s, rn);
6560     tcg_op2 = read_fp_hreg(s, rm);
6561     tcg_op3 = read_fp_hreg(s, ra);
6562 
6563     /* These are fused multiply-add, and must be done as one
6564      * floating point operation with no rounding between the
6565      * multiplication and addition steps.
6566      * NB that doing the negations here as separate steps is
6567      * correct : an input NaN should come out with its sign bit
6568      * flipped if it is a negated-input.
6569      */
6570     if (o1 == true) {
6571         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6572     }
6573 
6574     if (o0 != o1) {
6575         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6576     }
6577 
6578     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6579 
6580     write_fp_sreg(s, rd, tcg_res);
6581 }
6582 
6583 /* Floating point data-processing (3 source)
6584  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6585  * +---+---+---+-----------+------+----+------+----+------+------+------+
6586  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6587  * +---+---+---+-----------+------+----+------+----+------+------+------+
6588  */
6589 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6590 {
6591     int mos = extract32(insn, 29, 3);
6592     int type = extract32(insn, 22, 2);
6593     int rd = extract32(insn, 0, 5);
6594     int rn = extract32(insn, 5, 5);
6595     int ra = extract32(insn, 10, 5);
6596     int rm = extract32(insn, 16, 5);
6597     bool o0 = extract32(insn, 15, 1);
6598     bool o1 = extract32(insn, 21, 1);
6599 
6600     if (mos) {
6601         unallocated_encoding(s);
6602         return;
6603     }
6604 
6605     switch (type) {
6606     case 0:
6607         if (!fp_access_check(s)) {
6608             return;
6609         }
6610         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6611         break;
6612     case 1:
6613         if (!fp_access_check(s)) {
6614             return;
6615         }
6616         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6617         break;
6618     case 3:
6619         if (!dc_isar_feature(aa64_fp16, s)) {
6620             unallocated_encoding(s);
6621             return;
6622         }
6623         if (!fp_access_check(s)) {
6624             return;
6625         }
6626         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6627         break;
6628     default:
6629         unallocated_encoding(s);
6630     }
6631 }
6632 
6633 /* Floating point immediate
6634  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6635  * +---+---+---+-----------+------+---+------------+-------+------+------+
6636  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6637  * +---+---+---+-----------+------+---+------------+-------+------+------+
6638  */
6639 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6640 {
6641     int rd = extract32(insn, 0, 5);
6642     int imm5 = extract32(insn, 5, 5);
6643     int imm8 = extract32(insn, 13, 8);
6644     int type = extract32(insn, 22, 2);
6645     int mos = extract32(insn, 29, 3);
6646     uint64_t imm;
6647     MemOp sz;
6648 
6649     if (mos || imm5) {
6650         unallocated_encoding(s);
6651         return;
6652     }
6653 
6654     switch (type) {
6655     case 0:
6656         sz = MO_32;
6657         break;
6658     case 1:
6659         sz = MO_64;
6660         break;
6661     case 3:
6662         sz = MO_16;
6663         if (dc_isar_feature(aa64_fp16, s)) {
6664             break;
6665         }
6666         /* fallthru */
6667     default:
6668         unallocated_encoding(s);
6669         return;
6670     }
6671 
6672     if (!fp_access_check(s)) {
6673         return;
6674     }
6675 
6676     imm = vfp_expand_imm(sz, imm8);
6677     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6678 }
6679 
6680 /* Handle floating point <=> fixed point conversions. Note that we can
6681  * also deal with fp <=> integer conversions as a special case (scale == 64)
6682  * OPTME: consider handling that special case specially or at least skipping
6683  * the call to scalbn in the helpers for zero shifts.
6684  */
6685 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6686                            bool itof, int rmode, int scale, int sf, int type)
6687 {
6688     bool is_signed = !(opcode & 1);
6689     TCGv_ptr tcg_fpstatus;
6690     TCGv_i32 tcg_shift, tcg_single;
6691     TCGv_i64 tcg_double;
6692 
6693     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6694 
6695     tcg_shift = tcg_constant_i32(64 - scale);
6696 
6697     if (itof) {
6698         TCGv_i64 tcg_int = cpu_reg(s, rn);
6699         if (!sf) {
6700             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6701 
6702             if (is_signed) {
6703                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6704             } else {
6705                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6706             }
6707 
6708             tcg_int = tcg_extend;
6709         }
6710 
6711         switch (type) {
6712         case 1: /* float64 */
6713             tcg_double = tcg_temp_new_i64();
6714             if (is_signed) {
6715                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6716                                      tcg_shift, tcg_fpstatus);
6717             } else {
6718                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6719                                      tcg_shift, tcg_fpstatus);
6720             }
6721             write_fp_dreg(s, rd, tcg_double);
6722             break;
6723 
6724         case 0: /* float32 */
6725             tcg_single = tcg_temp_new_i32();
6726             if (is_signed) {
6727                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6728                                      tcg_shift, tcg_fpstatus);
6729             } else {
6730                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6731                                      tcg_shift, tcg_fpstatus);
6732             }
6733             write_fp_sreg(s, rd, tcg_single);
6734             break;
6735 
6736         case 3: /* float16 */
6737             tcg_single = tcg_temp_new_i32();
6738             if (is_signed) {
6739                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6740                                      tcg_shift, tcg_fpstatus);
6741             } else {
6742                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6743                                      tcg_shift, tcg_fpstatus);
6744             }
6745             write_fp_sreg(s, rd, tcg_single);
6746             break;
6747 
6748         default:
6749             g_assert_not_reached();
6750         }
6751     } else {
6752         TCGv_i64 tcg_int = cpu_reg(s, rd);
6753         TCGv_i32 tcg_rmode;
6754 
6755         if (extract32(opcode, 2, 1)) {
6756             /* There are too many rounding modes to all fit into rmode,
6757              * so FCVTA[US] is a special case.
6758              */
6759             rmode = FPROUNDING_TIEAWAY;
6760         }
6761 
6762         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6763 
6764         switch (type) {
6765         case 1: /* float64 */
6766             tcg_double = read_fp_dreg(s, rn);
6767             if (is_signed) {
6768                 if (!sf) {
6769                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6770                                          tcg_shift, tcg_fpstatus);
6771                 } else {
6772                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6773                                          tcg_shift, tcg_fpstatus);
6774                 }
6775             } else {
6776                 if (!sf) {
6777                     gen_helper_vfp_tould(tcg_int, tcg_double,
6778                                          tcg_shift, tcg_fpstatus);
6779                 } else {
6780                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6781                                          tcg_shift, tcg_fpstatus);
6782                 }
6783             }
6784             if (!sf) {
6785                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6786             }
6787             break;
6788 
6789         case 0: /* float32 */
6790             tcg_single = read_fp_sreg(s, rn);
6791             if (sf) {
6792                 if (is_signed) {
6793                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6794                                          tcg_shift, tcg_fpstatus);
6795                 } else {
6796                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6797                                          tcg_shift, tcg_fpstatus);
6798                 }
6799             } else {
6800                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6801                 if (is_signed) {
6802                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6803                                          tcg_shift, tcg_fpstatus);
6804                 } else {
6805                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6806                                          tcg_shift, tcg_fpstatus);
6807                 }
6808                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6809             }
6810             break;
6811 
6812         case 3: /* float16 */
6813             tcg_single = read_fp_sreg(s, rn);
6814             if (sf) {
6815                 if (is_signed) {
6816                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6817                                          tcg_shift, tcg_fpstatus);
6818                 } else {
6819                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6820                                          tcg_shift, tcg_fpstatus);
6821                 }
6822             } else {
6823                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6824                 if (is_signed) {
6825                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6826                                          tcg_shift, tcg_fpstatus);
6827                 } else {
6828                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6829                                          tcg_shift, tcg_fpstatus);
6830                 }
6831                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6832             }
6833             break;
6834 
6835         default:
6836             g_assert_not_reached();
6837         }
6838 
6839         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6840     }
6841 }
6842 
6843 /* Floating point <-> fixed point conversions
6844  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6845  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6846  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6847  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6848  */
6849 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6850 {
6851     int rd = extract32(insn, 0, 5);
6852     int rn = extract32(insn, 5, 5);
6853     int scale = extract32(insn, 10, 6);
6854     int opcode = extract32(insn, 16, 3);
6855     int rmode = extract32(insn, 19, 2);
6856     int type = extract32(insn, 22, 2);
6857     bool sbit = extract32(insn, 29, 1);
6858     bool sf = extract32(insn, 31, 1);
6859     bool itof;
6860 
6861     if (sbit || (!sf && scale < 32)) {
6862         unallocated_encoding(s);
6863         return;
6864     }
6865 
6866     switch (type) {
6867     case 0: /* float32 */
6868     case 1: /* float64 */
6869         break;
6870     case 3: /* float16 */
6871         if (dc_isar_feature(aa64_fp16, s)) {
6872             break;
6873         }
6874         /* fallthru */
6875     default:
6876         unallocated_encoding(s);
6877         return;
6878     }
6879 
6880     switch ((rmode << 3) | opcode) {
6881     case 0x2: /* SCVTF */
6882     case 0x3: /* UCVTF */
6883         itof = true;
6884         break;
6885     case 0x18: /* FCVTZS */
6886     case 0x19: /* FCVTZU */
6887         itof = false;
6888         break;
6889     default:
6890         unallocated_encoding(s);
6891         return;
6892     }
6893 
6894     if (!fp_access_check(s)) {
6895         return;
6896     }
6897 
6898     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6899 }
6900 
6901 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6902 {
6903     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6904      * without conversion.
6905      */
6906 
6907     if (itof) {
6908         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6909         TCGv_i64 tmp;
6910 
6911         switch (type) {
6912         case 0:
6913             /* 32 bit */
6914             tmp = tcg_temp_new_i64();
6915             tcg_gen_ext32u_i64(tmp, tcg_rn);
6916             write_fp_dreg(s, rd, tmp);
6917             break;
6918         case 1:
6919             /* 64 bit */
6920             write_fp_dreg(s, rd, tcg_rn);
6921             break;
6922         case 2:
6923             /* 64 bit to top half. */
6924             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6925             clear_vec_high(s, true, rd);
6926             break;
6927         case 3:
6928             /* 16 bit */
6929             tmp = tcg_temp_new_i64();
6930             tcg_gen_ext16u_i64(tmp, tcg_rn);
6931             write_fp_dreg(s, rd, tmp);
6932             break;
6933         default:
6934             g_assert_not_reached();
6935         }
6936     } else {
6937         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6938 
6939         switch (type) {
6940         case 0:
6941             /* 32 bit */
6942             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6943             break;
6944         case 1:
6945             /* 64 bit */
6946             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6947             break;
6948         case 2:
6949             /* 64 bits from top half */
6950             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6951             break;
6952         case 3:
6953             /* 16 bit */
6954             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6955             break;
6956         default:
6957             g_assert_not_reached();
6958         }
6959     }
6960 }
6961 
6962 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6963 {
6964     TCGv_i64 t = read_fp_dreg(s, rn);
6965     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
6966 
6967     gen_helper_fjcvtzs(t, t, fpstatus);
6968 
6969     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6970     tcg_gen_extrh_i64_i32(cpu_ZF, t);
6971     tcg_gen_movi_i32(cpu_CF, 0);
6972     tcg_gen_movi_i32(cpu_NF, 0);
6973     tcg_gen_movi_i32(cpu_VF, 0);
6974 }
6975 
6976 /* Floating point <-> integer conversions
6977  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6978  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6979  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6980  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6981  */
6982 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6983 {
6984     int rd = extract32(insn, 0, 5);
6985     int rn = extract32(insn, 5, 5);
6986     int opcode = extract32(insn, 16, 3);
6987     int rmode = extract32(insn, 19, 2);
6988     int type = extract32(insn, 22, 2);
6989     bool sbit = extract32(insn, 29, 1);
6990     bool sf = extract32(insn, 31, 1);
6991     bool itof = false;
6992 
6993     if (sbit) {
6994         goto do_unallocated;
6995     }
6996 
6997     switch (opcode) {
6998     case 2: /* SCVTF */
6999     case 3: /* UCVTF */
7000         itof = true;
7001         /* fallthru */
7002     case 4: /* FCVTAS */
7003     case 5: /* FCVTAU */
7004         if (rmode != 0) {
7005             goto do_unallocated;
7006         }
7007         /* fallthru */
7008     case 0: /* FCVT[NPMZ]S */
7009     case 1: /* FCVT[NPMZ]U */
7010         switch (type) {
7011         case 0: /* float32 */
7012         case 1: /* float64 */
7013             break;
7014         case 3: /* float16 */
7015             if (!dc_isar_feature(aa64_fp16, s)) {
7016                 goto do_unallocated;
7017             }
7018             break;
7019         default:
7020             goto do_unallocated;
7021         }
7022         if (!fp_access_check(s)) {
7023             return;
7024         }
7025         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7026         break;
7027 
7028     default:
7029         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7030         case 0b01100110: /* FMOV half <-> 32-bit int */
7031         case 0b01100111:
7032         case 0b11100110: /* FMOV half <-> 64-bit int */
7033         case 0b11100111:
7034             if (!dc_isar_feature(aa64_fp16, s)) {
7035                 goto do_unallocated;
7036             }
7037             /* fallthru */
7038         case 0b00000110: /* FMOV 32-bit */
7039         case 0b00000111:
7040         case 0b10100110: /* FMOV 64-bit */
7041         case 0b10100111:
7042         case 0b11001110: /* FMOV top half of 128-bit */
7043         case 0b11001111:
7044             if (!fp_access_check(s)) {
7045                 return;
7046             }
7047             itof = opcode & 1;
7048             handle_fmov(s, rd, rn, type, itof);
7049             break;
7050 
7051         case 0b00111110: /* FJCVTZS */
7052             if (!dc_isar_feature(aa64_jscvt, s)) {
7053                 goto do_unallocated;
7054             } else if (fp_access_check(s)) {
7055                 handle_fjcvtzs(s, rd, rn);
7056             }
7057             break;
7058 
7059         default:
7060         do_unallocated:
7061             unallocated_encoding(s);
7062             return;
7063         }
7064         break;
7065     }
7066 }
7067 
7068 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7069  *   31  30  29 28     25 24                          0
7070  * +---+---+---+---------+-----------------------------+
7071  * |   | 0 |   | 1 1 1 1 |                             |
7072  * +---+---+---+---------+-----------------------------+
7073  */
7074 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7075 {
7076     if (extract32(insn, 24, 1)) {
7077         /* Floating point data-processing (3 source) */
7078         disas_fp_3src(s, insn);
7079     } else if (extract32(insn, 21, 1) == 0) {
7080         /* Floating point to fixed point conversions */
7081         disas_fp_fixed_conv(s, insn);
7082     } else {
7083         switch (extract32(insn, 10, 2)) {
7084         case 1:
7085             /* Floating point conditional compare */
7086             disas_fp_ccomp(s, insn);
7087             break;
7088         case 2:
7089             /* Floating point data-processing (2 source) */
7090             disas_fp_2src(s, insn);
7091             break;
7092         case 3:
7093             /* Floating point conditional select */
7094             disas_fp_csel(s, insn);
7095             break;
7096         case 0:
7097             switch (ctz32(extract32(insn, 12, 4))) {
7098             case 0: /* [15:12] == xxx1 */
7099                 /* Floating point immediate */
7100                 disas_fp_imm(s, insn);
7101                 break;
7102             case 1: /* [15:12] == xx10 */
7103                 /* Floating point compare */
7104                 disas_fp_compare(s, insn);
7105                 break;
7106             case 2: /* [15:12] == x100 */
7107                 /* Floating point data-processing (1 source) */
7108                 disas_fp_1src(s, insn);
7109                 break;
7110             case 3: /* [15:12] == 1000 */
7111                 unallocated_encoding(s);
7112                 break;
7113             default: /* [15:12] == 0000 */
7114                 /* Floating point <-> integer conversions */
7115                 disas_fp_int_conv(s, insn);
7116                 break;
7117             }
7118             break;
7119         }
7120     }
7121 }
7122 
7123 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7124                      int pos)
7125 {
7126     /* Extract 64 bits from the middle of two concatenated 64 bit
7127      * vector register slices left:right. The extracted bits start
7128      * at 'pos' bits into the right (least significant) side.
7129      * We return the result in tcg_right, and guarantee not to
7130      * trash tcg_left.
7131      */
7132     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7133     assert(pos > 0 && pos < 64);
7134 
7135     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7136     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7137     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7138 }
7139 
7140 /* EXT
7141  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7142  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7143  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7144  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7145  */
7146 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7147 {
7148     int is_q = extract32(insn, 30, 1);
7149     int op2 = extract32(insn, 22, 2);
7150     int imm4 = extract32(insn, 11, 4);
7151     int rm = extract32(insn, 16, 5);
7152     int rn = extract32(insn, 5, 5);
7153     int rd = extract32(insn, 0, 5);
7154     int pos = imm4 << 3;
7155     TCGv_i64 tcg_resl, tcg_resh;
7156 
7157     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7158         unallocated_encoding(s);
7159         return;
7160     }
7161 
7162     if (!fp_access_check(s)) {
7163         return;
7164     }
7165 
7166     tcg_resh = tcg_temp_new_i64();
7167     tcg_resl = tcg_temp_new_i64();
7168 
7169     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7170      * either extracting 128 bits from a 128:128 concatenation, or
7171      * extracting 64 bits from a 64:64 concatenation.
7172      */
7173     if (!is_q) {
7174         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7175         if (pos != 0) {
7176             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7177             do_ext64(s, tcg_resh, tcg_resl, pos);
7178         }
7179     } else {
7180         TCGv_i64 tcg_hh;
7181         typedef struct {
7182             int reg;
7183             int elt;
7184         } EltPosns;
7185         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7186         EltPosns *elt = eltposns;
7187 
7188         if (pos >= 64) {
7189             elt++;
7190             pos -= 64;
7191         }
7192 
7193         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7194         elt++;
7195         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7196         elt++;
7197         if (pos != 0) {
7198             do_ext64(s, tcg_resh, tcg_resl, pos);
7199             tcg_hh = tcg_temp_new_i64();
7200             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7201             do_ext64(s, tcg_hh, tcg_resh, pos);
7202         }
7203     }
7204 
7205     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7206     if (is_q) {
7207         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7208     }
7209     clear_vec_high(s, is_q, rd);
7210 }
7211 
7212 /* TBL/TBX
7213  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7214  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7215  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7216  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7217  */
7218 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7219 {
7220     int op2 = extract32(insn, 22, 2);
7221     int is_q = extract32(insn, 30, 1);
7222     int rm = extract32(insn, 16, 5);
7223     int rn = extract32(insn, 5, 5);
7224     int rd = extract32(insn, 0, 5);
7225     int is_tbx = extract32(insn, 12, 1);
7226     int len = (extract32(insn, 13, 2) + 1) * 16;
7227 
7228     if (op2 != 0) {
7229         unallocated_encoding(s);
7230         return;
7231     }
7232 
7233     if (!fp_access_check(s)) {
7234         return;
7235     }
7236 
7237     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7238                        vec_full_reg_offset(s, rm), cpu_env,
7239                        is_q ? 16 : 8, vec_full_reg_size(s),
7240                        (len << 6) | (is_tbx << 5) | rn,
7241                        gen_helper_simd_tblx);
7242 }
7243 
7244 /* ZIP/UZP/TRN
7245  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7246  * +---+---+-------------+------+---+------+---+------------------+------+
7247  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7248  * +---+---+-------------+------+---+------+---+------------------+------+
7249  */
7250 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7251 {
7252     int rd = extract32(insn, 0, 5);
7253     int rn = extract32(insn, 5, 5);
7254     int rm = extract32(insn, 16, 5);
7255     int size = extract32(insn, 22, 2);
7256     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7257      * bit 2 indicates 1 vs 2 variant of the insn.
7258      */
7259     int opcode = extract32(insn, 12, 2);
7260     bool part = extract32(insn, 14, 1);
7261     bool is_q = extract32(insn, 30, 1);
7262     int esize = 8 << size;
7263     int i;
7264     int datasize = is_q ? 128 : 64;
7265     int elements = datasize / esize;
7266     TCGv_i64 tcg_res[2], tcg_ele;
7267 
7268     if (opcode == 0 || (size == 3 && !is_q)) {
7269         unallocated_encoding(s);
7270         return;
7271     }
7272 
7273     if (!fp_access_check(s)) {
7274         return;
7275     }
7276 
7277     tcg_res[0] = tcg_temp_new_i64();
7278     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7279     tcg_ele = tcg_temp_new_i64();
7280 
7281     for (i = 0; i < elements; i++) {
7282         int o, w;
7283 
7284         switch (opcode) {
7285         case 1: /* UZP1/2 */
7286         {
7287             int midpoint = elements / 2;
7288             if (i < midpoint) {
7289                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7290             } else {
7291                 read_vec_element(s, tcg_ele, rm,
7292                                  2 * (i - midpoint) + part, size);
7293             }
7294             break;
7295         }
7296         case 2: /* TRN1/2 */
7297             if (i & 1) {
7298                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7299             } else {
7300                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7301             }
7302             break;
7303         case 3: /* ZIP1/2 */
7304         {
7305             int base = part * elements / 2;
7306             if (i & 1) {
7307                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7308             } else {
7309                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7310             }
7311             break;
7312         }
7313         default:
7314             g_assert_not_reached();
7315         }
7316 
7317         w = (i * esize) / 64;
7318         o = (i * esize) % 64;
7319         if (o == 0) {
7320             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7321         } else {
7322             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7323             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7324         }
7325     }
7326 
7327     for (i = 0; i <= is_q; ++i) {
7328         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7329     }
7330     clear_vec_high(s, is_q, rd);
7331 }
7332 
7333 /*
7334  * do_reduction_op helper
7335  *
7336  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7337  * important for correct NaN propagation that we do these
7338  * operations in exactly the order specified by the pseudocode.
7339  *
7340  * This is a recursive function, TCG temps should be freed by the
7341  * calling function once it is done with the values.
7342  */
7343 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7344                                 int esize, int size, int vmap, TCGv_ptr fpst)
7345 {
7346     if (esize == size) {
7347         int element;
7348         MemOp msize = esize == 16 ? MO_16 : MO_32;
7349         TCGv_i32 tcg_elem;
7350 
7351         /* We should have one register left here */
7352         assert(ctpop8(vmap) == 1);
7353         element = ctz32(vmap);
7354         assert(element < 8);
7355 
7356         tcg_elem = tcg_temp_new_i32();
7357         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7358         return tcg_elem;
7359     } else {
7360         int bits = size / 2;
7361         int shift = ctpop8(vmap) / 2;
7362         int vmap_lo = (vmap >> shift) & vmap;
7363         int vmap_hi = (vmap & ~vmap_lo);
7364         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7365 
7366         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7367         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7368         tcg_res = tcg_temp_new_i32();
7369 
7370         switch (fpopcode) {
7371         case 0x0c: /* fmaxnmv half-precision */
7372             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7373             break;
7374         case 0x0f: /* fmaxv half-precision */
7375             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7376             break;
7377         case 0x1c: /* fminnmv half-precision */
7378             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7379             break;
7380         case 0x1f: /* fminv half-precision */
7381             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7382             break;
7383         case 0x2c: /* fmaxnmv */
7384             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7385             break;
7386         case 0x2f: /* fmaxv */
7387             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7388             break;
7389         case 0x3c: /* fminnmv */
7390             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7391             break;
7392         case 0x3f: /* fminv */
7393             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7394             break;
7395         default:
7396             g_assert_not_reached();
7397         }
7398         return tcg_res;
7399     }
7400 }
7401 
7402 /* AdvSIMD across lanes
7403  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7404  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7405  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7406  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7407  */
7408 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7409 {
7410     int rd = extract32(insn, 0, 5);
7411     int rn = extract32(insn, 5, 5);
7412     int size = extract32(insn, 22, 2);
7413     int opcode = extract32(insn, 12, 5);
7414     bool is_q = extract32(insn, 30, 1);
7415     bool is_u = extract32(insn, 29, 1);
7416     bool is_fp = false;
7417     bool is_min = false;
7418     int esize;
7419     int elements;
7420     int i;
7421     TCGv_i64 tcg_res, tcg_elt;
7422 
7423     switch (opcode) {
7424     case 0x1b: /* ADDV */
7425         if (is_u) {
7426             unallocated_encoding(s);
7427             return;
7428         }
7429         /* fall through */
7430     case 0x3: /* SADDLV, UADDLV */
7431     case 0xa: /* SMAXV, UMAXV */
7432     case 0x1a: /* SMINV, UMINV */
7433         if (size == 3 || (size == 2 && !is_q)) {
7434             unallocated_encoding(s);
7435             return;
7436         }
7437         break;
7438     case 0xc: /* FMAXNMV, FMINNMV */
7439     case 0xf: /* FMAXV, FMINV */
7440         /* Bit 1 of size field encodes min vs max and the actual size
7441          * depends on the encoding of the U bit. If not set (and FP16
7442          * enabled) then we do half-precision float instead of single
7443          * precision.
7444          */
7445         is_min = extract32(size, 1, 1);
7446         is_fp = true;
7447         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7448             size = 1;
7449         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7450             unallocated_encoding(s);
7451             return;
7452         } else {
7453             size = 2;
7454         }
7455         break;
7456     default:
7457         unallocated_encoding(s);
7458         return;
7459     }
7460 
7461     if (!fp_access_check(s)) {
7462         return;
7463     }
7464 
7465     esize = 8 << size;
7466     elements = (is_q ? 128 : 64) / esize;
7467 
7468     tcg_res = tcg_temp_new_i64();
7469     tcg_elt = tcg_temp_new_i64();
7470 
7471     /* These instructions operate across all lanes of a vector
7472      * to produce a single result. We can guarantee that a 64
7473      * bit intermediate is sufficient:
7474      *  + for [US]ADDLV the maximum element size is 32 bits, and
7475      *    the result type is 64 bits
7476      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7477      *    same as the element size, which is 32 bits at most
7478      * For the integer operations we can choose to work at 64
7479      * or 32 bits and truncate at the end; for simplicity
7480      * we use 64 bits always. The floating point
7481      * ops do require 32 bit intermediates, though.
7482      */
7483     if (!is_fp) {
7484         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7485 
7486         for (i = 1; i < elements; i++) {
7487             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7488 
7489             switch (opcode) {
7490             case 0x03: /* SADDLV / UADDLV */
7491             case 0x1b: /* ADDV */
7492                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7493                 break;
7494             case 0x0a: /* SMAXV / UMAXV */
7495                 if (is_u) {
7496                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7497                 } else {
7498                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7499                 }
7500                 break;
7501             case 0x1a: /* SMINV / UMINV */
7502                 if (is_u) {
7503                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7504                 } else {
7505                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7506                 }
7507                 break;
7508             default:
7509                 g_assert_not_reached();
7510             }
7511 
7512         }
7513     } else {
7514         /* Floating point vector reduction ops which work across 32
7515          * bit (single) or 16 bit (half-precision) intermediates.
7516          * Note that correct NaN propagation requires that we do these
7517          * operations in exactly the order specified by the pseudocode.
7518          */
7519         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7520         int fpopcode = opcode | is_min << 4 | is_u << 5;
7521         int vmap = (1 << elements) - 1;
7522         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7523                                              (is_q ? 128 : 64), vmap, fpst);
7524         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7525     }
7526 
7527     /* Now truncate the result to the width required for the final output */
7528     if (opcode == 0x03) {
7529         /* SADDLV, UADDLV: result is 2*esize */
7530         size++;
7531     }
7532 
7533     switch (size) {
7534     case 0:
7535         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7536         break;
7537     case 1:
7538         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7539         break;
7540     case 2:
7541         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7542         break;
7543     case 3:
7544         break;
7545     default:
7546         g_assert_not_reached();
7547     }
7548 
7549     write_fp_dreg(s, rd, tcg_res);
7550 }
7551 
7552 /* DUP (Element, Vector)
7553  *
7554  *  31  30   29              21 20    16 15        10  9    5 4    0
7555  * +---+---+-------------------+--------+-------------+------+------+
7556  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7557  * +---+---+-------------------+--------+-------------+------+------+
7558  *
7559  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7560  */
7561 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7562                              int imm5)
7563 {
7564     int size = ctz32(imm5);
7565     int index;
7566 
7567     if (size > 3 || (size == 3 && !is_q)) {
7568         unallocated_encoding(s);
7569         return;
7570     }
7571 
7572     if (!fp_access_check(s)) {
7573         return;
7574     }
7575 
7576     index = imm5 >> (size + 1);
7577     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7578                          vec_reg_offset(s, rn, index, size),
7579                          is_q ? 16 : 8, vec_full_reg_size(s));
7580 }
7581 
7582 /* DUP (element, scalar)
7583  *  31                   21 20    16 15        10  9    5 4    0
7584  * +-----------------------+--------+-------------+------+------+
7585  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7586  * +-----------------------+--------+-------------+------+------+
7587  */
7588 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7589                               int imm5)
7590 {
7591     int size = ctz32(imm5);
7592     int index;
7593     TCGv_i64 tmp;
7594 
7595     if (size > 3) {
7596         unallocated_encoding(s);
7597         return;
7598     }
7599 
7600     if (!fp_access_check(s)) {
7601         return;
7602     }
7603 
7604     index = imm5 >> (size + 1);
7605 
7606     /* This instruction just extracts the specified element and
7607      * zero-extends it into the bottom of the destination register.
7608      */
7609     tmp = tcg_temp_new_i64();
7610     read_vec_element(s, tmp, rn, index, size);
7611     write_fp_dreg(s, rd, tmp);
7612 }
7613 
7614 /* DUP (General)
7615  *
7616  *  31  30   29              21 20    16 15        10  9    5 4    0
7617  * +---+---+-------------------+--------+-------------+------+------+
7618  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7619  * +---+---+-------------------+--------+-------------+------+------+
7620  *
7621  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7622  */
7623 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7624                              int imm5)
7625 {
7626     int size = ctz32(imm5);
7627     uint32_t dofs, oprsz, maxsz;
7628 
7629     if (size > 3 || ((size == 3) && !is_q)) {
7630         unallocated_encoding(s);
7631         return;
7632     }
7633 
7634     if (!fp_access_check(s)) {
7635         return;
7636     }
7637 
7638     dofs = vec_full_reg_offset(s, rd);
7639     oprsz = is_q ? 16 : 8;
7640     maxsz = vec_full_reg_size(s);
7641 
7642     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7643 }
7644 
7645 /* INS (Element)
7646  *
7647  *  31                   21 20    16 15  14    11  10 9    5 4    0
7648  * +-----------------------+--------+------------+---+------+------+
7649  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7650  * +-----------------------+--------+------------+---+------+------+
7651  *
7652  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7653  * index: encoded in imm5<4:size+1>
7654  */
7655 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7656                              int imm4, int imm5)
7657 {
7658     int size = ctz32(imm5);
7659     int src_index, dst_index;
7660     TCGv_i64 tmp;
7661 
7662     if (size > 3) {
7663         unallocated_encoding(s);
7664         return;
7665     }
7666 
7667     if (!fp_access_check(s)) {
7668         return;
7669     }
7670 
7671     dst_index = extract32(imm5, 1+size, 5);
7672     src_index = extract32(imm4, size, 4);
7673 
7674     tmp = tcg_temp_new_i64();
7675 
7676     read_vec_element(s, tmp, rn, src_index, size);
7677     write_vec_element(s, tmp, rd, dst_index, size);
7678 
7679     /* INS is considered a 128-bit write for SVE. */
7680     clear_vec_high(s, true, rd);
7681 }
7682 
7683 
7684 /* INS (General)
7685  *
7686  *  31                   21 20    16 15        10  9    5 4    0
7687  * +-----------------------+--------+-------------+------+------+
7688  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7689  * +-----------------------+--------+-------------+------+------+
7690  *
7691  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7692  * index: encoded in imm5<4:size+1>
7693  */
7694 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7695 {
7696     int size = ctz32(imm5);
7697     int idx;
7698 
7699     if (size > 3) {
7700         unallocated_encoding(s);
7701         return;
7702     }
7703 
7704     if (!fp_access_check(s)) {
7705         return;
7706     }
7707 
7708     idx = extract32(imm5, 1 + size, 4 - size);
7709     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7710 
7711     /* INS is considered a 128-bit write for SVE. */
7712     clear_vec_high(s, true, rd);
7713 }
7714 
7715 /*
7716  * UMOV (General)
7717  * SMOV (General)
7718  *
7719  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7720  * +---+---+-------------------+--------+-------------+------+------+
7721  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7722  * +---+---+-------------------+--------+-------------+------+------+
7723  *
7724  * U: unsigned when set
7725  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7726  */
7727 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7728                                   int rn, int rd, int imm5)
7729 {
7730     int size = ctz32(imm5);
7731     int element;
7732     TCGv_i64 tcg_rd;
7733 
7734     /* Check for UnallocatedEncodings */
7735     if (is_signed) {
7736         if (size > 2 || (size == 2 && !is_q)) {
7737             unallocated_encoding(s);
7738             return;
7739         }
7740     } else {
7741         if (size > 3
7742             || (size < 3 && is_q)
7743             || (size == 3 && !is_q)) {
7744             unallocated_encoding(s);
7745             return;
7746         }
7747     }
7748 
7749     if (!fp_access_check(s)) {
7750         return;
7751     }
7752 
7753     element = extract32(imm5, 1+size, 4);
7754 
7755     tcg_rd = cpu_reg(s, rd);
7756     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7757     if (is_signed && !is_q) {
7758         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7759     }
7760 }
7761 
7762 /* AdvSIMD copy
7763  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7764  * +---+---+----+-----------------+------+---+------+---+------+------+
7765  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7766  * +---+---+----+-----------------+------+---+------+---+------+------+
7767  */
7768 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7769 {
7770     int rd = extract32(insn, 0, 5);
7771     int rn = extract32(insn, 5, 5);
7772     int imm4 = extract32(insn, 11, 4);
7773     int op = extract32(insn, 29, 1);
7774     int is_q = extract32(insn, 30, 1);
7775     int imm5 = extract32(insn, 16, 5);
7776 
7777     if (op) {
7778         if (is_q) {
7779             /* INS (element) */
7780             handle_simd_inse(s, rd, rn, imm4, imm5);
7781         } else {
7782             unallocated_encoding(s);
7783         }
7784     } else {
7785         switch (imm4) {
7786         case 0:
7787             /* DUP (element - vector) */
7788             handle_simd_dupe(s, is_q, rd, rn, imm5);
7789             break;
7790         case 1:
7791             /* DUP (general) */
7792             handle_simd_dupg(s, is_q, rd, rn, imm5);
7793             break;
7794         case 3:
7795             if (is_q) {
7796                 /* INS (general) */
7797                 handle_simd_insg(s, rd, rn, imm5);
7798             } else {
7799                 unallocated_encoding(s);
7800             }
7801             break;
7802         case 5:
7803         case 7:
7804             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7805             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7806             break;
7807         default:
7808             unallocated_encoding(s);
7809             break;
7810         }
7811     }
7812 }
7813 
7814 /* AdvSIMD modified immediate
7815  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7816  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7817  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7818  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7819  *
7820  * There are a number of operations that can be carried out here:
7821  *   MOVI - move (shifted) imm into register
7822  *   MVNI - move inverted (shifted) imm into register
7823  *   ORR  - bitwise OR of (shifted) imm with register
7824  *   BIC  - bitwise clear of (shifted) imm with register
7825  * With ARMv8.2 we also have:
7826  *   FMOV half-precision
7827  */
7828 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7829 {
7830     int rd = extract32(insn, 0, 5);
7831     int cmode = extract32(insn, 12, 4);
7832     int o2 = extract32(insn, 11, 1);
7833     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7834     bool is_neg = extract32(insn, 29, 1);
7835     bool is_q = extract32(insn, 30, 1);
7836     uint64_t imm = 0;
7837 
7838     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7839         /* Check for FMOV (vector, immediate) - half-precision */
7840         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7841             unallocated_encoding(s);
7842             return;
7843         }
7844     }
7845 
7846     if (!fp_access_check(s)) {
7847         return;
7848     }
7849 
7850     if (cmode == 15 && o2 && !is_neg) {
7851         /* FMOV (vector, immediate) - half-precision */
7852         imm = vfp_expand_imm(MO_16, abcdefgh);
7853         /* now duplicate across the lanes */
7854         imm = dup_const(MO_16, imm);
7855     } else {
7856         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7857     }
7858 
7859     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7860         /* MOVI or MVNI, with MVNI negation handled above.  */
7861         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7862                              vec_full_reg_size(s), imm);
7863     } else {
7864         /* ORR or BIC, with BIC negation to AND handled above.  */
7865         if (is_neg) {
7866             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7867         } else {
7868             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7869         }
7870     }
7871 }
7872 
7873 /* AdvSIMD scalar copy
7874  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7875  * +-----+----+-----------------+------+---+------+---+------+------+
7876  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7877  * +-----+----+-----------------+------+---+------+---+------+------+
7878  */
7879 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7880 {
7881     int rd = extract32(insn, 0, 5);
7882     int rn = extract32(insn, 5, 5);
7883     int imm4 = extract32(insn, 11, 4);
7884     int imm5 = extract32(insn, 16, 5);
7885     int op = extract32(insn, 29, 1);
7886 
7887     if (op != 0 || imm4 != 0) {
7888         unallocated_encoding(s);
7889         return;
7890     }
7891 
7892     /* DUP (element, scalar) */
7893     handle_simd_dupes(s, rd, rn, imm5);
7894 }
7895 
7896 /* AdvSIMD scalar pairwise
7897  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7898  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7899  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7900  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7901  */
7902 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7903 {
7904     int u = extract32(insn, 29, 1);
7905     int size = extract32(insn, 22, 2);
7906     int opcode = extract32(insn, 12, 5);
7907     int rn = extract32(insn, 5, 5);
7908     int rd = extract32(insn, 0, 5);
7909     TCGv_ptr fpst;
7910 
7911     /* For some ops (the FP ones), size[1] is part of the encoding.
7912      * For ADDP strictly it is not but size[1] is always 1 for valid
7913      * encodings.
7914      */
7915     opcode |= (extract32(size, 1, 1) << 5);
7916 
7917     switch (opcode) {
7918     case 0x3b: /* ADDP */
7919         if (u || size != 3) {
7920             unallocated_encoding(s);
7921             return;
7922         }
7923         if (!fp_access_check(s)) {
7924             return;
7925         }
7926 
7927         fpst = NULL;
7928         break;
7929     case 0xc: /* FMAXNMP */
7930     case 0xd: /* FADDP */
7931     case 0xf: /* FMAXP */
7932     case 0x2c: /* FMINNMP */
7933     case 0x2f: /* FMINP */
7934         /* FP op, size[0] is 32 or 64 bit*/
7935         if (!u) {
7936             if (!dc_isar_feature(aa64_fp16, s)) {
7937                 unallocated_encoding(s);
7938                 return;
7939             } else {
7940                 size = MO_16;
7941             }
7942         } else {
7943             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7944         }
7945 
7946         if (!fp_access_check(s)) {
7947             return;
7948         }
7949 
7950         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7951         break;
7952     default:
7953         unallocated_encoding(s);
7954         return;
7955     }
7956 
7957     if (size == MO_64) {
7958         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7959         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7960         TCGv_i64 tcg_res = tcg_temp_new_i64();
7961 
7962         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7963         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7964 
7965         switch (opcode) {
7966         case 0x3b: /* ADDP */
7967             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7968             break;
7969         case 0xc: /* FMAXNMP */
7970             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7971             break;
7972         case 0xd: /* FADDP */
7973             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7974             break;
7975         case 0xf: /* FMAXP */
7976             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7977             break;
7978         case 0x2c: /* FMINNMP */
7979             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7980             break;
7981         case 0x2f: /* FMINP */
7982             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7983             break;
7984         default:
7985             g_assert_not_reached();
7986         }
7987 
7988         write_fp_dreg(s, rd, tcg_res);
7989     } else {
7990         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7991         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7992         TCGv_i32 tcg_res = tcg_temp_new_i32();
7993 
7994         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7995         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7996 
7997         if (size == MO_16) {
7998             switch (opcode) {
7999             case 0xc: /* FMAXNMP */
8000                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8001                 break;
8002             case 0xd: /* FADDP */
8003                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8004                 break;
8005             case 0xf: /* FMAXP */
8006                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8007                 break;
8008             case 0x2c: /* FMINNMP */
8009                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8010                 break;
8011             case 0x2f: /* FMINP */
8012                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8013                 break;
8014             default:
8015                 g_assert_not_reached();
8016             }
8017         } else {
8018             switch (opcode) {
8019             case 0xc: /* FMAXNMP */
8020                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8021                 break;
8022             case 0xd: /* FADDP */
8023                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8024                 break;
8025             case 0xf: /* FMAXP */
8026                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8027                 break;
8028             case 0x2c: /* FMINNMP */
8029                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8030                 break;
8031             case 0x2f: /* FMINP */
8032                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8033                 break;
8034             default:
8035                 g_assert_not_reached();
8036             }
8037         }
8038 
8039         write_fp_sreg(s, rd, tcg_res);
8040     }
8041 }
8042 
8043 /*
8044  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8045  *
8046  * This code is handles the common shifting code and is used by both
8047  * the vector and scalar code.
8048  */
8049 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8050                                     TCGv_i64 tcg_rnd, bool accumulate,
8051                                     bool is_u, int size, int shift)
8052 {
8053     bool extended_result = false;
8054     bool round = tcg_rnd != NULL;
8055     int ext_lshift = 0;
8056     TCGv_i64 tcg_src_hi;
8057 
8058     if (round && size == 3) {
8059         extended_result = true;
8060         ext_lshift = 64 - shift;
8061         tcg_src_hi = tcg_temp_new_i64();
8062     } else if (shift == 64) {
8063         if (!accumulate && is_u) {
8064             /* result is zero */
8065             tcg_gen_movi_i64(tcg_res, 0);
8066             return;
8067         }
8068     }
8069 
8070     /* Deal with the rounding step */
8071     if (round) {
8072         if (extended_result) {
8073             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8074             if (!is_u) {
8075                 /* take care of sign extending tcg_res */
8076                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8077                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8078                                  tcg_src, tcg_src_hi,
8079                                  tcg_rnd, tcg_zero);
8080             } else {
8081                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8082                                  tcg_src, tcg_zero,
8083                                  tcg_rnd, tcg_zero);
8084             }
8085         } else {
8086             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8087         }
8088     }
8089 
8090     /* Now do the shift right */
8091     if (round && extended_result) {
8092         /* extended case, >64 bit precision required */
8093         if (ext_lshift == 0) {
8094             /* special case, only high bits matter */
8095             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8096         } else {
8097             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8098             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8099             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8100         }
8101     } else {
8102         if (is_u) {
8103             if (shift == 64) {
8104                 /* essentially shifting in 64 zeros */
8105                 tcg_gen_movi_i64(tcg_src, 0);
8106             } else {
8107                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8108             }
8109         } else {
8110             if (shift == 64) {
8111                 /* effectively extending the sign-bit */
8112                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8113             } else {
8114                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8115             }
8116         }
8117     }
8118 
8119     if (accumulate) {
8120         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8121     } else {
8122         tcg_gen_mov_i64(tcg_res, tcg_src);
8123     }
8124 }
8125 
8126 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8127 static void handle_scalar_simd_shri(DisasContext *s,
8128                                     bool is_u, int immh, int immb,
8129                                     int opcode, int rn, int rd)
8130 {
8131     const int size = 3;
8132     int immhb = immh << 3 | immb;
8133     int shift = 2 * (8 << size) - immhb;
8134     bool accumulate = false;
8135     bool round = false;
8136     bool insert = false;
8137     TCGv_i64 tcg_rn;
8138     TCGv_i64 tcg_rd;
8139     TCGv_i64 tcg_round;
8140 
8141     if (!extract32(immh, 3, 1)) {
8142         unallocated_encoding(s);
8143         return;
8144     }
8145 
8146     if (!fp_access_check(s)) {
8147         return;
8148     }
8149 
8150     switch (opcode) {
8151     case 0x02: /* SSRA / USRA (accumulate) */
8152         accumulate = true;
8153         break;
8154     case 0x04: /* SRSHR / URSHR (rounding) */
8155         round = true;
8156         break;
8157     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8158         accumulate = round = true;
8159         break;
8160     case 0x08: /* SRI */
8161         insert = true;
8162         break;
8163     }
8164 
8165     if (round) {
8166         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8167     } else {
8168         tcg_round = NULL;
8169     }
8170 
8171     tcg_rn = read_fp_dreg(s, rn);
8172     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8173 
8174     if (insert) {
8175         /* shift count same as element size is valid but does nothing;
8176          * special case to avoid potential shift by 64.
8177          */
8178         int esize = 8 << size;
8179         if (shift != esize) {
8180             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8181             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8182         }
8183     } else {
8184         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8185                                 accumulate, is_u, size, shift);
8186     }
8187 
8188     write_fp_dreg(s, rd, tcg_rd);
8189 }
8190 
8191 /* SHL/SLI - Scalar shift left */
8192 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8193                                     int immh, int immb, int opcode,
8194                                     int rn, int rd)
8195 {
8196     int size = 32 - clz32(immh) - 1;
8197     int immhb = immh << 3 | immb;
8198     int shift = immhb - (8 << size);
8199     TCGv_i64 tcg_rn;
8200     TCGv_i64 tcg_rd;
8201 
8202     if (!extract32(immh, 3, 1)) {
8203         unallocated_encoding(s);
8204         return;
8205     }
8206 
8207     if (!fp_access_check(s)) {
8208         return;
8209     }
8210 
8211     tcg_rn = read_fp_dreg(s, rn);
8212     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8213 
8214     if (insert) {
8215         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8216     } else {
8217         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8218     }
8219 
8220     write_fp_dreg(s, rd, tcg_rd);
8221 }
8222 
8223 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8224  * (signed/unsigned) narrowing */
8225 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8226                                    bool is_u_shift, bool is_u_narrow,
8227                                    int immh, int immb, int opcode,
8228                                    int rn, int rd)
8229 {
8230     int immhb = immh << 3 | immb;
8231     int size = 32 - clz32(immh) - 1;
8232     int esize = 8 << size;
8233     int shift = (2 * esize) - immhb;
8234     int elements = is_scalar ? 1 : (64 / esize);
8235     bool round = extract32(opcode, 0, 1);
8236     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8237     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8238     TCGv_i32 tcg_rd_narrowed;
8239     TCGv_i64 tcg_final;
8240 
8241     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8242         { gen_helper_neon_narrow_sat_s8,
8243           gen_helper_neon_unarrow_sat8 },
8244         { gen_helper_neon_narrow_sat_s16,
8245           gen_helper_neon_unarrow_sat16 },
8246         { gen_helper_neon_narrow_sat_s32,
8247           gen_helper_neon_unarrow_sat32 },
8248         { NULL, NULL },
8249     };
8250     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8251         gen_helper_neon_narrow_sat_u8,
8252         gen_helper_neon_narrow_sat_u16,
8253         gen_helper_neon_narrow_sat_u32,
8254         NULL
8255     };
8256     NeonGenNarrowEnvFn *narrowfn;
8257 
8258     int i;
8259 
8260     assert(size < 4);
8261 
8262     if (extract32(immh, 3, 1)) {
8263         unallocated_encoding(s);
8264         return;
8265     }
8266 
8267     if (!fp_access_check(s)) {
8268         return;
8269     }
8270 
8271     if (is_u_shift) {
8272         narrowfn = unsigned_narrow_fns[size];
8273     } else {
8274         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8275     }
8276 
8277     tcg_rn = tcg_temp_new_i64();
8278     tcg_rd = tcg_temp_new_i64();
8279     tcg_rd_narrowed = tcg_temp_new_i32();
8280     tcg_final = tcg_temp_new_i64();
8281 
8282     if (round) {
8283         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8284     } else {
8285         tcg_round = NULL;
8286     }
8287 
8288     for (i = 0; i < elements; i++) {
8289         read_vec_element(s, tcg_rn, rn, i, ldop);
8290         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8291                                 false, is_u_shift, size+1, shift);
8292         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8293         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8294         if (i == 0) {
8295             tcg_gen_mov_i64(tcg_final, tcg_rd);
8296         } else {
8297             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8298         }
8299     }
8300 
8301     if (!is_q) {
8302         write_vec_element(s, tcg_final, rd, 0, MO_64);
8303     } else {
8304         write_vec_element(s, tcg_final, rd, 1, MO_64);
8305     }
8306     clear_vec_high(s, is_q, rd);
8307 }
8308 
8309 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8310 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8311                              bool src_unsigned, bool dst_unsigned,
8312                              int immh, int immb, int rn, int rd)
8313 {
8314     int immhb = immh << 3 | immb;
8315     int size = 32 - clz32(immh) - 1;
8316     int shift = immhb - (8 << size);
8317     int pass;
8318 
8319     assert(immh != 0);
8320     assert(!(scalar && is_q));
8321 
8322     if (!scalar) {
8323         if (!is_q && extract32(immh, 3, 1)) {
8324             unallocated_encoding(s);
8325             return;
8326         }
8327 
8328         /* Since we use the variable-shift helpers we must
8329          * replicate the shift count into each element of
8330          * the tcg_shift value.
8331          */
8332         switch (size) {
8333         case 0:
8334             shift |= shift << 8;
8335             /* fall through */
8336         case 1:
8337             shift |= shift << 16;
8338             break;
8339         case 2:
8340         case 3:
8341             break;
8342         default:
8343             g_assert_not_reached();
8344         }
8345     }
8346 
8347     if (!fp_access_check(s)) {
8348         return;
8349     }
8350 
8351     if (size == 3) {
8352         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8353         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8354             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8355             { NULL, gen_helper_neon_qshl_u64 },
8356         };
8357         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8358         int maxpass = is_q ? 2 : 1;
8359 
8360         for (pass = 0; pass < maxpass; pass++) {
8361             TCGv_i64 tcg_op = tcg_temp_new_i64();
8362 
8363             read_vec_element(s, tcg_op, rn, pass, MO_64);
8364             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8365             write_vec_element(s, tcg_op, rd, pass, MO_64);
8366         }
8367         clear_vec_high(s, is_q, rd);
8368     } else {
8369         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8370         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8371             {
8372                 { gen_helper_neon_qshl_s8,
8373                   gen_helper_neon_qshl_s16,
8374                   gen_helper_neon_qshl_s32 },
8375                 { gen_helper_neon_qshlu_s8,
8376                   gen_helper_neon_qshlu_s16,
8377                   gen_helper_neon_qshlu_s32 }
8378             }, {
8379                 { NULL, NULL, NULL },
8380                 { gen_helper_neon_qshl_u8,
8381                   gen_helper_neon_qshl_u16,
8382                   gen_helper_neon_qshl_u32 }
8383             }
8384         };
8385         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8386         MemOp memop = scalar ? size : MO_32;
8387         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8388 
8389         for (pass = 0; pass < maxpass; pass++) {
8390             TCGv_i32 tcg_op = tcg_temp_new_i32();
8391 
8392             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8393             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8394             if (scalar) {
8395                 switch (size) {
8396                 case 0:
8397                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8398                     break;
8399                 case 1:
8400                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8401                     break;
8402                 case 2:
8403                     break;
8404                 default:
8405                     g_assert_not_reached();
8406                 }
8407                 write_fp_sreg(s, rd, tcg_op);
8408             } else {
8409                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8410             }
8411         }
8412 
8413         if (!scalar) {
8414             clear_vec_high(s, is_q, rd);
8415         }
8416     }
8417 }
8418 
8419 /* Common vector code for handling integer to FP conversion */
8420 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8421                                    int elements, int is_signed,
8422                                    int fracbits, int size)
8423 {
8424     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8425     TCGv_i32 tcg_shift = NULL;
8426 
8427     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8428     int pass;
8429 
8430     if (fracbits || size == MO_64) {
8431         tcg_shift = tcg_constant_i32(fracbits);
8432     }
8433 
8434     if (size == MO_64) {
8435         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8436         TCGv_i64 tcg_double = tcg_temp_new_i64();
8437 
8438         for (pass = 0; pass < elements; pass++) {
8439             read_vec_element(s, tcg_int64, rn, pass, mop);
8440 
8441             if (is_signed) {
8442                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8443                                      tcg_shift, tcg_fpst);
8444             } else {
8445                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8446                                      tcg_shift, tcg_fpst);
8447             }
8448             if (elements == 1) {
8449                 write_fp_dreg(s, rd, tcg_double);
8450             } else {
8451                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8452             }
8453         }
8454     } else {
8455         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8456         TCGv_i32 tcg_float = tcg_temp_new_i32();
8457 
8458         for (pass = 0; pass < elements; pass++) {
8459             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8460 
8461             switch (size) {
8462             case MO_32:
8463                 if (fracbits) {
8464                     if (is_signed) {
8465                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8466                                              tcg_shift, tcg_fpst);
8467                     } else {
8468                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8469                                              tcg_shift, tcg_fpst);
8470                     }
8471                 } else {
8472                     if (is_signed) {
8473                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8474                     } else {
8475                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8476                     }
8477                 }
8478                 break;
8479             case MO_16:
8480                 if (fracbits) {
8481                     if (is_signed) {
8482                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8483                                              tcg_shift, tcg_fpst);
8484                     } else {
8485                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8486                                              tcg_shift, tcg_fpst);
8487                     }
8488                 } else {
8489                     if (is_signed) {
8490                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8491                     } else {
8492                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8493                     }
8494                 }
8495                 break;
8496             default:
8497                 g_assert_not_reached();
8498             }
8499 
8500             if (elements == 1) {
8501                 write_fp_sreg(s, rd, tcg_float);
8502             } else {
8503                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8504             }
8505         }
8506     }
8507 
8508     clear_vec_high(s, elements << size == 16, rd);
8509 }
8510 
8511 /* UCVTF/SCVTF - Integer to FP conversion */
8512 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8513                                          bool is_q, bool is_u,
8514                                          int immh, int immb, int opcode,
8515                                          int rn, int rd)
8516 {
8517     int size, elements, fracbits;
8518     int immhb = immh << 3 | immb;
8519 
8520     if (immh & 8) {
8521         size = MO_64;
8522         if (!is_scalar && !is_q) {
8523             unallocated_encoding(s);
8524             return;
8525         }
8526     } else if (immh & 4) {
8527         size = MO_32;
8528     } else if (immh & 2) {
8529         size = MO_16;
8530         if (!dc_isar_feature(aa64_fp16, s)) {
8531             unallocated_encoding(s);
8532             return;
8533         }
8534     } else {
8535         /* immh == 0 would be a failure of the decode logic */
8536         g_assert(immh == 1);
8537         unallocated_encoding(s);
8538         return;
8539     }
8540 
8541     if (is_scalar) {
8542         elements = 1;
8543     } else {
8544         elements = (8 << is_q) >> size;
8545     }
8546     fracbits = (16 << size) - immhb;
8547 
8548     if (!fp_access_check(s)) {
8549         return;
8550     }
8551 
8552     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8553 }
8554 
8555 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8556 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8557                                          bool is_q, bool is_u,
8558                                          int immh, int immb, int rn, int rd)
8559 {
8560     int immhb = immh << 3 | immb;
8561     int pass, size, fracbits;
8562     TCGv_ptr tcg_fpstatus;
8563     TCGv_i32 tcg_rmode, tcg_shift;
8564 
8565     if (immh & 0x8) {
8566         size = MO_64;
8567         if (!is_scalar && !is_q) {
8568             unallocated_encoding(s);
8569             return;
8570         }
8571     } else if (immh & 0x4) {
8572         size = MO_32;
8573     } else if (immh & 0x2) {
8574         size = MO_16;
8575         if (!dc_isar_feature(aa64_fp16, s)) {
8576             unallocated_encoding(s);
8577             return;
8578         }
8579     } else {
8580         /* Should have split out AdvSIMD modified immediate earlier.  */
8581         assert(immh == 1);
8582         unallocated_encoding(s);
8583         return;
8584     }
8585 
8586     if (!fp_access_check(s)) {
8587         return;
8588     }
8589 
8590     assert(!(is_scalar && is_q));
8591 
8592     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8593     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8594     fracbits = (16 << size) - immhb;
8595     tcg_shift = tcg_constant_i32(fracbits);
8596 
8597     if (size == MO_64) {
8598         int maxpass = is_scalar ? 1 : 2;
8599 
8600         for (pass = 0; pass < maxpass; pass++) {
8601             TCGv_i64 tcg_op = tcg_temp_new_i64();
8602 
8603             read_vec_element(s, tcg_op, rn, pass, MO_64);
8604             if (is_u) {
8605                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8606             } else {
8607                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8608             }
8609             write_vec_element(s, tcg_op, rd, pass, MO_64);
8610         }
8611         clear_vec_high(s, is_q, rd);
8612     } else {
8613         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8614         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8615 
8616         switch (size) {
8617         case MO_16:
8618             if (is_u) {
8619                 fn = gen_helper_vfp_touhh;
8620             } else {
8621                 fn = gen_helper_vfp_toshh;
8622             }
8623             break;
8624         case MO_32:
8625             if (is_u) {
8626                 fn = gen_helper_vfp_touls;
8627             } else {
8628                 fn = gen_helper_vfp_tosls;
8629             }
8630             break;
8631         default:
8632             g_assert_not_reached();
8633         }
8634 
8635         for (pass = 0; pass < maxpass; pass++) {
8636             TCGv_i32 tcg_op = tcg_temp_new_i32();
8637 
8638             read_vec_element_i32(s, tcg_op, rn, pass, size);
8639             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8640             if (is_scalar) {
8641                 write_fp_sreg(s, rd, tcg_op);
8642             } else {
8643                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8644             }
8645         }
8646         if (!is_scalar) {
8647             clear_vec_high(s, is_q, rd);
8648         }
8649     }
8650 
8651     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8652 }
8653 
8654 /* AdvSIMD scalar shift by immediate
8655  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8656  * +-----+---+-------------+------+------+--------+---+------+------+
8657  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8658  * +-----+---+-------------+------+------+--------+---+------+------+
8659  *
8660  * This is the scalar version so it works on a fixed sized registers
8661  */
8662 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8663 {
8664     int rd = extract32(insn, 0, 5);
8665     int rn = extract32(insn, 5, 5);
8666     int opcode = extract32(insn, 11, 5);
8667     int immb = extract32(insn, 16, 3);
8668     int immh = extract32(insn, 19, 4);
8669     bool is_u = extract32(insn, 29, 1);
8670 
8671     if (immh == 0) {
8672         unallocated_encoding(s);
8673         return;
8674     }
8675 
8676     switch (opcode) {
8677     case 0x08: /* SRI */
8678         if (!is_u) {
8679             unallocated_encoding(s);
8680             return;
8681         }
8682         /* fall through */
8683     case 0x00: /* SSHR / USHR */
8684     case 0x02: /* SSRA / USRA */
8685     case 0x04: /* SRSHR / URSHR */
8686     case 0x06: /* SRSRA / URSRA */
8687         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8688         break;
8689     case 0x0a: /* SHL / SLI */
8690         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8691         break;
8692     case 0x1c: /* SCVTF, UCVTF */
8693         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8694                                      opcode, rn, rd);
8695         break;
8696     case 0x10: /* SQSHRUN, SQSHRUN2 */
8697     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8698         if (!is_u) {
8699             unallocated_encoding(s);
8700             return;
8701         }
8702         handle_vec_simd_sqshrn(s, true, false, false, true,
8703                                immh, immb, opcode, rn, rd);
8704         break;
8705     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8706     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8707         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8708                                immh, immb, opcode, rn, rd);
8709         break;
8710     case 0xc: /* SQSHLU */
8711         if (!is_u) {
8712             unallocated_encoding(s);
8713             return;
8714         }
8715         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8716         break;
8717     case 0xe: /* SQSHL, UQSHL */
8718         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8719         break;
8720     case 0x1f: /* FCVTZS, FCVTZU */
8721         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8722         break;
8723     default:
8724         unallocated_encoding(s);
8725         break;
8726     }
8727 }
8728 
8729 /* AdvSIMD scalar three different
8730  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8731  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8732  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8733  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8734  */
8735 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8736 {
8737     bool is_u = extract32(insn, 29, 1);
8738     int size = extract32(insn, 22, 2);
8739     int opcode = extract32(insn, 12, 4);
8740     int rm = extract32(insn, 16, 5);
8741     int rn = extract32(insn, 5, 5);
8742     int rd = extract32(insn, 0, 5);
8743 
8744     if (is_u) {
8745         unallocated_encoding(s);
8746         return;
8747     }
8748 
8749     switch (opcode) {
8750     case 0x9: /* SQDMLAL, SQDMLAL2 */
8751     case 0xb: /* SQDMLSL, SQDMLSL2 */
8752     case 0xd: /* SQDMULL, SQDMULL2 */
8753         if (size == 0 || size == 3) {
8754             unallocated_encoding(s);
8755             return;
8756         }
8757         break;
8758     default:
8759         unallocated_encoding(s);
8760         return;
8761     }
8762 
8763     if (!fp_access_check(s)) {
8764         return;
8765     }
8766 
8767     if (size == 2) {
8768         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8769         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8770         TCGv_i64 tcg_res = tcg_temp_new_i64();
8771 
8772         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8773         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8774 
8775         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8776         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8777 
8778         switch (opcode) {
8779         case 0xd: /* SQDMULL, SQDMULL2 */
8780             break;
8781         case 0xb: /* SQDMLSL, SQDMLSL2 */
8782             tcg_gen_neg_i64(tcg_res, tcg_res);
8783             /* fall through */
8784         case 0x9: /* SQDMLAL, SQDMLAL2 */
8785             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8786             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8787                                               tcg_res, tcg_op1);
8788             break;
8789         default:
8790             g_assert_not_reached();
8791         }
8792 
8793         write_fp_dreg(s, rd, tcg_res);
8794     } else {
8795         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8796         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8797         TCGv_i64 tcg_res = tcg_temp_new_i64();
8798 
8799         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8800         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8801 
8802         switch (opcode) {
8803         case 0xd: /* SQDMULL, SQDMULL2 */
8804             break;
8805         case 0xb: /* SQDMLSL, SQDMLSL2 */
8806             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8807             /* fall through */
8808         case 0x9: /* SQDMLAL, SQDMLAL2 */
8809         {
8810             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8811             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8812             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8813                                               tcg_res, tcg_op3);
8814             break;
8815         }
8816         default:
8817             g_assert_not_reached();
8818         }
8819 
8820         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8821         write_fp_dreg(s, rd, tcg_res);
8822     }
8823 }
8824 
8825 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8826                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8827 {
8828     /* Handle 64x64->64 opcodes which are shared between the scalar
8829      * and vector 3-same groups. We cover every opcode where size == 3
8830      * is valid in either the three-reg-same (integer, not pairwise)
8831      * or scalar-three-reg-same groups.
8832      */
8833     TCGCond cond;
8834 
8835     switch (opcode) {
8836     case 0x1: /* SQADD */
8837         if (u) {
8838             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8839         } else {
8840             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8841         }
8842         break;
8843     case 0x5: /* SQSUB */
8844         if (u) {
8845             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8846         } else {
8847             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8848         }
8849         break;
8850     case 0x6: /* CMGT, CMHI */
8851         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8852          * We implement this using setcond (test) and then negating.
8853          */
8854         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8855     do_cmop:
8856         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8857         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8858         break;
8859     case 0x7: /* CMGE, CMHS */
8860         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8861         goto do_cmop;
8862     case 0x11: /* CMTST, CMEQ */
8863         if (u) {
8864             cond = TCG_COND_EQ;
8865             goto do_cmop;
8866         }
8867         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8868         break;
8869     case 0x8: /* SSHL, USHL */
8870         if (u) {
8871             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8872         } else {
8873             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8874         }
8875         break;
8876     case 0x9: /* SQSHL, UQSHL */
8877         if (u) {
8878             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8879         } else {
8880             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8881         }
8882         break;
8883     case 0xa: /* SRSHL, URSHL */
8884         if (u) {
8885             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8886         } else {
8887             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8888         }
8889         break;
8890     case 0xb: /* SQRSHL, UQRSHL */
8891         if (u) {
8892             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8893         } else {
8894             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8895         }
8896         break;
8897     case 0x10: /* ADD, SUB */
8898         if (u) {
8899             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8900         } else {
8901             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8902         }
8903         break;
8904     default:
8905         g_assert_not_reached();
8906     }
8907 }
8908 
8909 /* Handle the 3-same-operands float operations; shared by the scalar
8910  * and vector encodings. The caller must filter out any encodings
8911  * not allocated for the encoding it is dealing with.
8912  */
8913 static void handle_3same_float(DisasContext *s, int size, int elements,
8914                                int fpopcode, int rd, int rn, int rm)
8915 {
8916     int pass;
8917     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8918 
8919     for (pass = 0; pass < elements; pass++) {
8920         if (size) {
8921             /* Double */
8922             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8923             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8924             TCGv_i64 tcg_res = tcg_temp_new_i64();
8925 
8926             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8927             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8928 
8929             switch (fpopcode) {
8930             case 0x39: /* FMLS */
8931                 /* As usual for ARM, separate negation for fused multiply-add */
8932                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8933                 /* fall through */
8934             case 0x19: /* FMLA */
8935                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8936                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8937                                        tcg_res, fpst);
8938                 break;
8939             case 0x18: /* FMAXNM */
8940                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8941                 break;
8942             case 0x1a: /* FADD */
8943                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8944                 break;
8945             case 0x1b: /* FMULX */
8946                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8947                 break;
8948             case 0x1c: /* FCMEQ */
8949                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8950                 break;
8951             case 0x1e: /* FMAX */
8952                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8953                 break;
8954             case 0x1f: /* FRECPS */
8955                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8956                 break;
8957             case 0x38: /* FMINNM */
8958                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8959                 break;
8960             case 0x3a: /* FSUB */
8961                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8962                 break;
8963             case 0x3e: /* FMIN */
8964                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8965                 break;
8966             case 0x3f: /* FRSQRTS */
8967                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8968                 break;
8969             case 0x5b: /* FMUL */
8970                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8971                 break;
8972             case 0x5c: /* FCMGE */
8973                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8974                 break;
8975             case 0x5d: /* FACGE */
8976                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8977                 break;
8978             case 0x5f: /* FDIV */
8979                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8980                 break;
8981             case 0x7a: /* FABD */
8982                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8983                 gen_helper_vfp_absd(tcg_res, tcg_res);
8984                 break;
8985             case 0x7c: /* FCMGT */
8986                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8987                 break;
8988             case 0x7d: /* FACGT */
8989                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8990                 break;
8991             default:
8992                 g_assert_not_reached();
8993             }
8994 
8995             write_vec_element(s, tcg_res, rd, pass, MO_64);
8996         } else {
8997             /* Single */
8998             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8999             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9000             TCGv_i32 tcg_res = tcg_temp_new_i32();
9001 
9002             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9003             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9004 
9005             switch (fpopcode) {
9006             case 0x39: /* FMLS */
9007                 /* As usual for ARM, separate negation for fused multiply-add */
9008                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9009                 /* fall through */
9010             case 0x19: /* FMLA */
9011                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9012                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9013                                        tcg_res, fpst);
9014                 break;
9015             case 0x1a: /* FADD */
9016                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9017                 break;
9018             case 0x1b: /* FMULX */
9019                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9020                 break;
9021             case 0x1c: /* FCMEQ */
9022                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9023                 break;
9024             case 0x1e: /* FMAX */
9025                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9026                 break;
9027             case 0x1f: /* FRECPS */
9028                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9029                 break;
9030             case 0x18: /* FMAXNM */
9031                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9032                 break;
9033             case 0x38: /* FMINNM */
9034                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9035                 break;
9036             case 0x3a: /* FSUB */
9037                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9038                 break;
9039             case 0x3e: /* FMIN */
9040                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9041                 break;
9042             case 0x3f: /* FRSQRTS */
9043                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9044                 break;
9045             case 0x5b: /* FMUL */
9046                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9047                 break;
9048             case 0x5c: /* FCMGE */
9049                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9050                 break;
9051             case 0x5d: /* FACGE */
9052                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9053                 break;
9054             case 0x5f: /* FDIV */
9055                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9056                 break;
9057             case 0x7a: /* FABD */
9058                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9059                 gen_helper_vfp_abss(tcg_res, tcg_res);
9060                 break;
9061             case 0x7c: /* FCMGT */
9062                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9063                 break;
9064             case 0x7d: /* FACGT */
9065                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9066                 break;
9067             default:
9068                 g_assert_not_reached();
9069             }
9070 
9071             if (elements == 1) {
9072                 /* scalar single so clear high part */
9073                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9074 
9075                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9076                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9077             } else {
9078                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9079             }
9080         }
9081     }
9082 
9083     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9084 }
9085 
9086 /* AdvSIMD scalar three same
9087  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9088  * +-----+---+-----------+------+---+------+--------+---+------+------+
9089  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9090  * +-----+---+-----------+------+---+------+--------+---+------+------+
9091  */
9092 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9093 {
9094     int rd = extract32(insn, 0, 5);
9095     int rn = extract32(insn, 5, 5);
9096     int opcode = extract32(insn, 11, 5);
9097     int rm = extract32(insn, 16, 5);
9098     int size = extract32(insn, 22, 2);
9099     bool u = extract32(insn, 29, 1);
9100     TCGv_i64 tcg_rd;
9101 
9102     if (opcode >= 0x18) {
9103         /* Floating point: U, size[1] and opcode indicate operation */
9104         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9105         switch (fpopcode) {
9106         case 0x1b: /* FMULX */
9107         case 0x1f: /* FRECPS */
9108         case 0x3f: /* FRSQRTS */
9109         case 0x5d: /* FACGE */
9110         case 0x7d: /* FACGT */
9111         case 0x1c: /* FCMEQ */
9112         case 0x5c: /* FCMGE */
9113         case 0x7c: /* FCMGT */
9114         case 0x7a: /* FABD */
9115             break;
9116         default:
9117             unallocated_encoding(s);
9118             return;
9119         }
9120 
9121         if (!fp_access_check(s)) {
9122             return;
9123         }
9124 
9125         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9126         return;
9127     }
9128 
9129     switch (opcode) {
9130     case 0x1: /* SQADD, UQADD */
9131     case 0x5: /* SQSUB, UQSUB */
9132     case 0x9: /* SQSHL, UQSHL */
9133     case 0xb: /* SQRSHL, UQRSHL */
9134         break;
9135     case 0x8: /* SSHL, USHL */
9136     case 0xa: /* SRSHL, URSHL */
9137     case 0x6: /* CMGT, CMHI */
9138     case 0x7: /* CMGE, CMHS */
9139     case 0x11: /* CMTST, CMEQ */
9140     case 0x10: /* ADD, SUB (vector) */
9141         if (size != 3) {
9142             unallocated_encoding(s);
9143             return;
9144         }
9145         break;
9146     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9147         if (size != 1 && size != 2) {
9148             unallocated_encoding(s);
9149             return;
9150         }
9151         break;
9152     default:
9153         unallocated_encoding(s);
9154         return;
9155     }
9156 
9157     if (!fp_access_check(s)) {
9158         return;
9159     }
9160 
9161     tcg_rd = tcg_temp_new_i64();
9162 
9163     if (size == 3) {
9164         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9165         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9166 
9167         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9168     } else {
9169         /* Do a single operation on the lowest element in the vector.
9170          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9171          * no side effects for all these operations.
9172          * OPTME: special-purpose helpers would avoid doing some
9173          * unnecessary work in the helper for the 8 and 16 bit cases.
9174          */
9175         NeonGenTwoOpEnvFn *genenvfn;
9176         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9177         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9178         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9179 
9180         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9181         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9182 
9183         switch (opcode) {
9184         case 0x1: /* SQADD, UQADD */
9185         {
9186             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9187                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9188                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9189                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9190             };
9191             genenvfn = fns[size][u];
9192             break;
9193         }
9194         case 0x5: /* SQSUB, UQSUB */
9195         {
9196             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9197                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9198                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9199                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9200             };
9201             genenvfn = fns[size][u];
9202             break;
9203         }
9204         case 0x9: /* SQSHL, UQSHL */
9205         {
9206             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9207                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9208                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9209                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9210             };
9211             genenvfn = fns[size][u];
9212             break;
9213         }
9214         case 0xb: /* SQRSHL, UQRSHL */
9215         {
9216             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9217                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9218                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9219                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9220             };
9221             genenvfn = fns[size][u];
9222             break;
9223         }
9224         case 0x16: /* SQDMULH, SQRDMULH */
9225         {
9226             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9227                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9228                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9229             };
9230             assert(size == 1 || size == 2);
9231             genenvfn = fns[size - 1][u];
9232             break;
9233         }
9234         default:
9235             g_assert_not_reached();
9236         }
9237 
9238         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9239         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9240     }
9241 
9242     write_fp_dreg(s, rd, tcg_rd);
9243 }
9244 
9245 /* AdvSIMD scalar three same FP16
9246  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9247  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9248  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9249  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9250  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9251  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9252  */
9253 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9254                                                   uint32_t insn)
9255 {
9256     int rd = extract32(insn, 0, 5);
9257     int rn = extract32(insn, 5, 5);
9258     int opcode = extract32(insn, 11, 3);
9259     int rm = extract32(insn, 16, 5);
9260     bool u = extract32(insn, 29, 1);
9261     bool a = extract32(insn, 23, 1);
9262     int fpopcode = opcode | (a << 3) |  (u << 4);
9263     TCGv_ptr fpst;
9264     TCGv_i32 tcg_op1;
9265     TCGv_i32 tcg_op2;
9266     TCGv_i32 tcg_res;
9267 
9268     switch (fpopcode) {
9269     case 0x03: /* FMULX */
9270     case 0x04: /* FCMEQ (reg) */
9271     case 0x07: /* FRECPS */
9272     case 0x0f: /* FRSQRTS */
9273     case 0x14: /* FCMGE (reg) */
9274     case 0x15: /* FACGE */
9275     case 0x1a: /* FABD */
9276     case 0x1c: /* FCMGT (reg) */
9277     case 0x1d: /* FACGT */
9278         break;
9279     default:
9280         unallocated_encoding(s);
9281         return;
9282     }
9283 
9284     if (!dc_isar_feature(aa64_fp16, s)) {
9285         unallocated_encoding(s);
9286     }
9287 
9288     if (!fp_access_check(s)) {
9289         return;
9290     }
9291 
9292     fpst = fpstatus_ptr(FPST_FPCR_F16);
9293 
9294     tcg_op1 = read_fp_hreg(s, rn);
9295     tcg_op2 = read_fp_hreg(s, rm);
9296     tcg_res = tcg_temp_new_i32();
9297 
9298     switch (fpopcode) {
9299     case 0x03: /* FMULX */
9300         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9301         break;
9302     case 0x04: /* FCMEQ (reg) */
9303         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9304         break;
9305     case 0x07: /* FRECPS */
9306         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9307         break;
9308     case 0x0f: /* FRSQRTS */
9309         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9310         break;
9311     case 0x14: /* FCMGE (reg) */
9312         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9313         break;
9314     case 0x15: /* FACGE */
9315         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9316         break;
9317     case 0x1a: /* FABD */
9318         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9319         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9320         break;
9321     case 0x1c: /* FCMGT (reg) */
9322         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9323         break;
9324     case 0x1d: /* FACGT */
9325         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9326         break;
9327     default:
9328         g_assert_not_reached();
9329     }
9330 
9331     write_fp_sreg(s, rd, tcg_res);
9332 }
9333 
9334 /* AdvSIMD scalar three same extra
9335  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9336  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9337  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9338  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9339  */
9340 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9341                                                    uint32_t insn)
9342 {
9343     int rd = extract32(insn, 0, 5);
9344     int rn = extract32(insn, 5, 5);
9345     int opcode = extract32(insn, 11, 4);
9346     int rm = extract32(insn, 16, 5);
9347     int size = extract32(insn, 22, 2);
9348     bool u = extract32(insn, 29, 1);
9349     TCGv_i32 ele1, ele2, ele3;
9350     TCGv_i64 res;
9351     bool feature;
9352 
9353     switch (u * 16 + opcode) {
9354     case 0x10: /* SQRDMLAH (vector) */
9355     case 0x11: /* SQRDMLSH (vector) */
9356         if (size != 1 && size != 2) {
9357             unallocated_encoding(s);
9358             return;
9359         }
9360         feature = dc_isar_feature(aa64_rdm, s);
9361         break;
9362     default:
9363         unallocated_encoding(s);
9364         return;
9365     }
9366     if (!feature) {
9367         unallocated_encoding(s);
9368         return;
9369     }
9370     if (!fp_access_check(s)) {
9371         return;
9372     }
9373 
9374     /* Do a single operation on the lowest element in the vector.
9375      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9376      * with no side effects for all these operations.
9377      * OPTME: special-purpose helpers would avoid doing some
9378      * unnecessary work in the helper for the 16 bit cases.
9379      */
9380     ele1 = tcg_temp_new_i32();
9381     ele2 = tcg_temp_new_i32();
9382     ele3 = tcg_temp_new_i32();
9383 
9384     read_vec_element_i32(s, ele1, rn, 0, size);
9385     read_vec_element_i32(s, ele2, rm, 0, size);
9386     read_vec_element_i32(s, ele3, rd, 0, size);
9387 
9388     switch (opcode) {
9389     case 0x0: /* SQRDMLAH */
9390         if (size == 1) {
9391             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9392         } else {
9393             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9394         }
9395         break;
9396     case 0x1: /* SQRDMLSH */
9397         if (size == 1) {
9398             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9399         } else {
9400             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9401         }
9402         break;
9403     default:
9404         g_assert_not_reached();
9405     }
9406 
9407     res = tcg_temp_new_i64();
9408     tcg_gen_extu_i32_i64(res, ele3);
9409     write_fp_dreg(s, rd, res);
9410 }
9411 
9412 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9413                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9414                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9415 {
9416     /* Handle 64->64 opcodes which are shared between the scalar and
9417      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9418      * is valid in either group and also the double-precision fp ops.
9419      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9420      * requires them.
9421      */
9422     TCGCond cond;
9423 
9424     switch (opcode) {
9425     case 0x4: /* CLS, CLZ */
9426         if (u) {
9427             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9428         } else {
9429             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9430         }
9431         break;
9432     case 0x5: /* NOT */
9433         /* This opcode is shared with CNT and RBIT but we have earlier
9434          * enforced that size == 3 if and only if this is the NOT insn.
9435          */
9436         tcg_gen_not_i64(tcg_rd, tcg_rn);
9437         break;
9438     case 0x7: /* SQABS, SQNEG */
9439         if (u) {
9440             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9441         } else {
9442             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9443         }
9444         break;
9445     case 0xa: /* CMLT */
9446         /* 64 bit integer comparison against zero, result is
9447          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9448          * subtracting 1.
9449          */
9450         cond = TCG_COND_LT;
9451     do_cmop:
9452         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9453         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9454         break;
9455     case 0x8: /* CMGT, CMGE */
9456         cond = u ? TCG_COND_GE : TCG_COND_GT;
9457         goto do_cmop;
9458     case 0x9: /* CMEQ, CMLE */
9459         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9460         goto do_cmop;
9461     case 0xb: /* ABS, NEG */
9462         if (u) {
9463             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9464         } else {
9465             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9466         }
9467         break;
9468     case 0x2f: /* FABS */
9469         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9470         break;
9471     case 0x6f: /* FNEG */
9472         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9473         break;
9474     case 0x7f: /* FSQRT */
9475         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9476         break;
9477     case 0x1a: /* FCVTNS */
9478     case 0x1b: /* FCVTMS */
9479     case 0x1c: /* FCVTAS */
9480     case 0x3a: /* FCVTPS */
9481     case 0x3b: /* FCVTZS */
9482         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9483         break;
9484     case 0x5a: /* FCVTNU */
9485     case 0x5b: /* FCVTMU */
9486     case 0x5c: /* FCVTAU */
9487     case 0x7a: /* FCVTPU */
9488     case 0x7b: /* FCVTZU */
9489         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9490         break;
9491     case 0x18: /* FRINTN */
9492     case 0x19: /* FRINTM */
9493     case 0x38: /* FRINTP */
9494     case 0x39: /* FRINTZ */
9495     case 0x58: /* FRINTA */
9496     case 0x79: /* FRINTI */
9497         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9498         break;
9499     case 0x59: /* FRINTX */
9500         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9501         break;
9502     case 0x1e: /* FRINT32Z */
9503     case 0x5e: /* FRINT32X */
9504         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9505         break;
9506     case 0x1f: /* FRINT64Z */
9507     case 0x5f: /* FRINT64X */
9508         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9509         break;
9510     default:
9511         g_assert_not_reached();
9512     }
9513 }
9514 
9515 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9516                                    bool is_scalar, bool is_u, bool is_q,
9517                                    int size, int rn, int rd)
9518 {
9519     bool is_double = (size == MO_64);
9520     TCGv_ptr fpst;
9521 
9522     if (!fp_access_check(s)) {
9523         return;
9524     }
9525 
9526     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9527 
9528     if (is_double) {
9529         TCGv_i64 tcg_op = tcg_temp_new_i64();
9530         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9531         TCGv_i64 tcg_res = tcg_temp_new_i64();
9532         NeonGenTwoDoubleOpFn *genfn;
9533         bool swap = false;
9534         int pass;
9535 
9536         switch (opcode) {
9537         case 0x2e: /* FCMLT (zero) */
9538             swap = true;
9539             /* fallthrough */
9540         case 0x2c: /* FCMGT (zero) */
9541             genfn = gen_helper_neon_cgt_f64;
9542             break;
9543         case 0x2d: /* FCMEQ (zero) */
9544             genfn = gen_helper_neon_ceq_f64;
9545             break;
9546         case 0x6d: /* FCMLE (zero) */
9547             swap = true;
9548             /* fall through */
9549         case 0x6c: /* FCMGE (zero) */
9550             genfn = gen_helper_neon_cge_f64;
9551             break;
9552         default:
9553             g_assert_not_reached();
9554         }
9555 
9556         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9557             read_vec_element(s, tcg_op, rn, pass, MO_64);
9558             if (swap) {
9559                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9560             } else {
9561                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9562             }
9563             write_vec_element(s, tcg_res, rd, pass, MO_64);
9564         }
9565 
9566         clear_vec_high(s, !is_scalar, rd);
9567     } else {
9568         TCGv_i32 tcg_op = tcg_temp_new_i32();
9569         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9570         TCGv_i32 tcg_res = tcg_temp_new_i32();
9571         NeonGenTwoSingleOpFn *genfn;
9572         bool swap = false;
9573         int pass, maxpasses;
9574 
9575         if (size == MO_16) {
9576             switch (opcode) {
9577             case 0x2e: /* FCMLT (zero) */
9578                 swap = true;
9579                 /* fall through */
9580             case 0x2c: /* FCMGT (zero) */
9581                 genfn = gen_helper_advsimd_cgt_f16;
9582                 break;
9583             case 0x2d: /* FCMEQ (zero) */
9584                 genfn = gen_helper_advsimd_ceq_f16;
9585                 break;
9586             case 0x6d: /* FCMLE (zero) */
9587                 swap = true;
9588                 /* fall through */
9589             case 0x6c: /* FCMGE (zero) */
9590                 genfn = gen_helper_advsimd_cge_f16;
9591                 break;
9592             default:
9593                 g_assert_not_reached();
9594             }
9595         } else {
9596             switch (opcode) {
9597             case 0x2e: /* FCMLT (zero) */
9598                 swap = true;
9599                 /* fall through */
9600             case 0x2c: /* FCMGT (zero) */
9601                 genfn = gen_helper_neon_cgt_f32;
9602                 break;
9603             case 0x2d: /* FCMEQ (zero) */
9604                 genfn = gen_helper_neon_ceq_f32;
9605                 break;
9606             case 0x6d: /* FCMLE (zero) */
9607                 swap = true;
9608                 /* fall through */
9609             case 0x6c: /* FCMGE (zero) */
9610                 genfn = gen_helper_neon_cge_f32;
9611                 break;
9612             default:
9613                 g_assert_not_reached();
9614             }
9615         }
9616 
9617         if (is_scalar) {
9618             maxpasses = 1;
9619         } else {
9620             int vector_size = 8 << is_q;
9621             maxpasses = vector_size >> size;
9622         }
9623 
9624         for (pass = 0; pass < maxpasses; pass++) {
9625             read_vec_element_i32(s, tcg_op, rn, pass, size);
9626             if (swap) {
9627                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9628             } else {
9629                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9630             }
9631             if (is_scalar) {
9632                 write_fp_sreg(s, rd, tcg_res);
9633             } else {
9634                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9635             }
9636         }
9637 
9638         if (!is_scalar) {
9639             clear_vec_high(s, is_q, rd);
9640         }
9641     }
9642 }
9643 
9644 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9645                                     bool is_scalar, bool is_u, bool is_q,
9646                                     int size, int rn, int rd)
9647 {
9648     bool is_double = (size == 3);
9649     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9650 
9651     if (is_double) {
9652         TCGv_i64 tcg_op = tcg_temp_new_i64();
9653         TCGv_i64 tcg_res = tcg_temp_new_i64();
9654         int pass;
9655 
9656         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9657             read_vec_element(s, tcg_op, rn, pass, MO_64);
9658             switch (opcode) {
9659             case 0x3d: /* FRECPE */
9660                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9661                 break;
9662             case 0x3f: /* FRECPX */
9663                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9664                 break;
9665             case 0x7d: /* FRSQRTE */
9666                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9667                 break;
9668             default:
9669                 g_assert_not_reached();
9670             }
9671             write_vec_element(s, tcg_res, rd, pass, MO_64);
9672         }
9673         clear_vec_high(s, !is_scalar, rd);
9674     } else {
9675         TCGv_i32 tcg_op = tcg_temp_new_i32();
9676         TCGv_i32 tcg_res = tcg_temp_new_i32();
9677         int pass, maxpasses;
9678 
9679         if (is_scalar) {
9680             maxpasses = 1;
9681         } else {
9682             maxpasses = is_q ? 4 : 2;
9683         }
9684 
9685         for (pass = 0; pass < maxpasses; pass++) {
9686             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9687 
9688             switch (opcode) {
9689             case 0x3c: /* URECPE */
9690                 gen_helper_recpe_u32(tcg_res, tcg_op);
9691                 break;
9692             case 0x3d: /* FRECPE */
9693                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9694                 break;
9695             case 0x3f: /* FRECPX */
9696                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9697                 break;
9698             case 0x7d: /* FRSQRTE */
9699                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9700                 break;
9701             default:
9702                 g_assert_not_reached();
9703             }
9704 
9705             if (is_scalar) {
9706                 write_fp_sreg(s, rd, tcg_res);
9707             } else {
9708                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9709             }
9710         }
9711         if (!is_scalar) {
9712             clear_vec_high(s, is_q, rd);
9713         }
9714     }
9715 }
9716 
9717 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9718                                 int opcode, bool u, bool is_q,
9719                                 int size, int rn, int rd)
9720 {
9721     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9722      * in the source becomes a size element in the destination).
9723      */
9724     int pass;
9725     TCGv_i32 tcg_res[2];
9726     int destelt = is_q ? 2 : 0;
9727     int passes = scalar ? 1 : 2;
9728 
9729     if (scalar) {
9730         tcg_res[1] = tcg_constant_i32(0);
9731     }
9732 
9733     for (pass = 0; pass < passes; pass++) {
9734         TCGv_i64 tcg_op = tcg_temp_new_i64();
9735         NeonGenNarrowFn *genfn = NULL;
9736         NeonGenNarrowEnvFn *genenvfn = NULL;
9737 
9738         if (scalar) {
9739             read_vec_element(s, tcg_op, rn, pass, size + 1);
9740         } else {
9741             read_vec_element(s, tcg_op, rn, pass, MO_64);
9742         }
9743         tcg_res[pass] = tcg_temp_new_i32();
9744 
9745         switch (opcode) {
9746         case 0x12: /* XTN, SQXTUN */
9747         {
9748             static NeonGenNarrowFn * const xtnfns[3] = {
9749                 gen_helper_neon_narrow_u8,
9750                 gen_helper_neon_narrow_u16,
9751                 tcg_gen_extrl_i64_i32,
9752             };
9753             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9754                 gen_helper_neon_unarrow_sat8,
9755                 gen_helper_neon_unarrow_sat16,
9756                 gen_helper_neon_unarrow_sat32,
9757             };
9758             if (u) {
9759                 genenvfn = sqxtunfns[size];
9760             } else {
9761                 genfn = xtnfns[size];
9762             }
9763             break;
9764         }
9765         case 0x14: /* SQXTN, UQXTN */
9766         {
9767             static NeonGenNarrowEnvFn * const fns[3][2] = {
9768                 { gen_helper_neon_narrow_sat_s8,
9769                   gen_helper_neon_narrow_sat_u8 },
9770                 { gen_helper_neon_narrow_sat_s16,
9771                   gen_helper_neon_narrow_sat_u16 },
9772                 { gen_helper_neon_narrow_sat_s32,
9773                   gen_helper_neon_narrow_sat_u32 },
9774             };
9775             genenvfn = fns[size][u];
9776             break;
9777         }
9778         case 0x16: /* FCVTN, FCVTN2 */
9779             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9780             if (size == 2) {
9781                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9782             } else {
9783                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9784                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9785                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9786                 TCGv_i32 ahp = get_ahp_flag();
9787 
9788                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9789                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9790                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9791                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9792             }
9793             break;
9794         case 0x36: /* BFCVTN, BFCVTN2 */
9795             {
9796                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9797                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9798             }
9799             break;
9800         case 0x56:  /* FCVTXN, FCVTXN2 */
9801             /* 64 bit to 32 bit float conversion
9802              * with von Neumann rounding (round to odd)
9803              */
9804             assert(size == 2);
9805             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9806             break;
9807         default:
9808             g_assert_not_reached();
9809         }
9810 
9811         if (genfn) {
9812             genfn(tcg_res[pass], tcg_op);
9813         } else if (genenvfn) {
9814             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9815         }
9816     }
9817 
9818     for (pass = 0; pass < 2; pass++) {
9819         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9820     }
9821     clear_vec_high(s, is_q, rd);
9822 }
9823 
9824 /* Remaining saturating accumulating ops */
9825 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9826                                 bool is_q, int size, int rn, int rd)
9827 {
9828     bool is_double = (size == 3);
9829 
9830     if (is_double) {
9831         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9832         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9833         int pass;
9834 
9835         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9836             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9837             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9838 
9839             if (is_u) { /* USQADD */
9840                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9841             } else { /* SUQADD */
9842                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9843             }
9844             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9845         }
9846         clear_vec_high(s, !is_scalar, rd);
9847     } else {
9848         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9849         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9850         int pass, maxpasses;
9851 
9852         if (is_scalar) {
9853             maxpasses = 1;
9854         } else {
9855             maxpasses = is_q ? 4 : 2;
9856         }
9857 
9858         for (pass = 0; pass < maxpasses; pass++) {
9859             if (is_scalar) {
9860                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9861                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9862             } else {
9863                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9864                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9865             }
9866 
9867             if (is_u) { /* USQADD */
9868                 switch (size) {
9869                 case 0:
9870                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9871                     break;
9872                 case 1:
9873                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9874                     break;
9875                 case 2:
9876                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9877                     break;
9878                 default:
9879                     g_assert_not_reached();
9880                 }
9881             } else { /* SUQADD */
9882                 switch (size) {
9883                 case 0:
9884                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9885                     break;
9886                 case 1:
9887                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9888                     break;
9889                 case 2:
9890                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9891                     break;
9892                 default:
9893                     g_assert_not_reached();
9894                 }
9895             }
9896 
9897             if (is_scalar) {
9898                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9899             }
9900             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9901         }
9902         clear_vec_high(s, is_q, rd);
9903     }
9904 }
9905 
9906 /* AdvSIMD scalar two reg misc
9907  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9908  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9909  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9910  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9911  */
9912 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9913 {
9914     int rd = extract32(insn, 0, 5);
9915     int rn = extract32(insn, 5, 5);
9916     int opcode = extract32(insn, 12, 5);
9917     int size = extract32(insn, 22, 2);
9918     bool u = extract32(insn, 29, 1);
9919     bool is_fcvt = false;
9920     int rmode;
9921     TCGv_i32 tcg_rmode;
9922     TCGv_ptr tcg_fpstatus;
9923 
9924     switch (opcode) {
9925     case 0x3: /* USQADD / SUQADD*/
9926         if (!fp_access_check(s)) {
9927             return;
9928         }
9929         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9930         return;
9931     case 0x7: /* SQABS / SQNEG */
9932         break;
9933     case 0xa: /* CMLT */
9934         if (u) {
9935             unallocated_encoding(s);
9936             return;
9937         }
9938         /* fall through */
9939     case 0x8: /* CMGT, CMGE */
9940     case 0x9: /* CMEQ, CMLE */
9941     case 0xb: /* ABS, NEG */
9942         if (size != 3) {
9943             unallocated_encoding(s);
9944             return;
9945         }
9946         break;
9947     case 0x12: /* SQXTUN */
9948         if (!u) {
9949             unallocated_encoding(s);
9950             return;
9951         }
9952         /* fall through */
9953     case 0x14: /* SQXTN, UQXTN */
9954         if (size == 3) {
9955             unallocated_encoding(s);
9956             return;
9957         }
9958         if (!fp_access_check(s)) {
9959             return;
9960         }
9961         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9962         return;
9963     case 0xc ... 0xf:
9964     case 0x16 ... 0x1d:
9965     case 0x1f:
9966         /* Floating point: U, size[1] and opcode indicate operation;
9967          * size[0] indicates single or double precision.
9968          */
9969         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9970         size = extract32(size, 0, 1) ? 3 : 2;
9971         switch (opcode) {
9972         case 0x2c: /* FCMGT (zero) */
9973         case 0x2d: /* FCMEQ (zero) */
9974         case 0x2e: /* FCMLT (zero) */
9975         case 0x6c: /* FCMGE (zero) */
9976         case 0x6d: /* FCMLE (zero) */
9977             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9978             return;
9979         case 0x1d: /* SCVTF */
9980         case 0x5d: /* UCVTF */
9981         {
9982             bool is_signed = (opcode == 0x1d);
9983             if (!fp_access_check(s)) {
9984                 return;
9985             }
9986             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9987             return;
9988         }
9989         case 0x3d: /* FRECPE */
9990         case 0x3f: /* FRECPX */
9991         case 0x7d: /* FRSQRTE */
9992             if (!fp_access_check(s)) {
9993                 return;
9994             }
9995             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9996             return;
9997         case 0x1a: /* FCVTNS */
9998         case 0x1b: /* FCVTMS */
9999         case 0x3a: /* FCVTPS */
10000         case 0x3b: /* FCVTZS */
10001         case 0x5a: /* FCVTNU */
10002         case 0x5b: /* FCVTMU */
10003         case 0x7a: /* FCVTPU */
10004         case 0x7b: /* FCVTZU */
10005             is_fcvt = true;
10006             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10007             break;
10008         case 0x1c: /* FCVTAS */
10009         case 0x5c: /* FCVTAU */
10010             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10011             is_fcvt = true;
10012             rmode = FPROUNDING_TIEAWAY;
10013             break;
10014         case 0x56: /* FCVTXN, FCVTXN2 */
10015             if (size == 2) {
10016                 unallocated_encoding(s);
10017                 return;
10018             }
10019             if (!fp_access_check(s)) {
10020                 return;
10021             }
10022             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10023             return;
10024         default:
10025             unallocated_encoding(s);
10026             return;
10027         }
10028         break;
10029     default:
10030         unallocated_encoding(s);
10031         return;
10032     }
10033 
10034     if (!fp_access_check(s)) {
10035         return;
10036     }
10037 
10038     if (is_fcvt) {
10039         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10040         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10041     } else {
10042         tcg_fpstatus = NULL;
10043         tcg_rmode = NULL;
10044     }
10045 
10046     if (size == 3) {
10047         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10048         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10049 
10050         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10051         write_fp_dreg(s, rd, tcg_rd);
10052     } else {
10053         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10054         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10055 
10056         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10057 
10058         switch (opcode) {
10059         case 0x7: /* SQABS, SQNEG */
10060         {
10061             NeonGenOneOpEnvFn *genfn;
10062             static NeonGenOneOpEnvFn * const fns[3][2] = {
10063                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10064                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10065                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10066             };
10067             genfn = fns[size][u];
10068             genfn(tcg_rd, cpu_env, tcg_rn);
10069             break;
10070         }
10071         case 0x1a: /* FCVTNS */
10072         case 0x1b: /* FCVTMS */
10073         case 0x1c: /* FCVTAS */
10074         case 0x3a: /* FCVTPS */
10075         case 0x3b: /* FCVTZS */
10076             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10077                                  tcg_fpstatus);
10078             break;
10079         case 0x5a: /* FCVTNU */
10080         case 0x5b: /* FCVTMU */
10081         case 0x5c: /* FCVTAU */
10082         case 0x7a: /* FCVTPU */
10083         case 0x7b: /* FCVTZU */
10084             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10085                                  tcg_fpstatus);
10086             break;
10087         default:
10088             g_assert_not_reached();
10089         }
10090 
10091         write_fp_sreg(s, rd, tcg_rd);
10092     }
10093 
10094     if (is_fcvt) {
10095         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10096     }
10097 }
10098 
10099 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10100 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10101                                  int immh, int immb, int opcode, int rn, int rd)
10102 {
10103     int size = 32 - clz32(immh) - 1;
10104     int immhb = immh << 3 | immb;
10105     int shift = 2 * (8 << size) - immhb;
10106     GVecGen2iFn *gvec_fn;
10107 
10108     if (extract32(immh, 3, 1) && !is_q) {
10109         unallocated_encoding(s);
10110         return;
10111     }
10112     tcg_debug_assert(size <= 3);
10113 
10114     if (!fp_access_check(s)) {
10115         return;
10116     }
10117 
10118     switch (opcode) {
10119     case 0x02: /* SSRA / USRA (accumulate) */
10120         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10121         break;
10122 
10123     case 0x08: /* SRI */
10124         gvec_fn = gen_gvec_sri;
10125         break;
10126 
10127     case 0x00: /* SSHR / USHR */
10128         if (is_u) {
10129             if (shift == 8 << size) {
10130                 /* Shift count the same size as element size produces zero.  */
10131                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10132                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10133                 return;
10134             }
10135             gvec_fn = tcg_gen_gvec_shri;
10136         } else {
10137             /* Shift count the same size as element size produces all sign.  */
10138             if (shift == 8 << size) {
10139                 shift -= 1;
10140             }
10141             gvec_fn = tcg_gen_gvec_sari;
10142         }
10143         break;
10144 
10145     case 0x04: /* SRSHR / URSHR (rounding) */
10146         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10147         break;
10148 
10149     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10150         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10151         break;
10152 
10153     default:
10154         g_assert_not_reached();
10155     }
10156 
10157     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10158 }
10159 
10160 /* SHL/SLI - Vector shift left */
10161 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10162                                  int immh, int immb, int opcode, int rn, int rd)
10163 {
10164     int size = 32 - clz32(immh) - 1;
10165     int immhb = immh << 3 | immb;
10166     int shift = immhb - (8 << size);
10167 
10168     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10169     assert(size >= 0 && size <= 3);
10170 
10171     if (extract32(immh, 3, 1) && !is_q) {
10172         unallocated_encoding(s);
10173         return;
10174     }
10175 
10176     if (!fp_access_check(s)) {
10177         return;
10178     }
10179 
10180     if (insert) {
10181         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10182     } else {
10183         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10184     }
10185 }
10186 
10187 /* USHLL/SHLL - Vector shift left with widening */
10188 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10189                                  int immh, int immb, int opcode, int rn, int rd)
10190 {
10191     int size = 32 - clz32(immh) - 1;
10192     int immhb = immh << 3 | immb;
10193     int shift = immhb - (8 << size);
10194     int dsize = 64;
10195     int esize = 8 << size;
10196     int elements = dsize/esize;
10197     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10198     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10199     int i;
10200 
10201     if (size >= 3) {
10202         unallocated_encoding(s);
10203         return;
10204     }
10205 
10206     if (!fp_access_check(s)) {
10207         return;
10208     }
10209 
10210     /* For the LL variants the store is larger than the load,
10211      * so if rd == rn we would overwrite parts of our input.
10212      * So load everything right now and use shifts in the main loop.
10213      */
10214     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10215 
10216     for (i = 0; i < elements; i++) {
10217         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10218         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10219         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10220         write_vec_element(s, tcg_rd, rd, i, size + 1);
10221     }
10222 }
10223 
10224 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10225 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10226                                  int immh, int immb, int opcode, int rn, int rd)
10227 {
10228     int immhb = immh << 3 | immb;
10229     int size = 32 - clz32(immh) - 1;
10230     int dsize = 64;
10231     int esize = 8 << size;
10232     int elements = dsize/esize;
10233     int shift = (2 * esize) - immhb;
10234     bool round = extract32(opcode, 0, 1);
10235     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10236     TCGv_i64 tcg_round;
10237     int i;
10238 
10239     if (extract32(immh, 3, 1)) {
10240         unallocated_encoding(s);
10241         return;
10242     }
10243 
10244     if (!fp_access_check(s)) {
10245         return;
10246     }
10247 
10248     tcg_rn = tcg_temp_new_i64();
10249     tcg_rd = tcg_temp_new_i64();
10250     tcg_final = tcg_temp_new_i64();
10251     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10252 
10253     if (round) {
10254         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10255     } else {
10256         tcg_round = NULL;
10257     }
10258 
10259     for (i = 0; i < elements; i++) {
10260         read_vec_element(s, tcg_rn, rn, i, size+1);
10261         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10262                                 false, true, size+1, shift);
10263 
10264         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10265     }
10266 
10267     if (!is_q) {
10268         write_vec_element(s, tcg_final, rd, 0, MO_64);
10269     } else {
10270         write_vec_element(s, tcg_final, rd, 1, MO_64);
10271     }
10272 
10273     clear_vec_high(s, is_q, rd);
10274 }
10275 
10276 
10277 /* AdvSIMD shift by immediate
10278  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10279  * +---+---+---+-------------+------+------+--------+---+------+------+
10280  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10281  * +---+---+---+-------------+------+------+--------+---+------+------+
10282  */
10283 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10284 {
10285     int rd = extract32(insn, 0, 5);
10286     int rn = extract32(insn, 5, 5);
10287     int opcode = extract32(insn, 11, 5);
10288     int immb = extract32(insn, 16, 3);
10289     int immh = extract32(insn, 19, 4);
10290     bool is_u = extract32(insn, 29, 1);
10291     bool is_q = extract32(insn, 30, 1);
10292 
10293     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10294     assert(immh != 0);
10295 
10296     switch (opcode) {
10297     case 0x08: /* SRI */
10298         if (!is_u) {
10299             unallocated_encoding(s);
10300             return;
10301         }
10302         /* fall through */
10303     case 0x00: /* SSHR / USHR */
10304     case 0x02: /* SSRA / USRA (accumulate) */
10305     case 0x04: /* SRSHR / URSHR (rounding) */
10306     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10307         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10308         break;
10309     case 0x0a: /* SHL / SLI */
10310         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10311         break;
10312     case 0x10: /* SHRN */
10313     case 0x11: /* RSHRN / SQRSHRUN */
10314         if (is_u) {
10315             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10316                                    opcode, rn, rd);
10317         } else {
10318             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10319         }
10320         break;
10321     case 0x12: /* SQSHRN / UQSHRN */
10322     case 0x13: /* SQRSHRN / UQRSHRN */
10323         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10324                                opcode, rn, rd);
10325         break;
10326     case 0x14: /* SSHLL / USHLL */
10327         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10328         break;
10329     case 0x1c: /* SCVTF / UCVTF */
10330         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10331                                      opcode, rn, rd);
10332         break;
10333     case 0xc: /* SQSHLU */
10334         if (!is_u) {
10335             unallocated_encoding(s);
10336             return;
10337         }
10338         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10339         break;
10340     case 0xe: /* SQSHL, UQSHL */
10341         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10342         break;
10343     case 0x1f: /* FCVTZS/ FCVTZU */
10344         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10345         return;
10346     default:
10347         unallocated_encoding(s);
10348         return;
10349     }
10350 }
10351 
10352 /* Generate code to do a "long" addition or subtraction, ie one done in
10353  * TCGv_i64 on vector lanes twice the width specified by size.
10354  */
10355 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10356                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10357 {
10358     static NeonGenTwo64OpFn * const fns[3][2] = {
10359         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10360         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10361         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10362     };
10363     NeonGenTwo64OpFn *genfn;
10364     assert(size < 3);
10365 
10366     genfn = fns[size][is_sub];
10367     genfn(tcg_res, tcg_op1, tcg_op2);
10368 }
10369 
10370 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10371                                 int opcode, int rd, int rn, int rm)
10372 {
10373     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10374     TCGv_i64 tcg_res[2];
10375     int pass, accop;
10376 
10377     tcg_res[0] = tcg_temp_new_i64();
10378     tcg_res[1] = tcg_temp_new_i64();
10379 
10380     /* Does this op do an adding accumulate, a subtracting accumulate,
10381      * or no accumulate at all?
10382      */
10383     switch (opcode) {
10384     case 5:
10385     case 8:
10386     case 9:
10387         accop = 1;
10388         break;
10389     case 10:
10390     case 11:
10391         accop = -1;
10392         break;
10393     default:
10394         accop = 0;
10395         break;
10396     }
10397 
10398     if (accop != 0) {
10399         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10400         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10401     }
10402 
10403     /* size == 2 means two 32x32->64 operations; this is worth special
10404      * casing because we can generally handle it inline.
10405      */
10406     if (size == 2) {
10407         for (pass = 0; pass < 2; pass++) {
10408             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10409             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10410             TCGv_i64 tcg_passres;
10411             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10412 
10413             int elt = pass + is_q * 2;
10414 
10415             read_vec_element(s, tcg_op1, rn, elt, memop);
10416             read_vec_element(s, tcg_op2, rm, elt, memop);
10417 
10418             if (accop == 0) {
10419                 tcg_passres = tcg_res[pass];
10420             } else {
10421                 tcg_passres = tcg_temp_new_i64();
10422             }
10423 
10424             switch (opcode) {
10425             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10426                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10427                 break;
10428             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10429                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10430                 break;
10431             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10432             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10433             {
10434                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10435                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10436 
10437                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10438                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10439                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10440                                     tcg_passres,
10441                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10442                 break;
10443             }
10444             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10445             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10446             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10447                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10448                 break;
10449             case 9: /* SQDMLAL, SQDMLAL2 */
10450             case 11: /* SQDMLSL, SQDMLSL2 */
10451             case 13: /* SQDMULL, SQDMULL2 */
10452                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10453                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10454                                                   tcg_passres, tcg_passres);
10455                 break;
10456             default:
10457                 g_assert_not_reached();
10458             }
10459 
10460             if (opcode == 9 || opcode == 11) {
10461                 /* saturating accumulate ops */
10462                 if (accop < 0) {
10463                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10464                 }
10465                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10466                                                   tcg_res[pass], tcg_passres);
10467             } else if (accop > 0) {
10468                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10469             } else if (accop < 0) {
10470                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10471             }
10472         }
10473     } else {
10474         /* size 0 or 1, generally helper functions */
10475         for (pass = 0; pass < 2; pass++) {
10476             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10477             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10478             TCGv_i64 tcg_passres;
10479             int elt = pass + is_q * 2;
10480 
10481             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10482             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10483 
10484             if (accop == 0) {
10485                 tcg_passres = tcg_res[pass];
10486             } else {
10487                 tcg_passres = tcg_temp_new_i64();
10488             }
10489 
10490             switch (opcode) {
10491             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10492             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10493             {
10494                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10495                 static NeonGenWidenFn * const widenfns[2][2] = {
10496                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10497                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10498                 };
10499                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10500 
10501                 widenfn(tcg_op2_64, tcg_op2);
10502                 widenfn(tcg_passres, tcg_op1);
10503                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10504                               tcg_passres, tcg_op2_64);
10505                 break;
10506             }
10507             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10508             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10509                 if (size == 0) {
10510                     if (is_u) {
10511                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10512                     } else {
10513                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10514                     }
10515                 } else {
10516                     if (is_u) {
10517                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10518                     } else {
10519                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10520                     }
10521                 }
10522                 break;
10523             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10524             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10525             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10526                 if (size == 0) {
10527                     if (is_u) {
10528                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10529                     } else {
10530                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10531                     }
10532                 } else {
10533                     if (is_u) {
10534                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10535                     } else {
10536                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10537                     }
10538                 }
10539                 break;
10540             case 9: /* SQDMLAL, SQDMLAL2 */
10541             case 11: /* SQDMLSL, SQDMLSL2 */
10542             case 13: /* SQDMULL, SQDMULL2 */
10543                 assert(size == 1);
10544                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10545                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10546                                                   tcg_passres, tcg_passres);
10547                 break;
10548             default:
10549                 g_assert_not_reached();
10550             }
10551 
10552             if (accop != 0) {
10553                 if (opcode == 9 || opcode == 11) {
10554                     /* saturating accumulate ops */
10555                     if (accop < 0) {
10556                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10557                     }
10558                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10559                                                       tcg_res[pass],
10560                                                       tcg_passres);
10561                 } else {
10562                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10563                                   tcg_res[pass], tcg_passres);
10564                 }
10565             }
10566         }
10567     }
10568 
10569     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10570     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10571 }
10572 
10573 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10574                             int opcode, int rd, int rn, int rm)
10575 {
10576     TCGv_i64 tcg_res[2];
10577     int part = is_q ? 2 : 0;
10578     int pass;
10579 
10580     for (pass = 0; pass < 2; pass++) {
10581         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10582         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10583         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10584         static NeonGenWidenFn * const widenfns[3][2] = {
10585             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10586             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10587             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10588         };
10589         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10590 
10591         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10592         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10593         widenfn(tcg_op2_wide, tcg_op2);
10594         tcg_res[pass] = tcg_temp_new_i64();
10595         gen_neon_addl(size, (opcode == 3),
10596                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10597     }
10598 
10599     for (pass = 0; pass < 2; pass++) {
10600         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10601     }
10602 }
10603 
10604 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10605 {
10606     tcg_gen_addi_i64(in, in, 1U << 31);
10607     tcg_gen_extrh_i64_i32(res, in);
10608 }
10609 
10610 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10611                                  int opcode, int rd, int rn, int rm)
10612 {
10613     TCGv_i32 tcg_res[2];
10614     int part = is_q ? 2 : 0;
10615     int pass;
10616 
10617     for (pass = 0; pass < 2; pass++) {
10618         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10619         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10620         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10621         static NeonGenNarrowFn * const narrowfns[3][2] = {
10622             { gen_helper_neon_narrow_high_u8,
10623               gen_helper_neon_narrow_round_high_u8 },
10624             { gen_helper_neon_narrow_high_u16,
10625               gen_helper_neon_narrow_round_high_u16 },
10626             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10627         };
10628         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10629 
10630         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10631         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10632 
10633         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10634 
10635         tcg_res[pass] = tcg_temp_new_i32();
10636         gennarrow(tcg_res[pass], tcg_wideres);
10637     }
10638 
10639     for (pass = 0; pass < 2; pass++) {
10640         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10641     }
10642     clear_vec_high(s, is_q, rd);
10643 }
10644 
10645 /* AdvSIMD three different
10646  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10647  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10648  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10649  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10650  */
10651 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10652 {
10653     /* Instructions in this group fall into three basic classes
10654      * (in each case with the operation working on each element in
10655      * the input vectors):
10656      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10657      *     128 bit input)
10658      * (2) wide 64 x 128 -> 128
10659      * (3) narrowing 128 x 128 -> 64
10660      * Here we do initial decode, catch unallocated cases and
10661      * dispatch to separate functions for each class.
10662      */
10663     int is_q = extract32(insn, 30, 1);
10664     int is_u = extract32(insn, 29, 1);
10665     int size = extract32(insn, 22, 2);
10666     int opcode = extract32(insn, 12, 4);
10667     int rm = extract32(insn, 16, 5);
10668     int rn = extract32(insn, 5, 5);
10669     int rd = extract32(insn, 0, 5);
10670 
10671     switch (opcode) {
10672     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10673     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10674         /* 64 x 128 -> 128 */
10675         if (size == 3) {
10676             unallocated_encoding(s);
10677             return;
10678         }
10679         if (!fp_access_check(s)) {
10680             return;
10681         }
10682         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10683         break;
10684     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10685     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10686         /* 128 x 128 -> 64 */
10687         if (size == 3) {
10688             unallocated_encoding(s);
10689             return;
10690         }
10691         if (!fp_access_check(s)) {
10692             return;
10693         }
10694         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10695         break;
10696     case 14: /* PMULL, PMULL2 */
10697         if (is_u) {
10698             unallocated_encoding(s);
10699             return;
10700         }
10701         switch (size) {
10702         case 0: /* PMULL.P8 */
10703             if (!fp_access_check(s)) {
10704                 return;
10705             }
10706             /* The Q field specifies lo/hi half input for this insn.  */
10707             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10708                              gen_helper_neon_pmull_h);
10709             break;
10710 
10711         case 3: /* PMULL.P64 */
10712             if (!dc_isar_feature(aa64_pmull, s)) {
10713                 unallocated_encoding(s);
10714                 return;
10715             }
10716             if (!fp_access_check(s)) {
10717                 return;
10718             }
10719             /* The Q field specifies lo/hi half input for this insn.  */
10720             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10721                              gen_helper_gvec_pmull_q);
10722             break;
10723 
10724         default:
10725             unallocated_encoding(s);
10726             break;
10727         }
10728         return;
10729     case 9: /* SQDMLAL, SQDMLAL2 */
10730     case 11: /* SQDMLSL, SQDMLSL2 */
10731     case 13: /* SQDMULL, SQDMULL2 */
10732         if (is_u || size == 0) {
10733             unallocated_encoding(s);
10734             return;
10735         }
10736         /* fall through */
10737     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10738     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10739     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10740     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10741     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10742     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10743     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10744         /* 64 x 64 -> 128 */
10745         if (size == 3) {
10746             unallocated_encoding(s);
10747             return;
10748         }
10749         if (!fp_access_check(s)) {
10750             return;
10751         }
10752 
10753         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10754         break;
10755     default:
10756         /* opcode 15 not allocated */
10757         unallocated_encoding(s);
10758         break;
10759     }
10760 }
10761 
10762 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10763 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10764 {
10765     int rd = extract32(insn, 0, 5);
10766     int rn = extract32(insn, 5, 5);
10767     int rm = extract32(insn, 16, 5);
10768     int size = extract32(insn, 22, 2);
10769     bool is_u = extract32(insn, 29, 1);
10770     bool is_q = extract32(insn, 30, 1);
10771 
10772     if (!fp_access_check(s)) {
10773         return;
10774     }
10775 
10776     switch (size + 4 * is_u) {
10777     case 0: /* AND */
10778         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10779         return;
10780     case 1: /* BIC */
10781         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10782         return;
10783     case 2: /* ORR */
10784         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10785         return;
10786     case 3: /* ORN */
10787         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10788         return;
10789     case 4: /* EOR */
10790         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10791         return;
10792 
10793     case 5: /* BSL bitwise select */
10794         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10795         return;
10796     case 6: /* BIT, bitwise insert if true */
10797         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10798         return;
10799     case 7: /* BIF, bitwise insert if false */
10800         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10801         return;
10802 
10803     default:
10804         g_assert_not_reached();
10805     }
10806 }
10807 
10808 /* Pairwise op subgroup of C3.6.16.
10809  *
10810  * This is called directly or via the handle_3same_float for float pairwise
10811  * operations where the opcode and size are calculated differently.
10812  */
10813 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10814                                    int size, int rn, int rm, int rd)
10815 {
10816     TCGv_ptr fpst;
10817     int pass;
10818 
10819     /* Floating point operations need fpst */
10820     if (opcode >= 0x58) {
10821         fpst = fpstatus_ptr(FPST_FPCR);
10822     } else {
10823         fpst = NULL;
10824     }
10825 
10826     if (!fp_access_check(s)) {
10827         return;
10828     }
10829 
10830     /* These operations work on the concatenated rm:rn, with each pair of
10831      * adjacent elements being operated on to produce an element in the result.
10832      */
10833     if (size == 3) {
10834         TCGv_i64 tcg_res[2];
10835 
10836         for (pass = 0; pass < 2; pass++) {
10837             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10838             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10839             int passreg = (pass == 0) ? rn : rm;
10840 
10841             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10842             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10843             tcg_res[pass] = tcg_temp_new_i64();
10844 
10845             switch (opcode) {
10846             case 0x17: /* ADDP */
10847                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10848                 break;
10849             case 0x58: /* FMAXNMP */
10850                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10851                 break;
10852             case 0x5a: /* FADDP */
10853                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10854                 break;
10855             case 0x5e: /* FMAXP */
10856                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10857                 break;
10858             case 0x78: /* FMINNMP */
10859                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10860                 break;
10861             case 0x7e: /* FMINP */
10862                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10863                 break;
10864             default:
10865                 g_assert_not_reached();
10866             }
10867         }
10868 
10869         for (pass = 0; pass < 2; pass++) {
10870             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10871         }
10872     } else {
10873         int maxpass = is_q ? 4 : 2;
10874         TCGv_i32 tcg_res[4];
10875 
10876         for (pass = 0; pass < maxpass; pass++) {
10877             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10878             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10879             NeonGenTwoOpFn *genfn = NULL;
10880             int passreg = pass < (maxpass / 2) ? rn : rm;
10881             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10882 
10883             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10884             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10885             tcg_res[pass] = tcg_temp_new_i32();
10886 
10887             switch (opcode) {
10888             case 0x17: /* ADDP */
10889             {
10890                 static NeonGenTwoOpFn * const fns[3] = {
10891                     gen_helper_neon_padd_u8,
10892                     gen_helper_neon_padd_u16,
10893                     tcg_gen_add_i32,
10894                 };
10895                 genfn = fns[size];
10896                 break;
10897             }
10898             case 0x14: /* SMAXP, UMAXP */
10899             {
10900                 static NeonGenTwoOpFn * const fns[3][2] = {
10901                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10902                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10903                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10904                 };
10905                 genfn = fns[size][u];
10906                 break;
10907             }
10908             case 0x15: /* SMINP, UMINP */
10909             {
10910                 static NeonGenTwoOpFn * const fns[3][2] = {
10911                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10912                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10913                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10914                 };
10915                 genfn = fns[size][u];
10916                 break;
10917             }
10918             /* The FP operations are all on single floats (32 bit) */
10919             case 0x58: /* FMAXNMP */
10920                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10921                 break;
10922             case 0x5a: /* FADDP */
10923                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10924                 break;
10925             case 0x5e: /* FMAXP */
10926                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10927                 break;
10928             case 0x78: /* FMINNMP */
10929                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10930                 break;
10931             case 0x7e: /* FMINP */
10932                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10933                 break;
10934             default:
10935                 g_assert_not_reached();
10936             }
10937 
10938             /* FP ops called directly, otherwise call now */
10939             if (genfn) {
10940                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10941             }
10942         }
10943 
10944         for (pass = 0; pass < maxpass; pass++) {
10945             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10946         }
10947         clear_vec_high(s, is_q, rd);
10948     }
10949 }
10950 
10951 /* Floating point op subgroup of C3.6.16. */
10952 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10953 {
10954     /* For floating point ops, the U, size[1] and opcode bits
10955      * together indicate the operation. size[0] indicates single
10956      * or double.
10957      */
10958     int fpopcode = extract32(insn, 11, 5)
10959         | (extract32(insn, 23, 1) << 5)
10960         | (extract32(insn, 29, 1) << 6);
10961     int is_q = extract32(insn, 30, 1);
10962     int size = extract32(insn, 22, 1);
10963     int rm = extract32(insn, 16, 5);
10964     int rn = extract32(insn, 5, 5);
10965     int rd = extract32(insn, 0, 5);
10966 
10967     int datasize = is_q ? 128 : 64;
10968     int esize = 32 << size;
10969     int elements = datasize / esize;
10970 
10971     if (size == 1 && !is_q) {
10972         unallocated_encoding(s);
10973         return;
10974     }
10975 
10976     switch (fpopcode) {
10977     case 0x58: /* FMAXNMP */
10978     case 0x5a: /* FADDP */
10979     case 0x5e: /* FMAXP */
10980     case 0x78: /* FMINNMP */
10981     case 0x7e: /* FMINP */
10982         if (size && !is_q) {
10983             unallocated_encoding(s);
10984             return;
10985         }
10986         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10987                                rn, rm, rd);
10988         return;
10989     case 0x1b: /* FMULX */
10990     case 0x1f: /* FRECPS */
10991     case 0x3f: /* FRSQRTS */
10992     case 0x5d: /* FACGE */
10993     case 0x7d: /* FACGT */
10994     case 0x19: /* FMLA */
10995     case 0x39: /* FMLS */
10996     case 0x18: /* FMAXNM */
10997     case 0x1a: /* FADD */
10998     case 0x1c: /* FCMEQ */
10999     case 0x1e: /* FMAX */
11000     case 0x38: /* FMINNM */
11001     case 0x3a: /* FSUB */
11002     case 0x3e: /* FMIN */
11003     case 0x5b: /* FMUL */
11004     case 0x5c: /* FCMGE */
11005     case 0x5f: /* FDIV */
11006     case 0x7a: /* FABD */
11007     case 0x7c: /* FCMGT */
11008         if (!fp_access_check(s)) {
11009             return;
11010         }
11011         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11012         return;
11013 
11014     case 0x1d: /* FMLAL  */
11015     case 0x3d: /* FMLSL  */
11016     case 0x59: /* FMLAL2 */
11017     case 0x79: /* FMLSL2 */
11018         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11019             unallocated_encoding(s);
11020             return;
11021         }
11022         if (fp_access_check(s)) {
11023             int is_s = extract32(insn, 23, 1);
11024             int is_2 = extract32(insn, 29, 1);
11025             int data = (is_2 << 1) | is_s;
11026             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11027                                vec_full_reg_offset(s, rn),
11028                                vec_full_reg_offset(s, rm), cpu_env,
11029                                is_q ? 16 : 8, vec_full_reg_size(s),
11030                                data, gen_helper_gvec_fmlal_a64);
11031         }
11032         return;
11033 
11034     default:
11035         unallocated_encoding(s);
11036         return;
11037     }
11038 }
11039 
11040 /* Integer op subgroup of C3.6.16. */
11041 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11042 {
11043     int is_q = extract32(insn, 30, 1);
11044     int u = extract32(insn, 29, 1);
11045     int size = extract32(insn, 22, 2);
11046     int opcode = extract32(insn, 11, 5);
11047     int rm = extract32(insn, 16, 5);
11048     int rn = extract32(insn, 5, 5);
11049     int rd = extract32(insn, 0, 5);
11050     int pass;
11051     TCGCond cond;
11052 
11053     switch (opcode) {
11054     case 0x13: /* MUL, PMUL */
11055         if (u && size != 0) {
11056             unallocated_encoding(s);
11057             return;
11058         }
11059         /* fall through */
11060     case 0x0: /* SHADD, UHADD */
11061     case 0x2: /* SRHADD, URHADD */
11062     case 0x4: /* SHSUB, UHSUB */
11063     case 0xc: /* SMAX, UMAX */
11064     case 0xd: /* SMIN, UMIN */
11065     case 0xe: /* SABD, UABD */
11066     case 0xf: /* SABA, UABA */
11067     case 0x12: /* MLA, MLS */
11068         if (size == 3) {
11069             unallocated_encoding(s);
11070             return;
11071         }
11072         break;
11073     case 0x16: /* SQDMULH, SQRDMULH */
11074         if (size == 0 || size == 3) {
11075             unallocated_encoding(s);
11076             return;
11077         }
11078         break;
11079     default:
11080         if (size == 3 && !is_q) {
11081             unallocated_encoding(s);
11082             return;
11083         }
11084         break;
11085     }
11086 
11087     if (!fp_access_check(s)) {
11088         return;
11089     }
11090 
11091     switch (opcode) {
11092     case 0x01: /* SQADD, UQADD */
11093         if (u) {
11094             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11095         } else {
11096             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11097         }
11098         return;
11099     case 0x05: /* SQSUB, UQSUB */
11100         if (u) {
11101             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11102         } else {
11103             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11104         }
11105         return;
11106     case 0x08: /* SSHL, USHL */
11107         if (u) {
11108             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11109         } else {
11110             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11111         }
11112         return;
11113     case 0x0c: /* SMAX, UMAX */
11114         if (u) {
11115             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11116         } else {
11117             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11118         }
11119         return;
11120     case 0x0d: /* SMIN, UMIN */
11121         if (u) {
11122             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11123         } else {
11124             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11125         }
11126         return;
11127     case 0xe: /* SABD, UABD */
11128         if (u) {
11129             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11130         } else {
11131             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11132         }
11133         return;
11134     case 0xf: /* SABA, UABA */
11135         if (u) {
11136             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11137         } else {
11138             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11139         }
11140         return;
11141     case 0x10: /* ADD, SUB */
11142         if (u) {
11143             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11144         } else {
11145             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11146         }
11147         return;
11148     case 0x13: /* MUL, PMUL */
11149         if (!u) { /* MUL */
11150             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11151         } else {  /* PMUL */
11152             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11153         }
11154         return;
11155     case 0x12: /* MLA, MLS */
11156         if (u) {
11157             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11158         } else {
11159             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11160         }
11161         return;
11162     case 0x16: /* SQDMULH, SQRDMULH */
11163         {
11164             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11165                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11166                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11167             };
11168             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11169         }
11170         return;
11171     case 0x11:
11172         if (!u) { /* CMTST */
11173             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11174             return;
11175         }
11176         /* else CMEQ */
11177         cond = TCG_COND_EQ;
11178         goto do_gvec_cmp;
11179     case 0x06: /* CMGT, CMHI */
11180         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11181         goto do_gvec_cmp;
11182     case 0x07: /* CMGE, CMHS */
11183         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11184     do_gvec_cmp:
11185         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11186                          vec_full_reg_offset(s, rn),
11187                          vec_full_reg_offset(s, rm),
11188                          is_q ? 16 : 8, vec_full_reg_size(s));
11189         return;
11190     }
11191 
11192     if (size == 3) {
11193         assert(is_q);
11194         for (pass = 0; pass < 2; pass++) {
11195             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11196             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11197             TCGv_i64 tcg_res = tcg_temp_new_i64();
11198 
11199             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11200             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11201 
11202             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11203 
11204             write_vec_element(s, tcg_res, rd, pass, MO_64);
11205         }
11206     } else {
11207         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11208             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11209             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11210             TCGv_i32 tcg_res = tcg_temp_new_i32();
11211             NeonGenTwoOpFn *genfn = NULL;
11212             NeonGenTwoOpEnvFn *genenvfn = NULL;
11213 
11214             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11215             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11216 
11217             switch (opcode) {
11218             case 0x0: /* SHADD, UHADD */
11219             {
11220                 static NeonGenTwoOpFn * const fns[3][2] = {
11221                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11222                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11223                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11224                 };
11225                 genfn = fns[size][u];
11226                 break;
11227             }
11228             case 0x2: /* SRHADD, URHADD */
11229             {
11230                 static NeonGenTwoOpFn * const fns[3][2] = {
11231                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11232                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11233                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11234                 };
11235                 genfn = fns[size][u];
11236                 break;
11237             }
11238             case 0x4: /* SHSUB, UHSUB */
11239             {
11240                 static NeonGenTwoOpFn * const fns[3][2] = {
11241                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11242                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11243                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11244                 };
11245                 genfn = fns[size][u];
11246                 break;
11247             }
11248             case 0x9: /* SQSHL, UQSHL */
11249             {
11250                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11251                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11252                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11253                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11254                 };
11255                 genenvfn = fns[size][u];
11256                 break;
11257             }
11258             case 0xa: /* SRSHL, URSHL */
11259             {
11260                 static NeonGenTwoOpFn * const fns[3][2] = {
11261                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11262                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11263                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11264                 };
11265                 genfn = fns[size][u];
11266                 break;
11267             }
11268             case 0xb: /* SQRSHL, UQRSHL */
11269             {
11270                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11271                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11272                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11273                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11274                 };
11275                 genenvfn = fns[size][u];
11276                 break;
11277             }
11278             default:
11279                 g_assert_not_reached();
11280             }
11281 
11282             if (genenvfn) {
11283                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11284             } else {
11285                 genfn(tcg_res, tcg_op1, tcg_op2);
11286             }
11287 
11288             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11289         }
11290     }
11291     clear_vec_high(s, is_q, rd);
11292 }
11293 
11294 /* AdvSIMD three same
11295  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11296  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11297  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11298  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11299  */
11300 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11301 {
11302     int opcode = extract32(insn, 11, 5);
11303 
11304     switch (opcode) {
11305     case 0x3: /* logic ops */
11306         disas_simd_3same_logic(s, insn);
11307         break;
11308     case 0x17: /* ADDP */
11309     case 0x14: /* SMAXP, UMAXP */
11310     case 0x15: /* SMINP, UMINP */
11311     {
11312         /* Pairwise operations */
11313         int is_q = extract32(insn, 30, 1);
11314         int u = extract32(insn, 29, 1);
11315         int size = extract32(insn, 22, 2);
11316         int rm = extract32(insn, 16, 5);
11317         int rn = extract32(insn, 5, 5);
11318         int rd = extract32(insn, 0, 5);
11319         if (opcode == 0x17) {
11320             if (u || (size == 3 && !is_q)) {
11321                 unallocated_encoding(s);
11322                 return;
11323             }
11324         } else {
11325             if (size == 3) {
11326                 unallocated_encoding(s);
11327                 return;
11328             }
11329         }
11330         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11331         break;
11332     }
11333     case 0x18 ... 0x31:
11334         /* floating point ops, sz[1] and U are part of opcode */
11335         disas_simd_3same_float(s, insn);
11336         break;
11337     default:
11338         disas_simd_3same_int(s, insn);
11339         break;
11340     }
11341 }
11342 
11343 /*
11344  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11345  *
11346  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11347  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11348  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11349  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11350  *
11351  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11352  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11353  *
11354  */
11355 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11356 {
11357     int opcode = extract32(insn, 11, 3);
11358     int u = extract32(insn, 29, 1);
11359     int a = extract32(insn, 23, 1);
11360     int is_q = extract32(insn, 30, 1);
11361     int rm = extract32(insn, 16, 5);
11362     int rn = extract32(insn, 5, 5);
11363     int rd = extract32(insn, 0, 5);
11364     /*
11365      * For these floating point ops, the U, a and opcode bits
11366      * together indicate the operation.
11367      */
11368     int fpopcode = opcode | (a << 3) | (u << 4);
11369     int datasize = is_q ? 128 : 64;
11370     int elements = datasize / 16;
11371     bool pairwise;
11372     TCGv_ptr fpst;
11373     int pass;
11374 
11375     switch (fpopcode) {
11376     case 0x0: /* FMAXNM */
11377     case 0x1: /* FMLA */
11378     case 0x2: /* FADD */
11379     case 0x3: /* FMULX */
11380     case 0x4: /* FCMEQ */
11381     case 0x6: /* FMAX */
11382     case 0x7: /* FRECPS */
11383     case 0x8: /* FMINNM */
11384     case 0x9: /* FMLS */
11385     case 0xa: /* FSUB */
11386     case 0xe: /* FMIN */
11387     case 0xf: /* FRSQRTS */
11388     case 0x13: /* FMUL */
11389     case 0x14: /* FCMGE */
11390     case 0x15: /* FACGE */
11391     case 0x17: /* FDIV */
11392     case 0x1a: /* FABD */
11393     case 0x1c: /* FCMGT */
11394     case 0x1d: /* FACGT */
11395         pairwise = false;
11396         break;
11397     case 0x10: /* FMAXNMP */
11398     case 0x12: /* FADDP */
11399     case 0x16: /* FMAXP */
11400     case 0x18: /* FMINNMP */
11401     case 0x1e: /* FMINP */
11402         pairwise = true;
11403         break;
11404     default:
11405         unallocated_encoding(s);
11406         return;
11407     }
11408 
11409     if (!dc_isar_feature(aa64_fp16, s)) {
11410         unallocated_encoding(s);
11411         return;
11412     }
11413 
11414     if (!fp_access_check(s)) {
11415         return;
11416     }
11417 
11418     fpst = fpstatus_ptr(FPST_FPCR_F16);
11419 
11420     if (pairwise) {
11421         int maxpass = is_q ? 8 : 4;
11422         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11423         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11424         TCGv_i32 tcg_res[8];
11425 
11426         for (pass = 0; pass < maxpass; pass++) {
11427             int passreg = pass < (maxpass / 2) ? rn : rm;
11428             int passelt = (pass << 1) & (maxpass - 1);
11429 
11430             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11431             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11432             tcg_res[pass] = tcg_temp_new_i32();
11433 
11434             switch (fpopcode) {
11435             case 0x10: /* FMAXNMP */
11436                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11437                                            fpst);
11438                 break;
11439             case 0x12: /* FADDP */
11440                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11441                 break;
11442             case 0x16: /* FMAXP */
11443                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11444                 break;
11445             case 0x18: /* FMINNMP */
11446                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11447                                            fpst);
11448                 break;
11449             case 0x1e: /* FMINP */
11450                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11451                 break;
11452             default:
11453                 g_assert_not_reached();
11454             }
11455         }
11456 
11457         for (pass = 0; pass < maxpass; pass++) {
11458             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11459         }
11460     } else {
11461         for (pass = 0; pass < elements; pass++) {
11462             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11463             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11464             TCGv_i32 tcg_res = tcg_temp_new_i32();
11465 
11466             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11467             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11468 
11469             switch (fpopcode) {
11470             case 0x0: /* FMAXNM */
11471                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11472                 break;
11473             case 0x1: /* FMLA */
11474                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11475                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11476                                            fpst);
11477                 break;
11478             case 0x2: /* FADD */
11479                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11480                 break;
11481             case 0x3: /* FMULX */
11482                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11483                 break;
11484             case 0x4: /* FCMEQ */
11485                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11486                 break;
11487             case 0x6: /* FMAX */
11488                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11489                 break;
11490             case 0x7: /* FRECPS */
11491                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11492                 break;
11493             case 0x8: /* FMINNM */
11494                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11495                 break;
11496             case 0x9: /* FMLS */
11497                 /* As usual for ARM, separate negation for fused multiply-add */
11498                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11499                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11500                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11501                                            fpst);
11502                 break;
11503             case 0xa: /* FSUB */
11504                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11505                 break;
11506             case 0xe: /* FMIN */
11507                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11508                 break;
11509             case 0xf: /* FRSQRTS */
11510                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11511                 break;
11512             case 0x13: /* FMUL */
11513                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11514                 break;
11515             case 0x14: /* FCMGE */
11516                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11517                 break;
11518             case 0x15: /* FACGE */
11519                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11520                 break;
11521             case 0x17: /* FDIV */
11522                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11523                 break;
11524             case 0x1a: /* FABD */
11525                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11526                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11527                 break;
11528             case 0x1c: /* FCMGT */
11529                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11530                 break;
11531             case 0x1d: /* FACGT */
11532                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11533                 break;
11534             default:
11535                 g_assert_not_reached();
11536             }
11537 
11538             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11539         }
11540     }
11541 
11542     clear_vec_high(s, is_q, rd);
11543 }
11544 
11545 /* AdvSIMD three same extra
11546  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11547  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11548  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11549  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11550  */
11551 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11552 {
11553     int rd = extract32(insn, 0, 5);
11554     int rn = extract32(insn, 5, 5);
11555     int opcode = extract32(insn, 11, 4);
11556     int rm = extract32(insn, 16, 5);
11557     int size = extract32(insn, 22, 2);
11558     bool u = extract32(insn, 29, 1);
11559     bool is_q = extract32(insn, 30, 1);
11560     bool feature;
11561     int rot;
11562 
11563     switch (u * 16 + opcode) {
11564     case 0x10: /* SQRDMLAH (vector) */
11565     case 0x11: /* SQRDMLSH (vector) */
11566         if (size != 1 && size != 2) {
11567             unallocated_encoding(s);
11568             return;
11569         }
11570         feature = dc_isar_feature(aa64_rdm, s);
11571         break;
11572     case 0x02: /* SDOT (vector) */
11573     case 0x12: /* UDOT (vector) */
11574         if (size != MO_32) {
11575             unallocated_encoding(s);
11576             return;
11577         }
11578         feature = dc_isar_feature(aa64_dp, s);
11579         break;
11580     case 0x03: /* USDOT */
11581         if (size != MO_32) {
11582             unallocated_encoding(s);
11583             return;
11584         }
11585         feature = dc_isar_feature(aa64_i8mm, s);
11586         break;
11587     case 0x04: /* SMMLA */
11588     case 0x14: /* UMMLA */
11589     case 0x05: /* USMMLA */
11590         if (!is_q || size != MO_32) {
11591             unallocated_encoding(s);
11592             return;
11593         }
11594         feature = dc_isar_feature(aa64_i8mm, s);
11595         break;
11596     case 0x18: /* FCMLA, #0 */
11597     case 0x19: /* FCMLA, #90 */
11598     case 0x1a: /* FCMLA, #180 */
11599     case 0x1b: /* FCMLA, #270 */
11600     case 0x1c: /* FCADD, #90 */
11601     case 0x1e: /* FCADD, #270 */
11602         if (size == 0
11603             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11604             || (size == 3 && !is_q)) {
11605             unallocated_encoding(s);
11606             return;
11607         }
11608         feature = dc_isar_feature(aa64_fcma, s);
11609         break;
11610     case 0x1d: /* BFMMLA */
11611         if (size != MO_16 || !is_q) {
11612             unallocated_encoding(s);
11613             return;
11614         }
11615         feature = dc_isar_feature(aa64_bf16, s);
11616         break;
11617     case 0x1f:
11618         switch (size) {
11619         case 1: /* BFDOT */
11620         case 3: /* BFMLAL{B,T} */
11621             feature = dc_isar_feature(aa64_bf16, s);
11622             break;
11623         default:
11624             unallocated_encoding(s);
11625             return;
11626         }
11627         break;
11628     default:
11629         unallocated_encoding(s);
11630         return;
11631     }
11632     if (!feature) {
11633         unallocated_encoding(s);
11634         return;
11635     }
11636     if (!fp_access_check(s)) {
11637         return;
11638     }
11639 
11640     switch (opcode) {
11641     case 0x0: /* SQRDMLAH (vector) */
11642         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11643         return;
11644 
11645     case 0x1: /* SQRDMLSH (vector) */
11646         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11647         return;
11648 
11649     case 0x2: /* SDOT / UDOT */
11650         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11651                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11652         return;
11653 
11654     case 0x3: /* USDOT */
11655         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11656         return;
11657 
11658     case 0x04: /* SMMLA, UMMLA */
11659         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11660                          u ? gen_helper_gvec_ummla_b
11661                          : gen_helper_gvec_smmla_b);
11662         return;
11663     case 0x05: /* USMMLA */
11664         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11665         return;
11666 
11667     case 0x8: /* FCMLA, #0 */
11668     case 0x9: /* FCMLA, #90 */
11669     case 0xa: /* FCMLA, #180 */
11670     case 0xb: /* FCMLA, #270 */
11671         rot = extract32(opcode, 0, 2);
11672         switch (size) {
11673         case 1:
11674             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11675                               gen_helper_gvec_fcmlah);
11676             break;
11677         case 2:
11678             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11679                               gen_helper_gvec_fcmlas);
11680             break;
11681         case 3:
11682             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11683                               gen_helper_gvec_fcmlad);
11684             break;
11685         default:
11686             g_assert_not_reached();
11687         }
11688         return;
11689 
11690     case 0xc: /* FCADD, #90 */
11691     case 0xe: /* FCADD, #270 */
11692         rot = extract32(opcode, 1, 1);
11693         switch (size) {
11694         case 1:
11695             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11696                               gen_helper_gvec_fcaddh);
11697             break;
11698         case 2:
11699             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11700                               gen_helper_gvec_fcadds);
11701             break;
11702         case 3:
11703             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11704                               gen_helper_gvec_fcaddd);
11705             break;
11706         default:
11707             g_assert_not_reached();
11708         }
11709         return;
11710 
11711     case 0xd: /* BFMMLA */
11712         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11713         return;
11714     case 0xf:
11715         switch (size) {
11716         case 1: /* BFDOT */
11717             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11718             break;
11719         case 3: /* BFMLAL{B,T} */
11720             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11721                               gen_helper_gvec_bfmlal);
11722             break;
11723         default:
11724             g_assert_not_reached();
11725         }
11726         return;
11727 
11728     default:
11729         g_assert_not_reached();
11730     }
11731 }
11732 
11733 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11734                                   int size, int rn, int rd)
11735 {
11736     /* Handle 2-reg-misc ops which are widening (so each size element
11737      * in the source becomes a 2*size element in the destination.
11738      * The only instruction like this is FCVTL.
11739      */
11740     int pass;
11741 
11742     if (size == 3) {
11743         /* 32 -> 64 bit fp conversion */
11744         TCGv_i64 tcg_res[2];
11745         int srcelt = is_q ? 2 : 0;
11746 
11747         for (pass = 0; pass < 2; pass++) {
11748             TCGv_i32 tcg_op = tcg_temp_new_i32();
11749             tcg_res[pass] = tcg_temp_new_i64();
11750 
11751             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11752             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11753         }
11754         for (pass = 0; pass < 2; pass++) {
11755             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11756         }
11757     } else {
11758         /* 16 -> 32 bit fp conversion */
11759         int srcelt = is_q ? 4 : 0;
11760         TCGv_i32 tcg_res[4];
11761         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11762         TCGv_i32 ahp = get_ahp_flag();
11763 
11764         for (pass = 0; pass < 4; pass++) {
11765             tcg_res[pass] = tcg_temp_new_i32();
11766 
11767             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11768             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11769                                            fpst, ahp);
11770         }
11771         for (pass = 0; pass < 4; pass++) {
11772             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11773         }
11774     }
11775 }
11776 
11777 static void handle_rev(DisasContext *s, int opcode, bool u,
11778                        bool is_q, int size, int rn, int rd)
11779 {
11780     int op = (opcode << 1) | u;
11781     int opsz = op + size;
11782     int grp_size = 3 - opsz;
11783     int dsize = is_q ? 128 : 64;
11784     int i;
11785 
11786     if (opsz >= 3) {
11787         unallocated_encoding(s);
11788         return;
11789     }
11790 
11791     if (!fp_access_check(s)) {
11792         return;
11793     }
11794 
11795     if (size == 0) {
11796         /* Special case bytes, use bswap op on each group of elements */
11797         int groups = dsize / (8 << grp_size);
11798 
11799         for (i = 0; i < groups; i++) {
11800             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11801 
11802             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11803             switch (grp_size) {
11804             case MO_16:
11805                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11806                 break;
11807             case MO_32:
11808                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11809                 break;
11810             case MO_64:
11811                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11812                 break;
11813             default:
11814                 g_assert_not_reached();
11815             }
11816             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11817         }
11818         clear_vec_high(s, is_q, rd);
11819     } else {
11820         int revmask = (1 << grp_size) - 1;
11821         int esize = 8 << size;
11822         int elements = dsize / esize;
11823         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11824         TCGv_i64 tcg_rd[2];
11825 
11826         for (i = 0; i < 2; i++) {
11827             tcg_rd[i] = tcg_temp_new_i64();
11828             tcg_gen_movi_i64(tcg_rd[i], 0);
11829         }
11830 
11831         for (i = 0; i < elements; i++) {
11832             int e_rev = (i & 0xf) ^ revmask;
11833             int w = (e_rev * esize) / 64;
11834             int o = (e_rev * esize) % 64;
11835 
11836             read_vec_element(s, tcg_rn, rn, i, size);
11837             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11838         }
11839 
11840         for (i = 0; i < 2; i++) {
11841             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11842         }
11843         clear_vec_high(s, true, rd);
11844     }
11845 }
11846 
11847 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11848                                   bool is_q, int size, int rn, int rd)
11849 {
11850     /* Implement the pairwise operations from 2-misc:
11851      * SADDLP, UADDLP, SADALP, UADALP.
11852      * These all add pairs of elements in the input to produce a
11853      * double-width result element in the output (possibly accumulating).
11854      */
11855     bool accum = (opcode == 0x6);
11856     int maxpass = is_q ? 2 : 1;
11857     int pass;
11858     TCGv_i64 tcg_res[2];
11859 
11860     if (size == 2) {
11861         /* 32 + 32 -> 64 op */
11862         MemOp memop = size + (u ? 0 : MO_SIGN);
11863 
11864         for (pass = 0; pass < maxpass; pass++) {
11865             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11866             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11867 
11868             tcg_res[pass] = tcg_temp_new_i64();
11869 
11870             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11871             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11872             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11873             if (accum) {
11874                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11875                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11876             }
11877         }
11878     } else {
11879         for (pass = 0; pass < maxpass; pass++) {
11880             TCGv_i64 tcg_op = tcg_temp_new_i64();
11881             NeonGenOne64OpFn *genfn;
11882             static NeonGenOne64OpFn * const fns[2][2] = {
11883                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11884                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11885             };
11886 
11887             genfn = fns[size][u];
11888 
11889             tcg_res[pass] = tcg_temp_new_i64();
11890 
11891             read_vec_element(s, tcg_op, rn, pass, MO_64);
11892             genfn(tcg_res[pass], tcg_op);
11893 
11894             if (accum) {
11895                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11896                 if (size == 0) {
11897                     gen_helper_neon_addl_u16(tcg_res[pass],
11898                                              tcg_res[pass], tcg_op);
11899                 } else {
11900                     gen_helper_neon_addl_u32(tcg_res[pass],
11901                                              tcg_res[pass], tcg_op);
11902                 }
11903             }
11904         }
11905     }
11906     if (!is_q) {
11907         tcg_res[1] = tcg_constant_i64(0);
11908     }
11909     for (pass = 0; pass < 2; pass++) {
11910         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11911     }
11912 }
11913 
11914 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11915 {
11916     /* Implement SHLL and SHLL2 */
11917     int pass;
11918     int part = is_q ? 2 : 0;
11919     TCGv_i64 tcg_res[2];
11920 
11921     for (pass = 0; pass < 2; pass++) {
11922         static NeonGenWidenFn * const widenfns[3] = {
11923             gen_helper_neon_widen_u8,
11924             gen_helper_neon_widen_u16,
11925             tcg_gen_extu_i32_i64,
11926         };
11927         NeonGenWidenFn *widenfn = widenfns[size];
11928         TCGv_i32 tcg_op = tcg_temp_new_i32();
11929 
11930         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11931         tcg_res[pass] = tcg_temp_new_i64();
11932         widenfn(tcg_res[pass], tcg_op);
11933         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11934     }
11935 
11936     for (pass = 0; pass < 2; pass++) {
11937         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11938     }
11939 }
11940 
11941 /* AdvSIMD two reg misc
11942  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11943  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11944  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11945  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11946  */
11947 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11948 {
11949     int size = extract32(insn, 22, 2);
11950     int opcode = extract32(insn, 12, 5);
11951     bool u = extract32(insn, 29, 1);
11952     bool is_q = extract32(insn, 30, 1);
11953     int rn = extract32(insn, 5, 5);
11954     int rd = extract32(insn, 0, 5);
11955     bool need_fpstatus = false;
11956     int rmode = -1;
11957     TCGv_i32 tcg_rmode;
11958     TCGv_ptr tcg_fpstatus;
11959 
11960     switch (opcode) {
11961     case 0x0: /* REV64, REV32 */
11962     case 0x1: /* REV16 */
11963         handle_rev(s, opcode, u, is_q, size, rn, rd);
11964         return;
11965     case 0x5: /* CNT, NOT, RBIT */
11966         if (u && size == 0) {
11967             /* NOT */
11968             break;
11969         } else if (u && size == 1) {
11970             /* RBIT */
11971             break;
11972         } else if (!u && size == 0) {
11973             /* CNT */
11974             break;
11975         }
11976         unallocated_encoding(s);
11977         return;
11978     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11979     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11980         if (size == 3) {
11981             unallocated_encoding(s);
11982             return;
11983         }
11984         if (!fp_access_check(s)) {
11985             return;
11986         }
11987 
11988         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11989         return;
11990     case 0x4: /* CLS, CLZ */
11991         if (size == 3) {
11992             unallocated_encoding(s);
11993             return;
11994         }
11995         break;
11996     case 0x2: /* SADDLP, UADDLP */
11997     case 0x6: /* SADALP, UADALP */
11998         if (size == 3) {
11999             unallocated_encoding(s);
12000             return;
12001         }
12002         if (!fp_access_check(s)) {
12003             return;
12004         }
12005         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12006         return;
12007     case 0x13: /* SHLL, SHLL2 */
12008         if (u == 0 || size == 3) {
12009             unallocated_encoding(s);
12010             return;
12011         }
12012         if (!fp_access_check(s)) {
12013             return;
12014         }
12015         handle_shll(s, is_q, size, rn, rd);
12016         return;
12017     case 0xa: /* CMLT */
12018         if (u == 1) {
12019             unallocated_encoding(s);
12020             return;
12021         }
12022         /* fall through */
12023     case 0x8: /* CMGT, CMGE */
12024     case 0x9: /* CMEQ, CMLE */
12025     case 0xb: /* ABS, NEG */
12026         if (size == 3 && !is_q) {
12027             unallocated_encoding(s);
12028             return;
12029         }
12030         break;
12031     case 0x3: /* SUQADD, USQADD */
12032         if (size == 3 && !is_q) {
12033             unallocated_encoding(s);
12034             return;
12035         }
12036         if (!fp_access_check(s)) {
12037             return;
12038         }
12039         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12040         return;
12041     case 0x7: /* SQABS, SQNEG */
12042         if (size == 3 && !is_q) {
12043             unallocated_encoding(s);
12044             return;
12045         }
12046         break;
12047     case 0xc ... 0xf:
12048     case 0x16 ... 0x1f:
12049     {
12050         /* Floating point: U, size[1] and opcode indicate operation;
12051          * size[0] indicates single or double precision.
12052          */
12053         int is_double = extract32(size, 0, 1);
12054         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12055         size = is_double ? 3 : 2;
12056         switch (opcode) {
12057         case 0x2f: /* FABS */
12058         case 0x6f: /* FNEG */
12059             if (size == 3 && !is_q) {
12060                 unallocated_encoding(s);
12061                 return;
12062             }
12063             break;
12064         case 0x1d: /* SCVTF */
12065         case 0x5d: /* UCVTF */
12066         {
12067             bool is_signed = (opcode == 0x1d) ? true : false;
12068             int elements = is_double ? 2 : is_q ? 4 : 2;
12069             if (is_double && !is_q) {
12070                 unallocated_encoding(s);
12071                 return;
12072             }
12073             if (!fp_access_check(s)) {
12074                 return;
12075             }
12076             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12077             return;
12078         }
12079         case 0x2c: /* FCMGT (zero) */
12080         case 0x2d: /* FCMEQ (zero) */
12081         case 0x2e: /* FCMLT (zero) */
12082         case 0x6c: /* FCMGE (zero) */
12083         case 0x6d: /* FCMLE (zero) */
12084             if (size == 3 && !is_q) {
12085                 unallocated_encoding(s);
12086                 return;
12087             }
12088             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12089             return;
12090         case 0x7f: /* FSQRT */
12091             if (size == 3 && !is_q) {
12092                 unallocated_encoding(s);
12093                 return;
12094             }
12095             break;
12096         case 0x1a: /* FCVTNS */
12097         case 0x1b: /* FCVTMS */
12098         case 0x3a: /* FCVTPS */
12099         case 0x3b: /* FCVTZS */
12100         case 0x5a: /* FCVTNU */
12101         case 0x5b: /* FCVTMU */
12102         case 0x7a: /* FCVTPU */
12103         case 0x7b: /* FCVTZU */
12104             need_fpstatus = true;
12105             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12106             if (size == 3 && !is_q) {
12107                 unallocated_encoding(s);
12108                 return;
12109             }
12110             break;
12111         case 0x5c: /* FCVTAU */
12112         case 0x1c: /* FCVTAS */
12113             need_fpstatus = true;
12114             rmode = FPROUNDING_TIEAWAY;
12115             if (size == 3 && !is_q) {
12116                 unallocated_encoding(s);
12117                 return;
12118             }
12119             break;
12120         case 0x3c: /* URECPE */
12121             if (size == 3) {
12122                 unallocated_encoding(s);
12123                 return;
12124             }
12125             /* fall through */
12126         case 0x3d: /* FRECPE */
12127         case 0x7d: /* FRSQRTE */
12128             if (size == 3 && !is_q) {
12129                 unallocated_encoding(s);
12130                 return;
12131             }
12132             if (!fp_access_check(s)) {
12133                 return;
12134             }
12135             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12136             return;
12137         case 0x56: /* FCVTXN, FCVTXN2 */
12138             if (size == 2) {
12139                 unallocated_encoding(s);
12140                 return;
12141             }
12142             /* fall through */
12143         case 0x16: /* FCVTN, FCVTN2 */
12144             /* handle_2misc_narrow does a 2*size -> size operation, but these
12145              * instructions encode the source size rather than dest size.
12146              */
12147             if (!fp_access_check(s)) {
12148                 return;
12149             }
12150             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12151             return;
12152         case 0x36: /* BFCVTN, BFCVTN2 */
12153             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12154                 unallocated_encoding(s);
12155                 return;
12156             }
12157             if (!fp_access_check(s)) {
12158                 return;
12159             }
12160             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12161             return;
12162         case 0x17: /* FCVTL, FCVTL2 */
12163             if (!fp_access_check(s)) {
12164                 return;
12165             }
12166             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12167             return;
12168         case 0x18: /* FRINTN */
12169         case 0x19: /* FRINTM */
12170         case 0x38: /* FRINTP */
12171         case 0x39: /* FRINTZ */
12172             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12173             /* fall through */
12174         case 0x59: /* FRINTX */
12175         case 0x79: /* FRINTI */
12176             need_fpstatus = true;
12177             if (size == 3 && !is_q) {
12178                 unallocated_encoding(s);
12179                 return;
12180             }
12181             break;
12182         case 0x58: /* FRINTA */
12183             rmode = FPROUNDING_TIEAWAY;
12184             need_fpstatus = true;
12185             if (size == 3 && !is_q) {
12186                 unallocated_encoding(s);
12187                 return;
12188             }
12189             break;
12190         case 0x7c: /* URSQRTE */
12191             if (size == 3) {
12192                 unallocated_encoding(s);
12193                 return;
12194             }
12195             break;
12196         case 0x1e: /* FRINT32Z */
12197         case 0x1f: /* FRINT64Z */
12198             rmode = FPROUNDING_ZERO;
12199             /* fall through */
12200         case 0x5e: /* FRINT32X */
12201         case 0x5f: /* FRINT64X */
12202             need_fpstatus = true;
12203             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12204                 unallocated_encoding(s);
12205                 return;
12206             }
12207             break;
12208         default:
12209             unallocated_encoding(s);
12210             return;
12211         }
12212         break;
12213     }
12214     default:
12215         unallocated_encoding(s);
12216         return;
12217     }
12218 
12219     if (!fp_access_check(s)) {
12220         return;
12221     }
12222 
12223     if (need_fpstatus || rmode >= 0) {
12224         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12225     } else {
12226         tcg_fpstatus = NULL;
12227     }
12228     if (rmode >= 0) {
12229         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12230     } else {
12231         tcg_rmode = NULL;
12232     }
12233 
12234     switch (opcode) {
12235     case 0x5:
12236         if (u && size == 0) { /* NOT */
12237             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12238             return;
12239         }
12240         break;
12241     case 0x8: /* CMGT, CMGE */
12242         if (u) {
12243             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12244         } else {
12245             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12246         }
12247         return;
12248     case 0x9: /* CMEQ, CMLE */
12249         if (u) {
12250             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12251         } else {
12252             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12253         }
12254         return;
12255     case 0xa: /* CMLT */
12256         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12257         return;
12258     case 0xb:
12259         if (u) { /* ABS, NEG */
12260             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12261         } else {
12262             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12263         }
12264         return;
12265     }
12266 
12267     if (size == 3) {
12268         /* All 64-bit element operations can be shared with scalar 2misc */
12269         int pass;
12270 
12271         /* Coverity claims (size == 3 && !is_q) has been eliminated
12272          * from all paths leading to here.
12273          */
12274         tcg_debug_assert(is_q);
12275         for (pass = 0; pass < 2; pass++) {
12276             TCGv_i64 tcg_op = tcg_temp_new_i64();
12277             TCGv_i64 tcg_res = tcg_temp_new_i64();
12278 
12279             read_vec_element(s, tcg_op, rn, pass, MO_64);
12280 
12281             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12282                             tcg_rmode, tcg_fpstatus);
12283 
12284             write_vec_element(s, tcg_res, rd, pass, MO_64);
12285         }
12286     } else {
12287         int pass;
12288 
12289         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12290             TCGv_i32 tcg_op = tcg_temp_new_i32();
12291             TCGv_i32 tcg_res = tcg_temp_new_i32();
12292 
12293             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12294 
12295             if (size == 2) {
12296                 /* Special cases for 32 bit elements */
12297                 switch (opcode) {
12298                 case 0x4: /* CLS */
12299                     if (u) {
12300                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12301                     } else {
12302                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12303                     }
12304                     break;
12305                 case 0x7: /* SQABS, SQNEG */
12306                     if (u) {
12307                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12308                     } else {
12309                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12310                     }
12311                     break;
12312                 case 0x2f: /* FABS */
12313                     gen_helper_vfp_abss(tcg_res, tcg_op);
12314                     break;
12315                 case 0x6f: /* FNEG */
12316                     gen_helper_vfp_negs(tcg_res, tcg_op);
12317                     break;
12318                 case 0x7f: /* FSQRT */
12319                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12320                     break;
12321                 case 0x1a: /* FCVTNS */
12322                 case 0x1b: /* FCVTMS */
12323                 case 0x1c: /* FCVTAS */
12324                 case 0x3a: /* FCVTPS */
12325                 case 0x3b: /* FCVTZS */
12326                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12327                                          tcg_constant_i32(0), tcg_fpstatus);
12328                     break;
12329                 case 0x5a: /* FCVTNU */
12330                 case 0x5b: /* FCVTMU */
12331                 case 0x5c: /* FCVTAU */
12332                 case 0x7a: /* FCVTPU */
12333                 case 0x7b: /* FCVTZU */
12334                     gen_helper_vfp_touls(tcg_res, tcg_op,
12335                                          tcg_constant_i32(0), tcg_fpstatus);
12336                     break;
12337                 case 0x18: /* FRINTN */
12338                 case 0x19: /* FRINTM */
12339                 case 0x38: /* FRINTP */
12340                 case 0x39: /* FRINTZ */
12341                 case 0x58: /* FRINTA */
12342                 case 0x79: /* FRINTI */
12343                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12344                     break;
12345                 case 0x59: /* FRINTX */
12346                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12347                     break;
12348                 case 0x7c: /* URSQRTE */
12349                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12350                     break;
12351                 case 0x1e: /* FRINT32Z */
12352                 case 0x5e: /* FRINT32X */
12353                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12354                     break;
12355                 case 0x1f: /* FRINT64Z */
12356                 case 0x5f: /* FRINT64X */
12357                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12358                     break;
12359                 default:
12360                     g_assert_not_reached();
12361                 }
12362             } else {
12363                 /* Use helpers for 8 and 16 bit elements */
12364                 switch (opcode) {
12365                 case 0x5: /* CNT, RBIT */
12366                     /* For these two insns size is part of the opcode specifier
12367                      * (handled earlier); they always operate on byte elements.
12368                      */
12369                     if (u) {
12370                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12371                     } else {
12372                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12373                     }
12374                     break;
12375                 case 0x7: /* SQABS, SQNEG */
12376                 {
12377                     NeonGenOneOpEnvFn *genfn;
12378                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12379                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12380                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12381                     };
12382                     genfn = fns[size][u];
12383                     genfn(tcg_res, cpu_env, tcg_op);
12384                     break;
12385                 }
12386                 case 0x4: /* CLS, CLZ */
12387                     if (u) {
12388                         if (size == 0) {
12389                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12390                         } else {
12391                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12392                         }
12393                     } else {
12394                         if (size == 0) {
12395                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12396                         } else {
12397                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12398                         }
12399                     }
12400                     break;
12401                 default:
12402                     g_assert_not_reached();
12403                 }
12404             }
12405 
12406             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12407         }
12408     }
12409     clear_vec_high(s, is_q, rd);
12410 
12411     if (tcg_rmode) {
12412         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12413     }
12414 }
12415 
12416 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12417  *
12418  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12419  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12420  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12421  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12422  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12423  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12424  *
12425  * This actually covers two groups where scalar access is governed by
12426  * bit 28. A bunch of the instructions (float to integral) only exist
12427  * in the vector form and are un-allocated for the scalar decode. Also
12428  * in the scalar decode Q is always 1.
12429  */
12430 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12431 {
12432     int fpop, opcode, a, u;
12433     int rn, rd;
12434     bool is_q;
12435     bool is_scalar;
12436     bool only_in_vector = false;
12437 
12438     int pass;
12439     TCGv_i32 tcg_rmode = NULL;
12440     TCGv_ptr tcg_fpstatus = NULL;
12441     bool need_fpst = true;
12442     int rmode = -1;
12443 
12444     if (!dc_isar_feature(aa64_fp16, s)) {
12445         unallocated_encoding(s);
12446         return;
12447     }
12448 
12449     rd = extract32(insn, 0, 5);
12450     rn = extract32(insn, 5, 5);
12451 
12452     a = extract32(insn, 23, 1);
12453     u = extract32(insn, 29, 1);
12454     is_scalar = extract32(insn, 28, 1);
12455     is_q = extract32(insn, 30, 1);
12456 
12457     opcode = extract32(insn, 12, 5);
12458     fpop = deposit32(opcode, 5, 1, a);
12459     fpop = deposit32(fpop, 6, 1, u);
12460 
12461     switch (fpop) {
12462     case 0x1d: /* SCVTF */
12463     case 0x5d: /* UCVTF */
12464     {
12465         int elements;
12466 
12467         if (is_scalar) {
12468             elements = 1;
12469         } else {
12470             elements = (is_q ? 8 : 4);
12471         }
12472 
12473         if (!fp_access_check(s)) {
12474             return;
12475         }
12476         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12477         return;
12478     }
12479     break;
12480     case 0x2c: /* FCMGT (zero) */
12481     case 0x2d: /* FCMEQ (zero) */
12482     case 0x2e: /* FCMLT (zero) */
12483     case 0x6c: /* FCMGE (zero) */
12484     case 0x6d: /* FCMLE (zero) */
12485         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12486         return;
12487     case 0x3d: /* FRECPE */
12488     case 0x3f: /* FRECPX */
12489         break;
12490     case 0x18: /* FRINTN */
12491         only_in_vector = true;
12492         rmode = FPROUNDING_TIEEVEN;
12493         break;
12494     case 0x19: /* FRINTM */
12495         only_in_vector = true;
12496         rmode = FPROUNDING_NEGINF;
12497         break;
12498     case 0x38: /* FRINTP */
12499         only_in_vector = true;
12500         rmode = FPROUNDING_POSINF;
12501         break;
12502     case 0x39: /* FRINTZ */
12503         only_in_vector = true;
12504         rmode = FPROUNDING_ZERO;
12505         break;
12506     case 0x58: /* FRINTA */
12507         only_in_vector = true;
12508         rmode = FPROUNDING_TIEAWAY;
12509         break;
12510     case 0x59: /* FRINTX */
12511     case 0x79: /* FRINTI */
12512         only_in_vector = true;
12513         /* current rounding mode */
12514         break;
12515     case 0x1a: /* FCVTNS */
12516         rmode = FPROUNDING_TIEEVEN;
12517         break;
12518     case 0x1b: /* FCVTMS */
12519         rmode = FPROUNDING_NEGINF;
12520         break;
12521     case 0x1c: /* FCVTAS */
12522         rmode = FPROUNDING_TIEAWAY;
12523         break;
12524     case 0x3a: /* FCVTPS */
12525         rmode = FPROUNDING_POSINF;
12526         break;
12527     case 0x3b: /* FCVTZS */
12528         rmode = FPROUNDING_ZERO;
12529         break;
12530     case 0x5a: /* FCVTNU */
12531         rmode = FPROUNDING_TIEEVEN;
12532         break;
12533     case 0x5b: /* FCVTMU */
12534         rmode = FPROUNDING_NEGINF;
12535         break;
12536     case 0x5c: /* FCVTAU */
12537         rmode = FPROUNDING_TIEAWAY;
12538         break;
12539     case 0x7a: /* FCVTPU */
12540         rmode = FPROUNDING_POSINF;
12541         break;
12542     case 0x7b: /* FCVTZU */
12543         rmode = FPROUNDING_ZERO;
12544         break;
12545     case 0x2f: /* FABS */
12546     case 0x6f: /* FNEG */
12547         need_fpst = false;
12548         break;
12549     case 0x7d: /* FRSQRTE */
12550     case 0x7f: /* FSQRT (vector) */
12551         break;
12552     default:
12553         unallocated_encoding(s);
12554         return;
12555     }
12556 
12557 
12558     /* Check additional constraints for the scalar encoding */
12559     if (is_scalar) {
12560         if (!is_q) {
12561             unallocated_encoding(s);
12562             return;
12563         }
12564         /* FRINTxx is only in the vector form */
12565         if (only_in_vector) {
12566             unallocated_encoding(s);
12567             return;
12568         }
12569     }
12570 
12571     if (!fp_access_check(s)) {
12572         return;
12573     }
12574 
12575     if (rmode >= 0 || need_fpst) {
12576         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12577     }
12578 
12579     if (rmode >= 0) {
12580         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12581     }
12582 
12583     if (is_scalar) {
12584         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12585         TCGv_i32 tcg_res = tcg_temp_new_i32();
12586 
12587         switch (fpop) {
12588         case 0x1a: /* FCVTNS */
12589         case 0x1b: /* FCVTMS */
12590         case 0x1c: /* FCVTAS */
12591         case 0x3a: /* FCVTPS */
12592         case 0x3b: /* FCVTZS */
12593             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12594             break;
12595         case 0x3d: /* FRECPE */
12596             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12597             break;
12598         case 0x3f: /* FRECPX */
12599             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12600             break;
12601         case 0x5a: /* FCVTNU */
12602         case 0x5b: /* FCVTMU */
12603         case 0x5c: /* FCVTAU */
12604         case 0x7a: /* FCVTPU */
12605         case 0x7b: /* FCVTZU */
12606             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12607             break;
12608         case 0x6f: /* FNEG */
12609             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12610             break;
12611         case 0x7d: /* FRSQRTE */
12612             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12613             break;
12614         default:
12615             g_assert_not_reached();
12616         }
12617 
12618         /* limit any sign extension going on */
12619         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12620         write_fp_sreg(s, rd, tcg_res);
12621     } else {
12622         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12623             TCGv_i32 tcg_op = tcg_temp_new_i32();
12624             TCGv_i32 tcg_res = tcg_temp_new_i32();
12625 
12626             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12627 
12628             switch (fpop) {
12629             case 0x1a: /* FCVTNS */
12630             case 0x1b: /* FCVTMS */
12631             case 0x1c: /* FCVTAS */
12632             case 0x3a: /* FCVTPS */
12633             case 0x3b: /* FCVTZS */
12634                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12635                 break;
12636             case 0x3d: /* FRECPE */
12637                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12638                 break;
12639             case 0x5a: /* FCVTNU */
12640             case 0x5b: /* FCVTMU */
12641             case 0x5c: /* FCVTAU */
12642             case 0x7a: /* FCVTPU */
12643             case 0x7b: /* FCVTZU */
12644                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12645                 break;
12646             case 0x18: /* FRINTN */
12647             case 0x19: /* FRINTM */
12648             case 0x38: /* FRINTP */
12649             case 0x39: /* FRINTZ */
12650             case 0x58: /* FRINTA */
12651             case 0x79: /* FRINTI */
12652                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12653                 break;
12654             case 0x59: /* FRINTX */
12655                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12656                 break;
12657             case 0x2f: /* FABS */
12658                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12659                 break;
12660             case 0x6f: /* FNEG */
12661                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12662                 break;
12663             case 0x7d: /* FRSQRTE */
12664                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12665                 break;
12666             case 0x7f: /* FSQRT */
12667                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12668                 break;
12669             default:
12670                 g_assert_not_reached();
12671             }
12672 
12673             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12674         }
12675 
12676         clear_vec_high(s, is_q, rd);
12677     }
12678 
12679     if (tcg_rmode) {
12680         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12681     }
12682 }
12683 
12684 /* AdvSIMD scalar x indexed element
12685  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12686  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12687  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12688  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12689  * AdvSIMD vector x indexed element
12690  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12691  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12692  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12693  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12694  */
12695 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12696 {
12697     /* This encoding has two kinds of instruction:
12698      *  normal, where we perform elt x idxelt => elt for each
12699      *     element in the vector
12700      *  long, where we perform elt x idxelt and generate a result of
12701      *     double the width of the input element
12702      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12703      */
12704     bool is_scalar = extract32(insn, 28, 1);
12705     bool is_q = extract32(insn, 30, 1);
12706     bool u = extract32(insn, 29, 1);
12707     int size = extract32(insn, 22, 2);
12708     int l = extract32(insn, 21, 1);
12709     int m = extract32(insn, 20, 1);
12710     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12711     int rm = extract32(insn, 16, 4);
12712     int opcode = extract32(insn, 12, 4);
12713     int h = extract32(insn, 11, 1);
12714     int rn = extract32(insn, 5, 5);
12715     int rd = extract32(insn, 0, 5);
12716     bool is_long = false;
12717     int is_fp = 0;
12718     bool is_fp16 = false;
12719     int index;
12720     TCGv_ptr fpst;
12721 
12722     switch (16 * u + opcode) {
12723     case 0x08: /* MUL */
12724     case 0x10: /* MLA */
12725     case 0x14: /* MLS */
12726         if (is_scalar) {
12727             unallocated_encoding(s);
12728             return;
12729         }
12730         break;
12731     case 0x02: /* SMLAL, SMLAL2 */
12732     case 0x12: /* UMLAL, UMLAL2 */
12733     case 0x06: /* SMLSL, SMLSL2 */
12734     case 0x16: /* UMLSL, UMLSL2 */
12735     case 0x0a: /* SMULL, SMULL2 */
12736     case 0x1a: /* UMULL, UMULL2 */
12737         if (is_scalar) {
12738             unallocated_encoding(s);
12739             return;
12740         }
12741         is_long = true;
12742         break;
12743     case 0x03: /* SQDMLAL, SQDMLAL2 */
12744     case 0x07: /* SQDMLSL, SQDMLSL2 */
12745     case 0x0b: /* SQDMULL, SQDMULL2 */
12746         is_long = true;
12747         break;
12748     case 0x0c: /* SQDMULH */
12749     case 0x0d: /* SQRDMULH */
12750         break;
12751     case 0x01: /* FMLA */
12752     case 0x05: /* FMLS */
12753     case 0x09: /* FMUL */
12754     case 0x19: /* FMULX */
12755         is_fp = 1;
12756         break;
12757     case 0x1d: /* SQRDMLAH */
12758     case 0x1f: /* SQRDMLSH */
12759         if (!dc_isar_feature(aa64_rdm, s)) {
12760             unallocated_encoding(s);
12761             return;
12762         }
12763         break;
12764     case 0x0e: /* SDOT */
12765     case 0x1e: /* UDOT */
12766         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12767             unallocated_encoding(s);
12768             return;
12769         }
12770         break;
12771     case 0x0f:
12772         switch (size) {
12773         case 0: /* SUDOT */
12774         case 2: /* USDOT */
12775             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12776                 unallocated_encoding(s);
12777                 return;
12778             }
12779             size = MO_32;
12780             break;
12781         case 1: /* BFDOT */
12782             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12783                 unallocated_encoding(s);
12784                 return;
12785             }
12786             size = MO_32;
12787             break;
12788         case 3: /* BFMLAL{B,T} */
12789             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12790                 unallocated_encoding(s);
12791                 return;
12792             }
12793             /* can't set is_fp without other incorrect size checks */
12794             size = MO_16;
12795             break;
12796         default:
12797             unallocated_encoding(s);
12798             return;
12799         }
12800         break;
12801     case 0x11: /* FCMLA #0 */
12802     case 0x13: /* FCMLA #90 */
12803     case 0x15: /* FCMLA #180 */
12804     case 0x17: /* FCMLA #270 */
12805         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12806             unallocated_encoding(s);
12807             return;
12808         }
12809         is_fp = 2;
12810         break;
12811     case 0x00: /* FMLAL */
12812     case 0x04: /* FMLSL */
12813     case 0x18: /* FMLAL2 */
12814     case 0x1c: /* FMLSL2 */
12815         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12816             unallocated_encoding(s);
12817             return;
12818         }
12819         size = MO_16;
12820         /* is_fp, but we pass cpu_env not fp_status.  */
12821         break;
12822     default:
12823         unallocated_encoding(s);
12824         return;
12825     }
12826 
12827     switch (is_fp) {
12828     case 1: /* normal fp */
12829         /* convert insn encoded size to MemOp size */
12830         switch (size) {
12831         case 0: /* half-precision */
12832             size = MO_16;
12833             is_fp16 = true;
12834             break;
12835         case MO_32: /* single precision */
12836         case MO_64: /* double precision */
12837             break;
12838         default:
12839             unallocated_encoding(s);
12840             return;
12841         }
12842         break;
12843 
12844     case 2: /* complex fp */
12845         /* Each indexable element is a complex pair.  */
12846         size += 1;
12847         switch (size) {
12848         case MO_32:
12849             if (h && !is_q) {
12850                 unallocated_encoding(s);
12851                 return;
12852             }
12853             is_fp16 = true;
12854             break;
12855         case MO_64:
12856             break;
12857         default:
12858             unallocated_encoding(s);
12859             return;
12860         }
12861         break;
12862 
12863     default: /* integer */
12864         switch (size) {
12865         case MO_8:
12866         case MO_64:
12867             unallocated_encoding(s);
12868             return;
12869         }
12870         break;
12871     }
12872     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12873         unallocated_encoding(s);
12874         return;
12875     }
12876 
12877     /* Given MemOp size, adjust register and indexing.  */
12878     switch (size) {
12879     case MO_16:
12880         index = h << 2 | l << 1 | m;
12881         break;
12882     case MO_32:
12883         index = h << 1 | l;
12884         rm |= m << 4;
12885         break;
12886     case MO_64:
12887         if (l || !is_q) {
12888             unallocated_encoding(s);
12889             return;
12890         }
12891         index = h;
12892         rm |= m << 4;
12893         break;
12894     default:
12895         g_assert_not_reached();
12896     }
12897 
12898     if (!fp_access_check(s)) {
12899         return;
12900     }
12901 
12902     if (is_fp) {
12903         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12904     } else {
12905         fpst = NULL;
12906     }
12907 
12908     switch (16 * u + opcode) {
12909     case 0x0e: /* SDOT */
12910     case 0x1e: /* UDOT */
12911         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12912                          u ? gen_helper_gvec_udot_idx_b
12913                          : gen_helper_gvec_sdot_idx_b);
12914         return;
12915     case 0x0f:
12916         switch (extract32(insn, 22, 2)) {
12917         case 0: /* SUDOT */
12918             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12919                              gen_helper_gvec_sudot_idx_b);
12920             return;
12921         case 1: /* BFDOT */
12922             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12923                              gen_helper_gvec_bfdot_idx);
12924             return;
12925         case 2: /* USDOT */
12926             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12927                              gen_helper_gvec_usdot_idx_b);
12928             return;
12929         case 3: /* BFMLAL{B,T} */
12930             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12931                               gen_helper_gvec_bfmlal_idx);
12932             return;
12933         }
12934         g_assert_not_reached();
12935     case 0x11: /* FCMLA #0 */
12936     case 0x13: /* FCMLA #90 */
12937     case 0x15: /* FCMLA #180 */
12938     case 0x17: /* FCMLA #270 */
12939         {
12940             int rot = extract32(insn, 13, 2);
12941             int data = (index << 2) | rot;
12942             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12943                                vec_full_reg_offset(s, rn),
12944                                vec_full_reg_offset(s, rm),
12945                                vec_full_reg_offset(s, rd), fpst,
12946                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12947                                size == MO_64
12948                                ? gen_helper_gvec_fcmlas_idx
12949                                : gen_helper_gvec_fcmlah_idx);
12950         }
12951         return;
12952 
12953     case 0x00: /* FMLAL */
12954     case 0x04: /* FMLSL */
12955     case 0x18: /* FMLAL2 */
12956     case 0x1c: /* FMLSL2 */
12957         {
12958             int is_s = extract32(opcode, 2, 1);
12959             int is_2 = u;
12960             int data = (index << 2) | (is_2 << 1) | is_s;
12961             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12962                                vec_full_reg_offset(s, rn),
12963                                vec_full_reg_offset(s, rm), cpu_env,
12964                                is_q ? 16 : 8, vec_full_reg_size(s),
12965                                data, gen_helper_gvec_fmlal_idx_a64);
12966         }
12967         return;
12968 
12969     case 0x08: /* MUL */
12970         if (!is_long && !is_scalar) {
12971             static gen_helper_gvec_3 * const fns[3] = {
12972                 gen_helper_gvec_mul_idx_h,
12973                 gen_helper_gvec_mul_idx_s,
12974                 gen_helper_gvec_mul_idx_d,
12975             };
12976             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
12977                                vec_full_reg_offset(s, rn),
12978                                vec_full_reg_offset(s, rm),
12979                                is_q ? 16 : 8, vec_full_reg_size(s),
12980                                index, fns[size - 1]);
12981             return;
12982         }
12983         break;
12984 
12985     case 0x10: /* MLA */
12986         if (!is_long && !is_scalar) {
12987             static gen_helper_gvec_4 * const fns[3] = {
12988                 gen_helper_gvec_mla_idx_h,
12989                 gen_helper_gvec_mla_idx_s,
12990                 gen_helper_gvec_mla_idx_d,
12991             };
12992             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
12993                                vec_full_reg_offset(s, rn),
12994                                vec_full_reg_offset(s, rm),
12995                                vec_full_reg_offset(s, rd),
12996                                is_q ? 16 : 8, vec_full_reg_size(s),
12997                                index, fns[size - 1]);
12998             return;
12999         }
13000         break;
13001 
13002     case 0x14: /* MLS */
13003         if (!is_long && !is_scalar) {
13004             static gen_helper_gvec_4 * const fns[3] = {
13005                 gen_helper_gvec_mls_idx_h,
13006                 gen_helper_gvec_mls_idx_s,
13007                 gen_helper_gvec_mls_idx_d,
13008             };
13009             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13010                                vec_full_reg_offset(s, rn),
13011                                vec_full_reg_offset(s, rm),
13012                                vec_full_reg_offset(s, rd),
13013                                is_q ? 16 : 8, vec_full_reg_size(s),
13014                                index, fns[size - 1]);
13015             return;
13016         }
13017         break;
13018     }
13019 
13020     if (size == 3) {
13021         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13022         int pass;
13023 
13024         assert(is_fp && is_q && !is_long);
13025 
13026         read_vec_element(s, tcg_idx, rm, index, MO_64);
13027 
13028         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13029             TCGv_i64 tcg_op = tcg_temp_new_i64();
13030             TCGv_i64 tcg_res = tcg_temp_new_i64();
13031 
13032             read_vec_element(s, tcg_op, rn, pass, MO_64);
13033 
13034             switch (16 * u + opcode) {
13035             case 0x05: /* FMLS */
13036                 /* As usual for ARM, separate negation for fused multiply-add */
13037                 gen_helper_vfp_negd(tcg_op, tcg_op);
13038                 /* fall through */
13039             case 0x01: /* FMLA */
13040                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13041                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13042                 break;
13043             case 0x09: /* FMUL */
13044                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13045                 break;
13046             case 0x19: /* FMULX */
13047                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13048                 break;
13049             default:
13050                 g_assert_not_reached();
13051             }
13052 
13053             write_vec_element(s, tcg_res, rd, pass, MO_64);
13054         }
13055 
13056         clear_vec_high(s, !is_scalar, rd);
13057     } else if (!is_long) {
13058         /* 32 bit floating point, or 16 or 32 bit integer.
13059          * For the 16 bit scalar case we use the usual Neon helpers and
13060          * rely on the fact that 0 op 0 == 0 with no side effects.
13061          */
13062         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13063         int pass, maxpasses;
13064 
13065         if (is_scalar) {
13066             maxpasses = 1;
13067         } else {
13068             maxpasses = is_q ? 4 : 2;
13069         }
13070 
13071         read_vec_element_i32(s, tcg_idx, rm, index, size);
13072 
13073         if (size == 1 && !is_scalar) {
13074             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13075              * the index into both halves of the 32 bit tcg_idx and then use
13076              * the usual Neon helpers.
13077              */
13078             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13079         }
13080 
13081         for (pass = 0; pass < maxpasses; pass++) {
13082             TCGv_i32 tcg_op = tcg_temp_new_i32();
13083             TCGv_i32 tcg_res = tcg_temp_new_i32();
13084 
13085             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13086 
13087             switch (16 * u + opcode) {
13088             case 0x08: /* MUL */
13089             case 0x10: /* MLA */
13090             case 0x14: /* MLS */
13091             {
13092                 static NeonGenTwoOpFn * const fns[2][2] = {
13093                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13094                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13095                 };
13096                 NeonGenTwoOpFn *genfn;
13097                 bool is_sub = opcode == 0x4;
13098 
13099                 if (size == 1) {
13100                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13101                 } else {
13102                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13103                 }
13104                 if (opcode == 0x8) {
13105                     break;
13106                 }
13107                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13108                 genfn = fns[size - 1][is_sub];
13109                 genfn(tcg_res, tcg_op, tcg_res);
13110                 break;
13111             }
13112             case 0x05: /* FMLS */
13113             case 0x01: /* FMLA */
13114                 read_vec_element_i32(s, tcg_res, rd, pass,
13115                                      is_scalar ? size : MO_32);
13116                 switch (size) {
13117                 case 1:
13118                     if (opcode == 0x5) {
13119                         /* As usual for ARM, separate negation for fused
13120                          * multiply-add */
13121                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13122                     }
13123                     if (is_scalar) {
13124                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13125                                                    tcg_res, fpst);
13126                     } else {
13127                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13128                                                     tcg_res, fpst);
13129                     }
13130                     break;
13131                 case 2:
13132                     if (opcode == 0x5) {
13133                         /* As usual for ARM, separate negation for
13134                          * fused multiply-add */
13135                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13136                     }
13137                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13138                                            tcg_res, fpst);
13139                     break;
13140                 default:
13141                     g_assert_not_reached();
13142                 }
13143                 break;
13144             case 0x09: /* FMUL */
13145                 switch (size) {
13146                 case 1:
13147                     if (is_scalar) {
13148                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13149                                                 tcg_idx, fpst);
13150                     } else {
13151                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13152                                                  tcg_idx, fpst);
13153                     }
13154                     break;
13155                 case 2:
13156                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13157                     break;
13158                 default:
13159                     g_assert_not_reached();
13160                 }
13161                 break;
13162             case 0x19: /* FMULX */
13163                 switch (size) {
13164                 case 1:
13165                     if (is_scalar) {
13166                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13167                                                  tcg_idx, fpst);
13168                     } else {
13169                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13170                                                   tcg_idx, fpst);
13171                     }
13172                     break;
13173                 case 2:
13174                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13175                     break;
13176                 default:
13177                     g_assert_not_reached();
13178                 }
13179                 break;
13180             case 0x0c: /* SQDMULH */
13181                 if (size == 1) {
13182                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13183                                                tcg_op, tcg_idx);
13184                 } else {
13185                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13186                                                tcg_op, tcg_idx);
13187                 }
13188                 break;
13189             case 0x0d: /* SQRDMULH */
13190                 if (size == 1) {
13191                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13192                                                 tcg_op, tcg_idx);
13193                 } else {
13194                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13195                                                 tcg_op, tcg_idx);
13196                 }
13197                 break;
13198             case 0x1d: /* SQRDMLAH */
13199                 read_vec_element_i32(s, tcg_res, rd, pass,
13200                                      is_scalar ? size : MO_32);
13201                 if (size == 1) {
13202                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13203                                                 tcg_op, tcg_idx, tcg_res);
13204                 } else {
13205                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13206                                                 tcg_op, tcg_idx, tcg_res);
13207                 }
13208                 break;
13209             case 0x1f: /* SQRDMLSH */
13210                 read_vec_element_i32(s, tcg_res, rd, pass,
13211                                      is_scalar ? size : MO_32);
13212                 if (size == 1) {
13213                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13214                                                 tcg_op, tcg_idx, tcg_res);
13215                 } else {
13216                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13217                                                 tcg_op, tcg_idx, tcg_res);
13218                 }
13219                 break;
13220             default:
13221                 g_assert_not_reached();
13222             }
13223 
13224             if (is_scalar) {
13225                 write_fp_sreg(s, rd, tcg_res);
13226             } else {
13227                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13228             }
13229         }
13230 
13231         clear_vec_high(s, is_q, rd);
13232     } else {
13233         /* long ops: 16x16->32 or 32x32->64 */
13234         TCGv_i64 tcg_res[2];
13235         int pass;
13236         bool satop = extract32(opcode, 0, 1);
13237         MemOp memop = MO_32;
13238 
13239         if (satop || !u) {
13240             memop |= MO_SIGN;
13241         }
13242 
13243         if (size == 2) {
13244             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13245 
13246             read_vec_element(s, tcg_idx, rm, index, memop);
13247 
13248             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13249                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13250                 TCGv_i64 tcg_passres;
13251                 int passelt;
13252 
13253                 if (is_scalar) {
13254                     passelt = 0;
13255                 } else {
13256                     passelt = pass + (is_q * 2);
13257                 }
13258 
13259                 read_vec_element(s, tcg_op, rn, passelt, memop);
13260 
13261                 tcg_res[pass] = tcg_temp_new_i64();
13262 
13263                 if (opcode == 0xa || opcode == 0xb) {
13264                     /* Non-accumulating ops */
13265                     tcg_passres = tcg_res[pass];
13266                 } else {
13267                     tcg_passres = tcg_temp_new_i64();
13268                 }
13269 
13270                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13271 
13272                 if (satop) {
13273                     /* saturating, doubling */
13274                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13275                                                       tcg_passres, tcg_passres);
13276                 }
13277 
13278                 if (opcode == 0xa || opcode == 0xb) {
13279                     continue;
13280                 }
13281 
13282                 /* Accumulating op: handle accumulate step */
13283                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13284 
13285                 switch (opcode) {
13286                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13287                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13288                     break;
13289                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13290                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13291                     break;
13292                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13293                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13294                     /* fall through */
13295                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13296                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13297                                                       tcg_res[pass],
13298                                                       tcg_passres);
13299                     break;
13300                 default:
13301                     g_assert_not_reached();
13302                 }
13303             }
13304 
13305             clear_vec_high(s, !is_scalar, rd);
13306         } else {
13307             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13308 
13309             assert(size == 1);
13310             read_vec_element_i32(s, tcg_idx, rm, index, size);
13311 
13312             if (!is_scalar) {
13313                 /* The simplest way to handle the 16x16 indexed ops is to
13314                  * duplicate the index into both halves of the 32 bit tcg_idx
13315                  * and then use the usual Neon helpers.
13316                  */
13317                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13318             }
13319 
13320             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13321                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13322                 TCGv_i64 tcg_passres;
13323 
13324                 if (is_scalar) {
13325                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13326                 } else {
13327                     read_vec_element_i32(s, tcg_op, rn,
13328                                          pass + (is_q * 2), MO_32);
13329                 }
13330 
13331                 tcg_res[pass] = tcg_temp_new_i64();
13332 
13333                 if (opcode == 0xa || opcode == 0xb) {
13334                     /* Non-accumulating ops */
13335                     tcg_passres = tcg_res[pass];
13336                 } else {
13337                     tcg_passres = tcg_temp_new_i64();
13338                 }
13339 
13340                 if (memop & MO_SIGN) {
13341                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13342                 } else {
13343                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13344                 }
13345                 if (satop) {
13346                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13347                                                       tcg_passres, tcg_passres);
13348                 }
13349 
13350                 if (opcode == 0xa || opcode == 0xb) {
13351                     continue;
13352                 }
13353 
13354                 /* Accumulating op: handle accumulate step */
13355                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13356 
13357                 switch (opcode) {
13358                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13359                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13360                                              tcg_passres);
13361                     break;
13362                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13363                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13364                                              tcg_passres);
13365                     break;
13366                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13367                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13368                     /* fall through */
13369                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13370                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13371                                                       tcg_res[pass],
13372                                                       tcg_passres);
13373                     break;
13374                 default:
13375                     g_assert_not_reached();
13376                 }
13377             }
13378 
13379             if (is_scalar) {
13380                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13381             }
13382         }
13383 
13384         if (is_scalar) {
13385             tcg_res[1] = tcg_constant_i64(0);
13386         }
13387 
13388         for (pass = 0; pass < 2; pass++) {
13389             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13390         }
13391     }
13392 }
13393 
13394 /* Crypto AES
13395  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13396  * +-----------------+------+-----------+--------+-----+------+------+
13397  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13398  * +-----------------+------+-----------+--------+-----+------+------+
13399  */
13400 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13401 {
13402     int size = extract32(insn, 22, 2);
13403     int opcode = extract32(insn, 12, 5);
13404     int rn = extract32(insn, 5, 5);
13405     int rd = extract32(insn, 0, 5);
13406     int decrypt;
13407     gen_helper_gvec_2 *genfn2 = NULL;
13408     gen_helper_gvec_3 *genfn3 = NULL;
13409 
13410     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13411         unallocated_encoding(s);
13412         return;
13413     }
13414 
13415     switch (opcode) {
13416     case 0x4: /* AESE */
13417         decrypt = 0;
13418         genfn3 = gen_helper_crypto_aese;
13419         break;
13420     case 0x6: /* AESMC */
13421         decrypt = 0;
13422         genfn2 = gen_helper_crypto_aesmc;
13423         break;
13424     case 0x5: /* AESD */
13425         decrypt = 1;
13426         genfn3 = gen_helper_crypto_aese;
13427         break;
13428     case 0x7: /* AESIMC */
13429         decrypt = 1;
13430         genfn2 = gen_helper_crypto_aesmc;
13431         break;
13432     default:
13433         unallocated_encoding(s);
13434         return;
13435     }
13436 
13437     if (!fp_access_check(s)) {
13438         return;
13439     }
13440     if (genfn2) {
13441         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13442     } else {
13443         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13444     }
13445 }
13446 
13447 /* Crypto three-reg SHA
13448  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13449  * +-----------------+------+---+------+---+--------+-----+------+------+
13450  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13451  * +-----------------+------+---+------+---+--------+-----+------+------+
13452  */
13453 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13454 {
13455     int size = extract32(insn, 22, 2);
13456     int opcode = extract32(insn, 12, 3);
13457     int rm = extract32(insn, 16, 5);
13458     int rn = extract32(insn, 5, 5);
13459     int rd = extract32(insn, 0, 5);
13460     gen_helper_gvec_3 *genfn;
13461     bool feature;
13462 
13463     if (size != 0) {
13464         unallocated_encoding(s);
13465         return;
13466     }
13467 
13468     switch (opcode) {
13469     case 0: /* SHA1C */
13470         genfn = gen_helper_crypto_sha1c;
13471         feature = dc_isar_feature(aa64_sha1, s);
13472         break;
13473     case 1: /* SHA1P */
13474         genfn = gen_helper_crypto_sha1p;
13475         feature = dc_isar_feature(aa64_sha1, s);
13476         break;
13477     case 2: /* SHA1M */
13478         genfn = gen_helper_crypto_sha1m;
13479         feature = dc_isar_feature(aa64_sha1, s);
13480         break;
13481     case 3: /* SHA1SU0 */
13482         genfn = gen_helper_crypto_sha1su0;
13483         feature = dc_isar_feature(aa64_sha1, s);
13484         break;
13485     case 4: /* SHA256H */
13486         genfn = gen_helper_crypto_sha256h;
13487         feature = dc_isar_feature(aa64_sha256, s);
13488         break;
13489     case 5: /* SHA256H2 */
13490         genfn = gen_helper_crypto_sha256h2;
13491         feature = dc_isar_feature(aa64_sha256, s);
13492         break;
13493     case 6: /* SHA256SU1 */
13494         genfn = gen_helper_crypto_sha256su1;
13495         feature = dc_isar_feature(aa64_sha256, s);
13496         break;
13497     default:
13498         unallocated_encoding(s);
13499         return;
13500     }
13501 
13502     if (!feature) {
13503         unallocated_encoding(s);
13504         return;
13505     }
13506 
13507     if (!fp_access_check(s)) {
13508         return;
13509     }
13510     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13511 }
13512 
13513 /* Crypto two-reg SHA
13514  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13515  * +-----------------+------+-----------+--------+-----+------+------+
13516  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13517  * +-----------------+------+-----------+--------+-----+------+------+
13518  */
13519 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13520 {
13521     int size = extract32(insn, 22, 2);
13522     int opcode = extract32(insn, 12, 5);
13523     int rn = extract32(insn, 5, 5);
13524     int rd = extract32(insn, 0, 5);
13525     gen_helper_gvec_2 *genfn;
13526     bool feature;
13527 
13528     if (size != 0) {
13529         unallocated_encoding(s);
13530         return;
13531     }
13532 
13533     switch (opcode) {
13534     case 0: /* SHA1H */
13535         feature = dc_isar_feature(aa64_sha1, s);
13536         genfn = gen_helper_crypto_sha1h;
13537         break;
13538     case 1: /* SHA1SU1 */
13539         feature = dc_isar_feature(aa64_sha1, s);
13540         genfn = gen_helper_crypto_sha1su1;
13541         break;
13542     case 2: /* SHA256SU0 */
13543         feature = dc_isar_feature(aa64_sha256, s);
13544         genfn = gen_helper_crypto_sha256su0;
13545         break;
13546     default:
13547         unallocated_encoding(s);
13548         return;
13549     }
13550 
13551     if (!feature) {
13552         unallocated_encoding(s);
13553         return;
13554     }
13555 
13556     if (!fp_access_check(s)) {
13557         return;
13558     }
13559     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13560 }
13561 
13562 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13563 {
13564     tcg_gen_rotli_i64(d, m, 1);
13565     tcg_gen_xor_i64(d, d, n);
13566 }
13567 
13568 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13569 {
13570     tcg_gen_rotli_vec(vece, d, m, 1);
13571     tcg_gen_xor_vec(vece, d, d, n);
13572 }
13573 
13574 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13575                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13576 {
13577     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13578     static const GVecGen3 op = {
13579         .fni8 = gen_rax1_i64,
13580         .fniv = gen_rax1_vec,
13581         .opt_opc = vecop_list,
13582         .fno = gen_helper_crypto_rax1,
13583         .vece = MO_64,
13584     };
13585     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13586 }
13587 
13588 /* Crypto three-reg SHA512
13589  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13590  * +-----------------------+------+---+---+-----+--------+------+------+
13591  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13592  * +-----------------------+------+---+---+-----+--------+------+------+
13593  */
13594 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13595 {
13596     int opcode = extract32(insn, 10, 2);
13597     int o =  extract32(insn, 14, 1);
13598     int rm = extract32(insn, 16, 5);
13599     int rn = extract32(insn, 5, 5);
13600     int rd = extract32(insn, 0, 5);
13601     bool feature;
13602     gen_helper_gvec_3 *oolfn = NULL;
13603     GVecGen3Fn *gvecfn = NULL;
13604 
13605     if (o == 0) {
13606         switch (opcode) {
13607         case 0: /* SHA512H */
13608             feature = dc_isar_feature(aa64_sha512, s);
13609             oolfn = gen_helper_crypto_sha512h;
13610             break;
13611         case 1: /* SHA512H2 */
13612             feature = dc_isar_feature(aa64_sha512, s);
13613             oolfn = gen_helper_crypto_sha512h2;
13614             break;
13615         case 2: /* SHA512SU1 */
13616             feature = dc_isar_feature(aa64_sha512, s);
13617             oolfn = gen_helper_crypto_sha512su1;
13618             break;
13619         case 3: /* RAX1 */
13620             feature = dc_isar_feature(aa64_sha3, s);
13621             gvecfn = gen_gvec_rax1;
13622             break;
13623         default:
13624             g_assert_not_reached();
13625         }
13626     } else {
13627         switch (opcode) {
13628         case 0: /* SM3PARTW1 */
13629             feature = dc_isar_feature(aa64_sm3, s);
13630             oolfn = gen_helper_crypto_sm3partw1;
13631             break;
13632         case 1: /* SM3PARTW2 */
13633             feature = dc_isar_feature(aa64_sm3, s);
13634             oolfn = gen_helper_crypto_sm3partw2;
13635             break;
13636         case 2: /* SM4EKEY */
13637             feature = dc_isar_feature(aa64_sm4, s);
13638             oolfn = gen_helper_crypto_sm4ekey;
13639             break;
13640         default:
13641             unallocated_encoding(s);
13642             return;
13643         }
13644     }
13645 
13646     if (!feature) {
13647         unallocated_encoding(s);
13648         return;
13649     }
13650 
13651     if (!fp_access_check(s)) {
13652         return;
13653     }
13654 
13655     if (oolfn) {
13656         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13657     } else {
13658         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13659     }
13660 }
13661 
13662 /* Crypto two-reg SHA512
13663  *  31                                     12  11  10  9    5 4    0
13664  * +-----------------------------------------+--------+------+------+
13665  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13666  * +-----------------------------------------+--------+------+------+
13667  */
13668 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13669 {
13670     int opcode = extract32(insn, 10, 2);
13671     int rn = extract32(insn, 5, 5);
13672     int rd = extract32(insn, 0, 5);
13673     bool feature;
13674 
13675     switch (opcode) {
13676     case 0: /* SHA512SU0 */
13677         feature = dc_isar_feature(aa64_sha512, s);
13678         break;
13679     case 1: /* SM4E */
13680         feature = dc_isar_feature(aa64_sm4, s);
13681         break;
13682     default:
13683         unallocated_encoding(s);
13684         return;
13685     }
13686 
13687     if (!feature) {
13688         unallocated_encoding(s);
13689         return;
13690     }
13691 
13692     if (!fp_access_check(s)) {
13693         return;
13694     }
13695 
13696     switch (opcode) {
13697     case 0: /* SHA512SU0 */
13698         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13699         break;
13700     case 1: /* SM4E */
13701         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13702         break;
13703     default:
13704         g_assert_not_reached();
13705     }
13706 }
13707 
13708 /* Crypto four-register
13709  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13710  * +-------------------+-----+------+---+------+------+------+
13711  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13712  * +-------------------+-----+------+---+------+------+------+
13713  */
13714 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13715 {
13716     int op0 = extract32(insn, 21, 2);
13717     int rm = extract32(insn, 16, 5);
13718     int ra = extract32(insn, 10, 5);
13719     int rn = extract32(insn, 5, 5);
13720     int rd = extract32(insn, 0, 5);
13721     bool feature;
13722 
13723     switch (op0) {
13724     case 0: /* EOR3 */
13725     case 1: /* BCAX */
13726         feature = dc_isar_feature(aa64_sha3, s);
13727         break;
13728     case 2: /* SM3SS1 */
13729         feature = dc_isar_feature(aa64_sm3, s);
13730         break;
13731     default:
13732         unallocated_encoding(s);
13733         return;
13734     }
13735 
13736     if (!feature) {
13737         unallocated_encoding(s);
13738         return;
13739     }
13740 
13741     if (!fp_access_check(s)) {
13742         return;
13743     }
13744 
13745     if (op0 < 2) {
13746         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13747         int pass;
13748 
13749         tcg_op1 = tcg_temp_new_i64();
13750         tcg_op2 = tcg_temp_new_i64();
13751         tcg_op3 = tcg_temp_new_i64();
13752         tcg_res[0] = tcg_temp_new_i64();
13753         tcg_res[1] = tcg_temp_new_i64();
13754 
13755         for (pass = 0; pass < 2; pass++) {
13756             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13757             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13758             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13759 
13760             if (op0 == 0) {
13761                 /* EOR3 */
13762                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13763             } else {
13764                 /* BCAX */
13765                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13766             }
13767             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13768         }
13769         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13770         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13771     } else {
13772         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13773 
13774         tcg_op1 = tcg_temp_new_i32();
13775         tcg_op2 = tcg_temp_new_i32();
13776         tcg_op3 = tcg_temp_new_i32();
13777         tcg_res = tcg_temp_new_i32();
13778         tcg_zero = tcg_constant_i32(0);
13779 
13780         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13781         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13782         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13783 
13784         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13785         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13786         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13787         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13788 
13789         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13790         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13791         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13792         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13793     }
13794 }
13795 
13796 /* Crypto XAR
13797  *  31                   21 20  16 15    10 9    5 4    0
13798  * +-----------------------+------+--------+------+------+
13799  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13800  * +-----------------------+------+--------+------+------+
13801  */
13802 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13803 {
13804     int rm = extract32(insn, 16, 5);
13805     int imm6 = extract32(insn, 10, 6);
13806     int rn = extract32(insn, 5, 5);
13807     int rd = extract32(insn, 0, 5);
13808 
13809     if (!dc_isar_feature(aa64_sha3, s)) {
13810         unallocated_encoding(s);
13811         return;
13812     }
13813 
13814     if (!fp_access_check(s)) {
13815         return;
13816     }
13817 
13818     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13819                  vec_full_reg_offset(s, rn),
13820                  vec_full_reg_offset(s, rm), imm6, 16,
13821                  vec_full_reg_size(s));
13822 }
13823 
13824 /* Crypto three-reg imm2
13825  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13826  * +-----------------------+------+-----+------+--------+------+------+
13827  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13828  * +-----------------------+------+-----+------+--------+------+------+
13829  */
13830 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13831 {
13832     static gen_helper_gvec_3 * const fns[4] = {
13833         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13834         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13835     };
13836     int opcode = extract32(insn, 10, 2);
13837     int imm2 = extract32(insn, 12, 2);
13838     int rm = extract32(insn, 16, 5);
13839     int rn = extract32(insn, 5, 5);
13840     int rd = extract32(insn, 0, 5);
13841 
13842     if (!dc_isar_feature(aa64_sm3, s)) {
13843         unallocated_encoding(s);
13844         return;
13845     }
13846 
13847     if (!fp_access_check(s)) {
13848         return;
13849     }
13850 
13851     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13852 }
13853 
13854 /* C3.6 Data processing - SIMD, inc Crypto
13855  *
13856  * As the decode gets a little complex we are using a table based
13857  * approach for this part of the decode.
13858  */
13859 static const AArch64DecodeTable data_proc_simd[] = {
13860     /* pattern  ,  mask     ,  fn                        */
13861     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13862     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13863     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13864     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13865     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13866     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13867     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13868     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13869     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13870     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13871     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13872     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13873     { 0x2e000000, 0xbf208400, disas_simd_ext },
13874     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13875     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13876     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13877     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13878     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13879     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13880     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13881     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13882     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13883     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13884     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13885     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13886     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13887     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13888     { 0xce800000, 0xffe00000, disas_crypto_xar },
13889     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13890     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13891     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13892     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13893     { 0x00000000, 0x00000000, NULL }
13894 };
13895 
13896 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13897 {
13898     /* Note that this is called with all non-FP cases from
13899      * table C3-6 so it must UNDEF for entries not specifically
13900      * allocated to instructions in that table.
13901      */
13902     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13903     if (fn) {
13904         fn(s, insn);
13905     } else {
13906         unallocated_encoding(s);
13907     }
13908 }
13909 
13910 /* C3.6 Data processing - SIMD and floating point */
13911 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13912 {
13913     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13914         disas_data_proc_fp(s, insn);
13915     } else {
13916         /* SIMD, including crypto */
13917         disas_data_proc_simd(s, insn);
13918     }
13919 }
13920 
13921 static bool trans_OK(DisasContext *s, arg_OK *a)
13922 {
13923     return true;
13924 }
13925 
13926 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13927 {
13928     s->is_nonstreaming = true;
13929     return true;
13930 }
13931 
13932 /**
13933  * is_guarded_page:
13934  * @env: The cpu environment
13935  * @s: The DisasContext
13936  *
13937  * Return true if the page is guarded.
13938  */
13939 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13940 {
13941     uint64_t addr = s->base.pc_first;
13942 #ifdef CONFIG_USER_ONLY
13943     return page_get_flags(addr) & PAGE_BTI;
13944 #else
13945     CPUTLBEntryFull *full;
13946     void *host;
13947     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13948     int flags;
13949 
13950     /*
13951      * We test this immediately after reading an insn, which means
13952      * that the TLB entry must be present and valid, and thus this
13953      * access will never raise an exception.
13954      */
13955     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13956                               false, &host, &full, 0);
13957     assert(!(flags & TLB_INVALID_MASK));
13958 
13959     return full->guarded;
13960 #endif
13961 }
13962 
13963 /**
13964  * btype_destination_ok:
13965  * @insn: The instruction at the branch destination
13966  * @bt: SCTLR_ELx.BT
13967  * @btype: PSTATE.BTYPE, and is non-zero
13968  *
13969  * On a guarded page, there are a limited number of insns
13970  * that may be present at the branch target:
13971  *   - branch target identifiers,
13972  *   - paciasp, pacibsp,
13973  *   - BRK insn
13974  *   - HLT insn
13975  * Anything else causes a Branch Target Exception.
13976  *
13977  * Return true if the branch is compatible, false to raise BTITRAP.
13978  */
13979 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13980 {
13981     if ((insn & 0xfffff01fu) == 0xd503201fu) {
13982         /* HINT space */
13983         switch (extract32(insn, 5, 7)) {
13984         case 0b011001: /* PACIASP */
13985         case 0b011011: /* PACIBSP */
13986             /*
13987              * If SCTLR_ELx.BT, then PACI*SP are not compatible
13988              * with btype == 3.  Otherwise all btype are ok.
13989              */
13990             return !bt || btype != 3;
13991         case 0b100000: /* BTI */
13992             /* Not compatible with any btype.  */
13993             return false;
13994         case 0b100010: /* BTI c */
13995             /* Not compatible with btype == 3 */
13996             return btype != 3;
13997         case 0b100100: /* BTI j */
13998             /* Not compatible with btype == 2 */
13999             return btype != 2;
14000         case 0b100110: /* BTI jc */
14001             /* Compatible with any btype.  */
14002             return true;
14003         }
14004     } else {
14005         switch (insn & 0xffe0001fu) {
14006         case 0xd4200000u: /* BRK */
14007         case 0xd4400000u: /* HLT */
14008             /* Give priority to the breakpoint exception.  */
14009             return true;
14010         }
14011     }
14012     return false;
14013 }
14014 
14015 /* C3.1 A64 instruction index by encoding */
14016 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14017 {
14018     switch (extract32(insn, 25, 4)) {
14019     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14020         disas_b_exc_sys(s, insn);
14021         break;
14022     case 0x4:
14023     case 0x6:
14024     case 0xc:
14025     case 0xe:      /* Loads and stores */
14026         disas_ldst(s, insn);
14027         break;
14028     case 0x5:
14029     case 0xd:      /* Data processing - register */
14030         disas_data_proc_reg(s, insn);
14031         break;
14032     case 0x7:
14033     case 0xf:      /* Data processing - SIMD and floating point */
14034         disas_data_proc_simd_fp(s, insn);
14035         break;
14036     default:
14037         unallocated_encoding(s);
14038         break;
14039     }
14040 }
14041 
14042 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14043                                           CPUState *cpu)
14044 {
14045     DisasContext *dc = container_of(dcbase, DisasContext, base);
14046     CPUARMState *env = cpu->env_ptr;
14047     ARMCPU *arm_cpu = env_archcpu(env);
14048     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14049     int bound, core_mmu_idx;
14050 
14051     dc->isar = &arm_cpu->isar;
14052     dc->condjmp = 0;
14053     dc->pc_save = dc->base.pc_first;
14054     dc->aarch64 = true;
14055     dc->thumb = false;
14056     dc->sctlr_b = 0;
14057     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14058     dc->condexec_mask = 0;
14059     dc->condexec_cond = 0;
14060     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14061     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14062     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14063     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14064     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14065     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14066 #if !defined(CONFIG_USER_ONLY)
14067     dc->user = (dc->current_el == 0);
14068 #endif
14069     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14070     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14071     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14072     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14073     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14074     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14075     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14076     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14077     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14078     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14079     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14080     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14081     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14082     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14083     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14084     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14085     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14086     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14087     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14088     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14089     dc->vec_len = 0;
14090     dc->vec_stride = 0;
14091     dc->cp_regs = arm_cpu->cp_regs;
14092     dc->features = env->features;
14093     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14094 
14095 #ifdef CONFIG_USER_ONLY
14096     /* In sve_probe_page, we assume TBI is enabled. */
14097     tcg_debug_assert(dc->tbid & 1);
14098 #endif
14099 
14100     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14101 
14102     /* Single step state. The code-generation logic here is:
14103      *  SS_ACTIVE == 0:
14104      *   generate code with no special handling for single-stepping (except
14105      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14106      *   this happens anyway because those changes are all system register or
14107      *   PSTATE writes).
14108      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14109      *   emit code for one insn
14110      *   emit code to clear PSTATE.SS
14111      *   emit code to generate software step exception for completed step
14112      *   end TB (as usual for having generated an exception)
14113      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14114      *   emit code to generate a software step exception
14115      *   end the TB
14116      */
14117     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14118     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14119     dc->is_ldex = false;
14120 
14121     /* Bound the number of insns to execute to those left on the page.  */
14122     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14123 
14124     /* If architectural single step active, limit to 1.  */
14125     if (dc->ss_active) {
14126         bound = 1;
14127     }
14128     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14129 }
14130 
14131 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14132 {
14133 }
14134 
14135 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14136 {
14137     DisasContext *dc = container_of(dcbase, DisasContext, base);
14138     target_ulong pc_arg = dc->base.pc_next;
14139 
14140     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14141         pc_arg &= ~TARGET_PAGE_MASK;
14142     }
14143     tcg_gen_insn_start(pc_arg, 0, 0);
14144     dc->insn_start = tcg_last_op();
14145 }
14146 
14147 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14148 {
14149     DisasContext *s = container_of(dcbase, DisasContext, base);
14150     CPUARMState *env = cpu->env_ptr;
14151     uint64_t pc = s->base.pc_next;
14152     uint32_t insn;
14153 
14154     /* Singlestep exceptions have the highest priority. */
14155     if (s->ss_active && !s->pstate_ss) {
14156         /* Singlestep state is Active-pending.
14157          * If we're in this state at the start of a TB then either
14158          *  a) we just took an exception to an EL which is being debugged
14159          *     and this is the first insn in the exception handler
14160          *  b) debug exceptions were masked and we just unmasked them
14161          *     without changing EL (eg by clearing PSTATE.D)
14162          * In either case we're going to take a swstep exception in the
14163          * "did not step an insn" case, and so the syndrome ISV and EX
14164          * bits should be zero.
14165          */
14166         assert(s->base.num_insns == 1);
14167         gen_swstep_exception(s, 0, 0);
14168         s->base.is_jmp = DISAS_NORETURN;
14169         s->base.pc_next = pc + 4;
14170         return;
14171     }
14172 
14173     if (pc & 3) {
14174         /*
14175          * PC alignment fault.  This has priority over the instruction abort
14176          * that we would receive from a translation fault via arm_ldl_code.
14177          * This should only be possible after an indirect branch, at the
14178          * start of the TB.
14179          */
14180         assert(s->base.num_insns == 1);
14181         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14182         s->base.is_jmp = DISAS_NORETURN;
14183         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14184         return;
14185     }
14186 
14187     s->pc_curr = pc;
14188     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14189     s->insn = insn;
14190     s->base.pc_next = pc + 4;
14191 
14192     s->fp_access_checked = false;
14193     s->sve_access_checked = false;
14194 
14195     if (s->pstate_il) {
14196         /*
14197          * Illegal execution state. This has priority over BTI
14198          * exceptions, but comes after instruction abort exceptions.
14199          */
14200         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14201         return;
14202     }
14203 
14204     if (dc_isar_feature(aa64_bti, s)) {
14205         if (s->base.num_insns == 1) {
14206             /*
14207              * At the first insn of the TB, compute s->guarded_page.
14208              * We delayed computing this until successfully reading
14209              * the first insn of the TB, above.  This (mostly) ensures
14210              * that the softmmu tlb entry has been populated, and the
14211              * page table GP bit is available.
14212              *
14213              * Note that we need to compute this even if btype == 0,
14214              * because this value is used for BR instructions later
14215              * where ENV is not available.
14216              */
14217             s->guarded_page = is_guarded_page(env, s);
14218 
14219             /* First insn can have btype set to non-zero.  */
14220             tcg_debug_assert(s->btype >= 0);
14221 
14222             /*
14223              * Note that the Branch Target Exception has fairly high
14224              * priority -- below debugging exceptions but above most
14225              * everything else.  This allows us to handle this now
14226              * instead of waiting until the insn is otherwise decoded.
14227              */
14228             if (s->btype != 0
14229                 && s->guarded_page
14230                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14231                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14232                 return;
14233             }
14234         } else {
14235             /* Not the first insn: btype must be 0.  */
14236             tcg_debug_assert(s->btype == 0);
14237         }
14238     }
14239 
14240     s->is_nonstreaming = false;
14241     if (s->sme_trap_nonstreaming) {
14242         disas_sme_fa64(s, insn);
14243     }
14244 
14245     if (!disas_a64(s, insn) &&
14246         !disas_sme(s, insn) &&
14247         !disas_sve(s, insn)) {
14248         disas_a64_legacy(s, insn);
14249     }
14250 
14251     /*
14252      * After execution of most insns, btype is reset to 0.
14253      * Note that we set btype == -1 when the insn sets btype.
14254      */
14255     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14256         reset_btype(s);
14257     }
14258 }
14259 
14260 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14261 {
14262     DisasContext *dc = container_of(dcbase, DisasContext, base);
14263 
14264     if (unlikely(dc->ss_active)) {
14265         /* Note that this means single stepping WFI doesn't halt the CPU.
14266          * For conditional branch insns this is harmless unreachable code as
14267          * gen_goto_tb() has already handled emitting the debug exception
14268          * (and thus a tb-jump is not possible when singlestepping).
14269          */
14270         switch (dc->base.is_jmp) {
14271         default:
14272             gen_a64_update_pc(dc, 4);
14273             /* fall through */
14274         case DISAS_EXIT:
14275         case DISAS_JUMP:
14276             gen_step_complete_exception(dc);
14277             break;
14278         case DISAS_NORETURN:
14279             break;
14280         }
14281     } else {
14282         switch (dc->base.is_jmp) {
14283         case DISAS_NEXT:
14284         case DISAS_TOO_MANY:
14285             gen_goto_tb(dc, 1, 4);
14286             break;
14287         default:
14288         case DISAS_UPDATE_EXIT:
14289             gen_a64_update_pc(dc, 4);
14290             /* fall through */
14291         case DISAS_EXIT:
14292             tcg_gen_exit_tb(NULL, 0);
14293             break;
14294         case DISAS_UPDATE_NOCHAIN:
14295             gen_a64_update_pc(dc, 4);
14296             /* fall through */
14297         case DISAS_JUMP:
14298             tcg_gen_lookup_and_goto_ptr();
14299             break;
14300         case DISAS_NORETURN:
14301         case DISAS_SWI:
14302             break;
14303         case DISAS_WFE:
14304             gen_a64_update_pc(dc, 4);
14305             gen_helper_wfe(cpu_env);
14306             break;
14307         case DISAS_YIELD:
14308             gen_a64_update_pc(dc, 4);
14309             gen_helper_yield(cpu_env);
14310             break;
14311         case DISAS_WFI:
14312             /*
14313              * This is a special case because we don't want to just halt
14314              * the CPU if trying to debug across a WFI.
14315              */
14316             gen_a64_update_pc(dc, 4);
14317             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14318             /*
14319              * The helper doesn't necessarily throw an exception, but we
14320              * must go back to the main loop to check for interrupts anyway.
14321              */
14322             tcg_gen_exit_tb(NULL, 0);
14323             break;
14324         }
14325     }
14326 }
14327 
14328 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14329                                  CPUState *cpu, FILE *logfile)
14330 {
14331     DisasContext *dc = container_of(dcbase, DisasContext, base);
14332 
14333     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14334     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14335 }
14336 
14337 const TranslatorOps aarch64_translator_ops = {
14338     .init_disas_context = aarch64_tr_init_disas_context,
14339     .tb_start           = aarch64_tr_tb_start,
14340     .insn_start         = aarch64_tr_insn_start,
14341     .translate_insn     = aarch64_tr_translate_insn,
14342     .tb_stop            = aarch64_tr_tb_stop,
14343     .disas_log          = aarch64_tr_disas_log,
14344 };
14345