xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision 03176bcd)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "disas/disas.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Include the generated decoders.
51  */
52 
53 #include "decode-sme-fa64.c.inc"
54 #include "decode-a64.c.inc"
55 
56 /* Table based decoder typedefs - used when the relevant bits for decode
57  * are too awkwardly scattered across the instruction (eg SIMD).
58  */
59 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
60 
61 typedef struct AArch64DecodeTable {
62     uint32_t pattern;
63     uint32_t mask;
64     AArch64DecodeFn *disas_fn;
65 } AArch64DecodeTable;
66 
67 /* initialize TCG globals.  */
68 void a64_translate_init(void)
69 {
70     int i;
71 
72     cpu_pc = tcg_global_mem_new_i64(cpu_env,
73                                     offsetof(CPUARMState, pc),
74                                     "pc");
75     for (i = 0; i < 32; i++) {
76         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
77                                           offsetof(CPUARMState, xregs[i]),
78                                           regnames[i]);
79     }
80 
81     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
82         offsetof(CPUARMState, exclusive_high), "exclusive_high");
83 }
84 
85 /*
86  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
87  */
88 static int get_a64_user_mem_index(DisasContext *s)
89 {
90     /*
91      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
92      * which is the usual mmu_idx for this cpu state.
93      */
94     ARMMMUIdx useridx = s->mmu_idx;
95 
96     if (s->unpriv) {
97         /*
98          * We have pre-computed the condition for AccType_UNPRIV.
99          * Therefore we should never get here with a mmu_idx for
100          * which we do not know the corresponding user mmu_idx.
101          */
102         switch (useridx) {
103         case ARMMMUIdx_E10_1:
104         case ARMMMUIdx_E10_1_PAN:
105             useridx = ARMMMUIdx_E10_0;
106             break;
107         case ARMMMUIdx_E20_2:
108         case ARMMMUIdx_E20_2_PAN:
109             useridx = ARMMMUIdx_E20_0;
110             break;
111         default:
112             g_assert_not_reached();
113         }
114     }
115     return arm_to_core_mmu_idx(useridx);
116 }
117 
118 static void set_btype_raw(int val)
119 {
120     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
121                    offsetof(CPUARMState, btype));
122 }
123 
124 static void set_btype(DisasContext *s, int val)
125 {
126     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
127     tcg_debug_assert(val >= 1 && val <= 3);
128     set_btype_raw(val);
129     s->btype = -1;
130 }
131 
132 static void reset_btype(DisasContext *s)
133 {
134     if (s->btype != 0) {
135         set_btype_raw(0);
136         s->btype = 0;
137     }
138 }
139 
140 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
141 {
142     assert(s->pc_save != -1);
143     if (tb_cflags(s->base.tb) & CF_PCREL) {
144         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
145     } else {
146         tcg_gen_movi_i64(dest, s->pc_curr + diff);
147     }
148 }
149 
150 void gen_a64_update_pc(DisasContext *s, target_long diff)
151 {
152     gen_pc_plus_diff(s, cpu_pc, diff);
153     s->pc_save = s->pc_curr + diff;
154 }
155 
156 /*
157  * Handle Top Byte Ignore (TBI) bits.
158  *
159  * If address tagging is enabled via the TCR TBI bits:
160  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
161  *    then the address is zero-extended, clearing bits [63:56]
162  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163  *    and TBI1 controls addressses with bit 55 == 1.
164  *    If the appropriate TBI bit is set for the address then
165  *    the address is sign-extended from bit 55 into bits [63:56]
166  *
167  * Here We have concatenated TBI{1,0} into tbi.
168  */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170                                 TCGv_i64 src, int tbi)
171 {
172     if (tbi == 0) {
173         /* Load unmodified address */
174         tcg_gen_mov_i64(dst, src);
175     } else if (!regime_has_2_ranges(s->mmu_idx)) {
176         /* Force tag byte to all zero */
177         tcg_gen_extract_i64(dst, src, 0, 56);
178     } else {
179         /* Sign-extend from bit 55.  */
180         tcg_gen_sextract_i64(dst, src, 0, 56);
181 
182         switch (tbi) {
183         case 1:
184             /* tbi0 but !tbi1: only use the extension if positive */
185             tcg_gen_and_i64(dst, dst, src);
186             break;
187         case 2:
188             /* !tbi0 but tbi1: only use the extension if negative */
189             tcg_gen_or_i64(dst, dst, src);
190             break;
191         case 3:
192             /* tbi0 and tbi1: always use the extension */
193             break;
194         default:
195             g_assert_not_reached();
196         }
197     }
198 }
199 
200 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
201 {
202     /*
203      * If address tagging is enabled for instructions via the TCR TBI bits,
204      * then loading an address into the PC will clear out any tag.
205      */
206     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
207     s->pc_save = -1;
208 }
209 
210 /*
211  * Handle MTE and/or TBI.
212  *
213  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
214  * for the tag to be present in the FAR_ELx register.  But for user-only
215  * mode we do not have a TLB with which to implement this, so we must
216  * remove the top byte now.
217  *
218  * Always return a fresh temporary that we can increment independently
219  * of the write-back address.
220  */
221 
222 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
223 {
224     TCGv_i64 clean = tcg_temp_new_i64();
225 #ifdef CONFIG_USER_ONLY
226     gen_top_byte_ignore(s, clean, addr, s->tbid);
227 #else
228     tcg_gen_mov_i64(clean, addr);
229 #endif
230     return clean;
231 }
232 
233 /* Insert a zero tag into src, with the result at dst. */
234 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
235 {
236     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
237 }
238 
239 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
240                              MMUAccessType acc, int log2_size)
241 {
242     gen_helper_probe_access(cpu_env, ptr,
243                             tcg_constant_i32(acc),
244                             tcg_constant_i32(get_mem_index(s)),
245                             tcg_constant_i32(1 << log2_size));
246 }
247 
248 /*
249  * For MTE, check a single logical or atomic access.  This probes a single
250  * address, the exact one specified.  The size and alignment of the access
251  * is not relevant to MTE, per se, but watchpoints do require the size,
252  * and we want to recognize those before making any other changes to state.
253  */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255                                       bool is_write, bool tag_checked,
256                                       int log2_size, bool is_unpriv,
257                                       int core_idx)
258 {
259     if (tag_checked && s->mte_active[is_unpriv]) {
260         TCGv_i64 ret;
261         int desc = 0;
262 
263         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
264         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
265         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
266         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
267         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
268 
269         ret = tcg_temp_new_i64();
270         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
271 
272         return ret;
273     }
274     return clean_data_tbi(s, addr);
275 }
276 
277 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
278                         bool tag_checked, int log2_size)
279 {
280     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
281                                  false, get_mem_index(s));
282 }
283 
284 /*
285  * For MTE, check multiple logical sequential accesses.
286  */
287 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int size)
289 {
290     if (tag_checked && s->mte_active[0]) {
291         TCGv_i64 ret;
292         int desc = 0;
293 
294         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 typedef struct DisasCompare64 {
309     TCGCond cond;
310     TCGv_i64 value;
311 } DisasCompare64;
312 
313 static void a64_test_cc(DisasCompare64 *c64, int cc)
314 {
315     DisasCompare c32;
316 
317     arm_test_cc(&c32, cc);
318 
319     /*
320      * Sign-extend the 32-bit value so that the GE/LT comparisons work
321      * properly.  The NE/EQ comparisons are also fine with this choice.
322       */
323     c64->cond = c32.cond;
324     c64->value = tcg_temp_new_i64();
325     tcg_gen_ext_i32_i64(c64->value, c32.value);
326 }
327 
328 static void gen_rebuild_hflags(DisasContext *s)
329 {
330     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
331 }
332 
333 static void gen_exception_internal(int excp)
334 {
335     assert(excp_is_internal(excp));
336     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
337 }
338 
339 static void gen_exception_internal_insn(DisasContext *s, int excp)
340 {
341     gen_a64_update_pc(s, 0);
342     gen_exception_internal(excp);
343     s->base.is_jmp = DISAS_NORETURN;
344 }
345 
346 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
347 {
348     gen_a64_update_pc(s, 0);
349     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
350     s->base.is_jmp = DISAS_NORETURN;
351 }
352 
353 static void gen_step_complete_exception(DisasContext *s)
354 {
355     /* We just completed step of an insn. Move from Active-not-pending
356      * to Active-pending, and then also take the swstep exception.
357      * This corresponds to making the (IMPDEF) choice to prioritize
358      * swstep exceptions over asynchronous exceptions taken to an exception
359      * level where debug is disabled. This choice has the advantage that
360      * we do not need to maintain internal state corresponding to the
361      * ISV/EX syndrome bits between completion of the step and generation
362      * of the exception, and our syndrome information is always correct.
363      */
364     gen_ss_advance(s);
365     gen_swstep_exception(s, 1, s->is_ldex);
366     s->base.is_jmp = DISAS_NORETURN;
367 }
368 
369 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
370 {
371     if (s->ss_active) {
372         return false;
373     }
374     return translator_use_goto_tb(&s->base, dest);
375 }
376 
377 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
378 {
379     if (use_goto_tb(s, s->pc_curr + diff)) {
380         /*
381          * For pcrel, the pc must always be up-to-date on entry to
382          * the linked TB, so that it can use simple additions for all
383          * further adjustments.  For !pcrel, the linked TB is compiled
384          * to know its full virtual address, so we can delay the
385          * update to pc to the unlinked path.  A long chain of links
386          * can thus avoid many updates to the PC.
387          */
388         if (tb_cflags(s->base.tb) & CF_PCREL) {
389             gen_a64_update_pc(s, diff);
390             tcg_gen_goto_tb(n);
391         } else {
392             tcg_gen_goto_tb(n);
393             gen_a64_update_pc(s, diff);
394         }
395         tcg_gen_exit_tb(s->base.tb, n);
396         s->base.is_jmp = DISAS_NORETURN;
397     } else {
398         gen_a64_update_pc(s, diff);
399         if (s->ss_active) {
400             gen_step_complete_exception(s);
401         } else {
402             tcg_gen_lookup_and_goto_ptr();
403             s->base.is_jmp = DISAS_NORETURN;
404         }
405     }
406 }
407 
408 /*
409  * Register access functions
410  *
411  * These functions are used for directly accessing a register in where
412  * changes to the final register value are likely to be made. If you
413  * need to use a register for temporary calculation (e.g. index type
414  * operations) use the read_* form.
415  *
416  * B1.2.1 Register mappings
417  *
418  * In instruction register encoding 31 can refer to ZR (zero register) or
419  * the SP (stack pointer) depending on context. In QEMU's case we map SP
420  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
421  * This is the point of the _sp forms.
422  */
423 TCGv_i64 cpu_reg(DisasContext *s, int reg)
424 {
425     if (reg == 31) {
426         TCGv_i64 t = tcg_temp_new_i64();
427         tcg_gen_movi_i64(t, 0);
428         return t;
429     } else {
430         return cpu_X[reg];
431     }
432 }
433 
434 /* register access for when 31 == SP */
435 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
436 {
437     return cpu_X[reg];
438 }
439 
440 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
441  * representing the register contents. This TCGv is an auto-freed
442  * temporary so it need not be explicitly freed, and may be modified.
443  */
444 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
445 {
446     TCGv_i64 v = tcg_temp_new_i64();
447     if (reg != 31) {
448         if (sf) {
449             tcg_gen_mov_i64(v, cpu_X[reg]);
450         } else {
451             tcg_gen_ext32u_i64(v, cpu_X[reg]);
452         }
453     } else {
454         tcg_gen_movi_i64(v, 0);
455     }
456     return v;
457 }
458 
459 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
460 {
461     TCGv_i64 v = tcg_temp_new_i64();
462     if (sf) {
463         tcg_gen_mov_i64(v, cpu_X[reg]);
464     } else {
465         tcg_gen_ext32u_i64(v, cpu_X[reg]);
466     }
467     return v;
468 }
469 
470 /* Return the offset into CPUARMState of a slice (from
471  * the least significant end) of FP register Qn (ie
472  * Dn, Sn, Hn or Bn).
473  * (Note that this is not the same mapping as for A32; see cpu.h)
474  */
475 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
476 {
477     return vec_reg_offset(s, regno, 0, size);
478 }
479 
480 /* Offset of the high half of the 128 bit vector Qn */
481 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
482 {
483     return vec_reg_offset(s, regno, 1, MO_64);
484 }
485 
486 /* Convenience accessors for reading and writing single and double
487  * FP registers. Writing clears the upper parts of the associated
488  * 128 bit vector register, as required by the architecture.
489  * Note that unlike the GP register accessors, the values returned
490  * by the read functions must be manually freed.
491  */
492 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
493 {
494     TCGv_i64 v = tcg_temp_new_i64();
495 
496     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
497     return v;
498 }
499 
500 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
501 {
502     TCGv_i32 v = tcg_temp_new_i32();
503 
504     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
505     return v;
506 }
507 
508 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
509 {
510     TCGv_i32 v = tcg_temp_new_i32();
511 
512     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
513     return v;
514 }
515 
516 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
517  * If SVE is not enabled, then there are only 128 bits in the vector.
518  */
519 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
520 {
521     unsigned ofs = fp_reg_offset(s, rd, MO_64);
522     unsigned vsz = vec_full_reg_size(s);
523 
524     /* Nop move, with side effect of clearing the tail. */
525     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
526 }
527 
528 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
529 {
530     unsigned ofs = fp_reg_offset(s, reg, MO_64);
531 
532     tcg_gen_st_i64(v, cpu_env, ofs);
533     clear_vec_high(s, false, reg);
534 }
535 
536 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
537 {
538     TCGv_i64 tmp = tcg_temp_new_i64();
539 
540     tcg_gen_extu_i32_i64(tmp, v);
541     write_fp_dreg(s, reg, tmp);
542 }
543 
544 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
545 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
546                          GVecGen2Fn *gvec_fn, int vece)
547 {
548     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
549             is_q ? 16 : 8, vec_full_reg_size(s));
550 }
551 
552 /* Expand a 2-operand + immediate AdvSIMD vector operation using
553  * an expander function.
554  */
555 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
556                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             imm, is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
563 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
564                          GVecGen3Fn *gvec_fn, int vece)
565 {
566     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
567             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
568 }
569 
570 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
571 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
572                          int rx, GVecGen4Fn *gvec_fn, int vece)
573 {
574     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
575             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
576             is_q ? 16 : 8, vec_full_reg_size(s));
577 }
578 
579 /* Expand a 2-operand operation using an out-of-line helper.  */
580 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
581                              int rn, int data, gen_helper_gvec_2 *fn)
582 {
583     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
584                        vec_full_reg_offset(s, rn),
585                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
586 }
587 
588 /* Expand a 3-operand operation using an out-of-line helper.  */
589 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
590                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
591 {
592     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
593                        vec_full_reg_offset(s, rn),
594                        vec_full_reg_offset(s, rm),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
599  * an out-of-line helper.
600  */
601 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
602                               int rm, bool is_fp16, int data,
603                               gen_helper_gvec_3_ptr *fn)
604 {
605     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
606     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
607                        vec_full_reg_offset(s, rn),
608                        vec_full_reg_offset(s, rm), fpst,
609                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
610 }
611 
612 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
613 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
614                             int rm, gen_helper_gvec_3_ptr *fn)
615 {
616     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
617 
618     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
619     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
620                        vec_full_reg_offset(s, rn),
621                        vec_full_reg_offset(s, rm), qc_ptr,
622                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
623 }
624 
625 /* Expand a 4-operand operation using an out-of-line helper.  */
626 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
627                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
628 {
629     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm),
632                        vec_full_reg_offset(s, ra),
633                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
634 }
635 
636 /*
637  * Expand a 4-operand + fpstatus pointer + simd data value operation using
638  * an out-of-line helper.
639  */
640 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
641                               int rm, int ra, bool is_fp16, int data,
642                               gen_helper_gvec_4_ptr *fn)
643 {
644     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
645     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
646                        vec_full_reg_offset(s, rn),
647                        vec_full_reg_offset(s, rm),
648                        vec_full_reg_offset(s, ra), fpst,
649                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
650 }
651 
652 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
653  * than the 32 bit equivalent.
654  */
655 static inline void gen_set_NZ64(TCGv_i64 result)
656 {
657     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
658     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
659 }
660 
661 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
662 static inline void gen_logic_CC(int sf, TCGv_i64 result)
663 {
664     if (sf) {
665         gen_set_NZ64(result);
666     } else {
667         tcg_gen_extrl_i64_i32(cpu_ZF, result);
668         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
669     }
670     tcg_gen_movi_i32(cpu_CF, 0);
671     tcg_gen_movi_i32(cpu_VF, 0);
672 }
673 
674 /* dest = T0 + T1; compute C, N, V and Z flags */
675 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
676 {
677     TCGv_i64 result, flag, tmp;
678     result = tcg_temp_new_i64();
679     flag = tcg_temp_new_i64();
680     tmp = tcg_temp_new_i64();
681 
682     tcg_gen_movi_i64(tmp, 0);
683     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
684 
685     tcg_gen_extrl_i64_i32(cpu_CF, flag);
686 
687     gen_set_NZ64(result);
688 
689     tcg_gen_xor_i64(flag, result, t0);
690     tcg_gen_xor_i64(tmp, t0, t1);
691     tcg_gen_andc_i64(flag, flag, tmp);
692     tcg_gen_extrh_i64_i32(cpu_VF, flag);
693 
694     tcg_gen_mov_i64(dest, result);
695 }
696 
697 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
698 {
699     TCGv_i32 t0_32 = tcg_temp_new_i32();
700     TCGv_i32 t1_32 = tcg_temp_new_i32();
701     TCGv_i32 tmp = tcg_temp_new_i32();
702 
703     tcg_gen_movi_i32(tmp, 0);
704     tcg_gen_extrl_i64_i32(t0_32, t0);
705     tcg_gen_extrl_i64_i32(t1_32, t1);
706     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
707     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
708     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
709     tcg_gen_xor_i32(tmp, t0_32, t1_32);
710     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
711     tcg_gen_extu_i32_i64(dest, cpu_NF);
712 }
713 
714 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
715 {
716     if (sf) {
717         gen_add64_CC(dest, t0, t1);
718     } else {
719         gen_add32_CC(dest, t0, t1);
720     }
721 }
722 
723 /* dest = T0 - T1; compute C, N, V and Z flags */
724 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
725 {
726     /* 64 bit arithmetic */
727     TCGv_i64 result, flag, tmp;
728 
729     result = tcg_temp_new_i64();
730     flag = tcg_temp_new_i64();
731     tcg_gen_sub_i64(result, t0, t1);
732 
733     gen_set_NZ64(result);
734 
735     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
736     tcg_gen_extrl_i64_i32(cpu_CF, flag);
737 
738     tcg_gen_xor_i64(flag, result, t0);
739     tmp = tcg_temp_new_i64();
740     tcg_gen_xor_i64(tmp, t0, t1);
741     tcg_gen_and_i64(flag, flag, tmp);
742     tcg_gen_extrh_i64_i32(cpu_VF, flag);
743     tcg_gen_mov_i64(dest, result);
744 }
745 
746 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
747 {
748     /* 32 bit arithmetic */
749     TCGv_i32 t0_32 = tcg_temp_new_i32();
750     TCGv_i32 t1_32 = tcg_temp_new_i32();
751     TCGv_i32 tmp;
752 
753     tcg_gen_extrl_i64_i32(t0_32, t0);
754     tcg_gen_extrl_i64_i32(t1_32, t1);
755     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
756     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
757     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
758     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
759     tmp = tcg_temp_new_i32();
760     tcg_gen_xor_i32(tmp, t0_32, t1_32);
761     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
762     tcg_gen_extu_i32_i64(dest, cpu_NF);
763 }
764 
765 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
766 {
767     if (sf) {
768         gen_sub64_CC(dest, t0, t1);
769     } else {
770         gen_sub32_CC(dest, t0, t1);
771     }
772 }
773 
774 /* dest = T0 + T1 + CF; do not compute flags. */
775 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
776 {
777     TCGv_i64 flag = tcg_temp_new_i64();
778     tcg_gen_extu_i32_i64(flag, cpu_CF);
779     tcg_gen_add_i64(dest, t0, t1);
780     tcg_gen_add_i64(dest, dest, flag);
781 
782     if (!sf) {
783         tcg_gen_ext32u_i64(dest, dest);
784     }
785 }
786 
787 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
788 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
789 {
790     if (sf) {
791         TCGv_i64 result = tcg_temp_new_i64();
792         TCGv_i64 cf_64 = tcg_temp_new_i64();
793         TCGv_i64 vf_64 = tcg_temp_new_i64();
794         TCGv_i64 tmp = tcg_temp_new_i64();
795         TCGv_i64 zero = tcg_constant_i64(0);
796 
797         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
798         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
799         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
800         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
801         gen_set_NZ64(result);
802 
803         tcg_gen_xor_i64(vf_64, result, t0);
804         tcg_gen_xor_i64(tmp, t0, t1);
805         tcg_gen_andc_i64(vf_64, vf_64, tmp);
806         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
807 
808         tcg_gen_mov_i64(dest, result);
809     } else {
810         TCGv_i32 t0_32 = tcg_temp_new_i32();
811         TCGv_i32 t1_32 = tcg_temp_new_i32();
812         TCGv_i32 tmp = tcg_temp_new_i32();
813         TCGv_i32 zero = tcg_constant_i32(0);
814 
815         tcg_gen_extrl_i64_i32(t0_32, t0);
816         tcg_gen_extrl_i64_i32(t1_32, t1);
817         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
818         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
819 
820         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
821         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
822         tcg_gen_xor_i32(tmp, t0_32, t1_32);
823         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
824         tcg_gen_extu_i32_i64(dest, cpu_NF);
825     }
826 }
827 
828 /*
829  * Load/Store generators
830  */
831 
832 /*
833  * Store from GPR register to memory.
834  */
835 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
836                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
837                              bool iss_valid,
838                              unsigned int iss_srt,
839                              bool iss_sf, bool iss_ar)
840 {
841     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
842 
843     if (iss_valid) {
844         uint32_t syn;
845 
846         syn = syn_data_abort_with_iss(0,
847                                       (memop & MO_SIZE),
848                                       false,
849                                       iss_srt,
850                                       iss_sf,
851                                       iss_ar,
852                                       0, 0, 0, 0, 0, false);
853         disas_set_insn_syndrome(s, syn);
854     }
855 }
856 
857 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
858                       TCGv_i64 tcg_addr, MemOp memop,
859                       bool iss_valid,
860                       unsigned int iss_srt,
861                       bool iss_sf, bool iss_ar)
862 {
863     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
864                      iss_valid, iss_srt, iss_sf, iss_ar);
865 }
866 
867 /*
868  * Load from memory to GPR register
869  */
870 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
871                              MemOp memop, bool extend, int memidx,
872                              bool iss_valid, unsigned int iss_srt,
873                              bool iss_sf, bool iss_ar)
874 {
875     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
876 
877     if (extend && (memop & MO_SIGN)) {
878         g_assert((memop & MO_SIZE) <= MO_32);
879         tcg_gen_ext32u_i64(dest, dest);
880     }
881 
882     if (iss_valid) {
883         uint32_t syn;
884 
885         syn = syn_data_abort_with_iss(0,
886                                       (memop & MO_SIZE),
887                                       (memop & MO_SIGN) != 0,
888                                       iss_srt,
889                                       iss_sf,
890                                       iss_ar,
891                                       0, 0, 0, 0, 0, false);
892         disas_set_insn_syndrome(s, syn);
893     }
894 }
895 
896 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
897                       MemOp memop, bool extend,
898                       bool iss_valid, unsigned int iss_srt,
899                       bool iss_sf, bool iss_ar)
900 {
901     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
902                      iss_valid, iss_srt, iss_sf, iss_ar);
903 }
904 
905 /*
906  * Store from FP register to memory
907  */
908 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
909 {
910     /* This writes the bottom N bits of a 128 bit wide vector to memory */
911     TCGv_i64 tmplo = tcg_temp_new_i64();
912 
913     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
914 
915     if ((mop & MO_SIZE) < MO_128) {
916         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
917     } else {
918         TCGv_i64 tmphi = tcg_temp_new_i64();
919         TCGv_i128 t16 = tcg_temp_new_i128();
920 
921         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
922         tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
923 
924         tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
925     }
926 }
927 
928 /*
929  * Load from memory to FP register
930  */
931 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
932 {
933     /* This always zero-extends and writes to a full 128 bit wide vector */
934     TCGv_i64 tmplo = tcg_temp_new_i64();
935     TCGv_i64 tmphi = NULL;
936 
937     if ((mop & MO_SIZE) < MO_128) {
938         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
939     } else {
940         TCGv_i128 t16 = tcg_temp_new_i128();
941 
942         tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
943 
944         tmphi = tcg_temp_new_i64();
945         tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
946     }
947 
948     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
949 
950     if (tmphi) {
951         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
952     }
953     clear_vec_high(s, tmphi != NULL, destidx);
954 }
955 
956 /*
957  * Vector load/store helpers.
958  *
959  * The principal difference between this and a FP load is that we don't
960  * zero extend as we are filling a partial chunk of the vector register.
961  * These functions don't support 128 bit loads/stores, which would be
962  * normal load/store operations.
963  *
964  * The _i32 versions are useful when operating on 32 bit quantities
965  * (eg for floating point single or using Neon helper functions).
966  */
967 
968 /* Get value of an element within a vector register */
969 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
970                              int element, MemOp memop)
971 {
972     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
973     switch ((unsigned)memop) {
974     case MO_8:
975         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
976         break;
977     case MO_16:
978         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
979         break;
980     case MO_32:
981         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
982         break;
983     case MO_8|MO_SIGN:
984         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
985         break;
986     case MO_16|MO_SIGN:
987         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
988         break;
989     case MO_32|MO_SIGN:
990         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
991         break;
992     case MO_64:
993     case MO_64|MO_SIGN:
994         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
995         break;
996     default:
997         g_assert_not_reached();
998     }
999 }
1000 
1001 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1002                                  int element, MemOp memop)
1003 {
1004     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1005     switch (memop) {
1006     case MO_8:
1007         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1008         break;
1009     case MO_16:
1010         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1011         break;
1012     case MO_8|MO_SIGN:
1013         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1014         break;
1015     case MO_16|MO_SIGN:
1016         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1017         break;
1018     case MO_32:
1019     case MO_32|MO_SIGN:
1020         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1021         break;
1022     default:
1023         g_assert_not_reached();
1024     }
1025 }
1026 
1027 /* Set value of an element within a vector register */
1028 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1029                               int element, MemOp memop)
1030 {
1031     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1032     switch (memop) {
1033     case MO_8:
1034         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1035         break;
1036     case MO_16:
1037         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1038         break;
1039     case MO_32:
1040         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1041         break;
1042     case MO_64:
1043         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1044         break;
1045     default:
1046         g_assert_not_reached();
1047     }
1048 }
1049 
1050 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1051                                   int destidx, int element, MemOp memop)
1052 {
1053     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1054     switch (memop) {
1055     case MO_8:
1056         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1057         break;
1058     case MO_16:
1059         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1060         break;
1061     case MO_32:
1062         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1063         break;
1064     default:
1065         g_assert_not_reached();
1066     }
1067 }
1068 
1069 /* Store from vector register to memory */
1070 static void do_vec_st(DisasContext *s, int srcidx, int element,
1071                       TCGv_i64 tcg_addr, MemOp mop)
1072 {
1073     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1074 
1075     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1076     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1077 }
1078 
1079 /* Load from memory to vector register */
1080 static void do_vec_ld(DisasContext *s, int destidx, int element,
1081                       TCGv_i64 tcg_addr, MemOp mop)
1082 {
1083     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1084 
1085     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1086     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1087 }
1088 
1089 /* Check that FP/Neon access is enabled. If it is, return
1090  * true. If not, emit code to generate an appropriate exception,
1091  * and return false; the caller should not emit any code for
1092  * the instruction. Note that this check must happen after all
1093  * unallocated-encoding checks (otherwise the syndrome information
1094  * for the resulting exception will be incorrect).
1095  */
1096 static bool fp_access_check_only(DisasContext *s)
1097 {
1098     if (s->fp_excp_el) {
1099         assert(!s->fp_access_checked);
1100         s->fp_access_checked = true;
1101 
1102         gen_exception_insn_el(s, 0, EXCP_UDEF,
1103                               syn_fp_access_trap(1, 0xe, false, 0),
1104                               s->fp_excp_el);
1105         return false;
1106     }
1107     s->fp_access_checked = true;
1108     return true;
1109 }
1110 
1111 static bool fp_access_check(DisasContext *s)
1112 {
1113     if (!fp_access_check_only(s)) {
1114         return false;
1115     }
1116     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1117         gen_exception_insn(s, 0, EXCP_UDEF,
1118                            syn_smetrap(SME_ET_Streaming, false));
1119         return false;
1120     }
1121     return true;
1122 }
1123 
1124 /*
1125  * Check that SVE access is enabled.  If it is, return true.
1126  * If not, emit code to generate an appropriate exception and return false.
1127  * This function corresponds to CheckSVEEnabled().
1128  */
1129 bool sve_access_check(DisasContext *s)
1130 {
1131     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1132         assert(dc_isar_feature(aa64_sme, s));
1133         if (!sme_sm_enabled_check(s)) {
1134             goto fail_exit;
1135         }
1136     } else if (s->sve_excp_el) {
1137         gen_exception_insn_el(s, 0, EXCP_UDEF,
1138                               syn_sve_access_trap(), s->sve_excp_el);
1139         goto fail_exit;
1140     }
1141     s->sve_access_checked = true;
1142     return fp_access_check(s);
1143 
1144  fail_exit:
1145     /* Assert that we only raise one exception per instruction. */
1146     assert(!s->sve_access_checked);
1147     s->sve_access_checked = true;
1148     return false;
1149 }
1150 
1151 /*
1152  * Check that SME access is enabled, raise an exception if not.
1153  * Note that this function corresponds to CheckSMEAccess and is
1154  * only used directly for cpregs.
1155  */
1156 static bool sme_access_check(DisasContext *s)
1157 {
1158     if (s->sme_excp_el) {
1159         gen_exception_insn_el(s, 0, EXCP_UDEF,
1160                               syn_smetrap(SME_ET_AccessTrap, false),
1161                               s->sme_excp_el);
1162         return false;
1163     }
1164     return true;
1165 }
1166 
1167 /* This function corresponds to CheckSMEEnabled. */
1168 bool sme_enabled_check(DisasContext *s)
1169 {
1170     /*
1171      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1172      * to be zero when fp_excp_el has priority.  This is because we need
1173      * sme_excp_el by itself for cpregs access checks.
1174      */
1175     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1176         s->fp_access_checked = true;
1177         return sme_access_check(s);
1178     }
1179     return fp_access_check_only(s);
1180 }
1181 
1182 /* Common subroutine for CheckSMEAnd*Enabled. */
1183 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1184 {
1185     if (!sme_enabled_check(s)) {
1186         return false;
1187     }
1188     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1189         gen_exception_insn(s, 0, EXCP_UDEF,
1190                            syn_smetrap(SME_ET_NotStreaming, false));
1191         return false;
1192     }
1193     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1194         gen_exception_insn(s, 0, EXCP_UDEF,
1195                            syn_smetrap(SME_ET_InactiveZA, false));
1196         return false;
1197     }
1198     return true;
1199 }
1200 
1201 /*
1202  * This utility function is for doing register extension with an
1203  * optional shift. You will likely want to pass a temporary for the
1204  * destination register. See DecodeRegExtend() in the ARM ARM.
1205  */
1206 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1207                               int option, unsigned int shift)
1208 {
1209     int extsize = extract32(option, 0, 2);
1210     bool is_signed = extract32(option, 2, 1);
1211 
1212     if (is_signed) {
1213         switch (extsize) {
1214         case 0:
1215             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1216             break;
1217         case 1:
1218             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1219             break;
1220         case 2:
1221             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1222             break;
1223         case 3:
1224             tcg_gen_mov_i64(tcg_out, tcg_in);
1225             break;
1226         }
1227     } else {
1228         switch (extsize) {
1229         case 0:
1230             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1231             break;
1232         case 1:
1233             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1234             break;
1235         case 2:
1236             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1237             break;
1238         case 3:
1239             tcg_gen_mov_i64(tcg_out, tcg_in);
1240             break;
1241         }
1242     }
1243 
1244     if (shift) {
1245         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1246     }
1247 }
1248 
1249 static inline void gen_check_sp_alignment(DisasContext *s)
1250 {
1251     /* The AArch64 architecture mandates that (if enabled via PSTATE
1252      * or SCTLR bits) there is a check that SP is 16-aligned on every
1253      * SP-relative load or store (with an exception generated if it is not).
1254      * In line with general QEMU practice regarding misaligned accesses,
1255      * we omit these checks for the sake of guest program performance.
1256      * This function is provided as a hook so we can more easily add these
1257      * checks in future (possibly as a "favour catching guest program bugs
1258      * over speed" user selectable option).
1259      */
1260 }
1261 
1262 /*
1263  * This provides a simple table based table lookup decoder. It is
1264  * intended to be used when the relevant bits for decode are too
1265  * awkwardly placed and switch/if based logic would be confusing and
1266  * deeply nested. Since it's a linear search through the table, tables
1267  * should be kept small.
1268  *
1269  * It returns the first handler where insn & mask == pattern, or
1270  * NULL if there is no match.
1271  * The table is terminated by an empty mask (i.e. 0)
1272  */
1273 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1274                                                uint32_t insn)
1275 {
1276     const AArch64DecodeTable *tptr = table;
1277 
1278     while (tptr->mask) {
1279         if ((insn & tptr->mask) == tptr->pattern) {
1280             return tptr->disas_fn;
1281         }
1282         tptr++;
1283     }
1284     return NULL;
1285 }
1286 
1287 /*
1288  * The instruction disassembly implemented here matches
1289  * the instruction encoding classifications in chapter C4
1290  * of the ARM Architecture Reference Manual (DDI0487B_a);
1291  * classification names and decode diagrams here should generally
1292  * match up with those in the manual.
1293  */
1294 
1295 static bool trans_B(DisasContext *s, arg_i *a)
1296 {
1297     reset_btype(s);
1298     gen_goto_tb(s, 0, a->imm);
1299     return true;
1300 }
1301 
1302 static bool trans_BL(DisasContext *s, arg_i *a)
1303 {
1304     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1305     reset_btype(s);
1306     gen_goto_tb(s, 0, a->imm);
1307     return true;
1308 }
1309 
1310 
1311 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1312 {
1313     DisasLabel match;
1314     TCGv_i64 tcg_cmp;
1315 
1316     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1317     reset_btype(s);
1318 
1319     match = gen_disas_label(s);
1320     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1321                         tcg_cmp, 0, match.label);
1322     gen_goto_tb(s, 0, 4);
1323     set_disas_label(s, match);
1324     gen_goto_tb(s, 1, a->imm);
1325     return true;
1326 }
1327 
1328 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1329 {
1330     DisasLabel match;
1331     TCGv_i64 tcg_cmp;
1332 
1333     tcg_cmp = tcg_temp_new_i64();
1334     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1335 
1336     reset_btype(s);
1337 
1338     match = gen_disas_label(s);
1339     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1340                         tcg_cmp, 0, match.label);
1341     gen_goto_tb(s, 0, 4);
1342     set_disas_label(s, match);
1343     gen_goto_tb(s, 1, a->imm);
1344     return true;
1345 }
1346 
1347 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1348 {
1349     reset_btype(s);
1350     if (a->cond < 0x0e) {
1351         /* genuinely conditional branches */
1352         DisasLabel match = gen_disas_label(s);
1353         arm_gen_test_cc(a->cond, match.label);
1354         gen_goto_tb(s, 0, 4);
1355         set_disas_label(s, match);
1356         gen_goto_tb(s, 1, a->imm);
1357     } else {
1358         /* 0xe and 0xf are both "always" conditions */
1359         gen_goto_tb(s, 0, a->imm);
1360     }
1361     return true;
1362 }
1363 
1364 static void set_btype_for_br(DisasContext *s, int rn)
1365 {
1366     if (dc_isar_feature(aa64_bti, s)) {
1367         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1368         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1369     }
1370 }
1371 
1372 static void set_btype_for_blr(DisasContext *s)
1373 {
1374     if (dc_isar_feature(aa64_bti, s)) {
1375         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1376         set_btype(s, 2);
1377     }
1378 }
1379 
1380 static bool trans_BR(DisasContext *s, arg_r *a)
1381 {
1382     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1383     set_btype_for_br(s, a->rn);
1384     s->base.is_jmp = DISAS_JUMP;
1385     return true;
1386 }
1387 
1388 static bool trans_BLR(DisasContext *s, arg_r *a)
1389 {
1390     TCGv_i64 dst = cpu_reg(s, a->rn);
1391     TCGv_i64 lr = cpu_reg(s, 30);
1392     if (dst == lr) {
1393         TCGv_i64 tmp = tcg_temp_new_i64();
1394         tcg_gen_mov_i64(tmp, dst);
1395         dst = tmp;
1396     }
1397     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1398     gen_a64_set_pc(s, dst);
1399     set_btype_for_blr(s);
1400     s->base.is_jmp = DISAS_JUMP;
1401     return true;
1402 }
1403 
1404 static bool trans_RET(DisasContext *s, arg_r *a)
1405 {
1406     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1407     s->base.is_jmp = DISAS_JUMP;
1408     return true;
1409 }
1410 
1411 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1412                                    TCGv_i64 modifier, bool use_key_a)
1413 {
1414     TCGv_i64 truedst;
1415     /*
1416      * Return the branch target for a BRAA/RETA/etc, which is either
1417      * just the destination dst, or that value with the pauth check
1418      * done and the code removed from the high bits.
1419      */
1420     if (!s->pauth_active) {
1421         return dst;
1422     }
1423 
1424     truedst = tcg_temp_new_i64();
1425     if (use_key_a) {
1426         gen_helper_autia(truedst, cpu_env, dst, modifier);
1427     } else {
1428         gen_helper_autib(truedst, cpu_env, dst, modifier);
1429     }
1430     return truedst;
1431 }
1432 
1433 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1434 {
1435     TCGv_i64 dst;
1436 
1437     if (!dc_isar_feature(aa64_pauth, s)) {
1438         return false;
1439     }
1440 
1441     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1442     gen_a64_set_pc(s, dst);
1443     set_btype_for_br(s, a->rn);
1444     s->base.is_jmp = DISAS_JUMP;
1445     return true;
1446 }
1447 
1448 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1449 {
1450     TCGv_i64 dst, lr;
1451 
1452     if (!dc_isar_feature(aa64_pauth, s)) {
1453         return false;
1454     }
1455 
1456     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1457     lr = cpu_reg(s, 30);
1458     if (dst == lr) {
1459         TCGv_i64 tmp = tcg_temp_new_i64();
1460         tcg_gen_mov_i64(tmp, dst);
1461         dst = tmp;
1462     }
1463     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1464     gen_a64_set_pc(s, dst);
1465     set_btype_for_blr(s);
1466     s->base.is_jmp = DISAS_JUMP;
1467     return true;
1468 }
1469 
1470 static bool trans_RETA(DisasContext *s, arg_reta *a)
1471 {
1472     TCGv_i64 dst;
1473 
1474     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1475     gen_a64_set_pc(s, dst);
1476     s->base.is_jmp = DISAS_JUMP;
1477     return true;
1478 }
1479 
1480 static bool trans_BRA(DisasContext *s, arg_bra *a)
1481 {
1482     TCGv_i64 dst;
1483 
1484     if (!dc_isar_feature(aa64_pauth, s)) {
1485         return false;
1486     }
1487     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1488     gen_a64_set_pc(s, dst);
1489     set_btype_for_br(s, a->rn);
1490     s->base.is_jmp = DISAS_JUMP;
1491     return true;
1492 }
1493 
1494 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1495 {
1496     TCGv_i64 dst, lr;
1497 
1498     if (!dc_isar_feature(aa64_pauth, s)) {
1499         return false;
1500     }
1501     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1502     lr = cpu_reg(s, 30);
1503     if (dst == lr) {
1504         TCGv_i64 tmp = tcg_temp_new_i64();
1505         tcg_gen_mov_i64(tmp, dst);
1506         dst = tmp;
1507     }
1508     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1509     gen_a64_set_pc(s, dst);
1510     set_btype_for_blr(s);
1511     s->base.is_jmp = DISAS_JUMP;
1512     return true;
1513 }
1514 
1515 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1516 {
1517     TCGv_i64 dst;
1518 
1519     if (s->current_el == 0) {
1520         return false;
1521     }
1522     if (s->fgt_eret) {
1523         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1524         return true;
1525     }
1526     dst = tcg_temp_new_i64();
1527     tcg_gen_ld_i64(dst, cpu_env,
1528                    offsetof(CPUARMState, elr_el[s->current_el]));
1529 
1530     translator_io_start(&s->base);
1531 
1532     gen_helper_exception_return(cpu_env, dst);
1533     /* Must exit loop to check un-masked IRQs */
1534     s->base.is_jmp = DISAS_EXIT;
1535     return true;
1536 }
1537 
1538 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1539 {
1540     TCGv_i64 dst;
1541 
1542     if (!dc_isar_feature(aa64_pauth, s)) {
1543         return false;
1544     }
1545     if (s->current_el == 0) {
1546         return false;
1547     }
1548     /* The FGT trap takes precedence over an auth trap. */
1549     if (s->fgt_eret) {
1550         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1551         return true;
1552     }
1553     dst = tcg_temp_new_i64();
1554     tcg_gen_ld_i64(dst, cpu_env,
1555                    offsetof(CPUARMState, elr_el[s->current_el]));
1556 
1557     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1558 
1559     translator_io_start(&s->base);
1560 
1561     gen_helper_exception_return(cpu_env, dst);
1562     /* Must exit loop to check un-masked IRQs */
1563     s->base.is_jmp = DISAS_EXIT;
1564     return true;
1565 }
1566 
1567 /* HINT instruction group, including various allocated HINTs */
1568 static void handle_hint(DisasContext *s, uint32_t insn,
1569                         unsigned int op1, unsigned int op2, unsigned int crm)
1570 {
1571     unsigned int selector = crm << 3 | op2;
1572 
1573     if (op1 != 3) {
1574         unallocated_encoding(s);
1575         return;
1576     }
1577 
1578     switch (selector) {
1579     case 0b00000: /* NOP */
1580         break;
1581     case 0b00011: /* WFI */
1582         s->base.is_jmp = DISAS_WFI;
1583         break;
1584     case 0b00001: /* YIELD */
1585         /* When running in MTTCG we don't generate jumps to the yield and
1586          * WFE helpers as it won't affect the scheduling of other vCPUs.
1587          * If we wanted to more completely model WFE/SEV so we don't busy
1588          * spin unnecessarily we would need to do something more involved.
1589          */
1590         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1591             s->base.is_jmp = DISAS_YIELD;
1592         }
1593         break;
1594     case 0b00010: /* WFE */
1595         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1596             s->base.is_jmp = DISAS_WFE;
1597         }
1598         break;
1599     case 0b00100: /* SEV */
1600     case 0b00101: /* SEVL */
1601     case 0b00110: /* DGH */
1602         /* we treat all as NOP at least for now */
1603         break;
1604     case 0b00111: /* XPACLRI */
1605         if (s->pauth_active) {
1606             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1607         }
1608         break;
1609     case 0b01000: /* PACIA1716 */
1610         if (s->pauth_active) {
1611             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1612         }
1613         break;
1614     case 0b01010: /* PACIB1716 */
1615         if (s->pauth_active) {
1616             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1617         }
1618         break;
1619     case 0b01100: /* AUTIA1716 */
1620         if (s->pauth_active) {
1621             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1622         }
1623         break;
1624     case 0b01110: /* AUTIB1716 */
1625         if (s->pauth_active) {
1626             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1627         }
1628         break;
1629     case 0b10000: /* ESB */
1630         /* Without RAS, we must implement this as NOP. */
1631         if (dc_isar_feature(aa64_ras, s)) {
1632             /*
1633              * QEMU does not have a source of physical SErrors,
1634              * so we are only concerned with virtual SErrors.
1635              * The pseudocode in the ARM for this case is
1636              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1637              *      AArch64.vESBOperation();
1638              * Most of the condition can be evaluated at translation time.
1639              * Test for EL2 present, and defer test for SEL2 to runtime.
1640              */
1641             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1642                 gen_helper_vesb(cpu_env);
1643             }
1644         }
1645         break;
1646     case 0b11000: /* PACIAZ */
1647         if (s->pauth_active) {
1648             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1649                              tcg_constant_i64(0));
1650         }
1651         break;
1652     case 0b11001: /* PACIASP */
1653         if (s->pauth_active) {
1654             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1655         }
1656         break;
1657     case 0b11010: /* PACIBZ */
1658         if (s->pauth_active) {
1659             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1660                              tcg_constant_i64(0));
1661         }
1662         break;
1663     case 0b11011: /* PACIBSP */
1664         if (s->pauth_active) {
1665             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1666         }
1667         break;
1668     case 0b11100: /* AUTIAZ */
1669         if (s->pauth_active) {
1670             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1671                              tcg_constant_i64(0));
1672         }
1673         break;
1674     case 0b11101: /* AUTIASP */
1675         if (s->pauth_active) {
1676             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1677         }
1678         break;
1679     case 0b11110: /* AUTIBZ */
1680         if (s->pauth_active) {
1681             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1682                              tcg_constant_i64(0));
1683         }
1684         break;
1685     case 0b11111: /* AUTIBSP */
1686         if (s->pauth_active) {
1687             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1688         }
1689         break;
1690     default:
1691         /* default specified as NOP equivalent */
1692         break;
1693     }
1694 }
1695 
1696 static void gen_clrex(DisasContext *s, uint32_t insn)
1697 {
1698     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1699 }
1700 
1701 /* CLREX, DSB, DMB, ISB */
1702 static void handle_sync(DisasContext *s, uint32_t insn,
1703                         unsigned int op1, unsigned int op2, unsigned int crm)
1704 {
1705     TCGBar bar;
1706 
1707     if (op1 != 3) {
1708         unallocated_encoding(s);
1709         return;
1710     }
1711 
1712     switch (op2) {
1713     case 2: /* CLREX */
1714         gen_clrex(s, insn);
1715         return;
1716     case 4: /* DSB */
1717     case 5: /* DMB */
1718         switch (crm & 3) {
1719         case 1: /* MBReqTypes_Reads */
1720             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1721             break;
1722         case 2: /* MBReqTypes_Writes */
1723             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1724             break;
1725         default: /* MBReqTypes_All */
1726             bar = TCG_BAR_SC | TCG_MO_ALL;
1727             break;
1728         }
1729         tcg_gen_mb(bar);
1730         return;
1731     case 6: /* ISB */
1732         /* We need to break the TB after this insn to execute
1733          * a self-modified code correctly and also to take
1734          * any pending interrupts immediately.
1735          */
1736         reset_btype(s);
1737         gen_goto_tb(s, 0, 4);
1738         return;
1739 
1740     case 7: /* SB */
1741         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1742             goto do_unallocated;
1743         }
1744         /*
1745          * TODO: There is no speculation barrier opcode for TCG;
1746          * MB and end the TB instead.
1747          */
1748         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1749         gen_goto_tb(s, 0, 4);
1750         return;
1751 
1752     default:
1753     do_unallocated:
1754         unallocated_encoding(s);
1755         return;
1756     }
1757 }
1758 
1759 static void gen_xaflag(void)
1760 {
1761     TCGv_i32 z = tcg_temp_new_i32();
1762 
1763     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1764 
1765     /*
1766      * (!C & !Z) << 31
1767      * (!(C | Z)) << 31
1768      * ~((C | Z) << 31)
1769      * ~-(C | Z)
1770      * (C | Z) - 1
1771      */
1772     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1773     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1774 
1775     /* !(Z & C) */
1776     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1777     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1778 
1779     /* (!C & Z) << 31 -> -(Z & ~C) */
1780     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1781     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1782 
1783     /* C | Z */
1784     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1785 }
1786 
1787 static void gen_axflag(void)
1788 {
1789     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1790     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1791 
1792     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1793     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1794 
1795     tcg_gen_movi_i32(cpu_NF, 0);
1796     tcg_gen_movi_i32(cpu_VF, 0);
1797 }
1798 
1799 /* MSR (immediate) - move immediate to processor state field */
1800 static void handle_msr_i(DisasContext *s, uint32_t insn,
1801                          unsigned int op1, unsigned int op2, unsigned int crm)
1802 {
1803     int op = op1 << 3 | op2;
1804 
1805     /* End the TB by default, chaining is ok.  */
1806     s->base.is_jmp = DISAS_TOO_MANY;
1807 
1808     switch (op) {
1809     case 0x00: /* CFINV */
1810         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1811             goto do_unallocated;
1812         }
1813         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1814         s->base.is_jmp = DISAS_NEXT;
1815         break;
1816 
1817     case 0x01: /* XAFlag */
1818         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1819             goto do_unallocated;
1820         }
1821         gen_xaflag();
1822         s->base.is_jmp = DISAS_NEXT;
1823         break;
1824 
1825     case 0x02: /* AXFlag */
1826         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1827             goto do_unallocated;
1828         }
1829         gen_axflag();
1830         s->base.is_jmp = DISAS_NEXT;
1831         break;
1832 
1833     case 0x03: /* UAO */
1834         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1835             goto do_unallocated;
1836         }
1837         if (crm & 1) {
1838             set_pstate_bits(PSTATE_UAO);
1839         } else {
1840             clear_pstate_bits(PSTATE_UAO);
1841         }
1842         gen_rebuild_hflags(s);
1843         break;
1844 
1845     case 0x04: /* PAN */
1846         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1847             goto do_unallocated;
1848         }
1849         if (crm & 1) {
1850             set_pstate_bits(PSTATE_PAN);
1851         } else {
1852             clear_pstate_bits(PSTATE_PAN);
1853         }
1854         gen_rebuild_hflags(s);
1855         break;
1856 
1857     case 0x05: /* SPSel */
1858         if (s->current_el == 0) {
1859             goto do_unallocated;
1860         }
1861         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1862         break;
1863 
1864     case 0x19: /* SSBS */
1865         if (!dc_isar_feature(aa64_ssbs, s)) {
1866             goto do_unallocated;
1867         }
1868         if (crm & 1) {
1869             set_pstate_bits(PSTATE_SSBS);
1870         } else {
1871             clear_pstate_bits(PSTATE_SSBS);
1872         }
1873         /* Don't need to rebuild hflags since SSBS is a nop */
1874         break;
1875 
1876     case 0x1a: /* DIT */
1877         if (!dc_isar_feature(aa64_dit, s)) {
1878             goto do_unallocated;
1879         }
1880         if (crm & 1) {
1881             set_pstate_bits(PSTATE_DIT);
1882         } else {
1883             clear_pstate_bits(PSTATE_DIT);
1884         }
1885         /* There's no need to rebuild hflags because DIT is a nop */
1886         break;
1887 
1888     case 0x1e: /* DAIFSet */
1889         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1890         break;
1891 
1892     case 0x1f: /* DAIFClear */
1893         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1894         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1895         s->base.is_jmp = DISAS_UPDATE_EXIT;
1896         break;
1897 
1898     case 0x1c: /* TCO */
1899         if (dc_isar_feature(aa64_mte, s)) {
1900             /* Full MTE is enabled -- set the TCO bit as directed. */
1901             if (crm & 1) {
1902                 set_pstate_bits(PSTATE_TCO);
1903             } else {
1904                 clear_pstate_bits(PSTATE_TCO);
1905             }
1906             gen_rebuild_hflags(s);
1907             /* Many factors, including TCO, go into MTE_ACTIVE. */
1908             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1909         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1910             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1911             s->base.is_jmp = DISAS_NEXT;
1912         } else {
1913             goto do_unallocated;
1914         }
1915         break;
1916 
1917     case 0x1b: /* SVCR* */
1918         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1919             goto do_unallocated;
1920         }
1921         if (sme_access_check(s)) {
1922             int old = s->pstate_sm | (s->pstate_za << 1);
1923             int new = (crm & 1) * 3;
1924             int msk = (crm >> 1) & 3;
1925 
1926             if ((old ^ new) & msk) {
1927                 /* At least one bit changes. */
1928                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1929                                     tcg_constant_i32(msk));
1930             } else {
1931                 s->base.is_jmp = DISAS_NEXT;
1932             }
1933         }
1934         break;
1935 
1936     default:
1937     do_unallocated:
1938         unallocated_encoding(s);
1939         return;
1940     }
1941 }
1942 
1943 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1944 {
1945     TCGv_i32 tmp = tcg_temp_new_i32();
1946     TCGv_i32 nzcv = tcg_temp_new_i32();
1947 
1948     /* build bit 31, N */
1949     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1950     /* build bit 30, Z */
1951     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1952     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1953     /* build bit 29, C */
1954     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1955     /* build bit 28, V */
1956     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1957     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1958     /* generate result */
1959     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1960 }
1961 
1962 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1963 {
1964     TCGv_i32 nzcv = tcg_temp_new_i32();
1965 
1966     /* take NZCV from R[t] */
1967     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1968 
1969     /* bit 31, N */
1970     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1971     /* bit 30, Z */
1972     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1973     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1974     /* bit 29, C */
1975     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1976     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1977     /* bit 28, V */
1978     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1979     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1980 }
1981 
1982 static void gen_sysreg_undef(DisasContext *s, bool isread,
1983                              uint8_t op0, uint8_t op1, uint8_t op2,
1984                              uint8_t crn, uint8_t crm, uint8_t rt)
1985 {
1986     /*
1987      * Generate code to emit an UNDEF with correct syndrome
1988      * information for a failed system register access.
1989      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
1990      * but if FEAT_IDST is implemented then read accesses to registers
1991      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
1992      * syndrome.
1993      */
1994     uint32_t syndrome;
1995 
1996     if (isread && dc_isar_feature(aa64_ids, s) &&
1997         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
1998         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1999     } else {
2000         syndrome = syn_uncategorized();
2001     }
2002     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2003 }
2004 
2005 /* MRS - move from system register
2006  * MSR (register) - move to system register
2007  * SYS
2008  * SYSL
2009  * These are all essentially the same insn in 'read' and 'write'
2010  * versions, with varying op0 fields.
2011  */
2012 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
2013                        unsigned int op0, unsigned int op1, unsigned int op2,
2014                        unsigned int crn, unsigned int crm, unsigned int rt)
2015 {
2016     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2017                                       crn, crm, op0, op1, op2);
2018     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2019     bool need_exit_tb = false;
2020     TCGv_ptr tcg_ri = NULL;
2021     TCGv_i64 tcg_rt;
2022 
2023     if (!ri) {
2024         /* Unknown register; this might be a guest error or a QEMU
2025          * unimplemented feature.
2026          */
2027         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2028                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2029                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2030         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2031         return;
2032     }
2033 
2034     /* Check access permissions */
2035     if (!cp_access_ok(s->current_el, ri, isread)) {
2036         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2037         return;
2038     }
2039 
2040     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2041         /* Emit code to perform further access permissions checks at
2042          * runtime; this may result in an exception.
2043          */
2044         uint32_t syndrome;
2045 
2046         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2047         gen_a64_update_pc(s, 0);
2048         tcg_ri = tcg_temp_new_ptr();
2049         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2050                                        tcg_constant_i32(key),
2051                                        tcg_constant_i32(syndrome),
2052                                        tcg_constant_i32(isread));
2053     } else if (ri->type & ARM_CP_RAISES_EXC) {
2054         /*
2055          * The readfn or writefn might raise an exception;
2056          * synchronize the CPU state in case it does.
2057          */
2058         gen_a64_update_pc(s, 0);
2059     }
2060 
2061     /* Handle special cases first */
2062     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2063     case 0:
2064         break;
2065     case ARM_CP_NOP:
2066         return;
2067     case ARM_CP_NZCV:
2068         tcg_rt = cpu_reg(s, rt);
2069         if (isread) {
2070             gen_get_nzcv(tcg_rt);
2071         } else {
2072             gen_set_nzcv(tcg_rt);
2073         }
2074         return;
2075     case ARM_CP_CURRENTEL:
2076         /* Reads as current EL value from pstate, which is
2077          * guaranteed to be constant by the tb flags.
2078          */
2079         tcg_rt = cpu_reg(s, rt);
2080         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2081         return;
2082     case ARM_CP_DC_ZVA:
2083         /* Writes clear the aligned block of memory which rt points into. */
2084         if (s->mte_active[0]) {
2085             int desc = 0;
2086 
2087             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2088             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2089             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2090 
2091             tcg_rt = tcg_temp_new_i64();
2092             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2093                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2094         } else {
2095             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2096         }
2097         gen_helper_dc_zva(cpu_env, tcg_rt);
2098         return;
2099     case ARM_CP_DC_GVA:
2100         {
2101             TCGv_i64 clean_addr, tag;
2102 
2103             /*
2104              * DC_GVA, like DC_ZVA, requires that we supply the original
2105              * pointer for an invalid page.  Probe that address first.
2106              */
2107             tcg_rt = cpu_reg(s, rt);
2108             clean_addr = clean_data_tbi(s, tcg_rt);
2109             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2110 
2111             if (s->ata) {
2112                 /* Extract the tag from the register to match STZGM.  */
2113                 tag = tcg_temp_new_i64();
2114                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2115                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2116             }
2117         }
2118         return;
2119     case ARM_CP_DC_GZVA:
2120         {
2121             TCGv_i64 clean_addr, tag;
2122 
2123             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2124             tcg_rt = cpu_reg(s, rt);
2125             clean_addr = clean_data_tbi(s, tcg_rt);
2126             gen_helper_dc_zva(cpu_env, clean_addr);
2127 
2128             if (s->ata) {
2129                 /* Extract the tag from the register to match STZGM.  */
2130                 tag = tcg_temp_new_i64();
2131                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2132                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2133             }
2134         }
2135         return;
2136     default:
2137         g_assert_not_reached();
2138     }
2139     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2140         return;
2141     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2142         return;
2143     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2144         return;
2145     }
2146 
2147     if (ri->type & ARM_CP_IO) {
2148         /* I/O operations must end the TB here (whether read or write) */
2149         need_exit_tb = translator_io_start(&s->base);
2150     }
2151 
2152     tcg_rt = cpu_reg(s, rt);
2153 
2154     if (isread) {
2155         if (ri->type & ARM_CP_CONST) {
2156             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2157         } else if (ri->readfn) {
2158             if (!tcg_ri) {
2159                 tcg_ri = gen_lookup_cp_reg(key);
2160             }
2161             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2162         } else {
2163             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2164         }
2165     } else {
2166         if (ri->type & ARM_CP_CONST) {
2167             /* If not forbidden by access permissions, treat as WI */
2168             return;
2169         } else if (ri->writefn) {
2170             if (!tcg_ri) {
2171                 tcg_ri = gen_lookup_cp_reg(key);
2172             }
2173             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2174         } else {
2175             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2176         }
2177     }
2178 
2179     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2180         /*
2181          * A write to any coprocessor regiser that ends a TB
2182          * must rebuild the hflags for the next TB.
2183          */
2184         gen_rebuild_hflags(s);
2185         /*
2186          * We default to ending the TB on a coprocessor register write,
2187          * but allow this to be suppressed by the register definition
2188          * (usually only necessary to work around guest bugs).
2189          */
2190         need_exit_tb = true;
2191     }
2192     if (need_exit_tb) {
2193         s->base.is_jmp = DISAS_UPDATE_EXIT;
2194     }
2195 }
2196 
2197 /* System
2198  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2199  * +---------------------+---+-----+-----+-------+-------+-----+------+
2200  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2201  * +---------------------+---+-----+-----+-------+-------+-----+------+
2202  */
2203 static void disas_system(DisasContext *s, uint32_t insn)
2204 {
2205     unsigned int l, op0, op1, crn, crm, op2, rt;
2206     l = extract32(insn, 21, 1);
2207     op0 = extract32(insn, 19, 2);
2208     op1 = extract32(insn, 16, 3);
2209     crn = extract32(insn, 12, 4);
2210     crm = extract32(insn, 8, 4);
2211     op2 = extract32(insn, 5, 3);
2212     rt = extract32(insn, 0, 5);
2213 
2214     if (op0 == 0) {
2215         if (l || rt != 31) {
2216             unallocated_encoding(s);
2217             return;
2218         }
2219         switch (crn) {
2220         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2221             handle_hint(s, insn, op1, op2, crm);
2222             break;
2223         case 3: /* CLREX, DSB, DMB, ISB */
2224             handle_sync(s, insn, op1, op2, crm);
2225             break;
2226         case 4: /* MSR (immediate) */
2227             handle_msr_i(s, insn, op1, op2, crm);
2228             break;
2229         default:
2230             unallocated_encoding(s);
2231             break;
2232         }
2233         return;
2234     }
2235     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2236 }
2237 
2238 /* Exception generation
2239  *
2240  *  31             24 23 21 20                     5 4   2 1  0
2241  * +-----------------+-----+------------------------+-----+----+
2242  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2243  * +-----------------------+------------------------+----------+
2244  */
2245 static void disas_exc(DisasContext *s, uint32_t insn)
2246 {
2247     int opc = extract32(insn, 21, 3);
2248     int op2_ll = extract32(insn, 0, 5);
2249     int imm16 = extract32(insn, 5, 16);
2250     uint32_t syndrome;
2251 
2252     switch (opc) {
2253     case 0:
2254         /* For SVC, HVC and SMC we advance the single-step state
2255          * machine before taking the exception. This is architecturally
2256          * mandated, to ensure that single-stepping a system call
2257          * instruction works properly.
2258          */
2259         switch (op2_ll) {
2260         case 1:                                                     /* SVC */
2261             syndrome = syn_aa64_svc(imm16);
2262             if (s->fgt_svc) {
2263                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2264                 break;
2265             }
2266             gen_ss_advance(s);
2267             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2268             break;
2269         case 2:                                                     /* HVC */
2270             if (s->current_el == 0) {
2271                 unallocated_encoding(s);
2272                 break;
2273             }
2274             /* The pre HVC helper handles cases when HVC gets trapped
2275              * as an undefined insn by runtime configuration.
2276              */
2277             gen_a64_update_pc(s, 0);
2278             gen_helper_pre_hvc(cpu_env);
2279             gen_ss_advance(s);
2280             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2281             break;
2282         case 3:                                                     /* SMC */
2283             if (s->current_el == 0) {
2284                 unallocated_encoding(s);
2285                 break;
2286             }
2287             gen_a64_update_pc(s, 0);
2288             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2289             gen_ss_advance(s);
2290             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2291             break;
2292         default:
2293             unallocated_encoding(s);
2294             break;
2295         }
2296         break;
2297     case 1:
2298         if (op2_ll != 0) {
2299             unallocated_encoding(s);
2300             break;
2301         }
2302         /* BRK */
2303         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2304         break;
2305     case 2:
2306         if (op2_ll != 0) {
2307             unallocated_encoding(s);
2308             break;
2309         }
2310         /* HLT. This has two purposes.
2311          * Architecturally, it is an external halting debug instruction.
2312          * Since QEMU doesn't implement external debug, we treat this as
2313          * it is required for halting debug disabled: it will UNDEF.
2314          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2315          */
2316         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2317             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2318         } else {
2319             unallocated_encoding(s);
2320         }
2321         break;
2322     case 5:
2323         if (op2_ll < 1 || op2_ll > 3) {
2324             unallocated_encoding(s);
2325             break;
2326         }
2327         /* DCPS1, DCPS2, DCPS3 */
2328         unallocated_encoding(s);
2329         break;
2330     default:
2331         unallocated_encoding(s);
2332         break;
2333     }
2334 }
2335 
2336 /* Branches, exception generating and system instructions */
2337 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2338 {
2339     switch (extract32(insn, 25, 7)) {
2340     case 0x6a: /* Exception generation / System */
2341         if (insn & (1 << 24)) {
2342             if (extract32(insn, 22, 2) == 0) {
2343                 disas_system(s, insn);
2344             } else {
2345                 unallocated_encoding(s);
2346             }
2347         } else {
2348             disas_exc(s, insn);
2349         }
2350         break;
2351     default:
2352         unallocated_encoding(s);
2353         break;
2354     }
2355 }
2356 
2357 /*
2358  * Load/Store exclusive instructions are implemented by remembering
2359  * the value/address loaded, and seeing if these are the same
2360  * when the store is performed. This is not actually the architecturally
2361  * mandated semantics, but it works for typical guest code sequences
2362  * and avoids having to monitor regular stores.
2363  *
2364  * The store exclusive uses the atomic cmpxchg primitives to avoid
2365  * races in multi-threaded linux-user and when MTTCG softmmu is
2366  * enabled.
2367  */
2368 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2369                                int size, bool is_pair)
2370 {
2371     int idx = get_mem_index(s);
2372     MemOp memop;
2373     TCGv_i64 dirty_addr, clean_addr;
2374 
2375     s->is_ldex = true;
2376     dirty_addr = cpu_reg_sp(s, rn);
2377     clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, size);
2378 
2379     g_assert(size <= 3);
2380     if (is_pair) {
2381         g_assert(size >= 2);
2382         if (size == 2) {
2383             /* The pair must be single-copy atomic for the doubleword.  */
2384             memop = finalize_memop(s, MO_64 | MO_ALIGN);
2385             tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2386             if (s->be_data == MO_LE) {
2387                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2388                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2389             } else {
2390                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2391                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2392             }
2393         } else {
2394             /*
2395              * The pair must be single-copy atomic for *each* doubleword, not
2396              * the entire quadword, however it must be quadword aligned.
2397              * Expose the complete load to tcg, for ease of tlb lookup,
2398              * but indicate that only 8-byte atomicity is required.
2399              */
2400             TCGv_i128 t16 = tcg_temp_new_i128();
2401 
2402             memop = finalize_memop_atom(s, MO_128 | MO_ALIGN_16,
2403                                         MO_ATOM_IFALIGN_PAIR);
2404             tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2405 
2406             if (s->be_data == MO_LE) {
2407                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2408                                       cpu_exclusive_high, t16);
2409             } else {
2410                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2411                                       cpu_exclusive_val, t16);
2412             }
2413             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2414             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2415         }
2416     } else {
2417         memop = finalize_memop(s, size | MO_ALIGN);
2418         tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2419         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2420     }
2421     tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2422 }
2423 
2424 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2425                                 int rn, int size, int is_pair)
2426 {
2427     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2428      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2429      *     [addr] = {Rt};
2430      *     if (is_pair) {
2431      *         [addr + datasize] = {Rt2};
2432      *     }
2433      *     {Rd} = 0;
2434      * } else {
2435      *     {Rd} = 1;
2436      * }
2437      * env->exclusive_addr = -1;
2438      */
2439     TCGLabel *fail_label = gen_new_label();
2440     TCGLabel *done_label = gen_new_label();
2441     TCGv_i64 tmp, dirty_addr, clean_addr;
2442 
2443     dirty_addr = cpu_reg_sp(s, rn);
2444     clean_addr = gen_mte_check1(s, dirty_addr, true, rn != 31, size);
2445 
2446     tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2447 
2448     tmp = tcg_temp_new_i64();
2449     if (is_pair) {
2450         if (size == 2) {
2451             if (s->be_data == MO_LE) {
2452                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2453             } else {
2454                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2455             }
2456             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2457                                        cpu_exclusive_val, tmp,
2458                                        get_mem_index(s),
2459                                        MO_64 | MO_ALIGN | s->be_data);
2460             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2461         } else {
2462             TCGv_i128 t16 = tcg_temp_new_i128();
2463             TCGv_i128 c16 = tcg_temp_new_i128();
2464             TCGv_i64 a, b;
2465 
2466             if (s->be_data == MO_LE) {
2467                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2468                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2469                                         cpu_exclusive_high);
2470             } else {
2471                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2472                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2473                                         cpu_exclusive_val);
2474             }
2475 
2476             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2477                                         get_mem_index(s),
2478                                         MO_128 | MO_ALIGN | s->be_data);
2479 
2480             a = tcg_temp_new_i64();
2481             b = tcg_temp_new_i64();
2482             if (s->be_data == MO_LE) {
2483                 tcg_gen_extr_i128_i64(a, b, t16);
2484             } else {
2485                 tcg_gen_extr_i128_i64(b, a, t16);
2486             }
2487 
2488             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2489             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2490             tcg_gen_or_i64(tmp, a, b);
2491 
2492             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2493         }
2494     } else {
2495         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2496                                    cpu_reg(s, rt), get_mem_index(s),
2497                                    size | MO_ALIGN | s->be_data);
2498         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2499     }
2500     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2501     tcg_gen_br(done_label);
2502 
2503     gen_set_label(fail_label);
2504     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2505     gen_set_label(done_label);
2506     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2507 }
2508 
2509 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2510                                  int rn, int size)
2511 {
2512     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2513     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2514     int memidx = get_mem_index(s);
2515     TCGv_i64 clean_addr;
2516 
2517     if (rn == 31) {
2518         gen_check_sp_alignment(s);
2519     }
2520     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2521     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2522                                size | MO_ALIGN | s->be_data);
2523 }
2524 
2525 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2526                                       int rn, int size)
2527 {
2528     TCGv_i64 s1 = cpu_reg(s, rs);
2529     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2530     TCGv_i64 t1 = cpu_reg(s, rt);
2531     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2532     TCGv_i64 clean_addr;
2533     int memidx = get_mem_index(s);
2534 
2535     if (rn == 31) {
2536         gen_check_sp_alignment(s);
2537     }
2538 
2539     /* This is a single atomic access, despite the "pair". */
2540     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2541 
2542     if (size == 2) {
2543         TCGv_i64 cmp = tcg_temp_new_i64();
2544         TCGv_i64 val = tcg_temp_new_i64();
2545 
2546         if (s->be_data == MO_LE) {
2547             tcg_gen_concat32_i64(val, t1, t2);
2548             tcg_gen_concat32_i64(cmp, s1, s2);
2549         } else {
2550             tcg_gen_concat32_i64(val, t2, t1);
2551             tcg_gen_concat32_i64(cmp, s2, s1);
2552         }
2553 
2554         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2555                                    MO_64 | MO_ALIGN | s->be_data);
2556 
2557         if (s->be_data == MO_LE) {
2558             tcg_gen_extr32_i64(s1, s2, cmp);
2559         } else {
2560             tcg_gen_extr32_i64(s2, s1, cmp);
2561         }
2562     } else {
2563         TCGv_i128 cmp = tcg_temp_new_i128();
2564         TCGv_i128 val = tcg_temp_new_i128();
2565 
2566         if (s->be_data == MO_LE) {
2567             tcg_gen_concat_i64_i128(val, t1, t2);
2568             tcg_gen_concat_i64_i128(cmp, s1, s2);
2569         } else {
2570             tcg_gen_concat_i64_i128(val, t2, t1);
2571             tcg_gen_concat_i64_i128(cmp, s2, s1);
2572         }
2573 
2574         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2575                                     MO_128 | MO_ALIGN | s->be_data);
2576 
2577         if (s->be_data == MO_LE) {
2578             tcg_gen_extr_i128_i64(s1, s2, cmp);
2579         } else {
2580             tcg_gen_extr_i128_i64(s2, s1, cmp);
2581         }
2582     }
2583 }
2584 
2585 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2586  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2587  */
2588 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2589 {
2590     int opc0 = extract32(opc, 0, 1);
2591     int regsize;
2592 
2593     if (is_signed) {
2594         regsize = opc0 ? 32 : 64;
2595     } else {
2596         regsize = size == 3 ? 64 : 32;
2597     }
2598     return regsize == 64;
2599 }
2600 
2601 /* Load/store exclusive
2602  *
2603  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2604  * +-----+-------------+----+---+----+------+----+-------+------+------+
2605  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2606  * +-----+-------------+----+---+----+------+----+-------+------+------+
2607  *
2608  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2609  *   L: 0 -> store, 1 -> load
2610  *  o2: 0 -> exclusive, 1 -> not
2611  *  o1: 0 -> single register, 1 -> register pair
2612  *  o0: 1 -> load-acquire/store-release, 0 -> not
2613  */
2614 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2615 {
2616     int rt = extract32(insn, 0, 5);
2617     int rn = extract32(insn, 5, 5);
2618     int rt2 = extract32(insn, 10, 5);
2619     int rs = extract32(insn, 16, 5);
2620     int is_lasr = extract32(insn, 15, 1);
2621     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2622     int size = extract32(insn, 30, 2);
2623     TCGv_i64 clean_addr;
2624     MemOp memop;
2625 
2626     switch (o2_L_o1_o0) {
2627     case 0x0: /* STXR */
2628     case 0x1: /* STLXR */
2629         if (rn == 31) {
2630             gen_check_sp_alignment(s);
2631         }
2632         if (is_lasr) {
2633             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2634         }
2635         gen_store_exclusive(s, rs, rt, rt2, rn, size, false);
2636         return;
2637 
2638     case 0x4: /* LDXR */
2639     case 0x5: /* LDAXR */
2640         if (rn == 31) {
2641             gen_check_sp_alignment(s);
2642         }
2643         gen_load_exclusive(s, rt, rt2, rn, size, false);
2644         if (is_lasr) {
2645             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2646         }
2647         return;
2648 
2649     case 0x8: /* STLLR */
2650         if (!dc_isar_feature(aa64_lor, s)) {
2651             break;
2652         }
2653         /* StoreLORelease is the same as Store-Release for QEMU.  */
2654         /* fall through */
2655     case 0x9: /* STLR */
2656         /* Generate ISS for non-exclusive accesses including LASR.  */
2657         if (rn == 31) {
2658             gen_check_sp_alignment(s);
2659         }
2660         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2661         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2662         memop = finalize_memop(s, size | MO_ALIGN);
2663         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2664                                     true, rn != 31, size);
2665         do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt,
2666                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2667         return;
2668 
2669     case 0xc: /* LDLAR */
2670         if (!dc_isar_feature(aa64_lor, s)) {
2671             break;
2672         }
2673         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2674         /* fall through */
2675     case 0xd: /* LDAR */
2676         /* Generate ISS for non-exclusive accesses including LASR.  */
2677         if (rn == 31) {
2678             gen_check_sp_alignment(s);
2679         }
2680         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2681         memop = finalize_memop(s, size | MO_ALIGN);
2682         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2683                                     false, rn != 31, size);
2684         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true,
2685                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2686         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2687         return;
2688 
2689     case 0x2: case 0x3: /* CASP / STXP */
2690         if (size & 2) { /* STXP / STLXP */
2691             if (rn == 31) {
2692                 gen_check_sp_alignment(s);
2693             }
2694             if (is_lasr) {
2695                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2696             }
2697             gen_store_exclusive(s, rs, rt, rt2, rn, size, true);
2698             return;
2699         }
2700         if (rt2 == 31
2701             && ((rt | rs) & 1) == 0
2702             && dc_isar_feature(aa64_atomics, s)) {
2703             /* CASP / CASPL */
2704             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2705             return;
2706         }
2707         break;
2708 
2709     case 0x6: case 0x7: /* CASPA / LDXP */
2710         if (size & 2) { /* LDXP / LDAXP */
2711             if (rn == 31) {
2712                 gen_check_sp_alignment(s);
2713             }
2714             gen_load_exclusive(s, rt, rt2, rn, size, true);
2715             if (is_lasr) {
2716                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2717             }
2718             return;
2719         }
2720         if (rt2 == 31
2721             && ((rt | rs) & 1) == 0
2722             && dc_isar_feature(aa64_atomics, s)) {
2723             /* CASPA / CASPAL */
2724             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2725             return;
2726         }
2727         break;
2728 
2729     case 0xa: /* CAS */
2730     case 0xb: /* CASL */
2731     case 0xe: /* CASA */
2732     case 0xf: /* CASAL */
2733         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2734             gen_compare_and_swap(s, rs, rt, rn, size);
2735             return;
2736         }
2737         break;
2738     }
2739     unallocated_encoding(s);
2740 }
2741 
2742 /*
2743  * Load register (literal)
2744  *
2745  *  31 30 29   27  26 25 24 23                5 4     0
2746  * +-----+-------+---+-----+-------------------+-------+
2747  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2748  * +-----+-------+---+-----+-------------------+-------+
2749  *
2750  * V: 1 -> vector (simd/fp)
2751  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2752  *                   10-> 32 bit signed, 11 -> prefetch
2753  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2754  */
2755 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2756 {
2757     int rt = extract32(insn, 0, 5);
2758     int64_t imm = sextract32(insn, 5, 19) << 2;
2759     bool is_vector = extract32(insn, 26, 1);
2760     int opc = extract32(insn, 30, 2);
2761     bool is_signed = false;
2762     int size = 2;
2763     TCGv_i64 tcg_rt, clean_addr;
2764     MemOp memop;
2765 
2766     if (is_vector) {
2767         if (opc == 3) {
2768             unallocated_encoding(s);
2769             return;
2770         }
2771         size = 2 + opc;
2772         if (!fp_access_check(s)) {
2773             return;
2774         }
2775         memop = finalize_memop_asimd(s, size);
2776     } else {
2777         if (opc == 3) {
2778             /* PRFM (literal) : prefetch */
2779             return;
2780         }
2781         size = 2 + extract32(opc, 0, 1);
2782         is_signed = extract32(opc, 1, 1);
2783         memop = finalize_memop(s, size + is_signed * MO_SIGN);
2784     }
2785 
2786     tcg_rt = cpu_reg(s, rt);
2787 
2788     clean_addr = tcg_temp_new_i64();
2789     gen_pc_plus_diff(s, clean_addr, imm);
2790 
2791     if (is_vector) {
2792         do_fp_ld(s, rt, clean_addr, memop);
2793     } else {
2794         /* Only unsigned 32bit loads target 32bit registers.  */
2795         bool iss_sf = opc != 0;
2796         do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false);
2797     }
2798 }
2799 
2800 /*
2801  * LDNP (Load Pair - non-temporal hint)
2802  * LDP (Load Pair - non vector)
2803  * LDPSW (Load Pair Signed Word - non vector)
2804  * STNP (Store Pair - non-temporal hint)
2805  * STP (Store Pair - non vector)
2806  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2807  * LDP (Load Pair of SIMD&FP)
2808  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2809  * STP (Store Pair of SIMD&FP)
2810  *
2811  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2812  * +-----+-------+---+---+-------+---+-----------------------------+
2813  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2814  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2815  *
2816  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2817  *      LDPSW/STGP               01
2818  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2819  *   V: 0 -> GPR, 1 -> Vector
2820  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2821  *      10 -> signed offset, 11 -> pre-index
2822  *   L: 0 -> Store 1 -> Load
2823  *
2824  * Rt, Rt2 = GPR or SIMD registers to be stored
2825  * Rn = general purpose register containing address
2826  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2827  */
2828 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2829 {
2830     int rt = extract32(insn, 0, 5);
2831     int rn = extract32(insn, 5, 5);
2832     int rt2 = extract32(insn, 10, 5);
2833     uint64_t offset = sextract64(insn, 15, 7);
2834     int index = extract32(insn, 23, 2);
2835     bool is_vector = extract32(insn, 26, 1);
2836     bool is_load = extract32(insn, 22, 1);
2837     int opc = extract32(insn, 30, 2);
2838 
2839     bool is_signed = false;
2840     bool postindex = false;
2841     bool wback = false;
2842     bool set_tag = false;
2843 
2844     TCGv_i64 clean_addr, dirty_addr;
2845 
2846     int size;
2847 
2848     if (opc == 3) {
2849         unallocated_encoding(s);
2850         return;
2851     }
2852 
2853     if (is_vector) {
2854         size = 2 + opc;
2855     } else if (opc == 1 && !is_load) {
2856         /* STGP */
2857         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2858             unallocated_encoding(s);
2859             return;
2860         }
2861         size = 3;
2862         set_tag = true;
2863     } else {
2864         size = 2 + extract32(opc, 1, 1);
2865         is_signed = extract32(opc, 0, 1);
2866         if (!is_load && is_signed) {
2867             unallocated_encoding(s);
2868             return;
2869         }
2870     }
2871 
2872     switch (index) {
2873     case 1: /* post-index */
2874         postindex = true;
2875         wback = true;
2876         break;
2877     case 0:
2878         /* signed offset with "non-temporal" hint. Since we don't emulate
2879          * caches we don't care about hints to the cache system about
2880          * data access patterns, and handle this identically to plain
2881          * signed offset.
2882          */
2883         if (is_signed) {
2884             /* There is no non-temporal-hint version of LDPSW */
2885             unallocated_encoding(s);
2886             return;
2887         }
2888         postindex = false;
2889         break;
2890     case 2: /* signed offset, rn not updated */
2891         postindex = false;
2892         break;
2893     case 3: /* pre-index */
2894         postindex = false;
2895         wback = true;
2896         break;
2897     }
2898 
2899     if (is_vector && !fp_access_check(s)) {
2900         return;
2901     }
2902 
2903     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2904 
2905     if (rn == 31) {
2906         gen_check_sp_alignment(s);
2907     }
2908 
2909     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2910     if (!postindex) {
2911         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2912     }
2913 
2914     if (set_tag) {
2915         if (!s->ata) {
2916             /*
2917              * TODO: We could rely on the stores below, at least for
2918              * system mode, if we arrange to add MO_ALIGN_16.
2919              */
2920             gen_helper_stg_stub(cpu_env, dirty_addr);
2921         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2922             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2923         } else {
2924             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2925         }
2926     }
2927 
2928     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2929                                 (wback || rn != 31) && !set_tag, 2 << size);
2930 
2931     if (is_vector) {
2932         MemOp mop = finalize_memop_asimd(s, size);
2933 
2934         if (is_load) {
2935             do_fp_ld(s, rt, clean_addr, mop);
2936         } else {
2937             do_fp_st(s, rt, clean_addr, mop);
2938         }
2939         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2940         if (is_load) {
2941             do_fp_ld(s, rt2, clean_addr, mop);
2942         } else {
2943             do_fp_st(s, rt2, clean_addr, mop);
2944         }
2945     } else {
2946         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2947         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2948         MemOp mop = size + 1;
2949 
2950         /*
2951          * With LSE2, non-sign-extending pairs are treated atomically if
2952          * aligned, and if unaligned one of the pair will be completely
2953          * within a 16-byte block and that element will be atomic.
2954          * Otherwise each element is separately atomic.
2955          * In all cases, issue one operation with the correct atomicity.
2956          *
2957          * This treats sign-extending loads like zero-extending loads,
2958          * since that reuses the most code below.
2959          */
2960         if (s->align_mem) {
2961             mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
2962         }
2963         mop = finalize_memop_pair(s, mop);
2964 
2965         if (is_load) {
2966             if (size == 2) {
2967                 int o2 = s->be_data == MO_LE ? 32 : 0;
2968                 int o1 = o2 ^ 32;
2969 
2970                 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
2971                 if (is_signed) {
2972                     tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
2973                     tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
2974                 } else {
2975                     tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
2976                     tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
2977                 }
2978             } else {
2979                 TCGv_i128 tmp = tcg_temp_new_i128();
2980 
2981                 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
2982                 if (s->be_data == MO_LE) {
2983                     tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
2984                 } else {
2985                     tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
2986                 }
2987             }
2988         } else {
2989             if (size == 2) {
2990                 TCGv_i64 tmp = tcg_temp_new_i64();
2991 
2992                 if (s->be_data == MO_LE) {
2993                     tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
2994                 } else {
2995                     tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
2996                 }
2997                 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
2998             } else {
2999                 TCGv_i128 tmp = tcg_temp_new_i128();
3000 
3001                 if (s->be_data == MO_LE) {
3002                     tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3003                 } else {
3004                     tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3005                 }
3006                 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3007             }
3008         }
3009     }
3010 
3011     if (wback) {
3012         if (postindex) {
3013             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3014         }
3015         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3016     }
3017 }
3018 
3019 /*
3020  * Load/store (immediate post-indexed)
3021  * Load/store (immediate pre-indexed)
3022  * Load/store (unscaled immediate)
3023  *
3024  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3025  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3026  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3027  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3028  *
3029  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3030          10 -> unprivileged
3031  * V = 0 -> non-vector
3032  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3033  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3034  */
3035 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3036                                 int opc,
3037                                 int size,
3038                                 int rt,
3039                                 bool is_vector)
3040 {
3041     int rn = extract32(insn, 5, 5);
3042     int imm9 = sextract32(insn, 12, 9);
3043     int idx = extract32(insn, 10, 2);
3044     bool is_signed = false;
3045     bool is_store = false;
3046     bool is_extended = false;
3047     bool is_unpriv = (idx == 2);
3048     bool iss_valid;
3049     bool post_index;
3050     bool writeback;
3051     int memidx;
3052     MemOp memop;
3053     TCGv_i64 clean_addr, dirty_addr;
3054 
3055     if (is_vector) {
3056         size |= (opc & 2) << 1;
3057         if (size > 4 || is_unpriv) {
3058             unallocated_encoding(s);
3059             return;
3060         }
3061         is_store = ((opc & 1) == 0);
3062         if (!fp_access_check(s)) {
3063             return;
3064         }
3065         memop = finalize_memop_asimd(s, size);
3066     } else {
3067         if (size == 3 && opc == 2) {
3068             /* PRFM - prefetch */
3069             if (idx != 0) {
3070                 unallocated_encoding(s);
3071                 return;
3072             }
3073             return;
3074         }
3075         if (opc == 3 && size > 1) {
3076             unallocated_encoding(s);
3077             return;
3078         }
3079         is_store = (opc == 0);
3080         is_signed = !is_store && extract32(opc, 1, 1);
3081         is_extended = (size < 3) && extract32(opc, 0, 1);
3082         memop = finalize_memop(s, size + is_signed * MO_SIGN);
3083     }
3084 
3085     switch (idx) {
3086     case 0:
3087     case 2:
3088         post_index = false;
3089         writeback = false;
3090         break;
3091     case 1:
3092         post_index = true;
3093         writeback = true;
3094         break;
3095     case 3:
3096         post_index = false;
3097         writeback = true;
3098         break;
3099     default:
3100         g_assert_not_reached();
3101     }
3102 
3103     iss_valid = !is_vector && !writeback;
3104 
3105     if (rn == 31) {
3106         gen_check_sp_alignment(s);
3107     }
3108 
3109     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3110     if (!post_index) {
3111         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3112     }
3113 
3114     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3115 
3116     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3117                                        writeback || rn != 31,
3118                                        size, is_unpriv, memidx);
3119 
3120     if (is_vector) {
3121         if (is_store) {
3122             do_fp_st(s, rt, clean_addr, memop);
3123         } else {
3124             do_fp_ld(s, rt, clean_addr, memop);
3125         }
3126     } else {
3127         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3128         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3129 
3130         if (is_store) {
3131             do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx,
3132                              iss_valid, rt, iss_sf, false);
3133         } else {
3134             do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop,
3135                              is_extended, memidx,
3136                              iss_valid, rt, iss_sf, false);
3137         }
3138     }
3139 
3140     if (writeback) {
3141         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3142         if (post_index) {
3143             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3144         }
3145         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3146     }
3147 }
3148 
3149 /*
3150  * Load/store (register offset)
3151  *
3152  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3153  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3154  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3155  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3156  *
3157  * For non-vector:
3158  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3159  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3160  * For vector:
3161  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3162  *   opc<0>: 0 -> store, 1 -> load
3163  * V: 1 -> vector/simd
3164  * opt: extend encoding (see DecodeRegExtend)
3165  * S: if S=1 then scale (essentially index by sizeof(size))
3166  * Rt: register to transfer into/out of
3167  * Rn: address register or SP for base
3168  * Rm: offset register or ZR for offset
3169  */
3170 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3171                                    int opc,
3172                                    int size,
3173                                    int rt,
3174                                    bool is_vector)
3175 {
3176     int rn = extract32(insn, 5, 5);
3177     int shift = extract32(insn, 12, 1);
3178     int rm = extract32(insn, 16, 5);
3179     int opt = extract32(insn, 13, 3);
3180     bool is_signed = false;
3181     bool is_store = false;
3182     bool is_extended = false;
3183     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3184     MemOp memop;
3185 
3186     if (extract32(opt, 1, 1) == 0) {
3187         unallocated_encoding(s);
3188         return;
3189     }
3190 
3191     if (is_vector) {
3192         size |= (opc & 2) << 1;
3193         if (size > 4) {
3194             unallocated_encoding(s);
3195             return;
3196         }
3197         is_store = !extract32(opc, 0, 1);
3198         if (!fp_access_check(s)) {
3199             return;
3200         }
3201     } else {
3202         if (size == 3 && opc == 2) {
3203             /* PRFM - prefetch */
3204             return;
3205         }
3206         if (opc == 3 && size > 1) {
3207             unallocated_encoding(s);
3208             return;
3209         }
3210         is_store = (opc == 0);
3211         is_signed = !is_store && extract32(opc, 1, 1);
3212         is_extended = (size < 3) && extract32(opc, 0, 1);
3213     }
3214 
3215     if (rn == 31) {
3216         gen_check_sp_alignment(s);
3217     }
3218     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3219 
3220     tcg_rm = read_cpu_reg(s, rm, 1);
3221     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3222 
3223     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3224 
3225     memop = finalize_memop(s, size + is_signed * MO_SIGN);
3226     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3227 
3228     if (is_vector) {
3229         if (is_store) {
3230             do_fp_st(s, rt, clean_addr, memop);
3231         } else {
3232             do_fp_ld(s, rt, clean_addr, memop);
3233         }
3234     } else {
3235         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3236         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3237 
3238         if (is_store) {
3239             do_gpr_st(s, tcg_rt, clean_addr, memop,
3240                       true, rt, iss_sf, false);
3241         } else {
3242             do_gpr_ld(s, tcg_rt, clean_addr, memop,
3243                       is_extended, true, rt, iss_sf, false);
3244         }
3245     }
3246 }
3247 
3248 /*
3249  * Load/store (unsigned immediate)
3250  *
3251  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3252  * +----+-------+---+-----+-----+------------+-------+------+
3253  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3254  * +----+-------+---+-----+-----+------------+-------+------+
3255  *
3256  * For non-vector:
3257  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3258  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3259  * For vector:
3260  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3261  *   opc<0>: 0 -> store, 1 -> load
3262  * Rn: base address register (inc SP)
3263  * Rt: target register
3264  */
3265 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3266                                         int opc,
3267                                         int size,
3268                                         int rt,
3269                                         bool is_vector)
3270 {
3271     int rn = extract32(insn, 5, 5);
3272     unsigned int imm12 = extract32(insn, 10, 12);
3273     unsigned int offset;
3274     TCGv_i64 clean_addr, dirty_addr;
3275     bool is_store;
3276     bool is_signed = false;
3277     bool is_extended = false;
3278     MemOp memop;
3279 
3280     if (is_vector) {
3281         size |= (opc & 2) << 1;
3282         if (size > 4) {
3283             unallocated_encoding(s);
3284             return;
3285         }
3286         is_store = !extract32(opc, 0, 1);
3287         if (!fp_access_check(s)) {
3288             return;
3289         }
3290     } else {
3291         if (size == 3 && opc == 2) {
3292             /* PRFM - prefetch */
3293             return;
3294         }
3295         if (opc == 3 && size > 1) {
3296             unallocated_encoding(s);
3297             return;
3298         }
3299         is_store = (opc == 0);
3300         is_signed = !is_store && extract32(opc, 1, 1);
3301         is_extended = (size < 3) && extract32(opc, 0, 1);
3302     }
3303 
3304     if (rn == 31) {
3305         gen_check_sp_alignment(s);
3306     }
3307     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3308     offset = imm12 << size;
3309     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3310 
3311     memop = finalize_memop(s, size + is_signed * MO_SIGN);
3312     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3313 
3314     if (is_vector) {
3315         if (is_store) {
3316             do_fp_st(s, rt, clean_addr, memop);
3317         } else {
3318             do_fp_ld(s, rt, clean_addr, memop);
3319         }
3320     } else {
3321         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3322         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3323         if (is_store) {
3324             do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false);
3325         } else {
3326             do_gpr_ld(s, tcg_rt, clean_addr, memop,
3327                       is_extended, true, rt, iss_sf, false);
3328         }
3329     }
3330 }
3331 
3332 /* Atomic memory operations
3333  *
3334  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3335  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3336  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3337  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3338  *
3339  * Rt: the result register
3340  * Rn: base address or SP
3341  * Rs: the source register for the operation
3342  * V: vector flag (always 0 as of v8.3)
3343  * A: acquire flag
3344  * R: release flag
3345  */
3346 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3347                               int size, int rt, bool is_vector)
3348 {
3349     int rs = extract32(insn, 16, 5);
3350     int rn = extract32(insn, 5, 5);
3351     int o3_opc = extract32(insn, 12, 4);
3352     bool r = extract32(insn, 22, 1);
3353     bool a = extract32(insn, 23, 1);
3354     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3355     AtomicThreeOpFn *fn = NULL;
3356     MemOp mop = finalize_memop(s, size | MO_ALIGN);
3357 
3358     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3359         unallocated_encoding(s);
3360         return;
3361     }
3362     switch (o3_opc) {
3363     case 000: /* LDADD */
3364         fn = tcg_gen_atomic_fetch_add_i64;
3365         break;
3366     case 001: /* LDCLR */
3367         fn = tcg_gen_atomic_fetch_and_i64;
3368         break;
3369     case 002: /* LDEOR */
3370         fn = tcg_gen_atomic_fetch_xor_i64;
3371         break;
3372     case 003: /* LDSET */
3373         fn = tcg_gen_atomic_fetch_or_i64;
3374         break;
3375     case 004: /* LDSMAX */
3376         fn = tcg_gen_atomic_fetch_smax_i64;
3377         mop |= MO_SIGN;
3378         break;
3379     case 005: /* LDSMIN */
3380         fn = tcg_gen_atomic_fetch_smin_i64;
3381         mop |= MO_SIGN;
3382         break;
3383     case 006: /* LDUMAX */
3384         fn = tcg_gen_atomic_fetch_umax_i64;
3385         break;
3386     case 007: /* LDUMIN */
3387         fn = tcg_gen_atomic_fetch_umin_i64;
3388         break;
3389     case 010: /* SWP */
3390         fn = tcg_gen_atomic_xchg_i64;
3391         break;
3392     case 014: /* LDAPR, LDAPRH, LDAPRB */
3393         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3394             rs != 31 || a != 1 || r != 0) {
3395             unallocated_encoding(s);
3396             return;
3397         }
3398         break;
3399     default:
3400         unallocated_encoding(s);
3401         return;
3402     }
3403 
3404     if (rn == 31) {
3405         gen_check_sp_alignment(s);
3406     }
3407     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3408 
3409     if (o3_opc == 014) {
3410         /*
3411          * LDAPR* are a special case because they are a simple load, not a
3412          * fetch-and-do-something op.
3413          * The architectural consistency requirements here are weaker than
3414          * full load-acquire (we only need "load-acquire processor consistent"),
3415          * but we choose to implement them as full LDAQ.
3416          */
3417         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false,
3418                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3419         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3420         return;
3421     }
3422 
3423     tcg_rs = read_cpu_reg(s, rs, true);
3424     tcg_rt = cpu_reg(s, rt);
3425 
3426     if (o3_opc == 1) { /* LDCLR */
3427         tcg_gen_not_i64(tcg_rs, tcg_rs);
3428     }
3429 
3430     /* The tcg atomic primitives are all full barriers.  Therefore we
3431      * can ignore the Acquire and Release bits of this instruction.
3432      */
3433     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3434 
3435     if ((mop & MO_SIGN) && size != MO_64) {
3436         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3437     }
3438 }
3439 
3440 /*
3441  * PAC memory operations
3442  *
3443  *  31  30      27  26    24    22  21       12  11  10    5     0
3444  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3445  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3446  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3447  *
3448  * Rt: the result register
3449  * Rn: base address or SP
3450  * V: vector flag (always 0 as of v8.3)
3451  * M: clear for key DA, set for key DB
3452  * W: pre-indexing flag
3453  * S: sign for imm9.
3454  */
3455 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3456                            int size, int rt, bool is_vector)
3457 {
3458     int rn = extract32(insn, 5, 5);
3459     bool is_wback = extract32(insn, 11, 1);
3460     bool use_key_a = !extract32(insn, 23, 1);
3461     int offset;
3462     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3463     MemOp memop;
3464 
3465     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3466         unallocated_encoding(s);
3467         return;
3468     }
3469 
3470     if (rn == 31) {
3471         gen_check_sp_alignment(s);
3472     }
3473     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3474 
3475     if (s->pauth_active) {
3476         if (use_key_a) {
3477             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3478                              tcg_constant_i64(0));
3479         } else {
3480             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3481                              tcg_constant_i64(0));
3482         }
3483     }
3484 
3485     /* Form the 10-bit signed, scaled offset.  */
3486     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3487     offset = sextract32(offset << size, 0, 10 + size);
3488     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3489 
3490     memop = finalize_memop(s, size);
3491 
3492     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3493     clean_addr = gen_mte_check1(s, dirty_addr, false,
3494                                 is_wback || rn != 31, size);
3495 
3496     tcg_rt = cpu_reg(s, rt);
3497     do_gpr_ld(s, tcg_rt, clean_addr, memop,
3498               /* extend */ false, /* iss_valid */ !is_wback,
3499               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3500 
3501     if (is_wback) {
3502         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3503     }
3504 }
3505 
3506 /*
3507  * LDAPR/STLR (unscaled immediate)
3508  *
3509  *  31  30            24    22  21       12    10    5     0
3510  * +------+-------------+-----+---+--------+-----+----+-----+
3511  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3512  * +------+-------------+-----+---+--------+-----+----+-----+
3513  *
3514  * Rt: source or destination register
3515  * Rn: base register
3516  * imm9: unscaled immediate offset
3517  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3518  * size: size of load/store
3519  */
3520 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3521 {
3522     int rt = extract32(insn, 0, 5);
3523     int rn = extract32(insn, 5, 5);
3524     int offset = sextract32(insn, 12, 9);
3525     int opc = extract32(insn, 22, 2);
3526     int size = extract32(insn, 30, 2);
3527     TCGv_i64 clean_addr, dirty_addr;
3528     bool is_store = false;
3529     bool extend = false;
3530     bool iss_sf;
3531     MemOp mop;
3532 
3533     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3534         unallocated_encoding(s);
3535         return;
3536     }
3537 
3538     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3539     mop = finalize_memop(s, size | MO_ALIGN);
3540 
3541     switch (opc) {
3542     case 0: /* STLURB */
3543         is_store = true;
3544         break;
3545     case 1: /* LDAPUR* */
3546         break;
3547     case 2: /* LDAPURS* 64-bit variant */
3548         if (size == 3) {
3549             unallocated_encoding(s);
3550             return;
3551         }
3552         mop |= MO_SIGN;
3553         break;
3554     case 3: /* LDAPURS* 32-bit variant */
3555         if (size > 1) {
3556             unallocated_encoding(s);
3557             return;
3558         }
3559         mop |= MO_SIGN;
3560         extend = true; /* zero-extend 32->64 after signed load */
3561         break;
3562     default:
3563         g_assert_not_reached();
3564     }
3565 
3566     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3567 
3568     if (rn == 31) {
3569         gen_check_sp_alignment(s);
3570     }
3571 
3572     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3573     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3574     clean_addr = clean_data_tbi(s, dirty_addr);
3575 
3576     if (is_store) {
3577         /* Store-Release semantics */
3578         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3579         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3580     } else {
3581         /*
3582          * Load-AcquirePC semantics; we implement as the slightly more
3583          * restrictive Load-Acquire.
3584          */
3585         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3586                   extend, true, rt, iss_sf, true);
3587         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3588     }
3589 }
3590 
3591 /* Load/store register (all forms) */
3592 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3593 {
3594     int rt = extract32(insn, 0, 5);
3595     int opc = extract32(insn, 22, 2);
3596     bool is_vector = extract32(insn, 26, 1);
3597     int size = extract32(insn, 30, 2);
3598 
3599     switch (extract32(insn, 24, 2)) {
3600     case 0:
3601         if (extract32(insn, 21, 1) == 0) {
3602             /* Load/store register (unscaled immediate)
3603              * Load/store immediate pre/post-indexed
3604              * Load/store register unprivileged
3605              */
3606             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3607             return;
3608         }
3609         switch (extract32(insn, 10, 2)) {
3610         case 0:
3611             disas_ldst_atomic(s, insn, size, rt, is_vector);
3612             return;
3613         case 2:
3614             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3615             return;
3616         default:
3617             disas_ldst_pac(s, insn, size, rt, is_vector);
3618             return;
3619         }
3620         break;
3621     case 1:
3622         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3623         return;
3624     }
3625     unallocated_encoding(s);
3626 }
3627 
3628 /* AdvSIMD load/store multiple structures
3629  *
3630  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3631  * +---+---+---------------+---+-------------+--------+------+------+------+
3632  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3633  * +---+---+---------------+---+-------------+--------+------+------+------+
3634  *
3635  * AdvSIMD load/store multiple structures (post-indexed)
3636  *
3637  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3638  * +---+---+---------------+---+---+---------+--------+------+------+------+
3639  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3640  * +---+---+---------------+---+---+---------+--------+------+------+------+
3641  *
3642  * Rt: first (or only) SIMD&FP register to be transferred
3643  * Rn: base address or SP
3644  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3645  */
3646 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3647 {
3648     int rt = extract32(insn, 0, 5);
3649     int rn = extract32(insn, 5, 5);
3650     int rm = extract32(insn, 16, 5);
3651     int size = extract32(insn, 10, 2);
3652     int opcode = extract32(insn, 12, 4);
3653     bool is_store = !extract32(insn, 22, 1);
3654     bool is_postidx = extract32(insn, 23, 1);
3655     bool is_q = extract32(insn, 30, 1);
3656     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3657     MemOp endian, align, mop;
3658 
3659     int total;    /* total bytes */
3660     int elements; /* elements per vector */
3661     int rpt;    /* num iterations */
3662     int selem;  /* structure elements */
3663     int r;
3664 
3665     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3666         unallocated_encoding(s);
3667         return;
3668     }
3669 
3670     if (!is_postidx && rm != 0) {
3671         unallocated_encoding(s);
3672         return;
3673     }
3674 
3675     /* From the shared decode logic */
3676     switch (opcode) {
3677     case 0x0:
3678         rpt = 1;
3679         selem = 4;
3680         break;
3681     case 0x2:
3682         rpt = 4;
3683         selem = 1;
3684         break;
3685     case 0x4:
3686         rpt = 1;
3687         selem = 3;
3688         break;
3689     case 0x6:
3690         rpt = 3;
3691         selem = 1;
3692         break;
3693     case 0x7:
3694         rpt = 1;
3695         selem = 1;
3696         break;
3697     case 0x8:
3698         rpt = 1;
3699         selem = 2;
3700         break;
3701     case 0xa:
3702         rpt = 2;
3703         selem = 1;
3704         break;
3705     default:
3706         unallocated_encoding(s);
3707         return;
3708     }
3709 
3710     if (size == 3 && !is_q && selem != 1) {
3711         /* reserved */
3712         unallocated_encoding(s);
3713         return;
3714     }
3715 
3716     if (!fp_access_check(s)) {
3717         return;
3718     }
3719 
3720     if (rn == 31) {
3721         gen_check_sp_alignment(s);
3722     }
3723 
3724     /* For our purposes, bytes are always little-endian.  */
3725     endian = s->be_data;
3726     if (size == 0) {
3727         endian = MO_LE;
3728     }
3729 
3730     total = rpt * selem * (is_q ? 16 : 8);
3731     tcg_rn = cpu_reg_sp(s, rn);
3732 
3733     /*
3734      * Issue the MTE check vs the logical repeat count, before we
3735      * promote consecutive little-endian elements below.
3736      */
3737     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3738                                 total);
3739 
3740     /*
3741      * Consecutive little-endian elements from a single register
3742      * can be promoted to a larger little-endian operation.
3743      */
3744     align = MO_ALIGN;
3745     if (selem == 1 && endian == MO_LE) {
3746         align = pow2_align(size);
3747         size = 3;
3748     }
3749     if (!s->align_mem) {
3750         align = 0;
3751     }
3752     mop = endian | size | align;
3753 
3754     elements = (is_q ? 16 : 8) >> size;
3755     tcg_ebytes = tcg_constant_i64(1 << size);
3756     for (r = 0; r < rpt; r++) {
3757         int e;
3758         for (e = 0; e < elements; e++) {
3759             int xs;
3760             for (xs = 0; xs < selem; xs++) {
3761                 int tt = (rt + r + xs) % 32;
3762                 if (is_store) {
3763                     do_vec_st(s, tt, e, clean_addr, mop);
3764                 } else {
3765                     do_vec_ld(s, tt, e, clean_addr, mop);
3766                 }
3767                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3768             }
3769         }
3770     }
3771 
3772     if (!is_store) {
3773         /* For non-quad operations, setting a slice of the low
3774          * 64 bits of the register clears the high 64 bits (in
3775          * the ARM ARM pseudocode this is implicit in the fact
3776          * that 'rval' is a 64 bit wide variable).
3777          * For quad operations, we might still need to zero the
3778          * high bits of SVE.
3779          */
3780         for (r = 0; r < rpt * selem; r++) {
3781             int tt = (rt + r) % 32;
3782             clear_vec_high(s, is_q, tt);
3783         }
3784     }
3785 
3786     if (is_postidx) {
3787         if (rm == 31) {
3788             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3789         } else {
3790             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3791         }
3792     }
3793 }
3794 
3795 /* AdvSIMD load/store single structure
3796  *
3797  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3798  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3799  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3800  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3801  *
3802  * AdvSIMD load/store single structure (post-indexed)
3803  *
3804  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3805  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3806  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3807  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3808  *
3809  * Rt: first (or only) SIMD&FP register to be transferred
3810  * Rn: base address or SP
3811  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3812  * index = encoded in Q:S:size dependent on size
3813  *
3814  * lane_size = encoded in R, opc
3815  * transfer width = encoded in opc, S, size
3816  */
3817 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3818 {
3819     int rt = extract32(insn, 0, 5);
3820     int rn = extract32(insn, 5, 5);
3821     int rm = extract32(insn, 16, 5);
3822     int size = extract32(insn, 10, 2);
3823     int S = extract32(insn, 12, 1);
3824     int opc = extract32(insn, 13, 3);
3825     int R = extract32(insn, 21, 1);
3826     int is_load = extract32(insn, 22, 1);
3827     int is_postidx = extract32(insn, 23, 1);
3828     int is_q = extract32(insn, 30, 1);
3829 
3830     int scale = extract32(opc, 1, 2);
3831     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3832     bool replicate = false;
3833     int index = is_q << 3 | S << 2 | size;
3834     int xs, total;
3835     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3836     MemOp mop;
3837 
3838     if (extract32(insn, 31, 1)) {
3839         unallocated_encoding(s);
3840         return;
3841     }
3842     if (!is_postidx && rm != 0) {
3843         unallocated_encoding(s);
3844         return;
3845     }
3846 
3847     switch (scale) {
3848     case 3:
3849         if (!is_load || S) {
3850             unallocated_encoding(s);
3851             return;
3852         }
3853         scale = size;
3854         replicate = true;
3855         break;
3856     case 0:
3857         break;
3858     case 1:
3859         if (extract32(size, 0, 1)) {
3860             unallocated_encoding(s);
3861             return;
3862         }
3863         index >>= 1;
3864         break;
3865     case 2:
3866         if (extract32(size, 1, 1)) {
3867             unallocated_encoding(s);
3868             return;
3869         }
3870         if (!extract32(size, 0, 1)) {
3871             index >>= 2;
3872         } else {
3873             if (S) {
3874                 unallocated_encoding(s);
3875                 return;
3876             }
3877             index >>= 3;
3878             scale = 3;
3879         }
3880         break;
3881     default:
3882         g_assert_not_reached();
3883     }
3884 
3885     if (!fp_access_check(s)) {
3886         return;
3887     }
3888 
3889     if (rn == 31) {
3890         gen_check_sp_alignment(s);
3891     }
3892 
3893     total = selem << scale;
3894     tcg_rn = cpu_reg_sp(s, rn);
3895 
3896     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3897                                 total);
3898     mop = finalize_memop(s, scale);
3899 
3900     tcg_ebytes = tcg_constant_i64(1 << scale);
3901     for (xs = 0; xs < selem; xs++) {
3902         if (replicate) {
3903             /* Load and replicate to all elements */
3904             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3905 
3906             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3907             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3908                                  (is_q + 1) * 8, vec_full_reg_size(s),
3909                                  tcg_tmp);
3910         } else {
3911             /* Load/store one element per register */
3912             if (is_load) {
3913                 do_vec_ld(s, rt, index, clean_addr, mop);
3914             } else {
3915                 do_vec_st(s, rt, index, clean_addr, mop);
3916             }
3917         }
3918         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3919         rt = (rt + 1) % 32;
3920     }
3921 
3922     if (is_postidx) {
3923         if (rm == 31) {
3924             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3925         } else {
3926             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3927         }
3928     }
3929 }
3930 
3931 /*
3932  * Load/Store memory tags
3933  *
3934  *  31 30 29         24     22  21     12    10      5      0
3935  * +-----+-------------+-----+---+------+-----+------+------+
3936  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3937  * +-----+-------------+-----+---+------+-----+------+------+
3938  */
3939 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3940 {
3941     int rt = extract32(insn, 0, 5);
3942     int rn = extract32(insn, 5, 5);
3943     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3944     int op2 = extract32(insn, 10, 2);
3945     int op1 = extract32(insn, 22, 2);
3946     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3947     int index = 0;
3948     TCGv_i64 addr, clean_addr, tcg_rt;
3949 
3950     /* We checked insn bits [29:24,21] in the caller.  */
3951     if (extract32(insn, 30, 2) != 3) {
3952         goto do_unallocated;
3953     }
3954 
3955     /*
3956      * @index is a tri-state variable which has 3 states:
3957      * < 0 : post-index, writeback
3958      * = 0 : signed offset
3959      * > 0 : pre-index, writeback
3960      */
3961     switch (op1) {
3962     case 0:
3963         if (op2 != 0) {
3964             /* STG */
3965             index = op2 - 2;
3966         } else {
3967             /* STZGM */
3968             if (s->current_el == 0 || offset != 0) {
3969                 goto do_unallocated;
3970             }
3971             is_mult = is_zero = true;
3972         }
3973         break;
3974     case 1:
3975         if (op2 != 0) {
3976             /* STZG */
3977             is_zero = true;
3978             index = op2 - 2;
3979         } else {
3980             /* LDG */
3981             is_load = true;
3982         }
3983         break;
3984     case 2:
3985         if (op2 != 0) {
3986             /* ST2G */
3987             is_pair = true;
3988             index = op2 - 2;
3989         } else {
3990             /* STGM */
3991             if (s->current_el == 0 || offset != 0) {
3992                 goto do_unallocated;
3993             }
3994             is_mult = true;
3995         }
3996         break;
3997     case 3:
3998         if (op2 != 0) {
3999             /* STZ2G */
4000             is_pair = is_zero = true;
4001             index = op2 - 2;
4002         } else {
4003             /* LDGM */
4004             if (s->current_el == 0 || offset != 0) {
4005                 goto do_unallocated;
4006             }
4007             is_mult = is_load = true;
4008         }
4009         break;
4010 
4011     default:
4012     do_unallocated:
4013         unallocated_encoding(s);
4014         return;
4015     }
4016 
4017     if (is_mult
4018         ? !dc_isar_feature(aa64_mte, s)
4019         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
4020         goto do_unallocated;
4021     }
4022 
4023     if (rn == 31) {
4024         gen_check_sp_alignment(s);
4025     }
4026 
4027     addr = read_cpu_reg_sp(s, rn, true);
4028     if (index >= 0) {
4029         /* pre-index or signed offset */
4030         tcg_gen_addi_i64(addr, addr, offset);
4031     }
4032 
4033     if (is_mult) {
4034         tcg_rt = cpu_reg(s, rt);
4035 
4036         if (is_zero) {
4037             int size = 4 << s->dcz_blocksize;
4038 
4039             if (s->ata) {
4040                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4041             }
4042             /*
4043              * The non-tags portion of STZGM is mostly like DC_ZVA,
4044              * except the alignment happens before the access.
4045              */
4046             clean_addr = clean_data_tbi(s, addr);
4047             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4048             gen_helper_dc_zva(cpu_env, clean_addr);
4049         } else if (s->ata) {
4050             if (is_load) {
4051                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4052             } else {
4053                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4054             }
4055         } else {
4056             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4057             int size = 4 << GMID_EL1_BS;
4058 
4059             clean_addr = clean_data_tbi(s, addr);
4060             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4061             gen_probe_access(s, clean_addr, acc, size);
4062 
4063             if (is_load) {
4064                 /* The result tags are zeros.  */
4065                 tcg_gen_movi_i64(tcg_rt, 0);
4066             }
4067         }
4068         return;
4069     }
4070 
4071     if (is_load) {
4072         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4073         tcg_rt = cpu_reg(s, rt);
4074         if (s->ata) {
4075             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4076         } else {
4077             clean_addr = clean_data_tbi(s, addr);
4078             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4079             gen_address_with_allocation_tag0(tcg_rt, addr);
4080         }
4081     } else {
4082         tcg_rt = cpu_reg_sp(s, rt);
4083         if (!s->ata) {
4084             /*
4085              * For STG and ST2G, we need to check alignment and probe memory.
4086              * TODO: For STZG and STZ2G, we could rely on the stores below,
4087              * at least for system mode; user-only won't enforce alignment.
4088              */
4089             if (is_pair) {
4090                 gen_helper_st2g_stub(cpu_env, addr);
4091             } else {
4092                 gen_helper_stg_stub(cpu_env, addr);
4093             }
4094         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4095             if (is_pair) {
4096                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4097             } else {
4098                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4099             }
4100         } else {
4101             if (is_pair) {
4102                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4103             } else {
4104                 gen_helper_stg(cpu_env, addr, tcg_rt);
4105             }
4106         }
4107     }
4108 
4109     if (is_zero) {
4110         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4111         TCGv_i64 zero64 = tcg_constant_i64(0);
4112         TCGv_i128 zero128 = tcg_temp_new_i128();
4113         int mem_index = get_mem_index(s);
4114         MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4115 
4116         tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4117 
4118         /* This is 1 or 2 atomic 16-byte operations. */
4119         tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4120         if (is_pair) {
4121             tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4122             tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4123         }
4124     }
4125 
4126     if (index != 0) {
4127         /* pre-index or post-index */
4128         if (index < 0) {
4129             /* post-index */
4130             tcg_gen_addi_i64(addr, addr, offset);
4131         }
4132         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4133     }
4134 }
4135 
4136 /* Loads and stores */
4137 static void disas_ldst(DisasContext *s, uint32_t insn)
4138 {
4139     switch (extract32(insn, 24, 6)) {
4140     case 0x08: /* Load/store exclusive */
4141         disas_ldst_excl(s, insn);
4142         break;
4143     case 0x18: case 0x1c: /* Load register (literal) */
4144         disas_ld_lit(s, insn);
4145         break;
4146     case 0x28: case 0x29:
4147     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4148         disas_ldst_pair(s, insn);
4149         break;
4150     case 0x38: case 0x39:
4151     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4152         disas_ldst_reg(s, insn);
4153         break;
4154     case 0x0c: /* AdvSIMD load/store multiple structures */
4155         disas_ldst_multiple_struct(s, insn);
4156         break;
4157     case 0x0d: /* AdvSIMD load/store single structure */
4158         disas_ldst_single_struct(s, insn);
4159         break;
4160     case 0x19:
4161         if (extract32(insn, 21, 1) != 0) {
4162             disas_ldst_tag(s, insn);
4163         } else if (extract32(insn, 10, 2) == 0) {
4164             disas_ldst_ldapr_stlr(s, insn);
4165         } else {
4166             unallocated_encoding(s);
4167         }
4168         break;
4169     default:
4170         unallocated_encoding(s);
4171         break;
4172     }
4173 }
4174 
4175 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4176 
4177 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4178                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4179 {
4180     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4181     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4182     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4183 
4184     fn(tcg_rd, tcg_rn, tcg_imm);
4185     if (!a->sf) {
4186         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4187     }
4188     return true;
4189 }
4190 
4191 /*
4192  * PC-rel. addressing
4193  */
4194 
4195 static bool trans_ADR(DisasContext *s, arg_ri *a)
4196 {
4197     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4198     return true;
4199 }
4200 
4201 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4202 {
4203     int64_t offset = (int64_t)a->imm << 12;
4204 
4205     /* The page offset is ok for CF_PCREL. */
4206     offset -= s->pc_curr & 0xfff;
4207     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4208     return true;
4209 }
4210 
4211 /*
4212  * Add/subtract (immediate)
4213  */
4214 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4215 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4216 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4217 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4218 
4219 /*
4220  * Add/subtract (immediate, with tags)
4221  */
4222 
4223 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4224                                       bool sub_op)
4225 {
4226     TCGv_i64 tcg_rn, tcg_rd;
4227     int imm;
4228 
4229     imm = a->uimm6 << LOG2_TAG_GRANULE;
4230     if (sub_op) {
4231         imm = -imm;
4232     }
4233 
4234     tcg_rn = cpu_reg_sp(s, a->rn);
4235     tcg_rd = cpu_reg_sp(s, a->rd);
4236 
4237     if (s->ata) {
4238         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4239                            tcg_constant_i32(imm),
4240                            tcg_constant_i32(a->uimm4));
4241     } else {
4242         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4243         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4244     }
4245     return true;
4246 }
4247 
4248 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4249 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4250 
4251 /* The input should be a value in the bottom e bits (with higher
4252  * bits zero); returns that value replicated into every element
4253  * of size e in a 64 bit integer.
4254  */
4255 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4256 {
4257     assert(e != 0);
4258     while (e < 64) {
4259         mask |= mask << e;
4260         e *= 2;
4261     }
4262     return mask;
4263 }
4264 
4265 /*
4266  * Logical (immediate)
4267  */
4268 
4269 /*
4270  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4271  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4272  * value (ie should cause a guest UNDEF exception), and true if they are
4273  * valid, in which case the decoded bit pattern is written to result.
4274  */
4275 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4276                             unsigned int imms, unsigned int immr)
4277 {
4278     uint64_t mask;
4279     unsigned e, levels, s, r;
4280     int len;
4281 
4282     assert(immn < 2 && imms < 64 && immr < 64);
4283 
4284     /* The bit patterns we create here are 64 bit patterns which
4285      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4286      * 64 bits each. Each element contains the same value: a run
4287      * of between 1 and e-1 non-zero bits, rotated within the
4288      * element by between 0 and e-1 bits.
4289      *
4290      * The element size and run length are encoded into immn (1 bit)
4291      * and imms (6 bits) as follows:
4292      * 64 bit elements: immn = 1, imms = <length of run - 1>
4293      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4294      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4295      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4296      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4297      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4298      * Notice that immn = 0, imms = 11111x is the only combination
4299      * not covered by one of the above options; this is reserved.
4300      * Further, <length of run - 1> all-ones is a reserved pattern.
4301      *
4302      * In all cases the rotation is by immr % e (and immr is 6 bits).
4303      */
4304 
4305     /* First determine the element size */
4306     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4307     if (len < 1) {
4308         /* This is the immn == 0, imms == 0x11111x case */
4309         return false;
4310     }
4311     e = 1 << len;
4312 
4313     levels = e - 1;
4314     s = imms & levels;
4315     r = immr & levels;
4316 
4317     if (s == levels) {
4318         /* <length of run - 1> mustn't be all-ones. */
4319         return false;
4320     }
4321 
4322     /* Create the value of one element: s+1 set bits rotated
4323      * by r within the element (which is e bits wide)...
4324      */
4325     mask = MAKE_64BIT_MASK(0, s + 1);
4326     if (r) {
4327         mask = (mask >> r) | (mask << (e - r));
4328         mask &= MAKE_64BIT_MASK(0, e);
4329     }
4330     /* ...then replicate the element over the whole 64 bit value */
4331     mask = bitfield_replicate(mask, e);
4332     *result = mask;
4333     return true;
4334 }
4335 
4336 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4337                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4338 {
4339     TCGv_i64 tcg_rd, tcg_rn;
4340     uint64_t imm;
4341 
4342     /* Some immediate field values are reserved. */
4343     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4344                                 extract32(a->dbm, 0, 6),
4345                                 extract32(a->dbm, 6, 6))) {
4346         return false;
4347     }
4348     if (!a->sf) {
4349         imm &= 0xffffffffull;
4350     }
4351 
4352     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4353     tcg_rn = cpu_reg(s, a->rn);
4354 
4355     fn(tcg_rd, tcg_rn, imm);
4356     if (set_cc) {
4357         gen_logic_CC(a->sf, tcg_rd);
4358     }
4359     if (!a->sf) {
4360         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4361     }
4362     return true;
4363 }
4364 
4365 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4366 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4367 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4368 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4369 
4370 /*
4371  * Move wide (immediate)
4372  */
4373 
4374 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4375 {
4376     int pos = a->hw << 4;
4377     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4378     return true;
4379 }
4380 
4381 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4382 {
4383     int pos = a->hw << 4;
4384     uint64_t imm = a->imm;
4385 
4386     imm = ~(imm << pos);
4387     if (!a->sf) {
4388         imm = (uint32_t)imm;
4389     }
4390     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4391     return true;
4392 }
4393 
4394 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4395 {
4396     int pos = a->hw << 4;
4397     TCGv_i64 tcg_rd, tcg_im;
4398 
4399     tcg_rd = cpu_reg(s, a->rd);
4400     tcg_im = tcg_constant_i64(a->imm);
4401     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4402     if (!a->sf) {
4403         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4404     }
4405     return true;
4406 }
4407 
4408 /*
4409  * Bitfield
4410  */
4411 
4412 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4413 {
4414     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4415     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4416     unsigned int bitsize = a->sf ? 64 : 32;
4417     unsigned int ri = a->immr;
4418     unsigned int si = a->imms;
4419     unsigned int pos, len;
4420 
4421     if (si >= ri) {
4422         /* Wd<s-r:0> = Wn<s:r> */
4423         len = (si - ri) + 1;
4424         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4425         if (!a->sf) {
4426             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4427         }
4428     } else {
4429         /* Wd<32+s-r,32-r> = Wn<s:0> */
4430         len = si + 1;
4431         pos = (bitsize - ri) & (bitsize - 1);
4432 
4433         if (len < ri) {
4434             /*
4435              * Sign extend the destination field from len to fill the
4436              * balance of the word.  Let the deposit below insert all
4437              * of those sign bits.
4438              */
4439             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4440             len = ri;
4441         }
4442 
4443         /*
4444          * We start with zero, and we haven't modified any bits outside
4445          * bitsize, therefore no final zero-extension is unneeded for !sf.
4446          */
4447         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4448     }
4449     return true;
4450 }
4451 
4452 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4453 {
4454     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4455     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4456     unsigned int bitsize = a->sf ? 64 : 32;
4457     unsigned int ri = a->immr;
4458     unsigned int si = a->imms;
4459     unsigned int pos, len;
4460 
4461     tcg_rd = cpu_reg(s, a->rd);
4462     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4463 
4464     if (si >= ri) {
4465         /* Wd<s-r:0> = Wn<s:r> */
4466         len = (si - ri) + 1;
4467         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4468     } else {
4469         /* Wd<32+s-r,32-r> = Wn<s:0> */
4470         len = si + 1;
4471         pos = (bitsize - ri) & (bitsize - 1);
4472         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4473     }
4474     return true;
4475 }
4476 
4477 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4478 {
4479     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4480     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4481     unsigned int bitsize = a->sf ? 64 : 32;
4482     unsigned int ri = a->immr;
4483     unsigned int si = a->imms;
4484     unsigned int pos, len;
4485 
4486     tcg_rd = cpu_reg(s, a->rd);
4487     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4488 
4489     if (si >= ri) {
4490         /* Wd<s-r:0> = Wn<s:r> */
4491         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4492         len = (si - ri) + 1;
4493         pos = 0;
4494     } else {
4495         /* Wd<32+s-r,32-r> = Wn<s:0> */
4496         len = si + 1;
4497         pos = (bitsize - ri) & (bitsize - 1);
4498     }
4499 
4500     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4501     if (!a->sf) {
4502         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4503     }
4504     return true;
4505 }
4506 
4507 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4508 {
4509     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4510 
4511     tcg_rd = cpu_reg(s, a->rd);
4512 
4513     if (unlikely(a->imm == 0)) {
4514         /*
4515          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4516          * so an extract from bit 0 is a special case.
4517          */
4518         if (a->sf) {
4519             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4520         } else {
4521             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4522         }
4523     } else {
4524         tcg_rm = cpu_reg(s, a->rm);
4525         tcg_rn = cpu_reg(s, a->rn);
4526 
4527         if (a->sf) {
4528             /* Specialization to ROR happens in EXTRACT2.  */
4529             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4530         } else {
4531             TCGv_i32 t0 = tcg_temp_new_i32();
4532 
4533             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4534             if (a->rm == a->rn) {
4535                 tcg_gen_rotri_i32(t0, t0, a->imm);
4536             } else {
4537                 TCGv_i32 t1 = tcg_temp_new_i32();
4538                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4539                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4540             }
4541             tcg_gen_extu_i32_i64(tcg_rd, t0);
4542         }
4543     }
4544     return true;
4545 }
4546 
4547 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4548  * Note that it is the caller's responsibility to ensure that the
4549  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4550  * mandated semantics for out of range shifts.
4551  */
4552 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4553                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4554 {
4555     switch (shift_type) {
4556     case A64_SHIFT_TYPE_LSL:
4557         tcg_gen_shl_i64(dst, src, shift_amount);
4558         break;
4559     case A64_SHIFT_TYPE_LSR:
4560         tcg_gen_shr_i64(dst, src, shift_amount);
4561         break;
4562     case A64_SHIFT_TYPE_ASR:
4563         if (!sf) {
4564             tcg_gen_ext32s_i64(dst, src);
4565         }
4566         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4567         break;
4568     case A64_SHIFT_TYPE_ROR:
4569         if (sf) {
4570             tcg_gen_rotr_i64(dst, src, shift_amount);
4571         } else {
4572             TCGv_i32 t0, t1;
4573             t0 = tcg_temp_new_i32();
4574             t1 = tcg_temp_new_i32();
4575             tcg_gen_extrl_i64_i32(t0, src);
4576             tcg_gen_extrl_i64_i32(t1, shift_amount);
4577             tcg_gen_rotr_i32(t0, t0, t1);
4578             tcg_gen_extu_i32_i64(dst, t0);
4579         }
4580         break;
4581     default:
4582         assert(FALSE); /* all shift types should be handled */
4583         break;
4584     }
4585 
4586     if (!sf) { /* zero extend final result */
4587         tcg_gen_ext32u_i64(dst, dst);
4588     }
4589 }
4590 
4591 /* Shift a TCGv src by immediate, put result in dst.
4592  * The shift amount must be in range (this should always be true as the
4593  * relevant instructions will UNDEF on bad shift immediates).
4594  */
4595 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4596                           enum a64_shift_type shift_type, unsigned int shift_i)
4597 {
4598     assert(shift_i < (sf ? 64 : 32));
4599 
4600     if (shift_i == 0) {
4601         tcg_gen_mov_i64(dst, src);
4602     } else {
4603         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4604     }
4605 }
4606 
4607 /* Logical (shifted register)
4608  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4609  * +----+-----+-----------+-------+---+------+--------+------+------+
4610  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4611  * +----+-----+-----------+-------+---+------+--------+------+------+
4612  */
4613 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4614 {
4615     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4616     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4617 
4618     sf = extract32(insn, 31, 1);
4619     opc = extract32(insn, 29, 2);
4620     shift_type = extract32(insn, 22, 2);
4621     invert = extract32(insn, 21, 1);
4622     rm = extract32(insn, 16, 5);
4623     shift_amount = extract32(insn, 10, 6);
4624     rn = extract32(insn, 5, 5);
4625     rd = extract32(insn, 0, 5);
4626 
4627     if (!sf && (shift_amount & (1 << 5))) {
4628         unallocated_encoding(s);
4629         return;
4630     }
4631 
4632     tcg_rd = cpu_reg(s, rd);
4633 
4634     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4635         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4636          * register-register MOV and MVN, so it is worth special casing.
4637          */
4638         tcg_rm = cpu_reg(s, rm);
4639         if (invert) {
4640             tcg_gen_not_i64(tcg_rd, tcg_rm);
4641             if (!sf) {
4642                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4643             }
4644         } else {
4645             if (sf) {
4646                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4647             } else {
4648                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4649             }
4650         }
4651         return;
4652     }
4653 
4654     tcg_rm = read_cpu_reg(s, rm, sf);
4655 
4656     if (shift_amount) {
4657         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4658     }
4659 
4660     tcg_rn = cpu_reg(s, rn);
4661 
4662     switch (opc | (invert << 2)) {
4663     case 0: /* AND */
4664     case 3: /* ANDS */
4665         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4666         break;
4667     case 1: /* ORR */
4668         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4669         break;
4670     case 2: /* EOR */
4671         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4672         break;
4673     case 4: /* BIC */
4674     case 7: /* BICS */
4675         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4676         break;
4677     case 5: /* ORN */
4678         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4679         break;
4680     case 6: /* EON */
4681         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4682         break;
4683     default:
4684         assert(FALSE);
4685         break;
4686     }
4687 
4688     if (!sf) {
4689         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4690     }
4691 
4692     if (opc == 3) {
4693         gen_logic_CC(sf, tcg_rd);
4694     }
4695 }
4696 
4697 /*
4698  * Add/subtract (extended register)
4699  *
4700  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4701  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4702  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4703  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4704  *
4705  *  sf: 0 -> 32bit, 1 -> 64bit
4706  *  op: 0 -> add  , 1 -> sub
4707  *   S: 1 -> set flags
4708  * opt: 00
4709  * option: extension type (see DecodeRegExtend)
4710  * imm3: optional shift to Rm
4711  *
4712  * Rd = Rn + LSL(extend(Rm), amount)
4713  */
4714 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4715 {
4716     int rd = extract32(insn, 0, 5);
4717     int rn = extract32(insn, 5, 5);
4718     int imm3 = extract32(insn, 10, 3);
4719     int option = extract32(insn, 13, 3);
4720     int rm = extract32(insn, 16, 5);
4721     int opt = extract32(insn, 22, 2);
4722     bool setflags = extract32(insn, 29, 1);
4723     bool sub_op = extract32(insn, 30, 1);
4724     bool sf = extract32(insn, 31, 1);
4725 
4726     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4727     TCGv_i64 tcg_rd;
4728     TCGv_i64 tcg_result;
4729 
4730     if (imm3 > 4 || opt != 0) {
4731         unallocated_encoding(s);
4732         return;
4733     }
4734 
4735     /* non-flag setting ops may use SP */
4736     if (!setflags) {
4737         tcg_rd = cpu_reg_sp(s, rd);
4738     } else {
4739         tcg_rd = cpu_reg(s, rd);
4740     }
4741     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4742 
4743     tcg_rm = read_cpu_reg(s, rm, sf);
4744     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4745 
4746     tcg_result = tcg_temp_new_i64();
4747 
4748     if (!setflags) {
4749         if (sub_op) {
4750             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4751         } else {
4752             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4753         }
4754     } else {
4755         if (sub_op) {
4756             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4757         } else {
4758             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4759         }
4760     }
4761 
4762     if (sf) {
4763         tcg_gen_mov_i64(tcg_rd, tcg_result);
4764     } else {
4765         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4766     }
4767 }
4768 
4769 /*
4770  * Add/subtract (shifted register)
4771  *
4772  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4773  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4774  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4775  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4776  *
4777  *    sf: 0 -> 32bit, 1 -> 64bit
4778  *    op: 0 -> add  , 1 -> sub
4779  *     S: 1 -> set flags
4780  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4781  *  imm6: Shift amount to apply to Rm before the add/sub
4782  */
4783 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4784 {
4785     int rd = extract32(insn, 0, 5);
4786     int rn = extract32(insn, 5, 5);
4787     int imm6 = extract32(insn, 10, 6);
4788     int rm = extract32(insn, 16, 5);
4789     int shift_type = extract32(insn, 22, 2);
4790     bool setflags = extract32(insn, 29, 1);
4791     bool sub_op = extract32(insn, 30, 1);
4792     bool sf = extract32(insn, 31, 1);
4793 
4794     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4795     TCGv_i64 tcg_rn, tcg_rm;
4796     TCGv_i64 tcg_result;
4797 
4798     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4799         unallocated_encoding(s);
4800         return;
4801     }
4802 
4803     tcg_rn = read_cpu_reg(s, rn, sf);
4804     tcg_rm = read_cpu_reg(s, rm, sf);
4805 
4806     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4807 
4808     tcg_result = tcg_temp_new_i64();
4809 
4810     if (!setflags) {
4811         if (sub_op) {
4812             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4813         } else {
4814             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4815         }
4816     } else {
4817         if (sub_op) {
4818             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4819         } else {
4820             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4821         }
4822     }
4823 
4824     if (sf) {
4825         tcg_gen_mov_i64(tcg_rd, tcg_result);
4826     } else {
4827         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4828     }
4829 }
4830 
4831 /* Data-processing (3 source)
4832  *
4833  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4834  *  +--+------+-----------+------+------+----+------+------+------+
4835  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4836  *  +--+------+-----------+------+------+----+------+------+------+
4837  */
4838 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4839 {
4840     int rd = extract32(insn, 0, 5);
4841     int rn = extract32(insn, 5, 5);
4842     int ra = extract32(insn, 10, 5);
4843     int rm = extract32(insn, 16, 5);
4844     int op_id = (extract32(insn, 29, 3) << 4) |
4845         (extract32(insn, 21, 3) << 1) |
4846         extract32(insn, 15, 1);
4847     bool sf = extract32(insn, 31, 1);
4848     bool is_sub = extract32(op_id, 0, 1);
4849     bool is_high = extract32(op_id, 2, 1);
4850     bool is_signed = false;
4851     TCGv_i64 tcg_op1;
4852     TCGv_i64 tcg_op2;
4853     TCGv_i64 tcg_tmp;
4854 
4855     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4856     switch (op_id) {
4857     case 0x42: /* SMADDL */
4858     case 0x43: /* SMSUBL */
4859     case 0x44: /* SMULH */
4860         is_signed = true;
4861         break;
4862     case 0x0: /* MADD (32bit) */
4863     case 0x1: /* MSUB (32bit) */
4864     case 0x40: /* MADD (64bit) */
4865     case 0x41: /* MSUB (64bit) */
4866     case 0x4a: /* UMADDL */
4867     case 0x4b: /* UMSUBL */
4868     case 0x4c: /* UMULH */
4869         break;
4870     default:
4871         unallocated_encoding(s);
4872         return;
4873     }
4874 
4875     if (is_high) {
4876         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4877         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4878         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4879         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4880 
4881         if (is_signed) {
4882             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4883         } else {
4884             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4885         }
4886         return;
4887     }
4888 
4889     tcg_op1 = tcg_temp_new_i64();
4890     tcg_op2 = tcg_temp_new_i64();
4891     tcg_tmp = tcg_temp_new_i64();
4892 
4893     if (op_id < 0x42) {
4894         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4895         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4896     } else {
4897         if (is_signed) {
4898             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4899             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4900         } else {
4901             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4902             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4903         }
4904     }
4905 
4906     if (ra == 31 && !is_sub) {
4907         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4908         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4909     } else {
4910         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4911         if (is_sub) {
4912             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4913         } else {
4914             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4915         }
4916     }
4917 
4918     if (!sf) {
4919         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4920     }
4921 }
4922 
4923 /* Add/subtract (with carry)
4924  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4925  * +--+--+--+------------------------+------+-------------+------+-----+
4926  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4927  * +--+--+--+------------------------+------+-------------+------+-----+
4928  */
4929 
4930 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4931 {
4932     unsigned int sf, op, setflags, rm, rn, rd;
4933     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4934 
4935     sf = extract32(insn, 31, 1);
4936     op = extract32(insn, 30, 1);
4937     setflags = extract32(insn, 29, 1);
4938     rm = extract32(insn, 16, 5);
4939     rn = extract32(insn, 5, 5);
4940     rd = extract32(insn, 0, 5);
4941 
4942     tcg_rd = cpu_reg(s, rd);
4943     tcg_rn = cpu_reg(s, rn);
4944 
4945     if (op) {
4946         tcg_y = tcg_temp_new_i64();
4947         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4948     } else {
4949         tcg_y = cpu_reg(s, rm);
4950     }
4951 
4952     if (setflags) {
4953         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4954     } else {
4955         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4956     }
4957 }
4958 
4959 /*
4960  * Rotate right into flags
4961  *  31 30 29                21       15          10      5  4      0
4962  * +--+--+--+-----------------+--------+-----------+------+--+------+
4963  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4964  * +--+--+--+-----------------+--------+-----------+------+--+------+
4965  */
4966 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4967 {
4968     int mask = extract32(insn, 0, 4);
4969     int o2 = extract32(insn, 4, 1);
4970     int rn = extract32(insn, 5, 5);
4971     int imm6 = extract32(insn, 15, 6);
4972     int sf_op_s = extract32(insn, 29, 3);
4973     TCGv_i64 tcg_rn;
4974     TCGv_i32 nzcv;
4975 
4976     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4977         unallocated_encoding(s);
4978         return;
4979     }
4980 
4981     tcg_rn = read_cpu_reg(s, rn, 1);
4982     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4983 
4984     nzcv = tcg_temp_new_i32();
4985     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4986 
4987     if (mask & 8) { /* N */
4988         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4989     }
4990     if (mask & 4) { /* Z */
4991         tcg_gen_not_i32(cpu_ZF, nzcv);
4992         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4993     }
4994     if (mask & 2) { /* C */
4995         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4996     }
4997     if (mask & 1) { /* V */
4998         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4999     }
5000 }
5001 
5002 /*
5003  * Evaluate into flags
5004  *  31 30 29                21        15   14        10      5  4      0
5005  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5006  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5007  * +--+--+--+-----------------+---------+----+---------+------+--+------+
5008  */
5009 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5010 {
5011     int o3_mask = extract32(insn, 0, 5);
5012     int rn = extract32(insn, 5, 5);
5013     int o2 = extract32(insn, 15, 6);
5014     int sz = extract32(insn, 14, 1);
5015     int sf_op_s = extract32(insn, 29, 3);
5016     TCGv_i32 tmp;
5017     int shift;
5018 
5019     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5020         !dc_isar_feature(aa64_condm_4, s)) {
5021         unallocated_encoding(s);
5022         return;
5023     }
5024     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5025 
5026     tmp = tcg_temp_new_i32();
5027     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5028     tcg_gen_shli_i32(cpu_NF, tmp, shift);
5029     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5030     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5031     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5032 }
5033 
5034 /* Conditional compare (immediate / register)
5035  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5036  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5037  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5038  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5039  *        [1]                             y                [0]       [0]
5040  */
5041 static void disas_cc(DisasContext *s, uint32_t insn)
5042 {
5043     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5044     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5045     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5046     DisasCompare c;
5047 
5048     if (!extract32(insn, 29, 1)) {
5049         unallocated_encoding(s);
5050         return;
5051     }
5052     if (insn & (1 << 10 | 1 << 4)) {
5053         unallocated_encoding(s);
5054         return;
5055     }
5056     sf = extract32(insn, 31, 1);
5057     op = extract32(insn, 30, 1);
5058     is_imm = extract32(insn, 11, 1);
5059     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5060     cond = extract32(insn, 12, 4);
5061     rn = extract32(insn, 5, 5);
5062     nzcv = extract32(insn, 0, 4);
5063 
5064     /* Set T0 = !COND.  */
5065     tcg_t0 = tcg_temp_new_i32();
5066     arm_test_cc(&c, cond);
5067     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5068 
5069     /* Load the arguments for the new comparison.  */
5070     if (is_imm) {
5071         tcg_y = tcg_temp_new_i64();
5072         tcg_gen_movi_i64(tcg_y, y);
5073     } else {
5074         tcg_y = cpu_reg(s, y);
5075     }
5076     tcg_rn = cpu_reg(s, rn);
5077 
5078     /* Set the flags for the new comparison.  */
5079     tcg_tmp = tcg_temp_new_i64();
5080     if (op) {
5081         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5082     } else {
5083         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5084     }
5085 
5086     /* If COND was false, force the flags to #nzcv.  Compute two masks
5087      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5088      * For tcg hosts that support ANDC, we can make do with just T1.
5089      * In either case, allow the tcg optimizer to delete any unused mask.
5090      */
5091     tcg_t1 = tcg_temp_new_i32();
5092     tcg_t2 = tcg_temp_new_i32();
5093     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5094     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5095 
5096     if (nzcv & 8) { /* N */
5097         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5098     } else {
5099         if (TCG_TARGET_HAS_andc_i32) {
5100             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5101         } else {
5102             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5103         }
5104     }
5105     if (nzcv & 4) { /* Z */
5106         if (TCG_TARGET_HAS_andc_i32) {
5107             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5108         } else {
5109             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5110         }
5111     } else {
5112         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5113     }
5114     if (nzcv & 2) { /* C */
5115         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5116     } else {
5117         if (TCG_TARGET_HAS_andc_i32) {
5118             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5119         } else {
5120             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5121         }
5122     }
5123     if (nzcv & 1) { /* V */
5124         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5125     } else {
5126         if (TCG_TARGET_HAS_andc_i32) {
5127             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5128         } else {
5129             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5130         }
5131     }
5132 }
5133 
5134 /* Conditional select
5135  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5136  * +----+----+---+-----------------+------+------+-----+------+------+
5137  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5138  * +----+----+---+-----------------+------+------+-----+------+------+
5139  */
5140 static void disas_cond_select(DisasContext *s, uint32_t insn)
5141 {
5142     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5143     TCGv_i64 tcg_rd, zero;
5144     DisasCompare64 c;
5145 
5146     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5147         /* S == 1 or op2<1> == 1 */
5148         unallocated_encoding(s);
5149         return;
5150     }
5151     sf = extract32(insn, 31, 1);
5152     else_inv = extract32(insn, 30, 1);
5153     rm = extract32(insn, 16, 5);
5154     cond = extract32(insn, 12, 4);
5155     else_inc = extract32(insn, 10, 1);
5156     rn = extract32(insn, 5, 5);
5157     rd = extract32(insn, 0, 5);
5158 
5159     tcg_rd = cpu_reg(s, rd);
5160 
5161     a64_test_cc(&c, cond);
5162     zero = tcg_constant_i64(0);
5163 
5164     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5165         /* CSET & CSETM.  */
5166         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5167         if (else_inv) {
5168             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5169         }
5170     } else {
5171         TCGv_i64 t_true = cpu_reg(s, rn);
5172         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5173         if (else_inv && else_inc) {
5174             tcg_gen_neg_i64(t_false, t_false);
5175         } else if (else_inv) {
5176             tcg_gen_not_i64(t_false, t_false);
5177         } else if (else_inc) {
5178             tcg_gen_addi_i64(t_false, t_false, 1);
5179         }
5180         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5181     }
5182 
5183     if (!sf) {
5184         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5185     }
5186 }
5187 
5188 static void handle_clz(DisasContext *s, unsigned int sf,
5189                        unsigned int rn, unsigned int rd)
5190 {
5191     TCGv_i64 tcg_rd, tcg_rn;
5192     tcg_rd = cpu_reg(s, rd);
5193     tcg_rn = cpu_reg(s, rn);
5194 
5195     if (sf) {
5196         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5197     } else {
5198         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5199         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5200         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5201         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5202     }
5203 }
5204 
5205 static void handle_cls(DisasContext *s, unsigned int sf,
5206                        unsigned int rn, unsigned int rd)
5207 {
5208     TCGv_i64 tcg_rd, tcg_rn;
5209     tcg_rd = cpu_reg(s, rd);
5210     tcg_rn = cpu_reg(s, rn);
5211 
5212     if (sf) {
5213         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5214     } else {
5215         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5216         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5217         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5218         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5219     }
5220 }
5221 
5222 static void handle_rbit(DisasContext *s, unsigned int sf,
5223                         unsigned int rn, unsigned int rd)
5224 {
5225     TCGv_i64 tcg_rd, tcg_rn;
5226     tcg_rd = cpu_reg(s, rd);
5227     tcg_rn = cpu_reg(s, rn);
5228 
5229     if (sf) {
5230         gen_helper_rbit64(tcg_rd, tcg_rn);
5231     } else {
5232         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5233         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5234         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5235         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5236     }
5237 }
5238 
5239 /* REV with sf==1, opcode==3 ("REV64") */
5240 static void handle_rev64(DisasContext *s, unsigned int sf,
5241                          unsigned int rn, unsigned int rd)
5242 {
5243     if (!sf) {
5244         unallocated_encoding(s);
5245         return;
5246     }
5247     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5248 }
5249 
5250 /* REV with sf==0, opcode==2
5251  * REV32 (sf==1, opcode==2)
5252  */
5253 static void handle_rev32(DisasContext *s, unsigned int sf,
5254                          unsigned int rn, unsigned int rd)
5255 {
5256     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5257     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5258 
5259     if (sf) {
5260         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5261         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5262     } else {
5263         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5264     }
5265 }
5266 
5267 /* REV16 (opcode==1) */
5268 static void handle_rev16(DisasContext *s, unsigned int sf,
5269                          unsigned int rn, unsigned int rd)
5270 {
5271     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5272     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5273     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5274     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5275 
5276     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5277     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5278     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5279     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5280     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5281 }
5282 
5283 /* Data-processing (1 source)
5284  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5285  * +----+---+---+-----------------+---------+--------+------+------+
5286  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5287  * +----+---+---+-----------------+---------+--------+------+------+
5288  */
5289 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5290 {
5291     unsigned int sf, opcode, opcode2, rn, rd;
5292     TCGv_i64 tcg_rd;
5293 
5294     if (extract32(insn, 29, 1)) {
5295         unallocated_encoding(s);
5296         return;
5297     }
5298 
5299     sf = extract32(insn, 31, 1);
5300     opcode = extract32(insn, 10, 6);
5301     opcode2 = extract32(insn, 16, 5);
5302     rn = extract32(insn, 5, 5);
5303     rd = extract32(insn, 0, 5);
5304 
5305 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5306 
5307     switch (MAP(sf, opcode2, opcode)) {
5308     case MAP(0, 0x00, 0x00): /* RBIT */
5309     case MAP(1, 0x00, 0x00):
5310         handle_rbit(s, sf, rn, rd);
5311         break;
5312     case MAP(0, 0x00, 0x01): /* REV16 */
5313     case MAP(1, 0x00, 0x01):
5314         handle_rev16(s, sf, rn, rd);
5315         break;
5316     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5317     case MAP(1, 0x00, 0x02):
5318         handle_rev32(s, sf, rn, rd);
5319         break;
5320     case MAP(1, 0x00, 0x03): /* REV64 */
5321         handle_rev64(s, sf, rn, rd);
5322         break;
5323     case MAP(0, 0x00, 0x04): /* CLZ */
5324     case MAP(1, 0x00, 0x04):
5325         handle_clz(s, sf, rn, rd);
5326         break;
5327     case MAP(0, 0x00, 0x05): /* CLS */
5328     case MAP(1, 0x00, 0x05):
5329         handle_cls(s, sf, rn, rd);
5330         break;
5331     case MAP(1, 0x01, 0x00): /* PACIA */
5332         if (s->pauth_active) {
5333             tcg_rd = cpu_reg(s, rd);
5334             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5335         } else if (!dc_isar_feature(aa64_pauth, s)) {
5336             goto do_unallocated;
5337         }
5338         break;
5339     case MAP(1, 0x01, 0x01): /* PACIB */
5340         if (s->pauth_active) {
5341             tcg_rd = cpu_reg(s, rd);
5342             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5343         } else if (!dc_isar_feature(aa64_pauth, s)) {
5344             goto do_unallocated;
5345         }
5346         break;
5347     case MAP(1, 0x01, 0x02): /* PACDA */
5348         if (s->pauth_active) {
5349             tcg_rd = cpu_reg(s, rd);
5350             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5351         } else if (!dc_isar_feature(aa64_pauth, s)) {
5352             goto do_unallocated;
5353         }
5354         break;
5355     case MAP(1, 0x01, 0x03): /* PACDB */
5356         if (s->pauth_active) {
5357             tcg_rd = cpu_reg(s, rd);
5358             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5359         } else if (!dc_isar_feature(aa64_pauth, s)) {
5360             goto do_unallocated;
5361         }
5362         break;
5363     case MAP(1, 0x01, 0x04): /* AUTIA */
5364         if (s->pauth_active) {
5365             tcg_rd = cpu_reg(s, rd);
5366             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5367         } else if (!dc_isar_feature(aa64_pauth, s)) {
5368             goto do_unallocated;
5369         }
5370         break;
5371     case MAP(1, 0x01, 0x05): /* AUTIB */
5372         if (s->pauth_active) {
5373             tcg_rd = cpu_reg(s, rd);
5374             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5375         } else if (!dc_isar_feature(aa64_pauth, s)) {
5376             goto do_unallocated;
5377         }
5378         break;
5379     case MAP(1, 0x01, 0x06): /* AUTDA */
5380         if (s->pauth_active) {
5381             tcg_rd = cpu_reg(s, rd);
5382             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5383         } else if (!dc_isar_feature(aa64_pauth, s)) {
5384             goto do_unallocated;
5385         }
5386         break;
5387     case MAP(1, 0x01, 0x07): /* AUTDB */
5388         if (s->pauth_active) {
5389             tcg_rd = cpu_reg(s, rd);
5390             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5391         } else if (!dc_isar_feature(aa64_pauth, s)) {
5392             goto do_unallocated;
5393         }
5394         break;
5395     case MAP(1, 0x01, 0x08): /* PACIZA */
5396         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5397             goto do_unallocated;
5398         } else if (s->pauth_active) {
5399             tcg_rd = cpu_reg(s, rd);
5400             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5401         }
5402         break;
5403     case MAP(1, 0x01, 0x09): /* PACIZB */
5404         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5405             goto do_unallocated;
5406         } else if (s->pauth_active) {
5407             tcg_rd = cpu_reg(s, rd);
5408             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5409         }
5410         break;
5411     case MAP(1, 0x01, 0x0a): /* PACDZA */
5412         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5413             goto do_unallocated;
5414         } else if (s->pauth_active) {
5415             tcg_rd = cpu_reg(s, rd);
5416             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5417         }
5418         break;
5419     case MAP(1, 0x01, 0x0b): /* PACDZB */
5420         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5421             goto do_unallocated;
5422         } else if (s->pauth_active) {
5423             tcg_rd = cpu_reg(s, rd);
5424             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5425         }
5426         break;
5427     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5428         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5429             goto do_unallocated;
5430         } else if (s->pauth_active) {
5431             tcg_rd = cpu_reg(s, rd);
5432             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5433         }
5434         break;
5435     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5436         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5437             goto do_unallocated;
5438         } else if (s->pauth_active) {
5439             tcg_rd = cpu_reg(s, rd);
5440             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5441         }
5442         break;
5443     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5444         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5445             goto do_unallocated;
5446         } else if (s->pauth_active) {
5447             tcg_rd = cpu_reg(s, rd);
5448             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5449         }
5450         break;
5451     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5452         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5453             goto do_unallocated;
5454         } else if (s->pauth_active) {
5455             tcg_rd = cpu_reg(s, rd);
5456             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5457         }
5458         break;
5459     case MAP(1, 0x01, 0x10): /* XPACI */
5460         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5461             goto do_unallocated;
5462         } else if (s->pauth_active) {
5463             tcg_rd = cpu_reg(s, rd);
5464             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5465         }
5466         break;
5467     case MAP(1, 0x01, 0x11): /* XPACD */
5468         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5469             goto do_unallocated;
5470         } else if (s->pauth_active) {
5471             tcg_rd = cpu_reg(s, rd);
5472             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5473         }
5474         break;
5475     default:
5476     do_unallocated:
5477         unallocated_encoding(s);
5478         break;
5479     }
5480 
5481 #undef MAP
5482 }
5483 
5484 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5485                        unsigned int rm, unsigned int rn, unsigned int rd)
5486 {
5487     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5488     tcg_rd = cpu_reg(s, rd);
5489 
5490     if (!sf && is_signed) {
5491         tcg_n = tcg_temp_new_i64();
5492         tcg_m = tcg_temp_new_i64();
5493         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5494         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5495     } else {
5496         tcg_n = read_cpu_reg(s, rn, sf);
5497         tcg_m = read_cpu_reg(s, rm, sf);
5498     }
5499 
5500     if (is_signed) {
5501         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5502     } else {
5503         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5504     }
5505 
5506     if (!sf) { /* zero extend final result */
5507         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5508     }
5509 }
5510 
5511 /* LSLV, LSRV, ASRV, RORV */
5512 static void handle_shift_reg(DisasContext *s,
5513                              enum a64_shift_type shift_type, unsigned int sf,
5514                              unsigned int rm, unsigned int rn, unsigned int rd)
5515 {
5516     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5517     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5518     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5519 
5520     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5521     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5522 }
5523 
5524 /* CRC32[BHWX], CRC32C[BHWX] */
5525 static void handle_crc32(DisasContext *s,
5526                          unsigned int sf, unsigned int sz, bool crc32c,
5527                          unsigned int rm, unsigned int rn, unsigned int rd)
5528 {
5529     TCGv_i64 tcg_acc, tcg_val;
5530     TCGv_i32 tcg_bytes;
5531 
5532     if (!dc_isar_feature(aa64_crc32, s)
5533         || (sf == 1 && sz != 3)
5534         || (sf == 0 && sz == 3)) {
5535         unallocated_encoding(s);
5536         return;
5537     }
5538 
5539     if (sz == 3) {
5540         tcg_val = cpu_reg(s, rm);
5541     } else {
5542         uint64_t mask;
5543         switch (sz) {
5544         case 0:
5545             mask = 0xFF;
5546             break;
5547         case 1:
5548             mask = 0xFFFF;
5549             break;
5550         case 2:
5551             mask = 0xFFFFFFFF;
5552             break;
5553         default:
5554             g_assert_not_reached();
5555         }
5556         tcg_val = tcg_temp_new_i64();
5557         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5558     }
5559 
5560     tcg_acc = cpu_reg(s, rn);
5561     tcg_bytes = tcg_constant_i32(1 << sz);
5562 
5563     if (crc32c) {
5564         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5565     } else {
5566         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5567     }
5568 }
5569 
5570 /* Data-processing (2 source)
5571  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5572  * +----+---+---+-----------------+------+--------+------+------+
5573  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5574  * +----+---+---+-----------------+------+--------+------+------+
5575  */
5576 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5577 {
5578     unsigned int sf, rm, opcode, rn, rd, setflag;
5579     sf = extract32(insn, 31, 1);
5580     setflag = extract32(insn, 29, 1);
5581     rm = extract32(insn, 16, 5);
5582     opcode = extract32(insn, 10, 6);
5583     rn = extract32(insn, 5, 5);
5584     rd = extract32(insn, 0, 5);
5585 
5586     if (setflag && opcode != 0) {
5587         unallocated_encoding(s);
5588         return;
5589     }
5590 
5591     switch (opcode) {
5592     case 0: /* SUBP(S) */
5593         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5594             goto do_unallocated;
5595         } else {
5596             TCGv_i64 tcg_n, tcg_m, tcg_d;
5597 
5598             tcg_n = read_cpu_reg_sp(s, rn, true);
5599             tcg_m = read_cpu_reg_sp(s, rm, true);
5600             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5601             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5602             tcg_d = cpu_reg(s, rd);
5603 
5604             if (setflag) {
5605                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5606             } else {
5607                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5608             }
5609         }
5610         break;
5611     case 2: /* UDIV */
5612         handle_div(s, false, sf, rm, rn, rd);
5613         break;
5614     case 3: /* SDIV */
5615         handle_div(s, true, sf, rm, rn, rd);
5616         break;
5617     case 4: /* IRG */
5618         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5619             goto do_unallocated;
5620         }
5621         if (s->ata) {
5622             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5623                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5624         } else {
5625             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5626                                              cpu_reg_sp(s, rn));
5627         }
5628         break;
5629     case 5: /* GMI */
5630         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5631             goto do_unallocated;
5632         } else {
5633             TCGv_i64 t = tcg_temp_new_i64();
5634 
5635             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5636             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5637             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5638         }
5639         break;
5640     case 8: /* LSLV */
5641         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5642         break;
5643     case 9: /* LSRV */
5644         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5645         break;
5646     case 10: /* ASRV */
5647         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5648         break;
5649     case 11: /* RORV */
5650         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5651         break;
5652     case 12: /* PACGA */
5653         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5654             goto do_unallocated;
5655         }
5656         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5657                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5658         break;
5659     case 16:
5660     case 17:
5661     case 18:
5662     case 19:
5663     case 20:
5664     case 21:
5665     case 22:
5666     case 23: /* CRC32 */
5667     {
5668         int sz = extract32(opcode, 0, 2);
5669         bool crc32c = extract32(opcode, 2, 1);
5670         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5671         break;
5672     }
5673     default:
5674     do_unallocated:
5675         unallocated_encoding(s);
5676         break;
5677     }
5678 }
5679 
5680 /*
5681  * Data processing - register
5682  *  31  30 29  28      25    21  20  16      10         0
5683  * +--+---+--+---+-------+-----+-------+-------+---------+
5684  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5685  * +--+---+--+---+-------+-----+-------+-------+---------+
5686  */
5687 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5688 {
5689     int op0 = extract32(insn, 30, 1);
5690     int op1 = extract32(insn, 28, 1);
5691     int op2 = extract32(insn, 21, 4);
5692     int op3 = extract32(insn, 10, 6);
5693 
5694     if (!op1) {
5695         if (op2 & 8) {
5696             if (op2 & 1) {
5697                 /* Add/sub (extended register) */
5698                 disas_add_sub_ext_reg(s, insn);
5699             } else {
5700                 /* Add/sub (shifted register) */
5701                 disas_add_sub_reg(s, insn);
5702             }
5703         } else {
5704             /* Logical (shifted register) */
5705             disas_logic_reg(s, insn);
5706         }
5707         return;
5708     }
5709 
5710     switch (op2) {
5711     case 0x0:
5712         switch (op3) {
5713         case 0x00: /* Add/subtract (with carry) */
5714             disas_adc_sbc(s, insn);
5715             break;
5716 
5717         case 0x01: /* Rotate right into flags */
5718         case 0x21:
5719             disas_rotate_right_into_flags(s, insn);
5720             break;
5721 
5722         case 0x02: /* Evaluate into flags */
5723         case 0x12:
5724         case 0x22:
5725         case 0x32:
5726             disas_evaluate_into_flags(s, insn);
5727             break;
5728 
5729         default:
5730             goto do_unallocated;
5731         }
5732         break;
5733 
5734     case 0x2: /* Conditional compare */
5735         disas_cc(s, insn); /* both imm and reg forms */
5736         break;
5737 
5738     case 0x4: /* Conditional select */
5739         disas_cond_select(s, insn);
5740         break;
5741 
5742     case 0x6: /* Data-processing */
5743         if (op0) {    /* (1 source) */
5744             disas_data_proc_1src(s, insn);
5745         } else {      /* (2 source) */
5746             disas_data_proc_2src(s, insn);
5747         }
5748         break;
5749     case 0x8 ... 0xf: /* (3 source) */
5750         disas_data_proc_3src(s, insn);
5751         break;
5752 
5753     default:
5754     do_unallocated:
5755         unallocated_encoding(s);
5756         break;
5757     }
5758 }
5759 
5760 static void handle_fp_compare(DisasContext *s, int size,
5761                               unsigned int rn, unsigned int rm,
5762                               bool cmp_with_zero, bool signal_all_nans)
5763 {
5764     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5765     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5766 
5767     if (size == MO_64) {
5768         TCGv_i64 tcg_vn, tcg_vm;
5769 
5770         tcg_vn = read_fp_dreg(s, rn);
5771         if (cmp_with_zero) {
5772             tcg_vm = tcg_constant_i64(0);
5773         } else {
5774             tcg_vm = read_fp_dreg(s, rm);
5775         }
5776         if (signal_all_nans) {
5777             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5778         } else {
5779             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5780         }
5781     } else {
5782         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5783         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5784 
5785         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5786         if (cmp_with_zero) {
5787             tcg_gen_movi_i32(tcg_vm, 0);
5788         } else {
5789             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5790         }
5791 
5792         switch (size) {
5793         case MO_32:
5794             if (signal_all_nans) {
5795                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5796             } else {
5797                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5798             }
5799             break;
5800         case MO_16:
5801             if (signal_all_nans) {
5802                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5803             } else {
5804                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5805             }
5806             break;
5807         default:
5808             g_assert_not_reached();
5809         }
5810     }
5811 
5812     gen_set_nzcv(tcg_flags);
5813 }
5814 
5815 /* Floating point compare
5816  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5817  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5818  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5819  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5820  */
5821 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5822 {
5823     unsigned int mos, type, rm, op, rn, opc, op2r;
5824     int size;
5825 
5826     mos = extract32(insn, 29, 3);
5827     type = extract32(insn, 22, 2);
5828     rm = extract32(insn, 16, 5);
5829     op = extract32(insn, 14, 2);
5830     rn = extract32(insn, 5, 5);
5831     opc = extract32(insn, 3, 2);
5832     op2r = extract32(insn, 0, 3);
5833 
5834     if (mos || op || op2r) {
5835         unallocated_encoding(s);
5836         return;
5837     }
5838 
5839     switch (type) {
5840     case 0:
5841         size = MO_32;
5842         break;
5843     case 1:
5844         size = MO_64;
5845         break;
5846     case 3:
5847         size = MO_16;
5848         if (dc_isar_feature(aa64_fp16, s)) {
5849             break;
5850         }
5851         /* fallthru */
5852     default:
5853         unallocated_encoding(s);
5854         return;
5855     }
5856 
5857     if (!fp_access_check(s)) {
5858         return;
5859     }
5860 
5861     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5862 }
5863 
5864 /* Floating point conditional compare
5865  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5866  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5867  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5868  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5869  */
5870 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5871 {
5872     unsigned int mos, type, rm, cond, rn, op, nzcv;
5873     TCGLabel *label_continue = NULL;
5874     int size;
5875 
5876     mos = extract32(insn, 29, 3);
5877     type = extract32(insn, 22, 2);
5878     rm = extract32(insn, 16, 5);
5879     cond = extract32(insn, 12, 4);
5880     rn = extract32(insn, 5, 5);
5881     op = extract32(insn, 4, 1);
5882     nzcv = extract32(insn, 0, 4);
5883 
5884     if (mos) {
5885         unallocated_encoding(s);
5886         return;
5887     }
5888 
5889     switch (type) {
5890     case 0:
5891         size = MO_32;
5892         break;
5893     case 1:
5894         size = MO_64;
5895         break;
5896     case 3:
5897         size = MO_16;
5898         if (dc_isar_feature(aa64_fp16, s)) {
5899             break;
5900         }
5901         /* fallthru */
5902     default:
5903         unallocated_encoding(s);
5904         return;
5905     }
5906 
5907     if (!fp_access_check(s)) {
5908         return;
5909     }
5910 
5911     if (cond < 0x0e) { /* not always */
5912         TCGLabel *label_match = gen_new_label();
5913         label_continue = gen_new_label();
5914         arm_gen_test_cc(cond, label_match);
5915         /* nomatch: */
5916         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5917         tcg_gen_br(label_continue);
5918         gen_set_label(label_match);
5919     }
5920 
5921     handle_fp_compare(s, size, rn, rm, false, op);
5922 
5923     if (cond < 0x0e) {
5924         gen_set_label(label_continue);
5925     }
5926 }
5927 
5928 /* Floating point conditional select
5929  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5930  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5931  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5932  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5933  */
5934 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5935 {
5936     unsigned int mos, type, rm, cond, rn, rd;
5937     TCGv_i64 t_true, t_false;
5938     DisasCompare64 c;
5939     MemOp sz;
5940 
5941     mos = extract32(insn, 29, 3);
5942     type = extract32(insn, 22, 2);
5943     rm = extract32(insn, 16, 5);
5944     cond = extract32(insn, 12, 4);
5945     rn = extract32(insn, 5, 5);
5946     rd = extract32(insn, 0, 5);
5947 
5948     if (mos) {
5949         unallocated_encoding(s);
5950         return;
5951     }
5952 
5953     switch (type) {
5954     case 0:
5955         sz = MO_32;
5956         break;
5957     case 1:
5958         sz = MO_64;
5959         break;
5960     case 3:
5961         sz = MO_16;
5962         if (dc_isar_feature(aa64_fp16, s)) {
5963             break;
5964         }
5965         /* fallthru */
5966     default:
5967         unallocated_encoding(s);
5968         return;
5969     }
5970 
5971     if (!fp_access_check(s)) {
5972         return;
5973     }
5974 
5975     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5976     t_true = tcg_temp_new_i64();
5977     t_false = tcg_temp_new_i64();
5978     read_vec_element(s, t_true, rn, 0, sz);
5979     read_vec_element(s, t_false, rm, 0, sz);
5980 
5981     a64_test_cc(&c, cond);
5982     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5983                         t_true, t_false);
5984 
5985     /* Note that sregs & hregs write back zeros to the high bits,
5986        and we've already done the zero-extension.  */
5987     write_fp_dreg(s, rd, t_true);
5988 }
5989 
5990 /* Floating-point data-processing (1 source) - half precision */
5991 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5992 {
5993     TCGv_ptr fpst = NULL;
5994     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5995     TCGv_i32 tcg_res = tcg_temp_new_i32();
5996 
5997     switch (opcode) {
5998     case 0x0: /* FMOV */
5999         tcg_gen_mov_i32(tcg_res, tcg_op);
6000         break;
6001     case 0x1: /* FABS */
6002         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6003         break;
6004     case 0x2: /* FNEG */
6005         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6006         break;
6007     case 0x3: /* FSQRT */
6008         fpst = fpstatus_ptr(FPST_FPCR_F16);
6009         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6010         break;
6011     case 0x8: /* FRINTN */
6012     case 0x9: /* FRINTP */
6013     case 0xa: /* FRINTM */
6014     case 0xb: /* FRINTZ */
6015     case 0xc: /* FRINTA */
6016     {
6017         TCGv_i32 tcg_rmode;
6018 
6019         fpst = fpstatus_ptr(FPST_FPCR_F16);
6020         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
6021         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6022         gen_restore_rmode(tcg_rmode, fpst);
6023         break;
6024     }
6025     case 0xe: /* FRINTX */
6026         fpst = fpstatus_ptr(FPST_FPCR_F16);
6027         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6028         break;
6029     case 0xf: /* FRINTI */
6030         fpst = fpstatus_ptr(FPST_FPCR_F16);
6031         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6032         break;
6033     default:
6034         g_assert_not_reached();
6035     }
6036 
6037     write_fp_sreg(s, rd, tcg_res);
6038 }
6039 
6040 /* Floating-point data-processing (1 source) - single precision */
6041 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6042 {
6043     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6044     TCGv_i32 tcg_op, tcg_res;
6045     TCGv_ptr fpst;
6046     int rmode = -1;
6047 
6048     tcg_op = read_fp_sreg(s, rn);
6049     tcg_res = tcg_temp_new_i32();
6050 
6051     switch (opcode) {
6052     case 0x0: /* FMOV */
6053         tcg_gen_mov_i32(tcg_res, tcg_op);
6054         goto done;
6055     case 0x1: /* FABS */
6056         gen_helper_vfp_abss(tcg_res, tcg_op);
6057         goto done;
6058     case 0x2: /* FNEG */
6059         gen_helper_vfp_negs(tcg_res, tcg_op);
6060         goto done;
6061     case 0x3: /* FSQRT */
6062         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6063         goto done;
6064     case 0x6: /* BFCVT */
6065         gen_fpst = gen_helper_bfcvt;
6066         break;
6067     case 0x8: /* FRINTN */
6068     case 0x9: /* FRINTP */
6069     case 0xa: /* FRINTM */
6070     case 0xb: /* FRINTZ */
6071     case 0xc: /* FRINTA */
6072         rmode = opcode & 7;
6073         gen_fpst = gen_helper_rints;
6074         break;
6075     case 0xe: /* FRINTX */
6076         gen_fpst = gen_helper_rints_exact;
6077         break;
6078     case 0xf: /* FRINTI */
6079         gen_fpst = gen_helper_rints;
6080         break;
6081     case 0x10: /* FRINT32Z */
6082         rmode = FPROUNDING_ZERO;
6083         gen_fpst = gen_helper_frint32_s;
6084         break;
6085     case 0x11: /* FRINT32X */
6086         gen_fpst = gen_helper_frint32_s;
6087         break;
6088     case 0x12: /* FRINT64Z */
6089         rmode = FPROUNDING_ZERO;
6090         gen_fpst = gen_helper_frint64_s;
6091         break;
6092     case 0x13: /* FRINT64X */
6093         gen_fpst = gen_helper_frint64_s;
6094         break;
6095     default:
6096         g_assert_not_reached();
6097     }
6098 
6099     fpst = fpstatus_ptr(FPST_FPCR);
6100     if (rmode >= 0) {
6101         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6102         gen_fpst(tcg_res, tcg_op, fpst);
6103         gen_restore_rmode(tcg_rmode, fpst);
6104     } else {
6105         gen_fpst(tcg_res, tcg_op, fpst);
6106     }
6107 
6108  done:
6109     write_fp_sreg(s, rd, tcg_res);
6110 }
6111 
6112 /* Floating-point data-processing (1 source) - double precision */
6113 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6114 {
6115     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6116     TCGv_i64 tcg_op, tcg_res;
6117     TCGv_ptr fpst;
6118     int rmode = -1;
6119 
6120     switch (opcode) {
6121     case 0x0: /* FMOV */
6122         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6123         return;
6124     }
6125 
6126     tcg_op = read_fp_dreg(s, rn);
6127     tcg_res = tcg_temp_new_i64();
6128 
6129     switch (opcode) {
6130     case 0x1: /* FABS */
6131         gen_helper_vfp_absd(tcg_res, tcg_op);
6132         goto done;
6133     case 0x2: /* FNEG */
6134         gen_helper_vfp_negd(tcg_res, tcg_op);
6135         goto done;
6136     case 0x3: /* FSQRT */
6137         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6138         goto done;
6139     case 0x8: /* FRINTN */
6140     case 0x9: /* FRINTP */
6141     case 0xa: /* FRINTM */
6142     case 0xb: /* FRINTZ */
6143     case 0xc: /* FRINTA */
6144         rmode = opcode & 7;
6145         gen_fpst = gen_helper_rintd;
6146         break;
6147     case 0xe: /* FRINTX */
6148         gen_fpst = gen_helper_rintd_exact;
6149         break;
6150     case 0xf: /* FRINTI */
6151         gen_fpst = gen_helper_rintd;
6152         break;
6153     case 0x10: /* FRINT32Z */
6154         rmode = FPROUNDING_ZERO;
6155         gen_fpst = gen_helper_frint32_d;
6156         break;
6157     case 0x11: /* FRINT32X */
6158         gen_fpst = gen_helper_frint32_d;
6159         break;
6160     case 0x12: /* FRINT64Z */
6161         rmode = FPROUNDING_ZERO;
6162         gen_fpst = gen_helper_frint64_d;
6163         break;
6164     case 0x13: /* FRINT64X */
6165         gen_fpst = gen_helper_frint64_d;
6166         break;
6167     default:
6168         g_assert_not_reached();
6169     }
6170 
6171     fpst = fpstatus_ptr(FPST_FPCR);
6172     if (rmode >= 0) {
6173         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6174         gen_fpst(tcg_res, tcg_op, fpst);
6175         gen_restore_rmode(tcg_rmode, fpst);
6176     } else {
6177         gen_fpst(tcg_res, tcg_op, fpst);
6178     }
6179 
6180  done:
6181     write_fp_dreg(s, rd, tcg_res);
6182 }
6183 
6184 static void handle_fp_fcvt(DisasContext *s, int opcode,
6185                            int rd, int rn, int dtype, int ntype)
6186 {
6187     switch (ntype) {
6188     case 0x0:
6189     {
6190         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6191         if (dtype == 1) {
6192             /* Single to double */
6193             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6194             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6195             write_fp_dreg(s, rd, tcg_rd);
6196         } else {
6197             /* Single to half */
6198             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6199             TCGv_i32 ahp = get_ahp_flag();
6200             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6201 
6202             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6203             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6204             write_fp_sreg(s, rd, tcg_rd);
6205         }
6206         break;
6207     }
6208     case 0x1:
6209     {
6210         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6211         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6212         if (dtype == 0) {
6213             /* Double to single */
6214             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6215         } else {
6216             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6217             TCGv_i32 ahp = get_ahp_flag();
6218             /* Double to half */
6219             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6220             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6221         }
6222         write_fp_sreg(s, rd, tcg_rd);
6223         break;
6224     }
6225     case 0x3:
6226     {
6227         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6228         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6229         TCGv_i32 tcg_ahp = get_ahp_flag();
6230         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6231         if (dtype == 0) {
6232             /* Half to single */
6233             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6234             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6235             write_fp_sreg(s, rd, tcg_rd);
6236         } else {
6237             /* Half to double */
6238             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6239             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6240             write_fp_dreg(s, rd, tcg_rd);
6241         }
6242         break;
6243     }
6244     default:
6245         g_assert_not_reached();
6246     }
6247 }
6248 
6249 /* Floating point data-processing (1 source)
6250  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6251  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6252  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6253  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6254  */
6255 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6256 {
6257     int mos = extract32(insn, 29, 3);
6258     int type = extract32(insn, 22, 2);
6259     int opcode = extract32(insn, 15, 6);
6260     int rn = extract32(insn, 5, 5);
6261     int rd = extract32(insn, 0, 5);
6262 
6263     if (mos) {
6264         goto do_unallocated;
6265     }
6266 
6267     switch (opcode) {
6268     case 0x4: case 0x5: case 0x7:
6269     {
6270         /* FCVT between half, single and double precision */
6271         int dtype = extract32(opcode, 0, 2);
6272         if (type == 2 || dtype == type) {
6273             goto do_unallocated;
6274         }
6275         if (!fp_access_check(s)) {
6276             return;
6277         }
6278 
6279         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6280         break;
6281     }
6282 
6283     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6284         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6285             goto do_unallocated;
6286         }
6287         /* fall through */
6288     case 0x0 ... 0x3:
6289     case 0x8 ... 0xc:
6290     case 0xe ... 0xf:
6291         /* 32-to-32 and 64-to-64 ops */
6292         switch (type) {
6293         case 0:
6294             if (!fp_access_check(s)) {
6295                 return;
6296             }
6297             handle_fp_1src_single(s, opcode, rd, rn);
6298             break;
6299         case 1:
6300             if (!fp_access_check(s)) {
6301                 return;
6302             }
6303             handle_fp_1src_double(s, opcode, rd, rn);
6304             break;
6305         case 3:
6306             if (!dc_isar_feature(aa64_fp16, s)) {
6307                 goto do_unallocated;
6308             }
6309 
6310             if (!fp_access_check(s)) {
6311                 return;
6312             }
6313             handle_fp_1src_half(s, opcode, rd, rn);
6314             break;
6315         default:
6316             goto do_unallocated;
6317         }
6318         break;
6319 
6320     case 0x6:
6321         switch (type) {
6322         case 1: /* BFCVT */
6323             if (!dc_isar_feature(aa64_bf16, s)) {
6324                 goto do_unallocated;
6325             }
6326             if (!fp_access_check(s)) {
6327                 return;
6328             }
6329             handle_fp_1src_single(s, opcode, rd, rn);
6330             break;
6331         default:
6332             goto do_unallocated;
6333         }
6334         break;
6335 
6336     default:
6337     do_unallocated:
6338         unallocated_encoding(s);
6339         break;
6340     }
6341 }
6342 
6343 /* Floating-point data-processing (2 source) - single precision */
6344 static void handle_fp_2src_single(DisasContext *s, int opcode,
6345                                   int rd, int rn, int rm)
6346 {
6347     TCGv_i32 tcg_op1;
6348     TCGv_i32 tcg_op2;
6349     TCGv_i32 tcg_res;
6350     TCGv_ptr fpst;
6351 
6352     tcg_res = tcg_temp_new_i32();
6353     fpst = fpstatus_ptr(FPST_FPCR);
6354     tcg_op1 = read_fp_sreg(s, rn);
6355     tcg_op2 = read_fp_sreg(s, rm);
6356 
6357     switch (opcode) {
6358     case 0x0: /* FMUL */
6359         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6360         break;
6361     case 0x1: /* FDIV */
6362         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6363         break;
6364     case 0x2: /* FADD */
6365         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6366         break;
6367     case 0x3: /* FSUB */
6368         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6369         break;
6370     case 0x4: /* FMAX */
6371         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6372         break;
6373     case 0x5: /* FMIN */
6374         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6375         break;
6376     case 0x6: /* FMAXNM */
6377         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6378         break;
6379     case 0x7: /* FMINNM */
6380         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6381         break;
6382     case 0x8: /* FNMUL */
6383         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6384         gen_helper_vfp_negs(tcg_res, tcg_res);
6385         break;
6386     }
6387 
6388     write_fp_sreg(s, rd, tcg_res);
6389 }
6390 
6391 /* Floating-point data-processing (2 source) - double precision */
6392 static void handle_fp_2src_double(DisasContext *s, int opcode,
6393                                   int rd, int rn, int rm)
6394 {
6395     TCGv_i64 tcg_op1;
6396     TCGv_i64 tcg_op2;
6397     TCGv_i64 tcg_res;
6398     TCGv_ptr fpst;
6399 
6400     tcg_res = tcg_temp_new_i64();
6401     fpst = fpstatus_ptr(FPST_FPCR);
6402     tcg_op1 = read_fp_dreg(s, rn);
6403     tcg_op2 = read_fp_dreg(s, rm);
6404 
6405     switch (opcode) {
6406     case 0x0: /* FMUL */
6407         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6408         break;
6409     case 0x1: /* FDIV */
6410         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6411         break;
6412     case 0x2: /* FADD */
6413         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6414         break;
6415     case 0x3: /* FSUB */
6416         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6417         break;
6418     case 0x4: /* FMAX */
6419         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6420         break;
6421     case 0x5: /* FMIN */
6422         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6423         break;
6424     case 0x6: /* FMAXNM */
6425         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6426         break;
6427     case 0x7: /* FMINNM */
6428         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6429         break;
6430     case 0x8: /* FNMUL */
6431         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6432         gen_helper_vfp_negd(tcg_res, tcg_res);
6433         break;
6434     }
6435 
6436     write_fp_dreg(s, rd, tcg_res);
6437 }
6438 
6439 /* Floating-point data-processing (2 source) - half precision */
6440 static void handle_fp_2src_half(DisasContext *s, int opcode,
6441                                 int rd, int rn, int rm)
6442 {
6443     TCGv_i32 tcg_op1;
6444     TCGv_i32 tcg_op2;
6445     TCGv_i32 tcg_res;
6446     TCGv_ptr fpst;
6447 
6448     tcg_res = tcg_temp_new_i32();
6449     fpst = fpstatus_ptr(FPST_FPCR_F16);
6450     tcg_op1 = read_fp_hreg(s, rn);
6451     tcg_op2 = read_fp_hreg(s, rm);
6452 
6453     switch (opcode) {
6454     case 0x0: /* FMUL */
6455         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6456         break;
6457     case 0x1: /* FDIV */
6458         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6459         break;
6460     case 0x2: /* FADD */
6461         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6462         break;
6463     case 0x3: /* FSUB */
6464         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6465         break;
6466     case 0x4: /* FMAX */
6467         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6468         break;
6469     case 0x5: /* FMIN */
6470         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6471         break;
6472     case 0x6: /* FMAXNM */
6473         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6474         break;
6475     case 0x7: /* FMINNM */
6476         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6477         break;
6478     case 0x8: /* FNMUL */
6479         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6480         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6481         break;
6482     default:
6483         g_assert_not_reached();
6484     }
6485 
6486     write_fp_sreg(s, rd, tcg_res);
6487 }
6488 
6489 /* Floating point data-processing (2 source)
6490  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6491  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6492  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6493  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6494  */
6495 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6496 {
6497     int mos = extract32(insn, 29, 3);
6498     int type = extract32(insn, 22, 2);
6499     int rd = extract32(insn, 0, 5);
6500     int rn = extract32(insn, 5, 5);
6501     int rm = extract32(insn, 16, 5);
6502     int opcode = extract32(insn, 12, 4);
6503 
6504     if (opcode > 8 || mos) {
6505         unallocated_encoding(s);
6506         return;
6507     }
6508 
6509     switch (type) {
6510     case 0:
6511         if (!fp_access_check(s)) {
6512             return;
6513         }
6514         handle_fp_2src_single(s, opcode, rd, rn, rm);
6515         break;
6516     case 1:
6517         if (!fp_access_check(s)) {
6518             return;
6519         }
6520         handle_fp_2src_double(s, opcode, rd, rn, rm);
6521         break;
6522     case 3:
6523         if (!dc_isar_feature(aa64_fp16, s)) {
6524             unallocated_encoding(s);
6525             return;
6526         }
6527         if (!fp_access_check(s)) {
6528             return;
6529         }
6530         handle_fp_2src_half(s, opcode, rd, rn, rm);
6531         break;
6532     default:
6533         unallocated_encoding(s);
6534     }
6535 }
6536 
6537 /* Floating-point data-processing (3 source) - single precision */
6538 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6539                                   int rd, int rn, int rm, int ra)
6540 {
6541     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6542     TCGv_i32 tcg_res = tcg_temp_new_i32();
6543     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6544 
6545     tcg_op1 = read_fp_sreg(s, rn);
6546     tcg_op2 = read_fp_sreg(s, rm);
6547     tcg_op3 = read_fp_sreg(s, ra);
6548 
6549     /* These are fused multiply-add, and must be done as one
6550      * floating point operation with no rounding between the
6551      * multiplication and addition steps.
6552      * NB that doing the negations here as separate steps is
6553      * correct : an input NaN should come out with its sign bit
6554      * flipped if it is a negated-input.
6555      */
6556     if (o1 == true) {
6557         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6558     }
6559 
6560     if (o0 != o1) {
6561         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6562     }
6563 
6564     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6565 
6566     write_fp_sreg(s, rd, tcg_res);
6567 }
6568 
6569 /* Floating-point data-processing (3 source) - double precision */
6570 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6571                                   int rd, int rn, int rm, int ra)
6572 {
6573     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6574     TCGv_i64 tcg_res = tcg_temp_new_i64();
6575     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6576 
6577     tcg_op1 = read_fp_dreg(s, rn);
6578     tcg_op2 = read_fp_dreg(s, rm);
6579     tcg_op3 = read_fp_dreg(s, ra);
6580 
6581     /* These are fused multiply-add, and must be done as one
6582      * floating point operation with no rounding between the
6583      * multiplication and addition steps.
6584      * NB that doing the negations here as separate steps is
6585      * correct : an input NaN should come out with its sign bit
6586      * flipped if it is a negated-input.
6587      */
6588     if (o1 == true) {
6589         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6590     }
6591 
6592     if (o0 != o1) {
6593         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6594     }
6595 
6596     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6597 
6598     write_fp_dreg(s, rd, tcg_res);
6599 }
6600 
6601 /* Floating-point data-processing (3 source) - half precision */
6602 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6603                                 int rd, int rn, int rm, int ra)
6604 {
6605     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6606     TCGv_i32 tcg_res = tcg_temp_new_i32();
6607     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6608 
6609     tcg_op1 = read_fp_hreg(s, rn);
6610     tcg_op2 = read_fp_hreg(s, rm);
6611     tcg_op3 = read_fp_hreg(s, ra);
6612 
6613     /* These are fused multiply-add, and must be done as one
6614      * floating point operation with no rounding between the
6615      * multiplication and addition steps.
6616      * NB that doing the negations here as separate steps is
6617      * correct : an input NaN should come out with its sign bit
6618      * flipped if it is a negated-input.
6619      */
6620     if (o1 == true) {
6621         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6622     }
6623 
6624     if (o0 != o1) {
6625         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6626     }
6627 
6628     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6629 
6630     write_fp_sreg(s, rd, tcg_res);
6631 }
6632 
6633 /* Floating point data-processing (3 source)
6634  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6635  * +---+---+---+-----------+------+----+------+----+------+------+------+
6636  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6637  * +---+---+---+-----------+------+----+------+----+------+------+------+
6638  */
6639 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6640 {
6641     int mos = extract32(insn, 29, 3);
6642     int type = extract32(insn, 22, 2);
6643     int rd = extract32(insn, 0, 5);
6644     int rn = extract32(insn, 5, 5);
6645     int ra = extract32(insn, 10, 5);
6646     int rm = extract32(insn, 16, 5);
6647     bool o0 = extract32(insn, 15, 1);
6648     bool o1 = extract32(insn, 21, 1);
6649 
6650     if (mos) {
6651         unallocated_encoding(s);
6652         return;
6653     }
6654 
6655     switch (type) {
6656     case 0:
6657         if (!fp_access_check(s)) {
6658             return;
6659         }
6660         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6661         break;
6662     case 1:
6663         if (!fp_access_check(s)) {
6664             return;
6665         }
6666         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6667         break;
6668     case 3:
6669         if (!dc_isar_feature(aa64_fp16, s)) {
6670             unallocated_encoding(s);
6671             return;
6672         }
6673         if (!fp_access_check(s)) {
6674             return;
6675         }
6676         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6677         break;
6678     default:
6679         unallocated_encoding(s);
6680     }
6681 }
6682 
6683 /* Floating point immediate
6684  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6685  * +---+---+---+-----------+------+---+------------+-------+------+------+
6686  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6687  * +---+---+---+-----------+------+---+------------+-------+------+------+
6688  */
6689 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6690 {
6691     int rd = extract32(insn, 0, 5);
6692     int imm5 = extract32(insn, 5, 5);
6693     int imm8 = extract32(insn, 13, 8);
6694     int type = extract32(insn, 22, 2);
6695     int mos = extract32(insn, 29, 3);
6696     uint64_t imm;
6697     MemOp sz;
6698 
6699     if (mos || imm5) {
6700         unallocated_encoding(s);
6701         return;
6702     }
6703 
6704     switch (type) {
6705     case 0:
6706         sz = MO_32;
6707         break;
6708     case 1:
6709         sz = MO_64;
6710         break;
6711     case 3:
6712         sz = MO_16;
6713         if (dc_isar_feature(aa64_fp16, s)) {
6714             break;
6715         }
6716         /* fallthru */
6717     default:
6718         unallocated_encoding(s);
6719         return;
6720     }
6721 
6722     if (!fp_access_check(s)) {
6723         return;
6724     }
6725 
6726     imm = vfp_expand_imm(sz, imm8);
6727     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6728 }
6729 
6730 /* Handle floating point <=> fixed point conversions. Note that we can
6731  * also deal with fp <=> integer conversions as a special case (scale == 64)
6732  * OPTME: consider handling that special case specially or at least skipping
6733  * the call to scalbn in the helpers for zero shifts.
6734  */
6735 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6736                            bool itof, int rmode, int scale, int sf, int type)
6737 {
6738     bool is_signed = !(opcode & 1);
6739     TCGv_ptr tcg_fpstatus;
6740     TCGv_i32 tcg_shift, tcg_single;
6741     TCGv_i64 tcg_double;
6742 
6743     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6744 
6745     tcg_shift = tcg_constant_i32(64 - scale);
6746 
6747     if (itof) {
6748         TCGv_i64 tcg_int = cpu_reg(s, rn);
6749         if (!sf) {
6750             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6751 
6752             if (is_signed) {
6753                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6754             } else {
6755                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6756             }
6757 
6758             tcg_int = tcg_extend;
6759         }
6760 
6761         switch (type) {
6762         case 1: /* float64 */
6763             tcg_double = tcg_temp_new_i64();
6764             if (is_signed) {
6765                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6766                                      tcg_shift, tcg_fpstatus);
6767             } else {
6768                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6769                                      tcg_shift, tcg_fpstatus);
6770             }
6771             write_fp_dreg(s, rd, tcg_double);
6772             break;
6773 
6774         case 0: /* float32 */
6775             tcg_single = tcg_temp_new_i32();
6776             if (is_signed) {
6777                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6778                                      tcg_shift, tcg_fpstatus);
6779             } else {
6780                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6781                                      tcg_shift, tcg_fpstatus);
6782             }
6783             write_fp_sreg(s, rd, tcg_single);
6784             break;
6785 
6786         case 3: /* float16 */
6787             tcg_single = tcg_temp_new_i32();
6788             if (is_signed) {
6789                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6790                                      tcg_shift, tcg_fpstatus);
6791             } else {
6792                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6793                                      tcg_shift, tcg_fpstatus);
6794             }
6795             write_fp_sreg(s, rd, tcg_single);
6796             break;
6797 
6798         default:
6799             g_assert_not_reached();
6800         }
6801     } else {
6802         TCGv_i64 tcg_int = cpu_reg(s, rd);
6803         TCGv_i32 tcg_rmode;
6804 
6805         if (extract32(opcode, 2, 1)) {
6806             /* There are too many rounding modes to all fit into rmode,
6807              * so FCVTA[US] is a special case.
6808              */
6809             rmode = FPROUNDING_TIEAWAY;
6810         }
6811 
6812         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6813 
6814         switch (type) {
6815         case 1: /* float64 */
6816             tcg_double = read_fp_dreg(s, rn);
6817             if (is_signed) {
6818                 if (!sf) {
6819                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6820                                          tcg_shift, tcg_fpstatus);
6821                 } else {
6822                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6823                                          tcg_shift, tcg_fpstatus);
6824                 }
6825             } else {
6826                 if (!sf) {
6827                     gen_helper_vfp_tould(tcg_int, tcg_double,
6828                                          tcg_shift, tcg_fpstatus);
6829                 } else {
6830                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6831                                          tcg_shift, tcg_fpstatus);
6832                 }
6833             }
6834             if (!sf) {
6835                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6836             }
6837             break;
6838 
6839         case 0: /* float32 */
6840             tcg_single = read_fp_sreg(s, rn);
6841             if (sf) {
6842                 if (is_signed) {
6843                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6844                                          tcg_shift, tcg_fpstatus);
6845                 } else {
6846                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6847                                          tcg_shift, tcg_fpstatus);
6848                 }
6849             } else {
6850                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6851                 if (is_signed) {
6852                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6853                                          tcg_shift, tcg_fpstatus);
6854                 } else {
6855                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6856                                          tcg_shift, tcg_fpstatus);
6857                 }
6858                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6859             }
6860             break;
6861 
6862         case 3: /* float16 */
6863             tcg_single = read_fp_sreg(s, rn);
6864             if (sf) {
6865                 if (is_signed) {
6866                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6867                                          tcg_shift, tcg_fpstatus);
6868                 } else {
6869                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6870                                          tcg_shift, tcg_fpstatus);
6871                 }
6872             } else {
6873                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6874                 if (is_signed) {
6875                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6876                                          tcg_shift, tcg_fpstatus);
6877                 } else {
6878                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6879                                          tcg_shift, tcg_fpstatus);
6880                 }
6881                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6882             }
6883             break;
6884 
6885         default:
6886             g_assert_not_reached();
6887         }
6888 
6889         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6890     }
6891 }
6892 
6893 /* Floating point <-> fixed point conversions
6894  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6895  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6896  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6897  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6898  */
6899 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6900 {
6901     int rd = extract32(insn, 0, 5);
6902     int rn = extract32(insn, 5, 5);
6903     int scale = extract32(insn, 10, 6);
6904     int opcode = extract32(insn, 16, 3);
6905     int rmode = extract32(insn, 19, 2);
6906     int type = extract32(insn, 22, 2);
6907     bool sbit = extract32(insn, 29, 1);
6908     bool sf = extract32(insn, 31, 1);
6909     bool itof;
6910 
6911     if (sbit || (!sf && scale < 32)) {
6912         unallocated_encoding(s);
6913         return;
6914     }
6915 
6916     switch (type) {
6917     case 0: /* float32 */
6918     case 1: /* float64 */
6919         break;
6920     case 3: /* float16 */
6921         if (dc_isar_feature(aa64_fp16, s)) {
6922             break;
6923         }
6924         /* fallthru */
6925     default:
6926         unallocated_encoding(s);
6927         return;
6928     }
6929 
6930     switch ((rmode << 3) | opcode) {
6931     case 0x2: /* SCVTF */
6932     case 0x3: /* UCVTF */
6933         itof = true;
6934         break;
6935     case 0x18: /* FCVTZS */
6936     case 0x19: /* FCVTZU */
6937         itof = false;
6938         break;
6939     default:
6940         unallocated_encoding(s);
6941         return;
6942     }
6943 
6944     if (!fp_access_check(s)) {
6945         return;
6946     }
6947 
6948     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6949 }
6950 
6951 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6952 {
6953     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6954      * without conversion.
6955      */
6956 
6957     if (itof) {
6958         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6959         TCGv_i64 tmp;
6960 
6961         switch (type) {
6962         case 0:
6963             /* 32 bit */
6964             tmp = tcg_temp_new_i64();
6965             tcg_gen_ext32u_i64(tmp, tcg_rn);
6966             write_fp_dreg(s, rd, tmp);
6967             break;
6968         case 1:
6969             /* 64 bit */
6970             write_fp_dreg(s, rd, tcg_rn);
6971             break;
6972         case 2:
6973             /* 64 bit to top half. */
6974             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6975             clear_vec_high(s, true, rd);
6976             break;
6977         case 3:
6978             /* 16 bit */
6979             tmp = tcg_temp_new_i64();
6980             tcg_gen_ext16u_i64(tmp, tcg_rn);
6981             write_fp_dreg(s, rd, tmp);
6982             break;
6983         default:
6984             g_assert_not_reached();
6985         }
6986     } else {
6987         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6988 
6989         switch (type) {
6990         case 0:
6991             /* 32 bit */
6992             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6993             break;
6994         case 1:
6995             /* 64 bit */
6996             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6997             break;
6998         case 2:
6999             /* 64 bits from top half */
7000             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7001             break;
7002         case 3:
7003             /* 16 bit */
7004             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7005             break;
7006         default:
7007             g_assert_not_reached();
7008         }
7009     }
7010 }
7011 
7012 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7013 {
7014     TCGv_i64 t = read_fp_dreg(s, rn);
7015     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7016 
7017     gen_helper_fjcvtzs(t, t, fpstatus);
7018 
7019     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7020     tcg_gen_extrh_i64_i32(cpu_ZF, t);
7021     tcg_gen_movi_i32(cpu_CF, 0);
7022     tcg_gen_movi_i32(cpu_NF, 0);
7023     tcg_gen_movi_i32(cpu_VF, 0);
7024 }
7025 
7026 /* Floating point <-> integer conversions
7027  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7028  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7029  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7030  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7031  */
7032 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7033 {
7034     int rd = extract32(insn, 0, 5);
7035     int rn = extract32(insn, 5, 5);
7036     int opcode = extract32(insn, 16, 3);
7037     int rmode = extract32(insn, 19, 2);
7038     int type = extract32(insn, 22, 2);
7039     bool sbit = extract32(insn, 29, 1);
7040     bool sf = extract32(insn, 31, 1);
7041     bool itof = false;
7042 
7043     if (sbit) {
7044         goto do_unallocated;
7045     }
7046 
7047     switch (opcode) {
7048     case 2: /* SCVTF */
7049     case 3: /* UCVTF */
7050         itof = true;
7051         /* fallthru */
7052     case 4: /* FCVTAS */
7053     case 5: /* FCVTAU */
7054         if (rmode != 0) {
7055             goto do_unallocated;
7056         }
7057         /* fallthru */
7058     case 0: /* FCVT[NPMZ]S */
7059     case 1: /* FCVT[NPMZ]U */
7060         switch (type) {
7061         case 0: /* float32 */
7062         case 1: /* float64 */
7063             break;
7064         case 3: /* float16 */
7065             if (!dc_isar_feature(aa64_fp16, s)) {
7066                 goto do_unallocated;
7067             }
7068             break;
7069         default:
7070             goto do_unallocated;
7071         }
7072         if (!fp_access_check(s)) {
7073             return;
7074         }
7075         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7076         break;
7077 
7078     default:
7079         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7080         case 0b01100110: /* FMOV half <-> 32-bit int */
7081         case 0b01100111:
7082         case 0b11100110: /* FMOV half <-> 64-bit int */
7083         case 0b11100111:
7084             if (!dc_isar_feature(aa64_fp16, s)) {
7085                 goto do_unallocated;
7086             }
7087             /* fallthru */
7088         case 0b00000110: /* FMOV 32-bit */
7089         case 0b00000111:
7090         case 0b10100110: /* FMOV 64-bit */
7091         case 0b10100111:
7092         case 0b11001110: /* FMOV top half of 128-bit */
7093         case 0b11001111:
7094             if (!fp_access_check(s)) {
7095                 return;
7096             }
7097             itof = opcode & 1;
7098             handle_fmov(s, rd, rn, type, itof);
7099             break;
7100 
7101         case 0b00111110: /* FJCVTZS */
7102             if (!dc_isar_feature(aa64_jscvt, s)) {
7103                 goto do_unallocated;
7104             } else if (fp_access_check(s)) {
7105                 handle_fjcvtzs(s, rd, rn);
7106             }
7107             break;
7108 
7109         default:
7110         do_unallocated:
7111             unallocated_encoding(s);
7112             return;
7113         }
7114         break;
7115     }
7116 }
7117 
7118 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7119  *   31  30  29 28     25 24                          0
7120  * +---+---+---+---------+-----------------------------+
7121  * |   | 0 |   | 1 1 1 1 |                             |
7122  * +---+---+---+---------+-----------------------------+
7123  */
7124 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7125 {
7126     if (extract32(insn, 24, 1)) {
7127         /* Floating point data-processing (3 source) */
7128         disas_fp_3src(s, insn);
7129     } else if (extract32(insn, 21, 1) == 0) {
7130         /* Floating point to fixed point conversions */
7131         disas_fp_fixed_conv(s, insn);
7132     } else {
7133         switch (extract32(insn, 10, 2)) {
7134         case 1:
7135             /* Floating point conditional compare */
7136             disas_fp_ccomp(s, insn);
7137             break;
7138         case 2:
7139             /* Floating point data-processing (2 source) */
7140             disas_fp_2src(s, insn);
7141             break;
7142         case 3:
7143             /* Floating point conditional select */
7144             disas_fp_csel(s, insn);
7145             break;
7146         case 0:
7147             switch (ctz32(extract32(insn, 12, 4))) {
7148             case 0: /* [15:12] == xxx1 */
7149                 /* Floating point immediate */
7150                 disas_fp_imm(s, insn);
7151                 break;
7152             case 1: /* [15:12] == xx10 */
7153                 /* Floating point compare */
7154                 disas_fp_compare(s, insn);
7155                 break;
7156             case 2: /* [15:12] == x100 */
7157                 /* Floating point data-processing (1 source) */
7158                 disas_fp_1src(s, insn);
7159                 break;
7160             case 3: /* [15:12] == 1000 */
7161                 unallocated_encoding(s);
7162                 break;
7163             default: /* [15:12] == 0000 */
7164                 /* Floating point <-> integer conversions */
7165                 disas_fp_int_conv(s, insn);
7166                 break;
7167             }
7168             break;
7169         }
7170     }
7171 }
7172 
7173 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7174                      int pos)
7175 {
7176     /* Extract 64 bits from the middle of two concatenated 64 bit
7177      * vector register slices left:right. The extracted bits start
7178      * at 'pos' bits into the right (least significant) side.
7179      * We return the result in tcg_right, and guarantee not to
7180      * trash tcg_left.
7181      */
7182     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7183     assert(pos > 0 && pos < 64);
7184 
7185     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7186     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7187     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7188 }
7189 
7190 /* EXT
7191  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7192  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7193  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7194  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7195  */
7196 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7197 {
7198     int is_q = extract32(insn, 30, 1);
7199     int op2 = extract32(insn, 22, 2);
7200     int imm4 = extract32(insn, 11, 4);
7201     int rm = extract32(insn, 16, 5);
7202     int rn = extract32(insn, 5, 5);
7203     int rd = extract32(insn, 0, 5);
7204     int pos = imm4 << 3;
7205     TCGv_i64 tcg_resl, tcg_resh;
7206 
7207     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7208         unallocated_encoding(s);
7209         return;
7210     }
7211 
7212     if (!fp_access_check(s)) {
7213         return;
7214     }
7215 
7216     tcg_resh = tcg_temp_new_i64();
7217     tcg_resl = tcg_temp_new_i64();
7218 
7219     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7220      * either extracting 128 bits from a 128:128 concatenation, or
7221      * extracting 64 bits from a 64:64 concatenation.
7222      */
7223     if (!is_q) {
7224         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7225         if (pos != 0) {
7226             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7227             do_ext64(s, tcg_resh, tcg_resl, pos);
7228         }
7229     } else {
7230         TCGv_i64 tcg_hh;
7231         typedef struct {
7232             int reg;
7233             int elt;
7234         } EltPosns;
7235         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7236         EltPosns *elt = eltposns;
7237 
7238         if (pos >= 64) {
7239             elt++;
7240             pos -= 64;
7241         }
7242 
7243         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7244         elt++;
7245         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7246         elt++;
7247         if (pos != 0) {
7248             do_ext64(s, tcg_resh, tcg_resl, pos);
7249             tcg_hh = tcg_temp_new_i64();
7250             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7251             do_ext64(s, tcg_hh, tcg_resh, pos);
7252         }
7253     }
7254 
7255     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7256     if (is_q) {
7257         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7258     }
7259     clear_vec_high(s, is_q, rd);
7260 }
7261 
7262 /* TBL/TBX
7263  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7264  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7265  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7266  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7267  */
7268 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7269 {
7270     int op2 = extract32(insn, 22, 2);
7271     int is_q = extract32(insn, 30, 1);
7272     int rm = extract32(insn, 16, 5);
7273     int rn = extract32(insn, 5, 5);
7274     int rd = extract32(insn, 0, 5);
7275     int is_tbx = extract32(insn, 12, 1);
7276     int len = (extract32(insn, 13, 2) + 1) * 16;
7277 
7278     if (op2 != 0) {
7279         unallocated_encoding(s);
7280         return;
7281     }
7282 
7283     if (!fp_access_check(s)) {
7284         return;
7285     }
7286 
7287     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7288                        vec_full_reg_offset(s, rm), cpu_env,
7289                        is_q ? 16 : 8, vec_full_reg_size(s),
7290                        (len << 6) | (is_tbx << 5) | rn,
7291                        gen_helper_simd_tblx);
7292 }
7293 
7294 /* ZIP/UZP/TRN
7295  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7296  * +---+---+-------------+------+---+------+---+------------------+------+
7297  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7298  * +---+---+-------------+------+---+------+---+------------------+------+
7299  */
7300 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7301 {
7302     int rd = extract32(insn, 0, 5);
7303     int rn = extract32(insn, 5, 5);
7304     int rm = extract32(insn, 16, 5);
7305     int size = extract32(insn, 22, 2);
7306     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7307      * bit 2 indicates 1 vs 2 variant of the insn.
7308      */
7309     int opcode = extract32(insn, 12, 2);
7310     bool part = extract32(insn, 14, 1);
7311     bool is_q = extract32(insn, 30, 1);
7312     int esize = 8 << size;
7313     int i;
7314     int datasize = is_q ? 128 : 64;
7315     int elements = datasize / esize;
7316     TCGv_i64 tcg_res[2], tcg_ele;
7317 
7318     if (opcode == 0 || (size == 3 && !is_q)) {
7319         unallocated_encoding(s);
7320         return;
7321     }
7322 
7323     if (!fp_access_check(s)) {
7324         return;
7325     }
7326 
7327     tcg_res[0] = tcg_temp_new_i64();
7328     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7329     tcg_ele = tcg_temp_new_i64();
7330 
7331     for (i = 0; i < elements; i++) {
7332         int o, w;
7333 
7334         switch (opcode) {
7335         case 1: /* UZP1/2 */
7336         {
7337             int midpoint = elements / 2;
7338             if (i < midpoint) {
7339                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7340             } else {
7341                 read_vec_element(s, tcg_ele, rm,
7342                                  2 * (i - midpoint) + part, size);
7343             }
7344             break;
7345         }
7346         case 2: /* TRN1/2 */
7347             if (i & 1) {
7348                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7349             } else {
7350                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7351             }
7352             break;
7353         case 3: /* ZIP1/2 */
7354         {
7355             int base = part * elements / 2;
7356             if (i & 1) {
7357                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7358             } else {
7359                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7360             }
7361             break;
7362         }
7363         default:
7364             g_assert_not_reached();
7365         }
7366 
7367         w = (i * esize) / 64;
7368         o = (i * esize) % 64;
7369         if (o == 0) {
7370             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7371         } else {
7372             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7373             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7374         }
7375     }
7376 
7377     for (i = 0; i <= is_q; ++i) {
7378         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7379     }
7380     clear_vec_high(s, is_q, rd);
7381 }
7382 
7383 /*
7384  * do_reduction_op helper
7385  *
7386  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7387  * important for correct NaN propagation that we do these
7388  * operations in exactly the order specified by the pseudocode.
7389  *
7390  * This is a recursive function, TCG temps should be freed by the
7391  * calling function once it is done with the values.
7392  */
7393 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7394                                 int esize, int size, int vmap, TCGv_ptr fpst)
7395 {
7396     if (esize == size) {
7397         int element;
7398         MemOp msize = esize == 16 ? MO_16 : MO_32;
7399         TCGv_i32 tcg_elem;
7400 
7401         /* We should have one register left here */
7402         assert(ctpop8(vmap) == 1);
7403         element = ctz32(vmap);
7404         assert(element < 8);
7405 
7406         tcg_elem = tcg_temp_new_i32();
7407         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7408         return tcg_elem;
7409     } else {
7410         int bits = size / 2;
7411         int shift = ctpop8(vmap) / 2;
7412         int vmap_lo = (vmap >> shift) & vmap;
7413         int vmap_hi = (vmap & ~vmap_lo);
7414         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7415 
7416         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7417         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7418         tcg_res = tcg_temp_new_i32();
7419 
7420         switch (fpopcode) {
7421         case 0x0c: /* fmaxnmv half-precision */
7422             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7423             break;
7424         case 0x0f: /* fmaxv half-precision */
7425             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7426             break;
7427         case 0x1c: /* fminnmv half-precision */
7428             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7429             break;
7430         case 0x1f: /* fminv half-precision */
7431             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7432             break;
7433         case 0x2c: /* fmaxnmv */
7434             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7435             break;
7436         case 0x2f: /* fmaxv */
7437             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7438             break;
7439         case 0x3c: /* fminnmv */
7440             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7441             break;
7442         case 0x3f: /* fminv */
7443             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7444             break;
7445         default:
7446             g_assert_not_reached();
7447         }
7448         return tcg_res;
7449     }
7450 }
7451 
7452 /* AdvSIMD across lanes
7453  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7454  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7455  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7456  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7457  */
7458 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7459 {
7460     int rd = extract32(insn, 0, 5);
7461     int rn = extract32(insn, 5, 5);
7462     int size = extract32(insn, 22, 2);
7463     int opcode = extract32(insn, 12, 5);
7464     bool is_q = extract32(insn, 30, 1);
7465     bool is_u = extract32(insn, 29, 1);
7466     bool is_fp = false;
7467     bool is_min = false;
7468     int esize;
7469     int elements;
7470     int i;
7471     TCGv_i64 tcg_res, tcg_elt;
7472 
7473     switch (opcode) {
7474     case 0x1b: /* ADDV */
7475         if (is_u) {
7476             unallocated_encoding(s);
7477             return;
7478         }
7479         /* fall through */
7480     case 0x3: /* SADDLV, UADDLV */
7481     case 0xa: /* SMAXV, UMAXV */
7482     case 0x1a: /* SMINV, UMINV */
7483         if (size == 3 || (size == 2 && !is_q)) {
7484             unallocated_encoding(s);
7485             return;
7486         }
7487         break;
7488     case 0xc: /* FMAXNMV, FMINNMV */
7489     case 0xf: /* FMAXV, FMINV */
7490         /* Bit 1 of size field encodes min vs max and the actual size
7491          * depends on the encoding of the U bit. If not set (and FP16
7492          * enabled) then we do half-precision float instead of single
7493          * precision.
7494          */
7495         is_min = extract32(size, 1, 1);
7496         is_fp = true;
7497         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7498             size = 1;
7499         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7500             unallocated_encoding(s);
7501             return;
7502         } else {
7503             size = 2;
7504         }
7505         break;
7506     default:
7507         unallocated_encoding(s);
7508         return;
7509     }
7510 
7511     if (!fp_access_check(s)) {
7512         return;
7513     }
7514 
7515     esize = 8 << size;
7516     elements = (is_q ? 128 : 64) / esize;
7517 
7518     tcg_res = tcg_temp_new_i64();
7519     tcg_elt = tcg_temp_new_i64();
7520 
7521     /* These instructions operate across all lanes of a vector
7522      * to produce a single result. We can guarantee that a 64
7523      * bit intermediate is sufficient:
7524      *  + for [US]ADDLV the maximum element size is 32 bits, and
7525      *    the result type is 64 bits
7526      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7527      *    same as the element size, which is 32 bits at most
7528      * For the integer operations we can choose to work at 64
7529      * or 32 bits and truncate at the end; for simplicity
7530      * we use 64 bits always. The floating point
7531      * ops do require 32 bit intermediates, though.
7532      */
7533     if (!is_fp) {
7534         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7535 
7536         for (i = 1; i < elements; i++) {
7537             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7538 
7539             switch (opcode) {
7540             case 0x03: /* SADDLV / UADDLV */
7541             case 0x1b: /* ADDV */
7542                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7543                 break;
7544             case 0x0a: /* SMAXV / UMAXV */
7545                 if (is_u) {
7546                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7547                 } else {
7548                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7549                 }
7550                 break;
7551             case 0x1a: /* SMINV / UMINV */
7552                 if (is_u) {
7553                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7554                 } else {
7555                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7556                 }
7557                 break;
7558             default:
7559                 g_assert_not_reached();
7560             }
7561 
7562         }
7563     } else {
7564         /* Floating point vector reduction ops which work across 32
7565          * bit (single) or 16 bit (half-precision) intermediates.
7566          * Note that correct NaN propagation requires that we do these
7567          * operations in exactly the order specified by the pseudocode.
7568          */
7569         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7570         int fpopcode = opcode | is_min << 4 | is_u << 5;
7571         int vmap = (1 << elements) - 1;
7572         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7573                                              (is_q ? 128 : 64), vmap, fpst);
7574         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7575     }
7576 
7577     /* Now truncate the result to the width required for the final output */
7578     if (opcode == 0x03) {
7579         /* SADDLV, UADDLV: result is 2*esize */
7580         size++;
7581     }
7582 
7583     switch (size) {
7584     case 0:
7585         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7586         break;
7587     case 1:
7588         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7589         break;
7590     case 2:
7591         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7592         break;
7593     case 3:
7594         break;
7595     default:
7596         g_assert_not_reached();
7597     }
7598 
7599     write_fp_dreg(s, rd, tcg_res);
7600 }
7601 
7602 /* DUP (Element, Vector)
7603  *
7604  *  31  30   29              21 20    16 15        10  9    5 4    0
7605  * +---+---+-------------------+--------+-------------+------+------+
7606  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7607  * +---+---+-------------------+--------+-------------+------+------+
7608  *
7609  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7610  */
7611 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7612                              int imm5)
7613 {
7614     int size = ctz32(imm5);
7615     int index;
7616 
7617     if (size > 3 || (size == 3 && !is_q)) {
7618         unallocated_encoding(s);
7619         return;
7620     }
7621 
7622     if (!fp_access_check(s)) {
7623         return;
7624     }
7625 
7626     index = imm5 >> (size + 1);
7627     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7628                          vec_reg_offset(s, rn, index, size),
7629                          is_q ? 16 : 8, vec_full_reg_size(s));
7630 }
7631 
7632 /* DUP (element, scalar)
7633  *  31                   21 20    16 15        10  9    5 4    0
7634  * +-----------------------+--------+-------------+------+------+
7635  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7636  * +-----------------------+--------+-------------+------+------+
7637  */
7638 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7639                               int imm5)
7640 {
7641     int size = ctz32(imm5);
7642     int index;
7643     TCGv_i64 tmp;
7644 
7645     if (size > 3) {
7646         unallocated_encoding(s);
7647         return;
7648     }
7649 
7650     if (!fp_access_check(s)) {
7651         return;
7652     }
7653 
7654     index = imm5 >> (size + 1);
7655 
7656     /* This instruction just extracts the specified element and
7657      * zero-extends it into the bottom of the destination register.
7658      */
7659     tmp = tcg_temp_new_i64();
7660     read_vec_element(s, tmp, rn, index, size);
7661     write_fp_dreg(s, rd, tmp);
7662 }
7663 
7664 /* DUP (General)
7665  *
7666  *  31  30   29              21 20    16 15        10  9    5 4    0
7667  * +---+---+-------------------+--------+-------------+------+------+
7668  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7669  * +---+---+-------------------+--------+-------------+------+------+
7670  *
7671  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7672  */
7673 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7674                              int imm5)
7675 {
7676     int size = ctz32(imm5);
7677     uint32_t dofs, oprsz, maxsz;
7678 
7679     if (size > 3 || ((size == 3) && !is_q)) {
7680         unallocated_encoding(s);
7681         return;
7682     }
7683 
7684     if (!fp_access_check(s)) {
7685         return;
7686     }
7687 
7688     dofs = vec_full_reg_offset(s, rd);
7689     oprsz = is_q ? 16 : 8;
7690     maxsz = vec_full_reg_size(s);
7691 
7692     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7693 }
7694 
7695 /* INS (Element)
7696  *
7697  *  31                   21 20    16 15  14    11  10 9    5 4    0
7698  * +-----------------------+--------+------------+---+------+------+
7699  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7700  * +-----------------------+--------+------------+---+------+------+
7701  *
7702  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7703  * index: encoded in imm5<4:size+1>
7704  */
7705 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7706                              int imm4, int imm5)
7707 {
7708     int size = ctz32(imm5);
7709     int src_index, dst_index;
7710     TCGv_i64 tmp;
7711 
7712     if (size > 3) {
7713         unallocated_encoding(s);
7714         return;
7715     }
7716 
7717     if (!fp_access_check(s)) {
7718         return;
7719     }
7720 
7721     dst_index = extract32(imm5, 1+size, 5);
7722     src_index = extract32(imm4, size, 4);
7723 
7724     tmp = tcg_temp_new_i64();
7725 
7726     read_vec_element(s, tmp, rn, src_index, size);
7727     write_vec_element(s, tmp, rd, dst_index, size);
7728 
7729     /* INS is considered a 128-bit write for SVE. */
7730     clear_vec_high(s, true, rd);
7731 }
7732 
7733 
7734 /* INS (General)
7735  *
7736  *  31                   21 20    16 15        10  9    5 4    0
7737  * +-----------------------+--------+-------------+------+------+
7738  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7739  * +-----------------------+--------+-------------+------+------+
7740  *
7741  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7742  * index: encoded in imm5<4:size+1>
7743  */
7744 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7745 {
7746     int size = ctz32(imm5);
7747     int idx;
7748 
7749     if (size > 3) {
7750         unallocated_encoding(s);
7751         return;
7752     }
7753 
7754     if (!fp_access_check(s)) {
7755         return;
7756     }
7757 
7758     idx = extract32(imm5, 1 + size, 4 - size);
7759     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7760 
7761     /* INS is considered a 128-bit write for SVE. */
7762     clear_vec_high(s, true, rd);
7763 }
7764 
7765 /*
7766  * UMOV (General)
7767  * SMOV (General)
7768  *
7769  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7770  * +---+---+-------------------+--------+-------------+------+------+
7771  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7772  * +---+---+-------------------+--------+-------------+------+------+
7773  *
7774  * U: unsigned when set
7775  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7776  */
7777 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7778                                   int rn, int rd, int imm5)
7779 {
7780     int size = ctz32(imm5);
7781     int element;
7782     TCGv_i64 tcg_rd;
7783 
7784     /* Check for UnallocatedEncodings */
7785     if (is_signed) {
7786         if (size > 2 || (size == 2 && !is_q)) {
7787             unallocated_encoding(s);
7788             return;
7789         }
7790     } else {
7791         if (size > 3
7792             || (size < 3 && is_q)
7793             || (size == 3 && !is_q)) {
7794             unallocated_encoding(s);
7795             return;
7796         }
7797     }
7798 
7799     if (!fp_access_check(s)) {
7800         return;
7801     }
7802 
7803     element = extract32(imm5, 1+size, 4);
7804 
7805     tcg_rd = cpu_reg(s, rd);
7806     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7807     if (is_signed && !is_q) {
7808         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7809     }
7810 }
7811 
7812 /* AdvSIMD copy
7813  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7814  * +---+---+----+-----------------+------+---+------+---+------+------+
7815  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7816  * +---+---+----+-----------------+------+---+------+---+------+------+
7817  */
7818 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7819 {
7820     int rd = extract32(insn, 0, 5);
7821     int rn = extract32(insn, 5, 5);
7822     int imm4 = extract32(insn, 11, 4);
7823     int op = extract32(insn, 29, 1);
7824     int is_q = extract32(insn, 30, 1);
7825     int imm5 = extract32(insn, 16, 5);
7826 
7827     if (op) {
7828         if (is_q) {
7829             /* INS (element) */
7830             handle_simd_inse(s, rd, rn, imm4, imm5);
7831         } else {
7832             unallocated_encoding(s);
7833         }
7834     } else {
7835         switch (imm4) {
7836         case 0:
7837             /* DUP (element - vector) */
7838             handle_simd_dupe(s, is_q, rd, rn, imm5);
7839             break;
7840         case 1:
7841             /* DUP (general) */
7842             handle_simd_dupg(s, is_q, rd, rn, imm5);
7843             break;
7844         case 3:
7845             if (is_q) {
7846                 /* INS (general) */
7847                 handle_simd_insg(s, rd, rn, imm5);
7848             } else {
7849                 unallocated_encoding(s);
7850             }
7851             break;
7852         case 5:
7853         case 7:
7854             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7855             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7856             break;
7857         default:
7858             unallocated_encoding(s);
7859             break;
7860         }
7861     }
7862 }
7863 
7864 /* AdvSIMD modified immediate
7865  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7866  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7867  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7868  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7869  *
7870  * There are a number of operations that can be carried out here:
7871  *   MOVI - move (shifted) imm into register
7872  *   MVNI - move inverted (shifted) imm into register
7873  *   ORR  - bitwise OR of (shifted) imm with register
7874  *   BIC  - bitwise clear of (shifted) imm with register
7875  * With ARMv8.2 we also have:
7876  *   FMOV half-precision
7877  */
7878 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7879 {
7880     int rd = extract32(insn, 0, 5);
7881     int cmode = extract32(insn, 12, 4);
7882     int o2 = extract32(insn, 11, 1);
7883     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7884     bool is_neg = extract32(insn, 29, 1);
7885     bool is_q = extract32(insn, 30, 1);
7886     uint64_t imm = 0;
7887 
7888     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7889         /* Check for FMOV (vector, immediate) - half-precision */
7890         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7891             unallocated_encoding(s);
7892             return;
7893         }
7894     }
7895 
7896     if (!fp_access_check(s)) {
7897         return;
7898     }
7899 
7900     if (cmode == 15 && o2 && !is_neg) {
7901         /* FMOV (vector, immediate) - half-precision */
7902         imm = vfp_expand_imm(MO_16, abcdefgh);
7903         /* now duplicate across the lanes */
7904         imm = dup_const(MO_16, imm);
7905     } else {
7906         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7907     }
7908 
7909     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7910         /* MOVI or MVNI, with MVNI negation handled above.  */
7911         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7912                              vec_full_reg_size(s), imm);
7913     } else {
7914         /* ORR or BIC, with BIC negation to AND handled above.  */
7915         if (is_neg) {
7916             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7917         } else {
7918             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7919         }
7920     }
7921 }
7922 
7923 /* AdvSIMD scalar copy
7924  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7925  * +-----+----+-----------------+------+---+------+---+------+------+
7926  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7927  * +-----+----+-----------------+------+---+------+---+------+------+
7928  */
7929 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7930 {
7931     int rd = extract32(insn, 0, 5);
7932     int rn = extract32(insn, 5, 5);
7933     int imm4 = extract32(insn, 11, 4);
7934     int imm5 = extract32(insn, 16, 5);
7935     int op = extract32(insn, 29, 1);
7936 
7937     if (op != 0 || imm4 != 0) {
7938         unallocated_encoding(s);
7939         return;
7940     }
7941 
7942     /* DUP (element, scalar) */
7943     handle_simd_dupes(s, rd, rn, imm5);
7944 }
7945 
7946 /* AdvSIMD scalar pairwise
7947  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7948  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7949  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7950  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7951  */
7952 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7953 {
7954     int u = extract32(insn, 29, 1);
7955     int size = extract32(insn, 22, 2);
7956     int opcode = extract32(insn, 12, 5);
7957     int rn = extract32(insn, 5, 5);
7958     int rd = extract32(insn, 0, 5);
7959     TCGv_ptr fpst;
7960 
7961     /* For some ops (the FP ones), size[1] is part of the encoding.
7962      * For ADDP strictly it is not but size[1] is always 1 for valid
7963      * encodings.
7964      */
7965     opcode |= (extract32(size, 1, 1) << 5);
7966 
7967     switch (opcode) {
7968     case 0x3b: /* ADDP */
7969         if (u || size != 3) {
7970             unallocated_encoding(s);
7971             return;
7972         }
7973         if (!fp_access_check(s)) {
7974             return;
7975         }
7976 
7977         fpst = NULL;
7978         break;
7979     case 0xc: /* FMAXNMP */
7980     case 0xd: /* FADDP */
7981     case 0xf: /* FMAXP */
7982     case 0x2c: /* FMINNMP */
7983     case 0x2f: /* FMINP */
7984         /* FP op, size[0] is 32 or 64 bit*/
7985         if (!u) {
7986             if (!dc_isar_feature(aa64_fp16, s)) {
7987                 unallocated_encoding(s);
7988                 return;
7989             } else {
7990                 size = MO_16;
7991             }
7992         } else {
7993             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7994         }
7995 
7996         if (!fp_access_check(s)) {
7997             return;
7998         }
7999 
8000         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8001         break;
8002     default:
8003         unallocated_encoding(s);
8004         return;
8005     }
8006 
8007     if (size == MO_64) {
8008         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8009         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8010         TCGv_i64 tcg_res = tcg_temp_new_i64();
8011 
8012         read_vec_element(s, tcg_op1, rn, 0, MO_64);
8013         read_vec_element(s, tcg_op2, rn, 1, MO_64);
8014 
8015         switch (opcode) {
8016         case 0x3b: /* ADDP */
8017             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8018             break;
8019         case 0xc: /* FMAXNMP */
8020             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8021             break;
8022         case 0xd: /* FADDP */
8023             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8024             break;
8025         case 0xf: /* FMAXP */
8026             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8027             break;
8028         case 0x2c: /* FMINNMP */
8029             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8030             break;
8031         case 0x2f: /* FMINP */
8032             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8033             break;
8034         default:
8035             g_assert_not_reached();
8036         }
8037 
8038         write_fp_dreg(s, rd, tcg_res);
8039     } else {
8040         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8041         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8042         TCGv_i32 tcg_res = tcg_temp_new_i32();
8043 
8044         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8045         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8046 
8047         if (size == MO_16) {
8048             switch (opcode) {
8049             case 0xc: /* FMAXNMP */
8050                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8051                 break;
8052             case 0xd: /* FADDP */
8053                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8054                 break;
8055             case 0xf: /* FMAXP */
8056                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8057                 break;
8058             case 0x2c: /* FMINNMP */
8059                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8060                 break;
8061             case 0x2f: /* FMINP */
8062                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8063                 break;
8064             default:
8065                 g_assert_not_reached();
8066             }
8067         } else {
8068             switch (opcode) {
8069             case 0xc: /* FMAXNMP */
8070                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8071                 break;
8072             case 0xd: /* FADDP */
8073                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8074                 break;
8075             case 0xf: /* FMAXP */
8076                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8077                 break;
8078             case 0x2c: /* FMINNMP */
8079                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8080                 break;
8081             case 0x2f: /* FMINP */
8082                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8083                 break;
8084             default:
8085                 g_assert_not_reached();
8086             }
8087         }
8088 
8089         write_fp_sreg(s, rd, tcg_res);
8090     }
8091 }
8092 
8093 /*
8094  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8095  *
8096  * This code is handles the common shifting code and is used by both
8097  * the vector and scalar code.
8098  */
8099 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8100                                     TCGv_i64 tcg_rnd, bool accumulate,
8101                                     bool is_u, int size, int shift)
8102 {
8103     bool extended_result = false;
8104     bool round = tcg_rnd != NULL;
8105     int ext_lshift = 0;
8106     TCGv_i64 tcg_src_hi;
8107 
8108     if (round && size == 3) {
8109         extended_result = true;
8110         ext_lshift = 64 - shift;
8111         tcg_src_hi = tcg_temp_new_i64();
8112     } else if (shift == 64) {
8113         if (!accumulate && is_u) {
8114             /* result is zero */
8115             tcg_gen_movi_i64(tcg_res, 0);
8116             return;
8117         }
8118     }
8119 
8120     /* Deal with the rounding step */
8121     if (round) {
8122         if (extended_result) {
8123             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8124             if (!is_u) {
8125                 /* take care of sign extending tcg_res */
8126                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8127                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8128                                  tcg_src, tcg_src_hi,
8129                                  tcg_rnd, tcg_zero);
8130             } else {
8131                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8132                                  tcg_src, tcg_zero,
8133                                  tcg_rnd, tcg_zero);
8134             }
8135         } else {
8136             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8137         }
8138     }
8139 
8140     /* Now do the shift right */
8141     if (round && extended_result) {
8142         /* extended case, >64 bit precision required */
8143         if (ext_lshift == 0) {
8144             /* special case, only high bits matter */
8145             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8146         } else {
8147             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8148             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8149             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8150         }
8151     } else {
8152         if (is_u) {
8153             if (shift == 64) {
8154                 /* essentially shifting in 64 zeros */
8155                 tcg_gen_movi_i64(tcg_src, 0);
8156             } else {
8157                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8158             }
8159         } else {
8160             if (shift == 64) {
8161                 /* effectively extending the sign-bit */
8162                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8163             } else {
8164                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8165             }
8166         }
8167     }
8168 
8169     if (accumulate) {
8170         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8171     } else {
8172         tcg_gen_mov_i64(tcg_res, tcg_src);
8173     }
8174 }
8175 
8176 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8177 static void handle_scalar_simd_shri(DisasContext *s,
8178                                     bool is_u, int immh, int immb,
8179                                     int opcode, int rn, int rd)
8180 {
8181     const int size = 3;
8182     int immhb = immh << 3 | immb;
8183     int shift = 2 * (8 << size) - immhb;
8184     bool accumulate = false;
8185     bool round = false;
8186     bool insert = false;
8187     TCGv_i64 tcg_rn;
8188     TCGv_i64 tcg_rd;
8189     TCGv_i64 tcg_round;
8190 
8191     if (!extract32(immh, 3, 1)) {
8192         unallocated_encoding(s);
8193         return;
8194     }
8195 
8196     if (!fp_access_check(s)) {
8197         return;
8198     }
8199 
8200     switch (opcode) {
8201     case 0x02: /* SSRA / USRA (accumulate) */
8202         accumulate = true;
8203         break;
8204     case 0x04: /* SRSHR / URSHR (rounding) */
8205         round = true;
8206         break;
8207     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8208         accumulate = round = true;
8209         break;
8210     case 0x08: /* SRI */
8211         insert = true;
8212         break;
8213     }
8214 
8215     if (round) {
8216         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8217     } else {
8218         tcg_round = NULL;
8219     }
8220 
8221     tcg_rn = read_fp_dreg(s, rn);
8222     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8223 
8224     if (insert) {
8225         /* shift count same as element size is valid but does nothing;
8226          * special case to avoid potential shift by 64.
8227          */
8228         int esize = 8 << size;
8229         if (shift != esize) {
8230             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8231             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8232         }
8233     } else {
8234         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8235                                 accumulate, is_u, size, shift);
8236     }
8237 
8238     write_fp_dreg(s, rd, tcg_rd);
8239 }
8240 
8241 /* SHL/SLI - Scalar shift left */
8242 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8243                                     int immh, int immb, int opcode,
8244                                     int rn, int rd)
8245 {
8246     int size = 32 - clz32(immh) - 1;
8247     int immhb = immh << 3 | immb;
8248     int shift = immhb - (8 << size);
8249     TCGv_i64 tcg_rn;
8250     TCGv_i64 tcg_rd;
8251 
8252     if (!extract32(immh, 3, 1)) {
8253         unallocated_encoding(s);
8254         return;
8255     }
8256 
8257     if (!fp_access_check(s)) {
8258         return;
8259     }
8260 
8261     tcg_rn = read_fp_dreg(s, rn);
8262     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8263 
8264     if (insert) {
8265         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8266     } else {
8267         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8268     }
8269 
8270     write_fp_dreg(s, rd, tcg_rd);
8271 }
8272 
8273 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8274  * (signed/unsigned) narrowing */
8275 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8276                                    bool is_u_shift, bool is_u_narrow,
8277                                    int immh, int immb, int opcode,
8278                                    int rn, int rd)
8279 {
8280     int immhb = immh << 3 | immb;
8281     int size = 32 - clz32(immh) - 1;
8282     int esize = 8 << size;
8283     int shift = (2 * esize) - immhb;
8284     int elements = is_scalar ? 1 : (64 / esize);
8285     bool round = extract32(opcode, 0, 1);
8286     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8287     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8288     TCGv_i32 tcg_rd_narrowed;
8289     TCGv_i64 tcg_final;
8290 
8291     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8292         { gen_helper_neon_narrow_sat_s8,
8293           gen_helper_neon_unarrow_sat8 },
8294         { gen_helper_neon_narrow_sat_s16,
8295           gen_helper_neon_unarrow_sat16 },
8296         { gen_helper_neon_narrow_sat_s32,
8297           gen_helper_neon_unarrow_sat32 },
8298         { NULL, NULL },
8299     };
8300     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8301         gen_helper_neon_narrow_sat_u8,
8302         gen_helper_neon_narrow_sat_u16,
8303         gen_helper_neon_narrow_sat_u32,
8304         NULL
8305     };
8306     NeonGenNarrowEnvFn *narrowfn;
8307 
8308     int i;
8309 
8310     assert(size < 4);
8311 
8312     if (extract32(immh, 3, 1)) {
8313         unallocated_encoding(s);
8314         return;
8315     }
8316 
8317     if (!fp_access_check(s)) {
8318         return;
8319     }
8320 
8321     if (is_u_shift) {
8322         narrowfn = unsigned_narrow_fns[size];
8323     } else {
8324         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8325     }
8326 
8327     tcg_rn = tcg_temp_new_i64();
8328     tcg_rd = tcg_temp_new_i64();
8329     tcg_rd_narrowed = tcg_temp_new_i32();
8330     tcg_final = tcg_temp_new_i64();
8331 
8332     if (round) {
8333         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8334     } else {
8335         tcg_round = NULL;
8336     }
8337 
8338     for (i = 0; i < elements; i++) {
8339         read_vec_element(s, tcg_rn, rn, i, ldop);
8340         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8341                                 false, is_u_shift, size+1, shift);
8342         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8343         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8344         if (i == 0) {
8345             tcg_gen_mov_i64(tcg_final, tcg_rd);
8346         } else {
8347             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8348         }
8349     }
8350 
8351     if (!is_q) {
8352         write_vec_element(s, tcg_final, rd, 0, MO_64);
8353     } else {
8354         write_vec_element(s, tcg_final, rd, 1, MO_64);
8355     }
8356     clear_vec_high(s, is_q, rd);
8357 }
8358 
8359 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8360 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8361                              bool src_unsigned, bool dst_unsigned,
8362                              int immh, int immb, int rn, int rd)
8363 {
8364     int immhb = immh << 3 | immb;
8365     int size = 32 - clz32(immh) - 1;
8366     int shift = immhb - (8 << size);
8367     int pass;
8368 
8369     assert(immh != 0);
8370     assert(!(scalar && is_q));
8371 
8372     if (!scalar) {
8373         if (!is_q && extract32(immh, 3, 1)) {
8374             unallocated_encoding(s);
8375             return;
8376         }
8377 
8378         /* Since we use the variable-shift helpers we must
8379          * replicate the shift count into each element of
8380          * the tcg_shift value.
8381          */
8382         switch (size) {
8383         case 0:
8384             shift |= shift << 8;
8385             /* fall through */
8386         case 1:
8387             shift |= shift << 16;
8388             break;
8389         case 2:
8390         case 3:
8391             break;
8392         default:
8393             g_assert_not_reached();
8394         }
8395     }
8396 
8397     if (!fp_access_check(s)) {
8398         return;
8399     }
8400 
8401     if (size == 3) {
8402         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8403         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8404             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8405             { NULL, gen_helper_neon_qshl_u64 },
8406         };
8407         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8408         int maxpass = is_q ? 2 : 1;
8409 
8410         for (pass = 0; pass < maxpass; pass++) {
8411             TCGv_i64 tcg_op = tcg_temp_new_i64();
8412 
8413             read_vec_element(s, tcg_op, rn, pass, MO_64);
8414             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8415             write_vec_element(s, tcg_op, rd, pass, MO_64);
8416         }
8417         clear_vec_high(s, is_q, rd);
8418     } else {
8419         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8420         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8421             {
8422                 { gen_helper_neon_qshl_s8,
8423                   gen_helper_neon_qshl_s16,
8424                   gen_helper_neon_qshl_s32 },
8425                 { gen_helper_neon_qshlu_s8,
8426                   gen_helper_neon_qshlu_s16,
8427                   gen_helper_neon_qshlu_s32 }
8428             }, {
8429                 { NULL, NULL, NULL },
8430                 { gen_helper_neon_qshl_u8,
8431                   gen_helper_neon_qshl_u16,
8432                   gen_helper_neon_qshl_u32 }
8433             }
8434         };
8435         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8436         MemOp memop = scalar ? size : MO_32;
8437         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8438 
8439         for (pass = 0; pass < maxpass; pass++) {
8440             TCGv_i32 tcg_op = tcg_temp_new_i32();
8441 
8442             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8443             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8444             if (scalar) {
8445                 switch (size) {
8446                 case 0:
8447                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8448                     break;
8449                 case 1:
8450                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8451                     break;
8452                 case 2:
8453                     break;
8454                 default:
8455                     g_assert_not_reached();
8456                 }
8457                 write_fp_sreg(s, rd, tcg_op);
8458             } else {
8459                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8460             }
8461         }
8462 
8463         if (!scalar) {
8464             clear_vec_high(s, is_q, rd);
8465         }
8466     }
8467 }
8468 
8469 /* Common vector code for handling integer to FP conversion */
8470 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8471                                    int elements, int is_signed,
8472                                    int fracbits, int size)
8473 {
8474     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8475     TCGv_i32 tcg_shift = NULL;
8476 
8477     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8478     int pass;
8479 
8480     if (fracbits || size == MO_64) {
8481         tcg_shift = tcg_constant_i32(fracbits);
8482     }
8483 
8484     if (size == MO_64) {
8485         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8486         TCGv_i64 tcg_double = tcg_temp_new_i64();
8487 
8488         for (pass = 0; pass < elements; pass++) {
8489             read_vec_element(s, tcg_int64, rn, pass, mop);
8490 
8491             if (is_signed) {
8492                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8493                                      tcg_shift, tcg_fpst);
8494             } else {
8495                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8496                                      tcg_shift, tcg_fpst);
8497             }
8498             if (elements == 1) {
8499                 write_fp_dreg(s, rd, tcg_double);
8500             } else {
8501                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8502             }
8503         }
8504     } else {
8505         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8506         TCGv_i32 tcg_float = tcg_temp_new_i32();
8507 
8508         for (pass = 0; pass < elements; pass++) {
8509             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8510 
8511             switch (size) {
8512             case MO_32:
8513                 if (fracbits) {
8514                     if (is_signed) {
8515                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8516                                              tcg_shift, tcg_fpst);
8517                     } else {
8518                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8519                                              tcg_shift, tcg_fpst);
8520                     }
8521                 } else {
8522                     if (is_signed) {
8523                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8524                     } else {
8525                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8526                     }
8527                 }
8528                 break;
8529             case MO_16:
8530                 if (fracbits) {
8531                     if (is_signed) {
8532                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8533                                              tcg_shift, tcg_fpst);
8534                     } else {
8535                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8536                                              tcg_shift, tcg_fpst);
8537                     }
8538                 } else {
8539                     if (is_signed) {
8540                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8541                     } else {
8542                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8543                     }
8544                 }
8545                 break;
8546             default:
8547                 g_assert_not_reached();
8548             }
8549 
8550             if (elements == 1) {
8551                 write_fp_sreg(s, rd, tcg_float);
8552             } else {
8553                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8554             }
8555         }
8556     }
8557 
8558     clear_vec_high(s, elements << size == 16, rd);
8559 }
8560 
8561 /* UCVTF/SCVTF - Integer to FP conversion */
8562 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8563                                          bool is_q, bool is_u,
8564                                          int immh, int immb, int opcode,
8565                                          int rn, int rd)
8566 {
8567     int size, elements, fracbits;
8568     int immhb = immh << 3 | immb;
8569 
8570     if (immh & 8) {
8571         size = MO_64;
8572         if (!is_scalar && !is_q) {
8573             unallocated_encoding(s);
8574             return;
8575         }
8576     } else if (immh & 4) {
8577         size = MO_32;
8578     } else if (immh & 2) {
8579         size = MO_16;
8580         if (!dc_isar_feature(aa64_fp16, s)) {
8581             unallocated_encoding(s);
8582             return;
8583         }
8584     } else {
8585         /* immh == 0 would be a failure of the decode logic */
8586         g_assert(immh == 1);
8587         unallocated_encoding(s);
8588         return;
8589     }
8590 
8591     if (is_scalar) {
8592         elements = 1;
8593     } else {
8594         elements = (8 << is_q) >> size;
8595     }
8596     fracbits = (16 << size) - immhb;
8597 
8598     if (!fp_access_check(s)) {
8599         return;
8600     }
8601 
8602     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8603 }
8604 
8605 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8606 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8607                                          bool is_q, bool is_u,
8608                                          int immh, int immb, int rn, int rd)
8609 {
8610     int immhb = immh << 3 | immb;
8611     int pass, size, fracbits;
8612     TCGv_ptr tcg_fpstatus;
8613     TCGv_i32 tcg_rmode, tcg_shift;
8614 
8615     if (immh & 0x8) {
8616         size = MO_64;
8617         if (!is_scalar && !is_q) {
8618             unallocated_encoding(s);
8619             return;
8620         }
8621     } else if (immh & 0x4) {
8622         size = MO_32;
8623     } else if (immh & 0x2) {
8624         size = MO_16;
8625         if (!dc_isar_feature(aa64_fp16, s)) {
8626             unallocated_encoding(s);
8627             return;
8628         }
8629     } else {
8630         /* Should have split out AdvSIMD modified immediate earlier.  */
8631         assert(immh == 1);
8632         unallocated_encoding(s);
8633         return;
8634     }
8635 
8636     if (!fp_access_check(s)) {
8637         return;
8638     }
8639 
8640     assert(!(is_scalar && is_q));
8641 
8642     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8643     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8644     fracbits = (16 << size) - immhb;
8645     tcg_shift = tcg_constant_i32(fracbits);
8646 
8647     if (size == MO_64) {
8648         int maxpass = is_scalar ? 1 : 2;
8649 
8650         for (pass = 0; pass < maxpass; pass++) {
8651             TCGv_i64 tcg_op = tcg_temp_new_i64();
8652 
8653             read_vec_element(s, tcg_op, rn, pass, MO_64);
8654             if (is_u) {
8655                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8656             } else {
8657                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8658             }
8659             write_vec_element(s, tcg_op, rd, pass, MO_64);
8660         }
8661         clear_vec_high(s, is_q, rd);
8662     } else {
8663         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8664         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8665 
8666         switch (size) {
8667         case MO_16:
8668             if (is_u) {
8669                 fn = gen_helper_vfp_touhh;
8670             } else {
8671                 fn = gen_helper_vfp_toshh;
8672             }
8673             break;
8674         case MO_32:
8675             if (is_u) {
8676                 fn = gen_helper_vfp_touls;
8677             } else {
8678                 fn = gen_helper_vfp_tosls;
8679             }
8680             break;
8681         default:
8682             g_assert_not_reached();
8683         }
8684 
8685         for (pass = 0; pass < maxpass; pass++) {
8686             TCGv_i32 tcg_op = tcg_temp_new_i32();
8687 
8688             read_vec_element_i32(s, tcg_op, rn, pass, size);
8689             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8690             if (is_scalar) {
8691                 write_fp_sreg(s, rd, tcg_op);
8692             } else {
8693                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8694             }
8695         }
8696         if (!is_scalar) {
8697             clear_vec_high(s, is_q, rd);
8698         }
8699     }
8700 
8701     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8702 }
8703 
8704 /* AdvSIMD scalar shift by immediate
8705  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8706  * +-----+---+-------------+------+------+--------+---+------+------+
8707  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8708  * +-----+---+-------------+------+------+--------+---+------+------+
8709  *
8710  * This is the scalar version so it works on a fixed sized registers
8711  */
8712 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8713 {
8714     int rd = extract32(insn, 0, 5);
8715     int rn = extract32(insn, 5, 5);
8716     int opcode = extract32(insn, 11, 5);
8717     int immb = extract32(insn, 16, 3);
8718     int immh = extract32(insn, 19, 4);
8719     bool is_u = extract32(insn, 29, 1);
8720 
8721     if (immh == 0) {
8722         unallocated_encoding(s);
8723         return;
8724     }
8725 
8726     switch (opcode) {
8727     case 0x08: /* SRI */
8728         if (!is_u) {
8729             unallocated_encoding(s);
8730             return;
8731         }
8732         /* fall through */
8733     case 0x00: /* SSHR / USHR */
8734     case 0x02: /* SSRA / USRA */
8735     case 0x04: /* SRSHR / URSHR */
8736     case 0x06: /* SRSRA / URSRA */
8737         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8738         break;
8739     case 0x0a: /* SHL / SLI */
8740         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8741         break;
8742     case 0x1c: /* SCVTF, UCVTF */
8743         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8744                                      opcode, rn, rd);
8745         break;
8746     case 0x10: /* SQSHRUN, SQSHRUN2 */
8747     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8748         if (!is_u) {
8749             unallocated_encoding(s);
8750             return;
8751         }
8752         handle_vec_simd_sqshrn(s, true, false, false, true,
8753                                immh, immb, opcode, rn, rd);
8754         break;
8755     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8756     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8757         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8758                                immh, immb, opcode, rn, rd);
8759         break;
8760     case 0xc: /* SQSHLU */
8761         if (!is_u) {
8762             unallocated_encoding(s);
8763             return;
8764         }
8765         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8766         break;
8767     case 0xe: /* SQSHL, UQSHL */
8768         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8769         break;
8770     case 0x1f: /* FCVTZS, FCVTZU */
8771         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8772         break;
8773     default:
8774         unallocated_encoding(s);
8775         break;
8776     }
8777 }
8778 
8779 /* AdvSIMD scalar three different
8780  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8781  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8782  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8783  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8784  */
8785 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8786 {
8787     bool is_u = extract32(insn, 29, 1);
8788     int size = extract32(insn, 22, 2);
8789     int opcode = extract32(insn, 12, 4);
8790     int rm = extract32(insn, 16, 5);
8791     int rn = extract32(insn, 5, 5);
8792     int rd = extract32(insn, 0, 5);
8793 
8794     if (is_u) {
8795         unallocated_encoding(s);
8796         return;
8797     }
8798 
8799     switch (opcode) {
8800     case 0x9: /* SQDMLAL, SQDMLAL2 */
8801     case 0xb: /* SQDMLSL, SQDMLSL2 */
8802     case 0xd: /* SQDMULL, SQDMULL2 */
8803         if (size == 0 || size == 3) {
8804             unallocated_encoding(s);
8805             return;
8806         }
8807         break;
8808     default:
8809         unallocated_encoding(s);
8810         return;
8811     }
8812 
8813     if (!fp_access_check(s)) {
8814         return;
8815     }
8816 
8817     if (size == 2) {
8818         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8819         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8820         TCGv_i64 tcg_res = tcg_temp_new_i64();
8821 
8822         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8823         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8824 
8825         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8826         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8827 
8828         switch (opcode) {
8829         case 0xd: /* SQDMULL, SQDMULL2 */
8830             break;
8831         case 0xb: /* SQDMLSL, SQDMLSL2 */
8832             tcg_gen_neg_i64(tcg_res, tcg_res);
8833             /* fall through */
8834         case 0x9: /* SQDMLAL, SQDMLAL2 */
8835             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8836             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8837                                               tcg_res, tcg_op1);
8838             break;
8839         default:
8840             g_assert_not_reached();
8841         }
8842 
8843         write_fp_dreg(s, rd, tcg_res);
8844     } else {
8845         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8846         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8847         TCGv_i64 tcg_res = tcg_temp_new_i64();
8848 
8849         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8850         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8851 
8852         switch (opcode) {
8853         case 0xd: /* SQDMULL, SQDMULL2 */
8854             break;
8855         case 0xb: /* SQDMLSL, SQDMLSL2 */
8856             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8857             /* fall through */
8858         case 0x9: /* SQDMLAL, SQDMLAL2 */
8859         {
8860             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8861             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8862             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8863                                               tcg_res, tcg_op3);
8864             break;
8865         }
8866         default:
8867             g_assert_not_reached();
8868         }
8869 
8870         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8871         write_fp_dreg(s, rd, tcg_res);
8872     }
8873 }
8874 
8875 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8876                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8877 {
8878     /* Handle 64x64->64 opcodes which are shared between the scalar
8879      * and vector 3-same groups. We cover every opcode where size == 3
8880      * is valid in either the three-reg-same (integer, not pairwise)
8881      * or scalar-three-reg-same groups.
8882      */
8883     TCGCond cond;
8884 
8885     switch (opcode) {
8886     case 0x1: /* SQADD */
8887         if (u) {
8888             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8889         } else {
8890             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8891         }
8892         break;
8893     case 0x5: /* SQSUB */
8894         if (u) {
8895             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8896         } else {
8897             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8898         }
8899         break;
8900     case 0x6: /* CMGT, CMHI */
8901         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8902          * We implement this using setcond (test) and then negating.
8903          */
8904         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8905     do_cmop:
8906         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8907         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8908         break;
8909     case 0x7: /* CMGE, CMHS */
8910         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8911         goto do_cmop;
8912     case 0x11: /* CMTST, CMEQ */
8913         if (u) {
8914             cond = TCG_COND_EQ;
8915             goto do_cmop;
8916         }
8917         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8918         break;
8919     case 0x8: /* SSHL, USHL */
8920         if (u) {
8921             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8922         } else {
8923             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8924         }
8925         break;
8926     case 0x9: /* SQSHL, UQSHL */
8927         if (u) {
8928             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8929         } else {
8930             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8931         }
8932         break;
8933     case 0xa: /* SRSHL, URSHL */
8934         if (u) {
8935             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8936         } else {
8937             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8938         }
8939         break;
8940     case 0xb: /* SQRSHL, UQRSHL */
8941         if (u) {
8942             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8943         } else {
8944             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8945         }
8946         break;
8947     case 0x10: /* ADD, SUB */
8948         if (u) {
8949             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8950         } else {
8951             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8952         }
8953         break;
8954     default:
8955         g_assert_not_reached();
8956     }
8957 }
8958 
8959 /* Handle the 3-same-operands float operations; shared by the scalar
8960  * and vector encodings. The caller must filter out any encodings
8961  * not allocated for the encoding it is dealing with.
8962  */
8963 static void handle_3same_float(DisasContext *s, int size, int elements,
8964                                int fpopcode, int rd, int rn, int rm)
8965 {
8966     int pass;
8967     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8968 
8969     for (pass = 0; pass < elements; pass++) {
8970         if (size) {
8971             /* Double */
8972             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8973             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8974             TCGv_i64 tcg_res = tcg_temp_new_i64();
8975 
8976             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8977             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8978 
8979             switch (fpopcode) {
8980             case 0x39: /* FMLS */
8981                 /* As usual for ARM, separate negation for fused multiply-add */
8982                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8983                 /* fall through */
8984             case 0x19: /* FMLA */
8985                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8986                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8987                                        tcg_res, fpst);
8988                 break;
8989             case 0x18: /* FMAXNM */
8990                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8991                 break;
8992             case 0x1a: /* FADD */
8993                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8994                 break;
8995             case 0x1b: /* FMULX */
8996                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8997                 break;
8998             case 0x1c: /* FCMEQ */
8999                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9000                 break;
9001             case 0x1e: /* FMAX */
9002                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9003                 break;
9004             case 0x1f: /* FRECPS */
9005                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9006                 break;
9007             case 0x38: /* FMINNM */
9008                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9009                 break;
9010             case 0x3a: /* FSUB */
9011                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9012                 break;
9013             case 0x3e: /* FMIN */
9014                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9015                 break;
9016             case 0x3f: /* FRSQRTS */
9017                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9018                 break;
9019             case 0x5b: /* FMUL */
9020                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9021                 break;
9022             case 0x5c: /* FCMGE */
9023                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9024                 break;
9025             case 0x5d: /* FACGE */
9026                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9027                 break;
9028             case 0x5f: /* FDIV */
9029                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9030                 break;
9031             case 0x7a: /* FABD */
9032                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9033                 gen_helper_vfp_absd(tcg_res, tcg_res);
9034                 break;
9035             case 0x7c: /* FCMGT */
9036                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9037                 break;
9038             case 0x7d: /* FACGT */
9039                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9040                 break;
9041             default:
9042                 g_assert_not_reached();
9043             }
9044 
9045             write_vec_element(s, tcg_res, rd, pass, MO_64);
9046         } else {
9047             /* Single */
9048             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9049             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9050             TCGv_i32 tcg_res = tcg_temp_new_i32();
9051 
9052             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9053             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9054 
9055             switch (fpopcode) {
9056             case 0x39: /* FMLS */
9057                 /* As usual for ARM, separate negation for fused multiply-add */
9058                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9059                 /* fall through */
9060             case 0x19: /* FMLA */
9061                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9062                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9063                                        tcg_res, fpst);
9064                 break;
9065             case 0x1a: /* FADD */
9066                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9067                 break;
9068             case 0x1b: /* FMULX */
9069                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9070                 break;
9071             case 0x1c: /* FCMEQ */
9072                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9073                 break;
9074             case 0x1e: /* FMAX */
9075                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x1f: /* FRECPS */
9078                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9079                 break;
9080             case 0x18: /* FMAXNM */
9081                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9082                 break;
9083             case 0x38: /* FMINNM */
9084                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9085                 break;
9086             case 0x3a: /* FSUB */
9087                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9088                 break;
9089             case 0x3e: /* FMIN */
9090                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9091                 break;
9092             case 0x3f: /* FRSQRTS */
9093                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9094                 break;
9095             case 0x5b: /* FMUL */
9096                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9097                 break;
9098             case 0x5c: /* FCMGE */
9099                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9100                 break;
9101             case 0x5d: /* FACGE */
9102                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9103                 break;
9104             case 0x5f: /* FDIV */
9105                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9106                 break;
9107             case 0x7a: /* FABD */
9108                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9109                 gen_helper_vfp_abss(tcg_res, tcg_res);
9110                 break;
9111             case 0x7c: /* FCMGT */
9112                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9113                 break;
9114             case 0x7d: /* FACGT */
9115                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9116                 break;
9117             default:
9118                 g_assert_not_reached();
9119             }
9120 
9121             if (elements == 1) {
9122                 /* scalar single so clear high part */
9123                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9124 
9125                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9126                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9127             } else {
9128                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9129             }
9130         }
9131     }
9132 
9133     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9134 }
9135 
9136 /* AdvSIMD scalar three same
9137  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9138  * +-----+---+-----------+------+---+------+--------+---+------+------+
9139  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9140  * +-----+---+-----------+------+---+------+--------+---+------+------+
9141  */
9142 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9143 {
9144     int rd = extract32(insn, 0, 5);
9145     int rn = extract32(insn, 5, 5);
9146     int opcode = extract32(insn, 11, 5);
9147     int rm = extract32(insn, 16, 5);
9148     int size = extract32(insn, 22, 2);
9149     bool u = extract32(insn, 29, 1);
9150     TCGv_i64 tcg_rd;
9151 
9152     if (opcode >= 0x18) {
9153         /* Floating point: U, size[1] and opcode indicate operation */
9154         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9155         switch (fpopcode) {
9156         case 0x1b: /* FMULX */
9157         case 0x1f: /* FRECPS */
9158         case 0x3f: /* FRSQRTS */
9159         case 0x5d: /* FACGE */
9160         case 0x7d: /* FACGT */
9161         case 0x1c: /* FCMEQ */
9162         case 0x5c: /* FCMGE */
9163         case 0x7c: /* FCMGT */
9164         case 0x7a: /* FABD */
9165             break;
9166         default:
9167             unallocated_encoding(s);
9168             return;
9169         }
9170 
9171         if (!fp_access_check(s)) {
9172             return;
9173         }
9174 
9175         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9176         return;
9177     }
9178 
9179     switch (opcode) {
9180     case 0x1: /* SQADD, UQADD */
9181     case 0x5: /* SQSUB, UQSUB */
9182     case 0x9: /* SQSHL, UQSHL */
9183     case 0xb: /* SQRSHL, UQRSHL */
9184         break;
9185     case 0x8: /* SSHL, USHL */
9186     case 0xa: /* SRSHL, URSHL */
9187     case 0x6: /* CMGT, CMHI */
9188     case 0x7: /* CMGE, CMHS */
9189     case 0x11: /* CMTST, CMEQ */
9190     case 0x10: /* ADD, SUB (vector) */
9191         if (size != 3) {
9192             unallocated_encoding(s);
9193             return;
9194         }
9195         break;
9196     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9197         if (size != 1 && size != 2) {
9198             unallocated_encoding(s);
9199             return;
9200         }
9201         break;
9202     default:
9203         unallocated_encoding(s);
9204         return;
9205     }
9206 
9207     if (!fp_access_check(s)) {
9208         return;
9209     }
9210 
9211     tcg_rd = tcg_temp_new_i64();
9212 
9213     if (size == 3) {
9214         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9215         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9216 
9217         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9218     } else {
9219         /* Do a single operation on the lowest element in the vector.
9220          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9221          * no side effects for all these operations.
9222          * OPTME: special-purpose helpers would avoid doing some
9223          * unnecessary work in the helper for the 8 and 16 bit cases.
9224          */
9225         NeonGenTwoOpEnvFn *genenvfn;
9226         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9227         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9228         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9229 
9230         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9231         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9232 
9233         switch (opcode) {
9234         case 0x1: /* SQADD, UQADD */
9235         {
9236             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9237                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9238                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9239                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9240             };
9241             genenvfn = fns[size][u];
9242             break;
9243         }
9244         case 0x5: /* SQSUB, UQSUB */
9245         {
9246             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9247                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9248                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9249                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9250             };
9251             genenvfn = fns[size][u];
9252             break;
9253         }
9254         case 0x9: /* SQSHL, UQSHL */
9255         {
9256             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9257                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9258                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9259                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9260             };
9261             genenvfn = fns[size][u];
9262             break;
9263         }
9264         case 0xb: /* SQRSHL, UQRSHL */
9265         {
9266             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9267                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9268                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9269                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9270             };
9271             genenvfn = fns[size][u];
9272             break;
9273         }
9274         case 0x16: /* SQDMULH, SQRDMULH */
9275         {
9276             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9277                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9278                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9279             };
9280             assert(size == 1 || size == 2);
9281             genenvfn = fns[size - 1][u];
9282             break;
9283         }
9284         default:
9285             g_assert_not_reached();
9286         }
9287 
9288         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9289         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9290     }
9291 
9292     write_fp_dreg(s, rd, tcg_rd);
9293 }
9294 
9295 /* AdvSIMD scalar three same FP16
9296  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9297  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9298  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9299  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9300  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9301  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9302  */
9303 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9304                                                   uint32_t insn)
9305 {
9306     int rd = extract32(insn, 0, 5);
9307     int rn = extract32(insn, 5, 5);
9308     int opcode = extract32(insn, 11, 3);
9309     int rm = extract32(insn, 16, 5);
9310     bool u = extract32(insn, 29, 1);
9311     bool a = extract32(insn, 23, 1);
9312     int fpopcode = opcode | (a << 3) |  (u << 4);
9313     TCGv_ptr fpst;
9314     TCGv_i32 tcg_op1;
9315     TCGv_i32 tcg_op2;
9316     TCGv_i32 tcg_res;
9317 
9318     switch (fpopcode) {
9319     case 0x03: /* FMULX */
9320     case 0x04: /* FCMEQ (reg) */
9321     case 0x07: /* FRECPS */
9322     case 0x0f: /* FRSQRTS */
9323     case 0x14: /* FCMGE (reg) */
9324     case 0x15: /* FACGE */
9325     case 0x1a: /* FABD */
9326     case 0x1c: /* FCMGT (reg) */
9327     case 0x1d: /* FACGT */
9328         break;
9329     default:
9330         unallocated_encoding(s);
9331         return;
9332     }
9333 
9334     if (!dc_isar_feature(aa64_fp16, s)) {
9335         unallocated_encoding(s);
9336     }
9337 
9338     if (!fp_access_check(s)) {
9339         return;
9340     }
9341 
9342     fpst = fpstatus_ptr(FPST_FPCR_F16);
9343 
9344     tcg_op1 = read_fp_hreg(s, rn);
9345     tcg_op2 = read_fp_hreg(s, rm);
9346     tcg_res = tcg_temp_new_i32();
9347 
9348     switch (fpopcode) {
9349     case 0x03: /* FMULX */
9350         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9351         break;
9352     case 0x04: /* FCMEQ (reg) */
9353         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9354         break;
9355     case 0x07: /* FRECPS */
9356         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9357         break;
9358     case 0x0f: /* FRSQRTS */
9359         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9360         break;
9361     case 0x14: /* FCMGE (reg) */
9362         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9363         break;
9364     case 0x15: /* FACGE */
9365         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9366         break;
9367     case 0x1a: /* FABD */
9368         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9369         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9370         break;
9371     case 0x1c: /* FCMGT (reg) */
9372         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9373         break;
9374     case 0x1d: /* FACGT */
9375         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9376         break;
9377     default:
9378         g_assert_not_reached();
9379     }
9380 
9381     write_fp_sreg(s, rd, tcg_res);
9382 }
9383 
9384 /* AdvSIMD scalar three same extra
9385  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9386  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9387  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9388  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9389  */
9390 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9391                                                    uint32_t insn)
9392 {
9393     int rd = extract32(insn, 0, 5);
9394     int rn = extract32(insn, 5, 5);
9395     int opcode = extract32(insn, 11, 4);
9396     int rm = extract32(insn, 16, 5);
9397     int size = extract32(insn, 22, 2);
9398     bool u = extract32(insn, 29, 1);
9399     TCGv_i32 ele1, ele2, ele3;
9400     TCGv_i64 res;
9401     bool feature;
9402 
9403     switch (u * 16 + opcode) {
9404     case 0x10: /* SQRDMLAH (vector) */
9405     case 0x11: /* SQRDMLSH (vector) */
9406         if (size != 1 && size != 2) {
9407             unallocated_encoding(s);
9408             return;
9409         }
9410         feature = dc_isar_feature(aa64_rdm, s);
9411         break;
9412     default:
9413         unallocated_encoding(s);
9414         return;
9415     }
9416     if (!feature) {
9417         unallocated_encoding(s);
9418         return;
9419     }
9420     if (!fp_access_check(s)) {
9421         return;
9422     }
9423 
9424     /* Do a single operation on the lowest element in the vector.
9425      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9426      * with no side effects for all these operations.
9427      * OPTME: special-purpose helpers would avoid doing some
9428      * unnecessary work in the helper for the 16 bit cases.
9429      */
9430     ele1 = tcg_temp_new_i32();
9431     ele2 = tcg_temp_new_i32();
9432     ele3 = tcg_temp_new_i32();
9433 
9434     read_vec_element_i32(s, ele1, rn, 0, size);
9435     read_vec_element_i32(s, ele2, rm, 0, size);
9436     read_vec_element_i32(s, ele3, rd, 0, size);
9437 
9438     switch (opcode) {
9439     case 0x0: /* SQRDMLAH */
9440         if (size == 1) {
9441             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9442         } else {
9443             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9444         }
9445         break;
9446     case 0x1: /* SQRDMLSH */
9447         if (size == 1) {
9448             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9449         } else {
9450             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9451         }
9452         break;
9453     default:
9454         g_assert_not_reached();
9455     }
9456 
9457     res = tcg_temp_new_i64();
9458     tcg_gen_extu_i32_i64(res, ele3);
9459     write_fp_dreg(s, rd, res);
9460 }
9461 
9462 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9463                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9464                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9465 {
9466     /* Handle 64->64 opcodes which are shared between the scalar and
9467      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9468      * is valid in either group and also the double-precision fp ops.
9469      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9470      * requires them.
9471      */
9472     TCGCond cond;
9473 
9474     switch (opcode) {
9475     case 0x4: /* CLS, CLZ */
9476         if (u) {
9477             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9478         } else {
9479             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9480         }
9481         break;
9482     case 0x5: /* NOT */
9483         /* This opcode is shared with CNT and RBIT but we have earlier
9484          * enforced that size == 3 if and only if this is the NOT insn.
9485          */
9486         tcg_gen_not_i64(tcg_rd, tcg_rn);
9487         break;
9488     case 0x7: /* SQABS, SQNEG */
9489         if (u) {
9490             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9491         } else {
9492             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9493         }
9494         break;
9495     case 0xa: /* CMLT */
9496         /* 64 bit integer comparison against zero, result is
9497          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9498          * subtracting 1.
9499          */
9500         cond = TCG_COND_LT;
9501     do_cmop:
9502         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9503         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9504         break;
9505     case 0x8: /* CMGT, CMGE */
9506         cond = u ? TCG_COND_GE : TCG_COND_GT;
9507         goto do_cmop;
9508     case 0x9: /* CMEQ, CMLE */
9509         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9510         goto do_cmop;
9511     case 0xb: /* ABS, NEG */
9512         if (u) {
9513             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9514         } else {
9515             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9516         }
9517         break;
9518     case 0x2f: /* FABS */
9519         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9520         break;
9521     case 0x6f: /* FNEG */
9522         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9523         break;
9524     case 0x7f: /* FSQRT */
9525         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9526         break;
9527     case 0x1a: /* FCVTNS */
9528     case 0x1b: /* FCVTMS */
9529     case 0x1c: /* FCVTAS */
9530     case 0x3a: /* FCVTPS */
9531     case 0x3b: /* FCVTZS */
9532         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9533         break;
9534     case 0x5a: /* FCVTNU */
9535     case 0x5b: /* FCVTMU */
9536     case 0x5c: /* FCVTAU */
9537     case 0x7a: /* FCVTPU */
9538     case 0x7b: /* FCVTZU */
9539         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9540         break;
9541     case 0x18: /* FRINTN */
9542     case 0x19: /* FRINTM */
9543     case 0x38: /* FRINTP */
9544     case 0x39: /* FRINTZ */
9545     case 0x58: /* FRINTA */
9546     case 0x79: /* FRINTI */
9547         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9548         break;
9549     case 0x59: /* FRINTX */
9550         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9551         break;
9552     case 0x1e: /* FRINT32Z */
9553     case 0x5e: /* FRINT32X */
9554         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9555         break;
9556     case 0x1f: /* FRINT64Z */
9557     case 0x5f: /* FRINT64X */
9558         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9559         break;
9560     default:
9561         g_assert_not_reached();
9562     }
9563 }
9564 
9565 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9566                                    bool is_scalar, bool is_u, bool is_q,
9567                                    int size, int rn, int rd)
9568 {
9569     bool is_double = (size == MO_64);
9570     TCGv_ptr fpst;
9571 
9572     if (!fp_access_check(s)) {
9573         return;
9574     }
9575 
9576     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9577 
9578     if (is_double) {
9579         TCGv_i64 tcg_op = tcg_temp_new_i64();
9580         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9581         TCGv_i64 tcg_res = tcg_temp_new_i64();
9582         NeonGenTwoDoubleOpFn *genfn;
9583         bool swap = false;
9584         int pass;
9585 
9586         switch (opcode) {
9587         case 0x2e: /* FCMLT (zero) */
9588             swap = true;
9589             /* fallthrough */
9590         case 0x2c: /* FCMGT (zero) */
9591             genfn = gen_helper_neon_cgt_f64;
9592             break;
9593         case 0x2d: /* FCMEQ (zero) */
9594             genfn = gen_helper_neon_ceq_f64;
9595             break;
9596         case 0x6d: /* FCMLE (zero) */
9597             swap = true;
9598             /* fall through */
9599         case 0x6c: /* FCMGE (zero) */
9600             genfn = gen_helper_neon_cge_f64;
9601             break;
9602         default:
9603             g_assert_not_reached();
9604         }
9605 
9606         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9607             read_vec_element(s, tcg_op, rn, pass, MO_64);
9608             if (swap) {
9609                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9610             } else {
9611                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9612             }
9613             write_vec_element(s, tcg_res, rd, pass, MO_64);
9614         }
9615 
9616         clear_vec_high(s, !is_scalar, rd);
9617     } else {
9618         TCGv_i32 tcg_op = tcg_temp_new_i32();
9619         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9620         TCGv_i32 tcg_res = tcg_temp_new_i32();
9621         NeonGenTwoSingleOpFn *genfn;
9622         bool swap = false;
9623         int pass, maxpasses;
9624 
9625         if (size == MO_16) {
9626             switch (opcode) {
9627             case 0x2e: /* FCMLT (zero) */
9628                 swap = true;
9629                 /* fall through */
9630             case 0x2c: /* FCMGT (zero) */
9631                 genfn = gen_helper_advsimd_cgt_f16;
9632                 break;
9633             case 0x2d: /* FCMEQ (zero) */
9634                 genfn = gen_helper_advsimd_ceq_f16;
9635                 break;
9636             case 0x6d: /* FCMLE (zero) */
9637                 swap = true;
9638                 /* fall through */
9639             case 0x6c: /* FCMGE (zero) */
9640                 genfn = gen_helper_advsimd_cge_f16;
9641                 break;
9642             default:
9643                 g_assert_not_reached();
9644             }
9645         } else {
9646             switch (opcode) {
9647             case 0x2e: /* FCMLT (zero) */
9648                 swap = true;
9649                 /* fall through */
9650             case 0x2c: /* FCMGT (zero) */
9651                 genfn = gen_helper_neon_cgt_f32;
9652                 break;
9653             case 0x2d: /* FCMEQ (zero) */
9654                 genfn = gen_helper_neon_ceq_f32;
9655                 break;
9656             case 0x6d: /* FCMLE (zero) */
9657                 swap = true;
9658                 /* fall through */
9659             case 0x6c: /* FCMGE (zero) */
9660                 genfn = gen_helper_neon_cge_f32;
9661                 break;
9662             default:
9663                 g_assert_not_reached();
9664             }
9665         }
9666 
9667         if (is_scalar) {
9668             maxpasses = 1;
9669         } else {
9670             int vector_size = 8 << is_q;
9671             maxpasses = vector_size >> size;
9672         }
9673 
9674         for (pass = 0; pass < maxpasses; pass++) {
9675             read_vec_element_i32(s, tcg_op, rn, pass, size);
9676             if (swap) {
9677                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9678             } else {
9679                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9680             }
9681             if (is_scalar) {
9682                 write_fp_sreg(s, rd, tcg_res);
9683             } else {
9684                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9685             }
9686         }
9687 
9688         if (!is_scalar) {
9689             clear_vec_high(s, is_q, rd);
9690         }
9691     }
9692 }
9693 
9694 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9695                                     bool is_scalar, bool is_u, bool is_q,
9696                                     int size, int rn, int rd)
9697 {
9698     bool is_double = (size == 3);
9699     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9700 
9701     if (is_double) {
9702         TCGv_i64 tcg_op = tcg_temp_new_i64();
9703         TCGv_i64 tcg_res = tcg_temp_new_i64();
9704         int pass;
9705 
9706         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9707             read_vec_element(s, tcg_op, rn, pass, MO_64);
9708             switch (opcode) {
9709             case 0x3d: /* FRECPE */
9710                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9711                 break;
9712             case 0x3f: /* FRECPX */
9713                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9714                 break;
9715             case 0x7d: /* FRSQRTE */
9716                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9717                 break;
9718             default:
9719                 g_assert_not_reached();
9720             }
9721             write_vec_element(s, tcg_res, rd, pass, MO_64);
9722         }
9723         clear_vec_high(s, !is_scalar, rd);
9724     } else {
9725         TCGv_i32 tcg_op = tcg_temp_new_i32();
9726         TCGv_i32 tcg_res = tcg_temp_new_i32();
9727         int pass, maxpasses;
9728 
9729         if (is_scalar) {
9730             maxpasses = 1;
9731         } else {
9732             maxpasses = is_q ? 4 : 2;
9733         }
9734 
9735         for (pass = 0; pass < maxpasses; pass++) {
9736             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9737 
9738             switch (opcode) {
9739             case 0x3c: /* URECPE */
9740                 gen_helper_recpe_u32(tcg_res, tcg_op);
9741                 break;
9742             case 0x3d: /* FRECPE */
9743                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9744                 break;
9745             case 0x3f: /* FRECPX */
9746                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9747                 break;
9748             case 0x7d: /* FRSQRTE */
9749                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9750                 break;
9751             default:
9752                 g_assert_not_reached();
9753             }
9754 
9755             if (is_scalar) {
9756                 write_fp_sreg(s, rd, tcg_res);
9757             } else {
9758                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9759             }
9760         }
9761         if (!is_scalar) {
9762             clear_vec_high(s, is_q, rd);
9763         }
9764     }
9765 }
9766 
9767 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9768                                 int opcode, bool u, bool is_q,
9769                                 int size, int rn, int rd)
9770 {
9771     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9772      * in the source becomes a size element in the destination).
9773      */
9774     int pass;
9775     TCGv_i32 tcg_res[2];
9776     int destelt = is_q ? 2 : 0;
9777     int passes = scalar ? 1 : 2;
9778 
9779     if (scalar) {
9780         tcg_res[1] = tcg_constant_i32(0);
9781     }
9782 
9783     for (pass = 0; pass < passes; pass++) {
9784         TCGv_i64 tcg_op = tcg_temp_new_i64();
9785         NeonGenNarrowFn *genfn = NULL;
9786         NeonGenNarrowEnvFn *genenvfn = NULL;
9787 
9788         if (scalar) {
9789             read_vec_element(s, tcg_op, rn, pass, size + 1);
9790         } else {
9791             read_vec_element(s, tcg_op, rn, pass, MO_64);
9792         }
9793         tcg_res[pass] = tcg_temp_new_i32();
9794 
9795         switch (opcode) {
9796         case 0x12: /* XTN, SQXTUN */
9797         {
9798             static NeonGenNarrowFn * const xtnfns[3] = {
9799                 gen_helper_neon_narrow_u8,
9800                 gen_helper_neon_narrow_u16,
9801                 tcg_gen_extrl_i64_i32,
9802             };
9803             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9804                 gen_helper_neon_unarrow_sat8,
9805                 gen_helper_neon_unarrow_sat16,
9806                 gen_helper_neon_unarrow_sat32,
9807             };
9808             if (u) {
9809                 genenvfn = sqxtunfns[size];
9810             } else {
9811                 genfn = xtnfns[size];
9812             }
9813             break;
9814         }
9815         case 0x14: /* SQXTN, UQXTN */
9816         {
9817             static NeonGenNarrowEnvFn * const fns[3][2] = {
9818                 { gen_helper_neon_narrow_sat_s8,
9819                   gen_helper_neon_narrow_sat_u8 },
9820                 { gen_helper_neon_narrow_sat_s16,
9821                   gen_helper_neon_narrow_sat_u16 },
9822                 { gen_helper_neon_narrow_sat_s32,
9823                   gen_helper_neon_narrow_sat_u32 },
9824             };
9825             genenvfn = fns[size][u];
9826             break;
9827         }
9828         case 0x16: /* FCVTN, FCVTN2 */
9829             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9830             if (size == 2) {
9831                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9832             } else {
9833                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9834                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9835                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9836                 TCGv_i32 ahp = get_ahp_flag();
9837 
9838                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9839                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9840                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9841                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9842             }
9843             break;
9844         case 0x36: /* BFCVTN, BFCVTN2 */
9845             {
9846                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9847                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9848             }
9849             break;
9850         case 0x56:  /* FCVTXN, FCVTXN2 */
9851             /* 64 bit to 32 bit float conversion
9852              * with von Neumann rounding (round to odd)
9853              */
9854             assert(size == 2);
9855             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9856             break;
9857         default:
9858             g_assert_not_reached();
9859         }
9860 
9861         if (genfn) {
9862             genfn(tcg_res[pass], tcg_op);
9863         } else if (genenvfn) {
9864             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9865         }
9866     }
9867 
9868     for (pass = 0; pass < 2; pass++) {
9869         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9870     }
9871     clear_vec_high(s, is_q, rd);
9872 }
9873 
9874 /* Remaining saturating accumulating ops */
9875 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9876                                 bool is_q, int size, int rn, int rd)
9877 {
9878     bool is_double = (size == 3);
9879 
9880     if (is_double) {
9881         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9882         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9883         int pass;
9884 
9885         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9886             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9887             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9888 
9889             if (is_u) { /* USQADD */
9890                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9891             } else { /* SUQADD */
9892                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9893             }
9894             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9895         }
9896         clear_vec_high(s, !is_scalar, rd);
9897     } else {
9898         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9899         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9900         int pass, maxpasses;
9901 
9902         if (is_scalar) {
9903             maxpasses = 1;
9904         } else {
9905             maxpasses = is_q ? 4 : 2;
9906         }
9907 
9908         for (pass = 0; pass < maxpasses; pass++) {
9909             if (is_scalar) {
9910                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9911                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9912             } else {
9913                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9914                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9915             }
9916 
9917             if (is_u) { /* USQADD */
9918                 switch (size) {
9919                 case 0:
9920                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9921                     break;
9922                 case 1:
9923                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9924                     break;
9925                 case 2:
9926                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9927                     break;
9928                 default:
9929                     g_assert_not_reached();
9930                 }
9931             } else { /* SUQADD */
9932                 switch (size) {
9933                 case 0:
9934                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9935                     break;
9936                 case 1:
9937                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9938                     break;
9939                 case 2:
9940                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9941                     break;
9942                 default:
9943                     g_assert_not_reached();
9944                 }
9945             }
9946 
9947             if (is_scalar) {
9948                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9949             }
9950             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9951         }
9952         clear_vec_high(s, is_q, rd);
9953     }
9954 }
9955 
9956 /* AdvSIMD scalar two reg misc
9957  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9958  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9959  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9960  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9961  */
9962 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9963 {
9964     int rd = extract32(insn, 0, 5);
9965     int rn = extract32(insn, 5, 5);
9966     int opcode = extract32(insn, 12, 5);
9967     int size = extract32(insn, 22, 2);
9968     bool u = extract32(insn, 29, 1);
9969     bool is_fcvt = false;
9970     int rmode;
9971     TCGv_i32 tcg_rmode;
9972     TCGv_ptr tcg_fpstatus;
9973 
9974     switch (opcode) {
9975     case 0x3: /* USQADD / SUQADD*/
9976         if (!fp_access_check(s)) {
9977             return;
9978         }
9979         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9980         return;
9981     case 0x7: /* SQABS / SQNEG */
9982         break;
9983     case 0xa: /* CMLT */
9984         if (u) {
9985             unallocated_encoding(s);
9986             return;
9987         }
9988         /* fall through */
9989     case 0x8: /* CMGT, CMGE */
9990     case 0x9: /* CMEQ, CMLE */
9991     case 0xb: /* ABS, NEG */
9992         if (size != 3) {
9993             unallocated_encoding(s);
9994             return;
9995         }
9996         break;
9997     case 0x12: /* SQXTUN */
9998         if (!u) {
9999             unallocated_encoding(s);
10000             return;
10001         }
10002         /* fall through */
10003     case 0x14: /* SQXTN, UQXTN */
10004         if (size == 3) {
10005             unallocated_encoding(s);
10006             return;
10007         }
10008         if (!fp_access_check(s)) {
10009             return;
10010         }
10011         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10012         return;
10013     case 0xc ... 0xf:
10014     case 0x16 ... 0x1d:
10015     case 0x1f:
10016         /* Floating point: U, size[1] and opcode indicate operation;
10017          * size[0] indicates single or double precision.
10018          */
10019         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10020         size = extract32(size, 0, 1) ? 3 : 2;
10021         switch (opcode) {
10022         case 0x2c: /* FCMGT (zero) */
10023         case 0x2d: /* FCMEQ (zero) */
10024         case 0x2e: /* FCMLT (zero) */
10025         case 0x6c: /* FCMGE (zero) */
10026         case 0x6d: /* FCMLE (zero) */
10027             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10028             return;
10029         case 0x1d: /* SCVTF */
10030         case 0x5d: /* UCVTF */
10031         {
10032             bool is_signed = (opcode == 0x1d);
10033             if (!fp_access_check(s)) {
10034                 return;
10035             }
10036             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10037             return;
10038         }
10039         case 0x3d: /* FRECPE */
10040         case 0x3f: /* FRECPX */
10041         case 0x7d: /* FRSQRTE */
10042             if (!fp_access_check(s)) {
10043                 return;
10044             }
10045             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10046             return;
10047         case 0x1a: /* FCVTNS */
10048         case 0x1b: /* FCVTMS */
10049         case 0x3a: /* FCVTPS */
10050         case 0x3b: /* FCVTZS */
10051         case 0x5a: /* FCVTNU */
10052         case 0x5b: /* FCVTMU */
10053         case 0x7a: /* FCVTPU */
10054         case 0x7b: /* FCVTZU */
10055             is_fcvt = true;
10056             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10057             break;
10058         case 0x1c: /* FCVTAS */
10059         case 0x5c: /* FCVTAU */
10060             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10061             is_fcvt = true;
10062             rmode = FPROUNDING_TIEAWAY;
10063             break;
10064         case 0x56: /* FCVTXN, FCVTXN2 */
10065             if (size == 2) {
10066                 unallocated_encoding(s);
10067                 return;
10068             }
10069             if (!fp_access_check(s)) {
10070                 return;
10071             }
10072             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10073             return;
10074         default:
10075             unallocated_encoding(s);
10076             return;
10077         }
10078         break;
10079     default:
10080         unallocated_encoding(s);
10081         return;
10082     }
10083 
10084     if (!fp_access_check(s)) {
10085         return;
10086     }
10087 
10088     if (is_fcvt) {
10089         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10090         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10091     } else {
10092         tcg_fpstatus = NULL;
10093         tcg_rmode = NULL;
10094     }
10095 
10096     if (size == 3) {
10097         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10098         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10099 
10100         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10101         write_fp_dreg(s, rd, tcg_rd);
10102     } else {
10103         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10104         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10105 
10106         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10107 
10108         switch (opcode) {
10109         case 0x7: /* SQABS, SQNEG */
10110         {
10111             NeonGenOneOpEnvFn *genfn;
10112             static NeonGenOneOpEnvFn * const fns[3][2] = {
10113                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10114                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10115                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10116             };
10117             genfn = fns[size][u];
10118             genfn(tcg_rd, cpu_env, tcg_rn);
10119             break;
10120         }
10121         case 0x1a: /* FCVTNS */
10122         case 0x1b: /* FCVTMS */
10123         case 0x1c: /* FCVTAS */
10124         case 0x3a: /* FCVTPS */
10125         case 0x3b: /* FCVTZS */
10126             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10127                                  tcg_fpstatus);
10128             break;
10129         case 0x5a: /* FCVTNU */
10130         case 0x5b: /* FCVTMU */
10131         case 0x5c: /* FCVTAU */
10132         case 0x7a: /* FCVTPU */
10133         case 0x7b: /* FCVTZU */
10134             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10135                                  tcg_fpstatus);
10136             break;
10137         default:
10138             g_assert_not_reached();
10139         }
10140 
10141         write_fp_sreg(s, rd, tcg_rd);
10142     }
10143 
10144     if (is_fcvt) {
10145         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10146     }
10147 }
10148 
10149 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10150 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10151                                  int immh, int immb, int opcode, int rn, int rd)
10152 {
10153     int size = 32 - clz32(immh) - 1;
10154     int immhb = immh << 3 | immb;
10155     int shift = 2 * (8 << size) - immhb;
10156     GVecGen2iFn *gvec_fn;
10157 
10158     if (extract32(immh, 3, 1) && !is_q) {
10159         unallocated_encoding(s);
10160         return;
10161     }
10162     tcg_debug_assert(size <= 3);
10163 
10164     if (!fp_access_check(s)) {
10165         return;
10166     }
10167 
10168     switch (opcode) {
10169     case 0x02: /* SSRA / USRA (accumulate) */
10170         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10171         break;
10172 
10173     case 0x08: /* SRI */
10174         gvec_fn = gen_gvec_sri;
10175         break;
10176 
10177     case 0x00: /* SSHR / USHR */
10178         if (is_u) {
10179             if (shift == 8 << size) {
10180                 /* Shift count the same size as element size produces zero.  */
10181                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10182                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10183                 return;
10184             }
10185             gvec_fn = tcg_gen_gvec_shri;
10186         } else {
10187             /* Shift count the same size as element size produces all sign.  */
10188             if (shift == 8 << size) {
10189                 shift -= 1;
10190             }
10191             gvec_fn = tcg_gen_gvec_sari;
10192         }
10193         break;
10194 
10195     case 0x04: /* SRSHR / URSHR (rounding) */
10196         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10197         break;
10198 
10199     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10200         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10201         break;
10202 
10203     default:
10204         g_assert_not_reached();
10205     }
10206 
10207     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10208 }
10209 
10210 /* SHL/SLI - Vector shift left */
10211 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10212                                  int immh, int immb, int opcode, int rn, int rd)
10213 {
10214     int size = 32 - clz32(immh) - 1;
10215     int immhb = immh << 3 | immb;
10216     int shift = immhb - (8 << size);
10217 
10218     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10219     assert(size >= 0 && size <= 3);
10220 
10221     if (extract32(immh, 3, 1) && !is_q) {
10222         unallocated_encoding(s);
10223         return;
10224     }
10225 
10226     if (!fp_access_check(s)) {
10227         return;
10228     }
10229 
10230     if (insert) {
10231         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10232     } else {
10233         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10234     }
10235 }
10236 
10237 /* USHLL/SHLL - Vector shift left with widening */
10238 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10239                                  int immh, int immb, int opcode, int rn, int rd)
10240 {
10241     int size = 32 - clz32(immh) - 1;
10242     int immhb = immh << 3 | immb;
10243     int shift = immhb - (8 << size);
10244     int dsize = 64;
10245     int esize = 8 << size;
10246     int elements = dsize/esize;
10247     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10248     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10249     int i;
10250 
10251     if (size >= 3) {
10252         unallocated_encoding(s);
10253         return;
10254     }
10255 
10256     if (!fp_access_check(s)) {
10257         return;
10258     }
10259 
10260     /* For the LL variants the store is larger than the load,
10261      * so if rd == rn we would overwrite parts of our input.
10262      * So load everything right now and use shifts in the main loop.
10263      */
10264     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10265 
10266     for (i = 0; i < elements; i++) {
10267         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10268         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10269         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10270         write_vec_element(s, tcg_rd, rd, i, size + 1);
10271     }
10272 }
10273 
10274 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10275 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10276                                  int immh, int immb, int opcode, int rn, int rd)
10277 {
10278     int immhb = immh << 3 | immb;
10279     int size = 32 - clz32(immh) - 1;
10280     int dsize = 64;
10281     int esize = 8 << size;
10282     int elements = dsize/esize;
10283     int shift = (2 * esize) - immhb;
10284     bool round = extract32(opcode, 0, 1);
10285     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10286     TCGv_i64 tcg_round;
10287     int i;
10288 
10289     if (extract32(immh, 3, 1)) {
10290         unallocated_encoding(s);
10291         return;
10292     }
10293 
10294     if (!fp_access_check(s)) {
10295         return;
10296     }
10297 
10298     tcg_rn = tcg_temp_new_i64();
10299     tcg_rd = tcg_temp_new_i64();
10300     tcg_final = tcg_temp_new_i64();
10301     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10302 
10303     if (round) {
10304         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10305     } else {
10306         tcg_round = NULL;
10307     }
10308 
10309     for (i = 0; i < elements; i++) {
10310         read_vec_element(s, tcg_rn, rn, i, size+1);
10311         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10312                                 false, true, size+1, shift);
10313 
10314         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10315     }
10316 
10317     if (!is_q) {
10318         write_vec_element(s, tcg_final, rd, 0, MO_64);
10319     } else {
10320         write_vec_element(s, tcg_final, rd, 1, MO_64);
10321     }
10322 
10323     clear_vec_high(s, is_q, rd);
10324 }
10325 
10326 
10327 /* AdvSIMD shift by immediate
10328  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10329  * +---+---+---+-------------+------+------+--------+---+------+------+
10330  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10331  * +---+---+---+-------------+------+------+--------+---+------+------+
10332  */
10333 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10334 {
10335     int rd = extract32(insn, 0, 5);
10336     int rn = extract32(insn, 5, 5);
10337     int opcode = extract32(insn, 11, 5);
10338     int immb = extract32(insn, 16, 3);
10339     int immh = extract32(insn, 19, 4);
10340     bool is_u = extract32(insn, 29, 1);
10341     bool is_q = extract32(insn, 30, 1);
10342 
10343     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10344     assert(immh != 0);
10345 
10346     switch (opcode) {
10347     case 0x08: /* SRI */
10348         if (!is_u) {
10349             unallocated_encoding(s);
10350             return;
10351         }
10352         /* fall through */
10353     case 0x00: /* SSHR / USHR */
10354     case 0x02: /* SSRA / USRA (accumulate) */
10355     case 0x04: /* SRSHR / URSHR (rounding) */
10356     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10357         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10358         break;
10359     case 0x0a: /* SHL / SLI */
10360         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10361         break;
10362     case 0x10: /* SHRN */
10363     case 0x11: /* RSHRN / SQRSHRUN */
10364         if (is_u) {
10365             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10366                                    opcode, rn, rd);
10367         } else {
10368             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10369         }
10370         break;
10371     case 0x12: /* SQSHRN / UQSHRN */
10372     case 0x13: /* SQRSHRN / UQRSHRN */
10373         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10374                                opcode, rn, rd);
10375         break;
10376     case 0x14: /* SSHLL / USHLL */
10377         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10378         break;
10379     case 0x1c: /* SCVTF / UCVTF */
10380         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10381                                      opcode, rn, rd);
10382         break;
10383     case 0xc: /* SQSHLU */
10384         if (!is_u) {
10385             unallocated_encoding(s);
10386             return;
10387         }
10388         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10389         break;
10390     case 0xe: /* SQSHL, UQSHL */
10391         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10392         break;
10393     case 0x1f: /* FCVTZS/ FCVTZU */
10394         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10395         return;
10396     default:
10397         unallocated_encoding(s);
10398         return;
10399     }
10400 }
10401 
10402 /* Generate code to do a "long" addition or subtraction, ie one done in
10403  * TCGv_i64 on vector lanes twice the width specified by size.
10404  */
10405 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10406                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10407 {
10408     static NeonGenTwo64OpFn * const fns[3][2] = {
10409         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10410         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10411         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10412     };
10413     NeonGenTwo64OpFn *genfn;
10414     assert(size < 3);
10415 
10416     genfn = fns[size][is_sub];
10417     genfn(tcg_res, tcg_op1, tcg_op2);
10418 }
10419 
10420 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10421                                 int opcode, int rd, int rn, int rm)
10422 {
10423     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10424     TCGv_i64 tcg_res[2];
10425     int pass, accop;
10426 
10427     tcg_res[0] = tcg_temp_new_i64();
10428     tcg_res[1] = tcg_temp_new_i64();
10429 
10430     /* Does this op do an adding accumulate, a subtracting accumulate,
10431      * or no accumulate at all?
10432      */
10433     switch (opcode) {
10434     case 5:
10435     case 8:
10436     case 9:
10437         accop = 1;
10438         break;
10439     case 10:
10440     case 11:
10441         accop = -1;
10442         break;
10443     default:
10444         accop = 0;
10445         break;
10446     }
10447 
10448     if (accop != 0) {
10449         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10450         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10451     }
10452 
10453     /* size == 2 means two 32x32->64 operations; this is worth special
10454      * casing because we can generally handle it inline.
10455      */
10456     if (size == 2) {
10457         for (pass = 0; pass < 2; pass++) {
10458             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10459             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10460             TCGv_i64 tcg_passres;
10461             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10462 
10463             int elt = pass + is_q * 2;
10464 
10465             read_vec_element(s, tcg_op1, rn, elt, memop);
10466             read_vec_element(s, tcg_op2, rm, elt, memop);
10467 
10468             if (accop == 0) {
10469                 tcg_passres = tcg_res[pass];
10470             } else {
10471                 tcg_passres = tcg_temp_new_i64();
10472             }
10473 
10474             switch (opcode) {
10475             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10476                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10477                 break;
10478             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10479                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10480                 break;
10481             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10482             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10483             {
10484                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10485                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10486 
10487                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10488                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10489                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10490                                     tcg_passres,
10491                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10492                 break;
10493             }
10494             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10495             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10496             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10497                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10498                 break;
10499             case 9: /* SQDMLAL, SQDMLAL2 */
10500             case 11: /* SQDMLSL, SQDMLSL2 */
10501             case 13: /* SQDMULL, SQDMULL2 */
10502                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10503                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10504                                                   tcg_passres, tcg_passres);
10505                 break;
10506             default:
10507                 g_assert_not_reached();
10508             }
10509 
10510             if (opcode == 9 || opcode == 11) {
10511                 /* saturating accumulate ops */
10512                 if (accop < 0) {
10513                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10514                 }
10515                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10516                                                   tcg_res[pass], tcg_passres);
10517             } else if (accop > 0) {
10518                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10519             } else if (accop < 0) {
10520                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10521             }
10522         }
10523     } else {
10524         /* size 0 or 1, generally helper functions */
10525         for (pass = 0; pass < 2; pass++) {
10526             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10527             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10528             TCGv_i64 tcg_passres;
10529             int elt = pass + is_q * 2;
10530 
10531             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10532             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10533 
10534             if (accop == 0) {
10535                 tcg_passres = tcg_res[pass];
10536             } else {
10537                 tcg_passres = tcg_temp_new_i64();
10538             }
10539 
10540             switch (opcode) {
10541             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10542             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10543             {
10544                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10545                 static NeonGenWidenFn * const widenfns[2][2] = {
10546                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10547                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10548                 };
10549                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10550 
10551                 widenfn(tcg_op2_64, tcg_op2);
10552                 widenfn(tcg_passres, tcg_op1);
10553                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10554                               tcg_passres, tcg_op2_64);
10555                 break;
10556             }
10557             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10558             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10559                 if (size == 0) {
10560                     if (is_u) {
10561                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10562                     } else {
10563                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10564                     }
10565                 } else {
10566                     if (is_u) {
10567                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10568                     } else {
10569                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10570                     }
10571                 }
10572                 break;
10573             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10574             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10575             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10576                 if (size == 0) {
10577                     if (is_u) {
10578                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10579                     } else {
10580                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10581                     }
10582                 } else {
10583                     if (is_u) {
10584                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10585                     } else {
10586                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10587                     }
10588                 }
10589                 break;
10590             case 9: /* SQDMLAL, SQDMLAL2 */
10591             case 11: /* SQDMLSL, SQDMLSL2 */
10592             case 13: /* SQDMULL, SQDMULL2 */
10593                 assert(size == 1);
10594                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10595                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10596                                                   tcg_passres, tcg_passres);
10597                 break;
10598             default:
10599                 g_assert_not_reached();
10600             }
10601 
10602             if (accop != 0) {
10603                 if (opcode == 9 || opcode == 11) {
10604                     /* saturating accumulate ops */
10605                     if (accop < 0) {
10606                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10607                     }
10608                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10609                                                       tcg_res[pass],
10610                                                       tcg_passres);
10611                 } else {
10612                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10613                                   tcg_res[pass], tcg_passres);
10614                 }
10615             }
10616         }
10617     }
10618 
10619     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10620     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10621 }
10622 
10623 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10624                             int opcode, int rd, int rn, int rm)
10625 {
10626     TCGv_i64 tcg_res[2];
10627     int part = is_q ? 2 : 0;
10628     int pass;
10629 
10630     for (pass = 0; pass < 2; pass++) {
10631         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10632         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10633         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10634         static NeonGenWidenFn * const widenfns[3][2] = {
10635             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10636             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10637             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10638         };
10639         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10640 
10641         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10642         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10643         widenfn(tcg_op2_wide, tcg_op2);
10644         tcg_res[pass] = tcg_temp_new_i64();
10645         gen_neon_addl(size, (opcode == 3),
10646                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10647     }
10648 
10649     for (pass = 0; pass < 2; pass++) {
10650         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10651     }
10652 }
10653 
10654 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10655 {
10656     tcg_gen_addi_i64(in, in, 1U << 31);
10657     tcg_gen_extrh_i64_i32(res, in);
10658 }
10659 
10660 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10661                                  int opcode, int rd, int rn, int rm)
10662 {
10663     TCGv_i32 tcg_res[2];
10664     int part = is_q ? 2 : 0;
10665     int pass;
10666 
10667     for (pass = 0; pass < 2; pass++) {
10668         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10669         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10670         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10671         static NeonGenNarrowFn * const narrowfns[3][2] = {
10672             { gen_helper_neon_narrow_high_u8,
10673               gen_helper_neon_narrow_round_high_u8 },
10674             { gen_helper_neon_narrow_high_u16,
10675               gen_helper_neon_narrow_round_high_u16 },
10676             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10677         };
10678         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10679 
10680         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10681         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10682 
10683         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10684 
10685         tcg_res[pass] = tcg_temp_new_i32();
10686         gennarrow(tcg_res[pass], tcg_wideres);
10687     }
10688 
10689     for (pass = 0; pass < 2; pass++) {
10690         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10691     }
10692     clear_vec_high(s, is_q, rd);
10693 }
10694 
10695 /* AdvSIMD three different
10696  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10697  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10698  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10699  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10700  */
10701 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10702 {
10703     /* Instructions in this group fall into three basic classes
10704      * (in each case with the operation working on each element in
10705      * the input vectors):
10706      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10707      *     128 bit input)
10708      * (2) wide 64 x 128 -> 128
10709      * (3) narrowing 128 x 128 -> 64
10710      * Here we do initial decode, catch unallocated cases and
10711      * dispatch to separate functions for each class.
10712      */
10713     int is_q = extract32(insn, 30, 1);
10714     int is_u = extract32(insn, 29, 1);
10715     int size = extract32(insn, 22, 2);
10716     int opcode = extract32(insn, 12, 4);
10717     int rm = extract32(insn, 16, 5);
10718     int rn = extract32(insn, 5, 5);
10719     int rd = extract32(insn, 0, 5);
10720 
10721     switch (opcode) {
10722     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10723     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10724         /* 64 x 128 -> 128 */
10725         if (size == 3) {
10726             unallocated_encoding(s);
10727             return;
10728         }
10729         if (!fp_access_check(s)) {
10730             return;
10731         }
10732         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10733         break;
10734     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10735     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10736         /* 128 x 128 -> 64 */
10737         if (size == 3) {
10738             unallocated_encoding(s);
10739             return;
10740         }
10741         if (!fp_access_check(s)) {
10742             return;
10743         }
10744         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10745         break;
10746     case 14: /* PMULL, PMULL2 */
10747         if (is_u) {
10748             unallocated_encoding(s);
10749             return;
10750         }
10751         switch (size) {
10752         case 0: /* PMULL.P8 */
10753             if (!fp_access_check(s)) {
10754                 return;
10755             }
10756             /* The Q field specifies lo/hi half input for this insn.  */
10757             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10758                              gen_helper_neon_pmull_h);
10759             break;
10760 
10761         case 3: /* PMULL.P64 */
10762             if (!dc_isar_feature(aa64_pmull, s)) {
10763                 unallocated_encoding(s);
10764                 return;
10765             }
10766             if (!fp_access_check(s)) {
10767                 return;
10768             }
10769             /* The Q field specifies lo/hi half input for this insn.  */
10770             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10771                              gen_helper_gvec_pmull_q);
10772             break;
10773 
10774         default:
10775             unallocated_encoding(s);
10776             break;
10777         }
10778         return;
10779     case 9: /* SQDMLAL, SQDMLAL2 */
10780     case 11: /* SQDMLSL, SQDMLSL2 */
10781     case 13: /* SQDMULL, SQDMULL2 */
10782         if (is_u || size == 0) {
10783             unallocated_encoding(s);
10784             return;
10785         }
10786         /* fall through */
10787     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10788     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10789     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10790     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10791     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10792     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10793     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10794         /* 64 x 64 -> 128 */
10795         if (size == 3) {
10796             unallocated_encoding(s);
10797             return;
10798         }
10799         if (!fp_access_check(s)) {
10800             return;
10801         }
10802 
10803         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10804         break;
10805     default:
10806         /* opcode 15 not allocated */
10807         unallocated_encoding(s);
10808         break;
10809     }
10810 }
10811 
10812 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10813 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10814 {
10815     int rd = extract32(insn, 0, 5);
10816     int rn = extract32(insn, 5, 5);
10817     int rm = extract32(insn, 16, 5);
10818     int size = extract32(insn, 22, 2);
10819     bool is_u = extract32(insn, 29, 1);
10820     bool is_q = extract32(insn, 30, 1);
10821 
10822     if (!fp_access_check(s)) {
10823         return;
10824     }
10825 
10826     switch (size + 4 * is_u) {
10827     case 0: /* AND */
10828         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10829         return;
10830     case 1: /* BIC */
10831         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10832         return;
10833     case 2: /* ORR */
10834         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10835         return;
10836     case 3: /* ORN */
10837         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10838         return;
10839     case 4: /* EOR */
10840         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10841         return;
10842 
10843     case 5: /* BSL bitwise select */
10844         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10845         return;
10846     case 6: /* BIT, bitwise insert if true */
10847         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10848         return;
10849     case 7: /* BIF, bitwise insert if false */
10850         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10851         return;
10852 
10853     default:
10854         g_assert_not_reached();
10855     }
10856 }
10857 
10858 /* Pairwise op subgroup of C3.6.16.
10859  *
10860  * This is called directly or via the handle_3same_float for float pairwise
10861  * operations where the opcode and size are calculated differently.
10862  */
10863 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10864                                    int size, int rn, int rm, int rd)
10865 {
10866     TCGv_ptr fpst;
10867     int pass;
10868 
10869     /* Floating point operations need fpst */
10870     if (opcode >= 0x58) {
10871         fpst = fpstatus_ptr(FPST_FPCR);
10872     } else {
10873         fpst = NULL;
10874     }
10875 
10876     if (!fp_access_check(s)) {
10877         return;
10878     }
10879 
10880     /* These operations work on the concatenated rm:rn, with each pair of
10881      * adjacent elements being operated on to produce an element in the result.
10882      */
10883     if (size == 3) {
10884         TCGv_i64 tcg_res[2];
10885 
10886         for (pass = 0; pass < 2; pass++) {
10887             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10888             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10889             int passreg = (pass == 0) ? rn : rm;
10890 
10891             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10892             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10893             tcg_res[pass] = tcg_temp_new_i64();
10894 
10895             switch (opcode) {
10896             case 0x17: /* ADDP */
10897                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10898                 break;
10899             case 0x58: /* FMAXNMP */
10900                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10901                 break;
10902             case 0x5a: /* FADDP */
10903                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10904                 break;
10905             case 0x5e: /* FMAXP */
10906                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10907                 break;
10908             case 0x78: /* FMINNMP */
10909                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10910                 break;
10911             case 0x7e: /* FMINP */
10912                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10913                 break;
10914             default:
10915                 g_assert_not_reached();
10916             }
10917         }
10918 
10919         for (pass = 0; pass < 2; pass++) {
10920             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10921         }
10922     } else {
10923         int maxpass = is_q ? 4 : 2;
10924         TCGv_i32 tcg_res[4];
10925 
10926         for (pass = 0; pass < maxpass; pass++) {
10927             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10928             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10929             NeonGenTwoOpFn *genfn = NULL;
10930             int passreg = pass < (maxpass / 2) ? rn : rm;
10931             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10932 
10933             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10934             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10935             tcg_res[pass] = tcg_temp_new_i32();
10936 
10937             switch (opcode) {
10938             case 0x17: /* ADDP */
10939             {
10940                 static NeonGenTwoOpFn * const fns[3] = {
10941                     gen_helper_neon_padd_u8,
10942                     gen_helper_neon_padd_u16,
10943                     tcg_gen_add_i32,
10944                 };
10945                 genfn = fns[size];
10946                 break;
10947             }
10948             case 0x14: /* SMAXP, UMAXP */
10949             {
10950                 static NeonGenTwoOpFn * const fns[3][2] = {
10951                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10952                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10953                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10954                 };
10955                 genfn = fns[size][u];
10956                 break;
10957             }
10958             case 0x15: /* SMINP, UMINP */
10959             {
10960                 static NeonGenTwoOpFn * const fns[3][2] = {
10961                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10962                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10963                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10964                 };
10965                 genfn = fns[size][u];
10966                 break;
10967             }
10968             /* The FP operations are all on single floats (32 bit) */
10969             case 0x58: /* FMAXNMP */
10970                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10971                 break;
10972             case 0x5a: /* FADDP */
10973                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10974                 break;
10975             case 0x5e: /* FMAXP */
10976                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10977                 break;
10978             case 0x78: /* FMINNMP */
10979                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10980                 break;
10981             case 0x7e: /* FMINP */
10982                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10983                 break;
10984             default:
10985                 g_assert_not_reached();
10986             }
10987 
10988             /* FP ops called directly, otherwise call now */
10989             if (genfn) {
10990                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10991             }
10992         }
10993 
10994         for (pass = 0; pass < maxpass; pass++) {
10995             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10996         }
10997         clear_vec_high(s, is_q, rd);
10998     }
10999 }
11000 
11001 /* Floating point op subgroup of C3.6.16. */
11002 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11003 {
11004     /* For floating point ops, the U, size[1] and opcode bits
11005      * together indicate the operation. size[0] indicates single
11006      * or double.
11007      */
11008     int fpopcode = extract32(insn, 11, 5)
11009         | (extract32(insn, 23, 1) << 5)
11010         | (extract32(insn, 29, 1) << 6);
11011     int is_q = extract32(insn, 30, 1);
11012     int size = extract32(insn, 22, 1);
11013     int rm = extract32(insn, 16, 5);
11014     int rn = extract32(insn, 5, 5);
11015     int rd = extract32(insn, 0, 5);
11016 
11017     int datasize = is_q ? 128 : 64;
11018     int esize = 32 << size;
11019     int elements = datasize / esize;
11020 
11021     if (size == 1 && !is_q) {
11022         unallocated_encoding(s);
11023         return;
11024     }
11025 
11026     switch (fpopcode) {
11027     case 0x58: /* FMAXNMP */
11028     case 0x5a: /* FADDP */
11029     case 0x5e: /* FMAXP */
11030     case 0x78: /* FMINNMP */
11031     case 0x7e: /* FMINP */
11032         if (size && !is_q) {
11033             unallocated_encoding(s);
11034             return;
11035         }
11036         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11037                                rn, rm, rd);
11038         return;
11039     case 0x1b: /* FMULX */
11040     case 0x1f: /* FRECPS */
11041     case 0x3f: /* FRSQRTS */
11042     case 0x5d: /* FACGE */
11043     case 0x7d: /* FACGT */
11044     case 0x19: /* FMLA */
11045     case 0x39: /* FMLS */
11046     case 0x18: /* FMAXNM */
11047     case 0x1a: /* FADD */
11048     case 0x1c: /* FCMEQ */
11049     case 0x1e: /* FMAX */
11050     case 0x38: /* FMINNM */
11051     case 0x3a: /* FSUB */
11052     case 0x3e: /* FMIN */
11053     case 0x5b: /* FMUL */
11054     case 0x5c: /* FCMGE */
11055     case 0x5f: /* FDIV */
11056     case 0x7a: /* FABD */
11057     case 0x7c: /* FCMGT */
11058         if (!fp_access_check(s)) {
11059             return;
11060         }
11061         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11062         return;
11063 
11064     case 0x1d: /* FMLAL  */
11065     case 0x3d: /* FMLSL  */
11066     case 0x59: /* FMLAL2 */
11067     case 0x79: /* FMLSL2 */
11068         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11069             unallocated_encoding(s);
11070             return;
11071         }
11072         if (fp_access_check(s)) {
11073             int is_s = extract32(insn, 23, 1);
11074             int is_2 = extract32(insn, 29, 1);
11075             int data = (is_2 << 1) | is_s;
11076             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11077                                vec_full_reg_offset(s, rn),
11078                                vec_full_reg_offset(s, rm), cpu_env,
11079                                is_q ? 16 : 8, vec_full_reg_size(s),
11080                                data, gen_helper_gvec_fmlal_a64);
11081         }
11082         return;
11083 
11084     default:
11085         unallocated_encoding(s);
11086         return;
11087     }
11088 }
11089 
11090 /* Integer op subgroup of C3.6.16. */
11091 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11092 {
11093     int is_q = extract32(insn, 30, 1);
11094     int u = extract32(insn, 29, 1);
11095     int size = extract32(insn, 22, 2);
11096     int opcode = extract32(insn, 11, 5);
11097     int rm = extract32(insn, 16, 5);
11098     int rn = extract32(insn, 5, 5);
11099     int rd = extract32(insn, 0, 5);
11100     int pass;
11101     TCGCond cond;
11102 
11103     switch (opcode) {
11104     case 0x13: /* MUL, PMUL */
11105         if (u && size != 0) {
11106             unallocated_encoding(s);
11107             return;
11108         }
11109         /* fall through */
11110     case 0x0: /* SHADD, UHADD */
11111     case 0x2: /* SRHADD, URHADD */
11112     case 0x4: /* SHSUB, UHSUB */
11113     case 0xc: /* SMAX, UMAX */
11114     case 0xd: /* SMIN, UMIN */
11115     case 0xe: /* SABD, UABD */
11116     case 0xf: /* SABA, UABA */
11117     case 0x12: /* MLA, MLS */
11118         if (size == 3) {
11119             unallocated_encoding(s);
11120             return;
11121         }
11122         break;
11123     case 0x16: /* SQDMULH, SQRDMULH */
11124         if (size == 0 || size == 3) {
11125             unallocated_encoding(s);
11126             return;
11127         }
11128         break;
11129     default:
11130         if (size == 3 && !is_q) {
11131             unallocated_encoding(s);
11132             return;
11133         }
11134         break;
11135     }
11136 
11137     if (!fp_access_check(s)) {
11138         return;
11139     }
11140 
11141     switch (opcode) {
11142     case 0x01: /* SQADD, UQADD */
11143         if (u) {
11144             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11145         } else {
11146             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11147         }
11148         return;
11149     case 0x05: /* SQSUB, UQSUB */
11150         if (u) {
11151             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11152         } else {
11153             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11154         }
11155         return;
11156     case 0x08: /* SSHL, USHL */
11157         if (u) {
11158             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11159         } else {
11160             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11161         }
11162         return;
11163     case 0x0c: /* SMAX, UMAX */
11164         if (u) {
11165             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11166         } else {
11167             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11168         }
11169         return;
11170     case 0x0d: /* SMIN, UMIN */
11171         if (u) {
11172             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11173         } else {
11174             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11175         }
11176         return;
11177     case 0xe: /* SABD, UABD */
11178         if (u) {
11179             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11180         } else {
11181             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11182         }
11183         return;
11184     case 0xf: /* SABA, UABA */
11185         if (u) {
11186             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11187         } else {
11188             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11189         }
11190         return;
11191     case 0x10: /* ADD, SUB */
11192         if (u) {
11193             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11194         } else {
11195             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11196         }
11197         return;
11198     case 0x13: /* MUL, PMUL */
11199         if (!u) { /* MUL */
11200             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11201         } else {  /* PMUL */
11202             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11203         }
11204         return;
11205     case 0x12: /* MLA, MLS */
11206         if (u) {
11207             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11208         } else {
11209             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11210         }
11211         return;
11212     case 0x16: /* SQDMULH, SQRDMULH */
11213         {
11214             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11215                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11216                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11217             };
11218             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11219         }
11220         return;
11221     case 0x11:
11222         if (!u) { /* CMTST */
11223             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11224             return;
11225         }
11226         /* else CMEQ */
11227         cond = TCG_COND_EQ;
11228         goto do_gvec_cmp;
11229     case 0x06: /* CMGT, CMHI */
11230         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11231         goto do_gvec_cmp;
11232     case 0x07: /* CMGE, CMHS */
11233         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11234     do_gvec_cmp:
11235         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11236                          vec_full_reg_offset(s, rn),
11237                          vec_full_reg_offset(s, rm),
11238                          is_q ? 16 : 8, vec_full_reg_size(s));
11239         return;
11240     }
11241 
11242     if (size == 3) {
11243         assert(is_q);
11244         for (pass = 0; pass < 2; pass++) {
11245             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11246             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11247             TCGv_i64 tcg_res = tcg_temp_new_i64();
11248 
11249             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11250             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11251 
11252             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11253 
11254             write_vec_element(s, tcg_res, rd, pass, MO_64);
11255         }
11256     } else {
11257         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11258             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11259             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11260             TCGv_i32 tcg_res = tcg_temp_new_i32();
11261             NeonGenTwoOpFn *genfn = NULL;
11262             NeonGenTwoOpEnvFn *genenvfn = NULL;
11263 
11264             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11265             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11266 
11267             switch (opcode) {
11268             case 0x0: /* SHADD, UHADD */
11269             {
11270                 static NeonGenTwoOpFn * const fns[3][2] = {
11271                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11272                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11273                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11274                 };
11275                 genfn = fns[size][u];
11276                 break;
11277             }
11278             case 0x2: /* SRHADD, URHADD */
11279             {
11280                 static NeonGenTwoOpFn * const fns[3][2] = {
11281                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11282                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11283                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11284                 };
11285                 genfn = fns[size][u];
11286                 break;
11287             }
11288             case 0x4: /* SHSUB, UHSUB */
11289             {
11290                 static NeonGenTwoOpFn * const fns[3][2] = {
11291                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11292                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11293                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11294                 };
11295                 genfn = fns[size][u];
11296                 break;
11297             }
11298             case 0x9: /* SQSHL, UQSHL */
11299             {
11300                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11301                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11302                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11303                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11304                 };
11305                 genenvfn = fns[size][u];
11306                 break;
11307             }
11308             case 0xa: /* SRSHL, URSHL */
11309             {
11310                 static NeonGenTwoOpFn * const fns[3][2] = {
11311                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11312                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11313                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11314                 };
11315                 genfn = fns[size][u];
11316                 break;
11317             }
11318             case 0xb: /* SQRSHL, UQRSHL */
11319             {
11320                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11321                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11322                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11323                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11324                 };
11325                 genenvfn = fns[size][u];
11326                 break;
11327             }
11328             default:
11329                 g_assert_not_reached();
11330             }
11331 
11332             if (genenvfn) {
11333                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11334             } else {
11335                 genfn(tcg_res, tcg_op1, tcg_op2);
11336             }
11337 
11338             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11339         }
11340     }
11341     clear_vec_high(s, is_q, rd);
11342 }
11343 
11344 /* AdvSIMD three same
11345  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11346  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11347  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11348  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11349  */
11350 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11351 {
11352     int opcode = extract32(insn, 11, 5);
11353 
11354     switch (opcode) {
11355     case 0x3: /* logic ops */
11356         disas_simd_3same_logic(s, insn);
11357         break;
11358     case 0x17: /* ADDP */
11359     case 0x14: /* SMAXP, UMAXP */
11360     case 0x15: /* SMINP, UMINP */
11361     {
11362         /* Pairwise operations */
11363         int is_q = extract32(insn, 30, 1);
11364         int u = extract32(insn, 29, 1);
11365         int size = extract32(insn, 22, 2);
11366         int rm = extract32(insn, 16, 5);
11367         int rn = extract32(insn, 5, 5);
11368         int rd = extract32(insn, 0, 5);
11369         if (opcode == 0x17) {
11370             if (u || (size == 3 && !is_q)) {
11371                 unallocated_encoding(s);
11372                 return;
11373             }
11374         } else {
11375             if (size == 3) {
11376                 unallocated_encoding(s);
11377                 return;
11378             }
11379         }
11380         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11381         break;
11382     }
11383     case 0x18 ... 0x31:
11384         /* floating point ops, sz[1] and U are part of opcode */
11385         disas_simd_3same_float(s, insn);
11386         break;
11387     default:
11388         disas_simd_3same_int(s, insn);
11389         break;
11390     }
11391 }
11392 
11393 /*
11394  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11395  *
11396  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11397  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11398  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11399  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11400  *
11401  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11402  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11403  *
11404  */
11405 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11406 {
11407     int opcode = extract32(insn, 11, 3);
11408     int u = extract32(insn, 29, 1);
11409     int a = extract32(insn, 23, 1);
11410     int is_q = extract32(insn, 30, 1);
11411     int rm = extract32(insn, 16, 5);
11412     int rn = extract32(insn, 5, 5);
11413     int rd = extract32(insn, 0, 5);
11414     /*
11415      * For these floating point ops, the U, a and opcode bits
11416      * together indicate the operation.
11417      */
11418     int fpopcode = opcode | (a << 3) | (u << 4);
11419     int datasize = is_q ? 128 : 64;
11420     int elements = datasize / 16;
11421     bool pairwise;
11422     TCGv_ptr fpst;
11423     int pass;
11424 
11425     switch (fpopcode) {
11426     case 0x0: /* FMAXNM */
11427     case 0x1: /* FMLA */
11428     case 0x2: /* FADD */
11429     case 0x3: /* FMULX */
11430     case 0x4: /* FCMEQ */
11431     case 0x6: /* FMAX */
11432     case 0x7: /* FRECPS */
11433     case 0x8: /* FMINNM */
11434     case 0x9: /* FMLS */
11435     case 0xa: /* FSUB */
11436     case 0xe: /* FMIN */
11437     case 0xf: /* FRSQRTS */
11438     case 0x13: /* FMUL */
11439     case 0x14: /* FCMGE */
11440     case 0x15: /* FACGE */
11441     case 0x17: /* FDIV */
11442     case 0x1a: /* FABD */
11443     case 0x1c: /* FCMGT */
11444     case 0x1d: /* FACGT */
11445         pairwise = false;
11446         break;
11447     case 0x10: /* FMAXNMP */
11448     case 0x12: /* FADDP */
11449     case 0x16: /* FMAXP */
11450     case 0x18: /* FMINNMP */
11451     case 0x1e: /* FMINP */
11452         pairwise = true;
11453         break;
11454     default:
11455         unallocated_encoding(s);
11456         return;
11457     }
11458 
11459     if (!dc_isar_feature(aa64_fp16, s)) {
11460         unallocated_encoding(s);
11461         return;
11462     }
11463 
11464     if (!fp_access_check(s)) {
11465         return;
11466     }
11467 
11468     fpst = fpstatus_ptr(FPST_FPCR_F16);
11469 
11470     if (pairwise) {
11471         int maxpass = is_q ? 8 : 4;
11472         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11473         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11474         TCGv_i32 tcg_res[8];
11475 
11476         for (pass = 0; pass < maxpass; pass++) {
11477             int passreg = pass < (maxpass / 2) ? rn : rm;
11478             int passelt = (pass << 1) & (maxpass - 1);
11479 
11480             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11481             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11482             tcg_res[pass] = tcg_temp_new_i32();
11483 
11484             switch (fpopcode) {
11485             case 0x10: /* FMAXNMP */
11486                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11487                                            fpst);
11488                 break;
11489             case 0x12: /* FADDP */
11490                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11491                 break;
11492             case 0x16: /* FMAXP */
11493                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11494                 break;
11495             case 0x18: /* FMINNMP */
11496                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11497                                            fpst);
11498                 break;
11499             case 0x1e: /* FMINP */
11500                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11501                 break;
11502             default:
11503                 g_assert_not_reached();
11504             }
11505         }
11506 
11507         for (pass = 0; pass < maxpass; pass++) {
11508             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11509         }
11510     } else {
11511         for (pass = 0; pass < elements; pass++) {
11512             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11513             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11514             TCGv_i32 tcg_res = tcg_temp_new_i32();
11515 
11516             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11517             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11518 
11519             switch (fpopcode) {
11520             case 0x0: /* FMAXNM */
11521                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11522                 break;
11523             case 0x1: /* FMLA */
11524                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11525                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11526                                            fpst);
11527                 break;
11528             case 0x2: /* FADD */
11529                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11530                 break;
11531             case 0x3: /* FMULX */
11532                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11533                 break;
11534             case 0x4: /* FCMEQ */
11535                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11536                 break;
11537             case 0x6: /* FMAX */
11538                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11539                 break;
11540             case 0x7: /* FRECPS */
11541                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11542                 break;
11543             case 0x8: /* FMINNM */
11544                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11545                 break;
11546             case 0x9: /* FMLS */
11547                 /* As usual for ARM, separate negation for fused multiply-add */
11548                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11549                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11550                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11551                                            fpst);
11552                 break;
11553             case 0xa: /* FSUB */
11554                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11555                 break;
11556             case 0xe: /* FMIN */
11557                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11558                 break;
11559             case 0xf: /* FRSQRTS */
11560                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11561                 break;
11562             case 0x13: /* FMUL */
11563                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11564                 break;
11565             case 0x14: /* FCMGE */
11566                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11567                 break;
11568             case 0x15: /* FACGE */
11569                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11570                 break;
11571             case 0x17: /* FDIV */
11572                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11573                 break;
11574             case 0x1a: /* FABD */
11575                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11576                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11577                 break;
11578             case 0x1c: /* FCMGT */
11579                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11580                 break;
11581             case 0x1d: /* FACGT */
11582                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11583                 break;
11584             default:
11585                 g_assert_not_reached();
11586             }
11587 
11588             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11589         }
11590     }
11591 
11592     clear_vec_high(s, is_q, rd);
11593 }
11594 
11595 /* AdvSIMD three same extra
11596  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11597  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11598  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11599  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11600  */
11601 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11602 {
11603     int rd = extract32(insn, 0, 5);
11604     int rn = extract32(insn, 5, 5);
11605     int opcode = extract32(insn, 11, 4);
11606     int rm = extract32(insn, 16, 5);
11607     int size = extract32(insn, 22, 2);
11608     bool u = extract32(insn, 29, 1);
11609     bool is_q = extract32(insn, 30, 1);
11610     bool feature;
11611     int rot;
11612 
11613     switch (u * 16 + opcode) {
11614     case 0x10: /* SQRDMLAH (vector) */
11615     case 0x11: /* SQRDMLSH (vector) */
11616         if (size != 1 && size != 2) {
11617             unallocated_encoding(s);
11618             return;
11619         }
11620         feature = dc_isar_feature(aa64_rdm, s);
11621         break;
11622     case 0x02: /* SDOT (vector) */
11623     case 0x12: /* UDOT (vector) */
11624         if (size != MO_32) {
11625             unallocated_encoding(s);
11626             return;
11627         }
11628         feature = dc_isar_feature(aa64_dp, s);
11629         break;
11630     case 0x03: /* USDOT */
11631         if (size != MO_32) {
11632             unallocated_encoding(s);
11633             return;
11634         }
11635         feature = dc_isar_feature(aa64_i8mm, s);
11636         break;
11637     case 0x04: /* SMMLA */
11638     case 0x14: /* UMMLA */
11639     case 0x05: /* USMMLA */
11640         if (!is_q || size != MO_32) {
11641             unallocated_encoding(s);
11642             return;
11643         }
11644         feature = dc_isar_feature(aa64_i8mm, s);
11645         break;
11646     case 0x18: /* FCMLA, #0 */
11647     case 0x19: /* FCMLA, #90 */
11648     case 0x1a: /* FCMLA, #180 */
11649     case 0x1b: /* FCMLA, #270 */
11650     case 0x1c: /* FCADD, #90 */
11651     case 0x1e: /* FCADD, #270 */
11652         if (size == 0
11653             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11654             || (size == 3 && !is_q)) {
11655             unallocated_encoding(s);
11656             return;
11657         }
11658         feature = dc_isar_feature(aa64_fcma, s);
11659         break;
11660     case 0x1d: /* BFMMLA */
11661         if (size != MO_16 || !is_q) {
11662             unallocated_encoding(s);
11663             return;
11664         }
11665         feature = dc_isar_feature(aa64_bf16, s);
11666         break;
11667     case 0x1f:
11668         switch (size) {
11669         case 1: /* BFDOT */
11670         case 3: /* BFMLAL{B,T} */
11671             feature = dc_isar_feature(aa64_bf16, s);
11672             break;
11673         default:
11674             unallocated_encoding(s);
11675             return;
11676         }
11677         break;
11678     default:
11679         unallocated_encoding(s);
11680         return;
11681     }
11682     if (!feature) {
11683         unallocated_encoding(s);
11684         return;
11685     }
11686     if (!fp_access_check(s)) {
11687         return;
11688     }
11689 
11690     switch (opcode) {
11691     case 0x0: /* SQRDMLAH (vector) */
11692         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11693         return;
11694 
11695     case 0x1: /* SQRDMLSH (vector) */
11696         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11697         return;
11698 
11699     case 0x2: /* SDOT / UDOT */
11700         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11701                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11702         return;
11703 
11704     case 0x3: /* USDOT */
11705         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11706         return;
11707 
11708     case 0x04: /* SMMLA, UMMLA */
11709         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11710                          u ? gen_helper_gvec_ummla_b
11711                          : gen_helper_gvec_smmla_b);
11712         return;
11713     case 0x05: /* USMMLA */
11714         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11715         return;
11716 
11717     case 0x8: /* FCMLA, #0 */
11718     case 0x9: /* FCMLA, #90 */
11719     case 0xa: /* FCMLA, #180 */
11720     case 0xb: /* FCMLA, #270 */
11721         rot = extract32(opcode, 0, 2);
11722         switch (size) {
11723         case 1:
11724             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11725                               gen_helper_gvec_fcmlah);
11726             break;
11727         case 2:
11728             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11729                               gen_helper_gvec_fcmlas);
11730             break;
11731         case 3:
11732             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11733                               gen_helper_gvec_fcmlad);
11734             break;
11735         default:
11736             g_assert_not_reached();
11737         }
11738         return;
11739 
11740     case 0xc: /* FCADD, #90 */
11741     case 0xe: /* FCADD, #270 */
11742         rot = extract32(opcode, 1, 1);
11743         switch (size) {
11744         case 1:
11745             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11746                               gen_helper_gvec_fcaddh);
11747             break;
11748         case 2:
11749             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11750                               gen_helper_gvec_fcadds);
11751             break;
11752         case 3:
11753             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11754                               gen_helper_gvec_fcaddd);
11755             break;
11756         default:
11757             g_assert_not_reached();
11758         }
11759         return;
11760 
11761     case 0xd: /* BFMMLA */
11762         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11763         return;
11764     case 0xf:
11765         switch (size) {
11766         case 1: /* BFDOT */
11767             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11768             break;
11769         case 3: /* BFMLAL{B,T} */
11770             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11771                               gen_helper_gvec_bfmlal);
11772             break;
11773         default:
11774             g_assert_not_reached();
11775         }
11776         return;
11777 
11778     default:
11779         g_assert_not_reached();
11780     }
11781 }
11782 
11783 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11784                                   int size, int rn, int rd)
11785 {
11786     /* Handle 2-reg-misc ops which are widening (so each size element
11787      * in the source becomes a 2*size element in the destination.
11788      * The only instruction like this is FCVTL.
11789      */
11790     int pass;
11791 
11792     if (size == 3) {
11793         /* 32 -> 64 bit fp conversion */
11794         TCGv_i64 tcg_res[2];
11795         int srcelt = is_q ? 2 : 0;
11796 
11797         for (pass = 0; pass < 2; pass++) {
11798             TCGv_i32 tcg_op = tcg_temp_new_i32();
11799             tcg_res[pass] = tcg_temp_new_i64();
11800 
11801             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11802             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11803         }
11804         for (pass = 0; pass < 2; pass++) {
11805             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11806         }
11807     } else {
11808         /* 16 -> 32 bit fp conversion */
11809         int srcelt = is_q ? 4 : 0;
11810         TCGv_i32 tcg_res[4];
11811         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11812         TCGv_i32 ahp = get_ahp_flag();
11813 
11814         for (pass = 0; pass < 4; pass++) {
11815             tcg_res[pass] = tcg_temp_new_i32();
11816 
11817             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11818             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11819                                            fpst, ahp);
11820         }
11821         for (pass = 0; pass < 4; pass++) {
11822             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11823         }
11824     }
11825 }
11826 
11827 static void handle_rev(DisasContext *s, int opcode, bool u,
11828                        bool is_q, int size, int rn, int rd)
11829 {
11830     int op = (opcode << 1) | u;
11831     int opsz = op + size;
11832     int grp_size = 3 - opsz;
11833     int dsize = is_q ? 128 : 64;
11834     int i;
11835 
11836     if (opsz >= 3) {
11837         unallocated_encoding(s);
11838         return;
11839     }
11840 
11841     if (!fp_access_check(s)) {
11842         return;
11843     }
11844 
11845     if (size == 0) {
11846         /* Special case bytes, use bswap op on each group of elements */
11847         int groups = dsize / (8 << grp_size);
11848 
11849         for (i = 0; i < groups; i++) {
11850             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11851 
11852             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11853             switch (grp_size) {
11854             case MO_16:
11855                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11856                 break;
11857             case MO_32:
11858                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11859                 break;
11860             case MO_64:
11861                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11862                 break;
11863             default:
11864                 g_assert_not_reached();
11865             }
11866             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11867         }
11868         clear_vec_high(s, is_q, rd);
11869     } else {
11870         int revmask = (1 << grp_size) - 1;
11871         int esize = 8 << size;
11872         int elements = dsize / esize;
11873         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11874         TCGv_i64 tcg_rd[2];
11875 
11876         for (i = 0; i < 2; i++) {
11877             tcg_rd[i] = tcg_temp_new_i64();
11878             tcg_gen_movi_i64(tcg_rd[i], 0);
11879         }
11880 
11881         for (i = 0; i < elements; i++) {
11882             int e_rev = (i & 0xf) ^ revmask;
11883             int w = (e_rev * esize) / 64;
11884             int o = (e_rev * esize) % 64;
11885 
11886             read_vec_element(s, tcg_rn, rn, i, size);
11887             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11888         }
11889 
11890         for (i = 0; i < 2; i++) {
11891             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11892         }
11893         clear_vec_high(s, true, rd);
11894     }
11895 }
11896 
11897 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11898                                   bool is_q, int size, int rn, int rd)
11899 {
11900     /* Implement the pairwise operations from 2-misc:
11901      * SADDLP, UADDLP, SADALP, UADALP.
11902      * These all add pairs of elements in the input to produce a
11903      * double-width result element in the output (possibly accumulating).
11904      */
11905     bool accum = (opcode == 0x6);
11906     int maxpass = is_q ? 2 : 1;
11907     int pass;
11908     TCGv_i64 tcg_res[2];
11909 
11910     if (size == 2) {
11911         /* 32 + 32 -> 64 op */
11912         MemOp memop = size + (u ? 0 : MO_SIGN);
11913 
11914         for (pass = 0; pass < maxpass; pass++) {
11915             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11916             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11917 
11918             tcg_res[pass] = tcg_temp_new_i64();
11919 
11920             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11921             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11922             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11923             if (accum) {
11924                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11925                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11926             }
11927         }
11928     } else {
11929         for (pass = 0; pass < maxpass; pass++) {
11930             TCGv_i64 tcg_op = tcg_temp_new_i64();
11931             NeonGenOne64OpFn *genfn;
11932             static NeonGenOne64OpFn * const fns[2][2] = {
11933                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11934                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11935             };
11936 
11937             genfn = fns[size][u];
11938 
11939             tcg_res[pass] = tcg_temp_new_i64();
11940 
11941             read_vec_element(s, tcg_op, rn, pass, MO_64);
11942             genfn(tcg_res[pass], tcg_op);
11943 
11944             if (accum) {
11945                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11946                 if (size == 0) {
11947                     gen_helper_neon_addl_u16(tcg_res[pass],
11948                                              tcg_res[pass], tcg_op);
11949                 } else {
11950                     gen_helper_neon_addl_u32(tcg_res[pass],
11951                                              tcg_res[pass], tcg_op);
11952                 }
11953             }
11954         }
11955     }
11956     if (!is_q) {
11957         tcg_res[1] = tcg_constant_i64(0);
11958     }
11959     for (pass = 0; pass < 2; pass++) {
11960         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11961     }
11962 }
11963 
11964 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11965 {
11966     /* Implement SHLL and SHLL2 */
11967     int pass;
11968     int part = is_q ? 2 : 0;
11969     TCGv_i64 tcg_res[2];
11970 
11971     for (pass = 0; pass < 2; pass++) {
11972         static NeonGenWidenFn * const widenfns[3] = {
11973             gen_helper_neon_widen_u8,
11974             gen_helper_neon_widen_u16,
11975             tcg_gen_extu_i32_i64,
11976         };
11977         NeonGenWidenFn *widenfn = widenfns[size];
11978         TCGv_i32 tcg_op = tcg_temp_new_i32();
11979 
11980         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11981         tcg_res[pass] = tcg_temp_new_i64();
11982         widenfn(tcg_res[pass], tcg_op);
11983         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11984     }
11985 
11986     for (pass = 0; pass < 2; pass++) {
11987         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11988     }
11989 }
11990 
11991 /* AdvSIMD two reg misc
11992  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11993  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11994  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11995  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11996  */
11997 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11998 {
11999     int size = extract32(insn, 22, 2);
12000     int opcode = extract32(insn, 12, 5);
12001     bool u = extract32(insn, 29, 1);
12002     bool is_q = extract32(insn, 30, 1);
12003     int rn = extract32(insn, 5, 5);
12004     int rd = extract32(insn, 0, 5);
12005     bool need_fpstatus = false;
12006     int rmode = -1;
12007     TCGv_i32 tcg_rmode;
12008     TCGv_ptr tcg_fpstatus;
12009 
12010     switch (opcode) {
12011     case 0x0: /* REV64, REV32 */
12012     case 0x1: /* REV16 */
12013         handle_rev(s, opcode, u, is_q, size, rn, rd);
12014         return;
12015     case 0x5: /* CNT, NOT, RBIT */
12016         if (u && size == 0) {
12017             /* NOT */
12018             break;
12019         } else if (u && size == 1) {
12020             /* RBIT */
12021             break;
12022         } else if (!u && size == 0) {
12023             /* CNT */
12024             break;
12025         }
12026         unallocated_encoding(s);
12027         return;
12028     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12029     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12030         if (size == 3) {
12031             unallocated_encoding(s);
12032             return;
12033         }
12034         if (!fp_access_check(s)) {
12035             return;
12036         }
12037 
12038         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12039         return;
12040     case 0x4: /* CLS, CLZ */
12041         if (size == 3) {
12042             unallocated_encoding(s);
12043             return;
12044         }
12045         break;
12046     case 0x2: /* SADDLP, UADDLP */
12047     case 0x6: /* SADALP, UADALP */
12048         if (size == 3) {
12049             unallocated_encoding(s);
12050             return;
12051         }
12052         if (!fp_access_check(s)) {
12053             return;
12054         }
12055         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12056         return;
12057     case 0x13: /* SHLL, SHLL2 */
12058         if (u == 0 || size == 3) {
12059             unallocated_encoding(s);
12060             return;
12061         }
12062         if (!fp_access_check(s)) {
12063             return;
12064         }
12065         handle_shll(s, is_q, size, rn, rd);
12066         return;
12067     case 0xa: /* CMLT */
12068         if (u == 1) {
12069             unallocated_encoding(s);
12070             return;
12071         }
12072         /* fall through */
12073     case 0x8: /* CMGT, CMGE */
12074     case 0x9: /* CMEQ, CMLE */
12075     case 0xb: /* ABS, NEG */
12076         if (size == 3 && !is_q) {
12077             unallocated_encoding(s);
12078             return;
12079         }
12080         break;
12081     case 0x3: /* SUQADD, USQADD */
12082         if (size == 3 && !is_q) {
12083             unallocated_encoding(s);
12084             return;
12085         }
12086         if (!fp_access_check(s)) {
12087             return;
12088         }
12089         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12090         return;
12091     case 0x7: /* SQABS, SQNEG */
12092         if (size == 3 && !is_q) {
12093             unallocated_encoding(s);
12094             return;
12095         }
12096         break;
12097     case 0xc ... 0xf:
12098     case 0x16 ... 0x1f:
12099     {
12100         /* Floating point: U, size[1] and opcode indicate operation;
12101          * size[0] indicates single or double precision.
12102          */
12103         int is_double = extract32(size, 0, 1);
12104         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12105         size = is_double ? 3 : 2;
12106         switch (opcode) {
12107         case 0x2f: /* FABS */
12108         case 0x6f: /* FNEG */
12109             if (size == 3 && !is_q) {
12110                 unallocated_encoding(s);
12111                 return;
12112             }
12113             break;
12114         case 0x1d: /* SCVTF */
12115         case 0x5d: /* UCVTF */
12116         {
12117             bool is_signed = (opcode == 0x1d) ? true : false;
12118             int elements = is_double ? 2 : is_q ? 4 : 2;
12119             if (is_double && !is_q) {
12120                 unallocated_encoding(s);
12121                 return;
12122             }
12123             if (!fp_access_check(s)) {
12124                 return;
12125             }
12126             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12127             return;
12128         }
12129         case 0x2c: /* FCMGT (zero) */
12130         case 0x2d: /* FCMEQ (zero) */
12131         case 0x2e: /* FCMLT (zero) */
12132         case 0x6c: /* FCMGE (zero) */
12133         case 0x6d: /* FCMLE (zero) */
12134             if (size == 3 && !is_q) {
12135                 unallocated_encoding(s);
12136                 return;
12137             }
12138             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12139             return;
12140         case 0x7f: /* FSQRT */
12141             if (size == 3 && !is_q) {
12142                 unallocated_encoding(s);
12143                 return;
12144             }
12145             break;
12146         case 0x1a: /* FCVTNS */
12147         case 0x1b: /* FCVTMS */
12148         case 0x3a: /* FCVTPS */
12149         case 0x3b: /* FCVTZS */
12150         case 0x5a: /* FCVTNU */
12151         case 0x5b: /* FCVTMU */
12152         case 0x7a: /* FCVTPU */
12153         case 0x7b: /* FCVTZU */
12154             need_fpstatus = true;
12155             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12156             if (size == 3 && !is_q) {
12157                 unallocated_encoding(s);
12158                 return;
12159             }
12160             break;
12161         case 0x5c: /* FCVTAU */
12162         case 0x1c: /* FCVTAS */
12163             need_fpstatus = true;
12164             rmode = FPROUNDING_TIEAWAY;
12165             if (size == 3 && !is_q) {
12166                 unallocated_encoding(s);
12167                 return;
12168             }
12169             break;
12170         case 0x3c: /* URECPE */
12171             if (size == 3) {
12172                 unallocated_encoding(s);
12173                 return;
12174             }
12175             /* fall through */
12176         case 0x3d: /* FRECPE */
12177         case 0x7d: /* FRSQRTE */
12178             if (size == 3 && !is_q) {
12179                 unallocated_encoding(s);
12180                 return;
12181             }
12182             if (!fp_access_check(s)) {
12183                 return;
12184             }
12185             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12186             return;
12187         case 0x56: /* FCVTXN, FCVTXN2 */
12188             if (size == 2) {
12189                 unallocated_encoding(s);
12190                 return;
12191             }
12192             /* fall through */
12193         case 0x16: /* FCVTN, FCVTN2 */
12194             /* handle_2misc_narrow does a 2*size -> size operation, but these
12195              * instructions encode the source size rather than dest size.
12196              */
12197             if (!fp_access_check(s)) {
12198                 return;
12199             }
12200             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12201             return;
12202         case 0x36: /* BFCVTN, BFCVTN2 */
12203             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12204                 unallocated_encoding(s);
12205                 return;
12206             }
12207             if (!fp_access_check(s)) {
12208                 return;
12209             }
12210             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12211             return;
12212         case 0x17: /* FCVTL, FCVTL2 */
12213             if (!fp_access_check(s)) {
12214                 return;
12215             }
12216             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12217             return;
12218         case 0x18: /* FRINTN */
12219         case 0x19: /* FRINTM */
12220         case 0x38: /* FRINTP */
12221         case 0x39: /* FRINTZ */
12222             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12223             /* fall through */
12224         case 0x59: /* FRINTX */
12225         case 0x79: /* FRINTI */
12226             need_fpstatus = true;
12227             if (size == 3 && !is_q) {
12228                 unallocated_encoding(s);
12229                 return;
12230             }
12231             break;
12232         case 0x58: /* FRINTA */
12233             rmode = FPROUNDING_TIEAWAY;
12234             need_fpstatus = true;
12235             if (size == 3 && !is_q) {
12236                 unallocated_encoding(s);
12237                 return;
12238             }
12239             break;
12240         case 0x7c: /* URSQRTE */
12241             if (size == 3) {
12242                 unallocated_encoding(s);
12243                 return;
12244             }
12245             break;
12246         case 0x1e: /* FRINT32Z */
12247         case 0x1f: /* FRINT64Z */
12248             rmode = FPROUNDING_ZERO;
12249             /* fall through */
12250         case 0x5e: /* FRINT32X */
12251         case 0x5f: /* FRINT64X */
12252             need_fpstatus = true;
12253             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12254                 unallocated_encoding(s);
12255                 return;
12256             }
12257             break;
12258         default:
12259             unallocated_encoding(s);
12260             return;
12261         }
12262         break;
12263     }
12264     default:
12265         unallocated_encoding(s);
12266         return;
12267     }
12268 
12269     if (!fp_access_check(s)) {
12270         return;
12271     }
12272 
12273     if (need_fpstatus || rmode >= 0) {
12274         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12275     } else {
12276         tcg_fpstatus = NULL;
12277     }
12278     if (rmode >= 0) {
12279         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12280     } else {
12281         tcg_rmode = NULL;
12282     }
12283 
12284     switch (opcode) {
12285     case 0x5:
12286         if (u && size == 0) { /* NOT */
12287             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12288             return;
12289         }
12290         break;
12291     case 0x8: /* CMGT, CMGE */
12292         if (u) {
12293             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12294         } else {
12295             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12296         }
12297         return;
12298     case 0x9: /* CMEQ, CMLE */
12299         if (u) {
12300             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12301         } else {
12302             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12303         }
12304         return;
12305     case 0xa: /* CMLT */
12306         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12307         return;
12308     case 0xb:
12309         if (u) { /* ABS, NEG */
12310             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12311         } else {
12312             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12313         }
12314         return;
12315     }
12316 
12317     if (size == 3) {
12318         /* All 64-bit element operations can be shared with scalar 2misc */
12319         int pass;
12320 
12321         /* Coverity claims (size == 3 && !is_q) has been eliminated
12322          * from all paths leading to here.
12323          */
12324         tcg_debug_assert(is_q);
12325         for (pass = 0; pass < 2; pass++) {
12326             TCGv_i64 tcg_op = tcg_temp_new_i64();
12327             TCGv_i64 tcg_res = tcg_temp_new_i64();
12328 
12329             read_vec_element(s, tcg_op, rn, pass, MO_64);
12330 
12331             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12332                             tcg_rmode, tcg_fpstatus);
12333 
12334             write_vec_element(s, tcg_res, rd, pass, MO_64);
12335         }
12336     } else {
12337         int pass;
12338 
12339         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12340             TCGv_i32 tcg_op = tcg_temp_new_i32();
12341             TCGv_i32 tcg_res = tcg_temp_new_i32();
12342 
12343             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12344 
12345             if (size == 2) {
12346                 /* Special cases for 32 bit elements */
12347                 switch (opcode) {
12348                 case 0x4: /* CLS */
12349                     if (u) {
12350                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12351                     } else {
12352                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12353                     }
12354                     break;
12355                 case 0x7: /* SQABS, SQNEG */
12356                     if (u) {
12357                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12358                     } else {
12359                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12360                     }
12361                     break;
12362                 case 0x2f: /* FABS */
12363                     gen_helper_vfp_abss(tcg_res, tcg_op);
12364                     break;
12365                 case 0x6f: /* FNEG */
12366                     gen_helper_vfp_negs(tcg_res, tcg_op);
12367                     break;
12368                 case 0x7f: /* FSQRT */
12369                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12370                     break;
12371                 case 0x1a: /* FCVTNS */
12372                 case 0x1b: /* FCVTMS */
12373                 case 0x1c: /* FCVTAS */
12374                 case 0x3a: /* FCVTPS */
12375                 case 0x3b: /* FCVTZS */
12376                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12377                                          tcg_constant_i32(0), tcg_fpstatus);
12378                     break;
12379                 case 0x5a: /* FCVTNU */
12380                 case 0x5b: /* FCVTMU */
12381                 case 0x5c: /* FCVTAU */
12382                 case 0x7a: /* FCVTPU */
12383                 case 0x7b: /* FCVTZU */
12384                     gen_helper_vfp_touls(tcg_res, tcg_op,
12385                                          tcg_constant_i32(0), tcg_fpstatus);
12386                     break;
12387                 case 0x18: /* FRINTN */
12388                 case 0x19: /* FRINTM */
12389                 case 0x38: /* FRINTP */
12390                 case 0x39: /* FRINTZ */
12391                 case 0x58: /* FRINTA */
12392                 case 0x79: /* FRINTI */
12393                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12394                     break;
12395                 case 0x59: /* FRINTX */
12396                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12397                     break;
12398                 case 0x7c: /* URSQRTE */
12399                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12400                     break;
12401                 case 0x1e: /* FRINT32Z */
12402                 case 0x5e: /* FRINT32X */
12403                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12404                     break;
12405                 case 0x1f: /* FRINT64Z */
12406                 case 0x5f: /* FRINT64X */
12407                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12408                     break;
12409                 default:
12410                     g_assert_not_reached();
12411                 }
12412             } else {
12413                 /* Use helpers for 8 and 16 bit elements */
12414                 switch (opcode) {
12415                 case 0x5: /* CNT, RBIT */
12416                     /* For these two insns size is part of the opcode specifier
12417                      * (handled earlier); they always operate on byte elements.
12418                      */
12419                     if (u) {
12420                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12421                     } else {
12422                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12423                     }
12424                     break;
12425                 case 0x7: /* SQABS, SQNEG */
12426                 {
12427                     NeonGenOneOpEnvFn *genfn;
12428                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12429                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12430                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12431                     };
12432                     genfn = fns[size][u];
12433                     genfn(tcg_res, cpu_env, tcg_op);
12434                     break;
12435                 }
12436                 case 0x4: /* CLS, CLZ */
12437                     if (u) {
12438                         if (size == 0) {
12439                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12440                         } else {
12441                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12442                         }
12443                     } else {
12444                         if (size == 0) {
12445                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12446                         } else {
12447                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12448                         }
12449                     }
12450                     break;
12451                 default:
12452                     g_assert_not_reached();
12453                 }
12454             }
12455 
12456             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12457         }
12458     }
12459     clear_vec_high(s, is_q, rd);
12460 
12461     if (tcg_rmode) {
12462         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12463     }
12464 }
12465 
12466 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12467  *
12468  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12469  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12470  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12471  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12472  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12473  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12474  *
12475  * This actually covers two groups where scalar access is governed by
12476  * bit 28. A bunch of the instructions (float to integral) only exist
12477  * in the vector form and are un-allocated for the scalar decode. Also
12478  * in the scalar decode Q is always 1.
12479  */
12480 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12481 {
12482     int fpop, opcode, a, u;
12483     int rn, rd;
12484     bool is_q;
12485     bool is_scalar;
12486     bool only_in_vector = false;
12487 
12488     int pass;
12489     TCGv_i32 tcg_rmode = NULL;
12490     TCGv_ptr tcg_fpstatus = NULL;
12491     bool need_fpst = true;
12492     int rmode = -1;
12493 
12494     if (!dc_isar_feature(aa64_fp16, s)) {
12495         unallocated_encoding(s);
12496         return;
12497     }
12498 
12499     rd = extract32(insn, 0, 5);
12500     rn = extract32(insn, 5, 5);
12501 
12502     a = extract32(insn, 23, 1);
12503     u = extract32(insn, 29, 1);
12504     is_scalar = extract32(insn, 28, 1);
12505     is_q = extract32(insn, 30, 1);
12506 
12507     opcode = extract32(insn, 12, 5);
12508     fpop = deposit32(opcode, 5, 1, a);
12509     fpop = deposit32(fpop, 6, 1, u);
12510 
12511     switch (fpop) {
12512     case 0x1d: /* SCVTF */
12513     case 0x5d: /* UCVTF */
12514     {
12515         int elements;
12516 
12517         if (is_scalar) {
12518             elements = 1;
12519         } else {
12520             elements = (is_q ? 8 : 4);
12521         }
12522 
12523         if (!fp_access_check(s)) {
12524             return;
12525         }
12526         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12527         return;
12528     }
12529     break;
12530     case 0x2c: /* FCMGT (zero) */
12531     case 0x2d: /* FCMEQ (zero) */
12532     case 0x2e: /* FCMLT (zero) */
12533     case 0x6c: /* FCMGE (zero) */
12534     case 0x6d: /* FCMLE (zero) */
12535         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12536         return;
12537     case 0x3d: /* FRECPE */
12538     case 0x3f: /* FRECPX */
12539         break;
12540     case 0x18: /* FRINTN */
12541         only_in_vector = true;
12542         rmode = FPROUNDING_TIEEVEN;
12543         break;
12544     case 0x19: /* FRINTM */
12545         only_in_vector = true;
12546         rmode = FPROUNDING_NEGINF;
12547         break;
12548     case 0x38: /* FRINTP */
12549         only_in_vector = true;
12550         rmode = FPROUNDING_POSINF;
12551         break;
12552     case 0x39: /* FRINTZ */
12553         only_in_vector = true;
12554         rmode = FPROUNDING_ZERO;
12555         break;
12556     case 0x58: /* FRINTA */
12557         only_in_vector = true;
12558         rmode = FPROUNDING_TIEAWAY;
12559         break;
12560     case 0x59: /* FRINTX */
12561     case 0x79: /* FRINTI */
12562         only_in_vector = true;
12563         /* current rounding mode */
12564         break;
12565     case 0x1a: /* FCVTNS */
12566         rmode = FPROUNDING_TIEEVEN;
12567         break;
12568     case 0x1b: /* FCVTMS */
12569         rmode = FPROUNDING_NEGINF;
12570         break;
12571     case 0x1c: /* FCVTAS */
12572         rmode = FPROUNDING_TIEAWAY;
12573         break;
12574     case 0x3a: /* FCVTPS */
12575         rmode = FPROUNDING_POSINF;
12576         break;
12577     case 0x3b: /* FCVTZS */
12578         rmode = FPROUNDING_ZERO;
12579         break;
12580     case 0x5a: /* FCVTNU */
12581         rmode = FPROUNDING_TIEEVEN;
12582         break;
12583     case 0x5b: /* FCVTMU */
12584         rmode = FPROUNDING_NEGINF;
12585         break;
12586     case 0x5c: /* FCVTAU */
12587         rmode = FPROUNDING_TIEAWAY;
12588         break;
12589     case 0x7a: /* FCVTPU */
12590         rmode = FPROUNDING_POSINF;
12591         break;
12592     case 0x7b: /* FCVTZU */
12593         rmode = FPROUNDING_ZERO;
12594         break;
12595     case 0x2f: /* FABS */
12596     case 0x6f: /* FNEG */
12597         need_fpst = false;
12598         break;
12599     case 0x7d: /* FRSQRTE */
12600     case 0x7f: /* FSQRT (vector) */
12601         break;
12602     default:
12603         unallocated_encoding(s);
12604         return;
12605     }
12606 
12607 
12608     /* Check additional constraints for the scalar encoding */
12609     if (is_scalar) {
12610         if (!is_q) {
12611             unallocated_encoding(s);
12612             return;
12613         }
12614         /* FRINTxx is only in the vector form */
12615         if (only_in_vector) {
12616             unallocated_encoding(s);
12617             return;
12618         }
12619     }
12620 
12621     if (!fp_access_check(s)) {
12622         return;
12623     }
12624 
12625     if (rmode >= 0 || need_fpst) {
12626         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12627     }
12628 
12629     if (rmode >= 0) {
12630         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12631     }
12632 
12633     if (is_scalar) {
12634         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12635         TCGv_i32 tcg_res = tcg_temp_new_i32();
12636 
12637         switch (fpop) {
12638         case 0x1a: /* FCVTNS */
12639         case 0x1b: /* FCVTMS */
12640         case 0x1c: /* FCVTAS */
12641         case 0x3a: /* FCVTPS */
12642         case 0x3b: /* FCVTZS */
12643             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12644             break;
12645         case 0x3d: /* FRECPE */
12646             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12647             break;
12648         case 0x3f: /* FRECPX */
12649             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12650             break;
12651         case 0x5a: /* FCVTNU */
12652         case 0x5b: /* FCVTMU */
12653         case 0x5c: /* FCVTAU */
12654         case 0x7a: /* FCVTPU */
12655         case 0x7b: /* FCVTZU */
12656             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12657             break;
12658         case 0x6f: /* FNEG */
12659             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12660             break;
12661         case 0x7d: /* FRSQRTE */
12662             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12663             break;
12664         default:
12665             g_assert_not_reached();
12666         }
12667 
12668         /* limit any sign extension going on */
12669         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12670         write_fp_sreg(s, rd, tcg_res);
12671     } else {
12672         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12673             TCGv_i32 tcg_op = tcg_temp_new_i32();
12674             TCGv_i32 tcg_res = tcg_temp_new_i32();
12675 
12676             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12677 
12678             switch (fpop) {
12679             case 0x1a: /* FCVTNS */
12680             case 0x1b: /* FCVTMS */
12681             case 0x1c: /* FCVTAS */
12682             case 0x3a: /* FCVTPS */
12683             case 0x3b: /* FCVTZS */
12684                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12685                 break;
12686             case 0x3d: /* FRECPE */
12687                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12688                 break;
12689             case 0x5a: /* FCVTNU */
12690             case 0x5b: /* FCVTMU */
12691             case 0x5c: /* FCVTAU */
12692             case 0x7a: /* FCVTPU */
12693             case 0x7b: /* FCVTZU */
12694                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12695                 break;
12696             case 0x18: /* FRINTN */
12697             case 0x19: /* FRINTM */
12698             case 0x38: /* FRINTP */
12699             case 0x39: /* FRINTZ */
12700             case 0x58: /* FRINTA */
12701             case 0x79: /* FRINTI */
12702                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12703                 break;
12704             case 0x59: /* FRINTX */
12705                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12706                 break;
12707             case 0x2f: /* FABS */
12708                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12709                 break;
12710             case 0x6f: /* FNEG */
12711                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12712                 break;
12713             case 0x7d: /* FRSQRTE */
12714                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12715                 break;
12716             case 0x7f: /* FSQRT */
12717                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12718                 break;
12719             default:
12720                 g_assert_not_reached();
12721             }
12722 
12723             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12724         }
12725 
12726         clear_vec_high(s, is_q, rd);
12727     }
12728 
12729     if (tcg_rmode) {
12730         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12731     }
12732 }
12733 
12734 /* AdvSIMD scalar x indexed element
12735  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12736  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12737  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12738  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12739  * AdvSIMD vector x indexed element
12740  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12741  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12742  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12743  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12744  */
12745 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12746 {
12747     /* This encoding has two kinds of instruction:
12748      *  normal, where we perform elt x idxelt => elt for each
12749      *     element in the vector
12750      *  long, where we perform elt x idxelt and generate a result of
12751      *     double the width of the input element
12752      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12753      */
12754     bool is_scalar = extract32(insn, 28, 1);
12755     bool is_q = extract32(insn, 30, 1);
12756     bool u = extract32(insn, 29, 1);
12757     int size = extract32(insn, 22, 2);
12758     int l = extract32(insn, 21, 1);
12759     int m = extract32(insn, 20, 1);
12760     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12761     int rm = extract32(insn, 16, 4);
12762     int opcode = extract32(insn, 12, 4);
12763     int h = extract32(insn, 11, 1);
12764     int rn = extract32(insn, 5, 5);
12765     int rd = extract32(insn, 0, 5);
12766     bool is_long = false;
12767     int is_fp = 0;
12768     bool is_fp16 = false;
12769     int index;
12770     TCGv_ptr fpst;
12771 
12772     switch (16 * u + opcode) {
12773     case 0x08: /* MUL */
12774     case 0x10: /* MLA */
12775     case 0x14: /* MLS */
12776         if (is_scalar) {
12777             unallocated_encoding(s);
12778             return;
12779         }
12780         break;
12781     case 0x02: /* SMLAL, SMLAL2 */
12782     case 0x12: /* UMLAL, UMLAL2 */
12783     case 0x06: /* SMLSL, SMLSL2 */
12784     case 0x16: /* UMLSL, UMLSL2 */
12785     case 0x0a: /* SMULL, SMULL2 */
12786     case 0x1a: /* UMULL, UMULL2 */
12787         if (is_scalar) {
12788             unallocated_encoding(s);
12789             return;
12790         }
12791         is_long = true;
12792         break;
12793     case 0x03: /* SQDMLAL, SQDMLAL2 */
12794     case 0x07: /* SQDMLSL, SQDMLSL2 */
12795     case 0x0b: /* SQDMULL, SQDMULL2 */
12796         is_long = true;
12797         break;
12798     case 0x0c: /* SQDMULH */
12799     case 0x0d: /* SQRDMULH */
12800         break;
12801     case 0x01: /* FMLA */
12802     case 0x05: /* FMLS */
12803     case 0x09: /* FMUL */
12804     case 0x19: /* FMULX */
12805         is_fp = 1;
12806         break;
12807     case 0x1d: /* SQRDMLAH */
12808     case 0x1f: /* SQRDMLSH */
12809         if (!dc_isar_feature(aa64_rdm, s)) {
12810             unallocated_encoding(s);
12811             return;
12812         }
12813         break;
12814     case 0x0e: /* SDOT */
12815     case 0x1e: /* UDOT */
12816         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12817             unallocated_encoding(s);
12818             return;
12819         }
12820         break;
12821     case 0x0f:
12822         switch (size) {
12823         case 0: /* SUDOT */
12824         case 2: /* USDOT */
12825             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12826                 unallocated_encoding(s);
12827                 return;
12828             }
12829             size = MO_32;
12830             break;
12831         case 1: /* BFDOT */
12832             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12833                 unallocated_encoding(s);
12834                 return;
12835             }
12836             size = MO_32;
12837             break;
12838         case 3: /* BFMLAL{B,T} */
12839             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12840                 unallocated_encoding(s);
12841                 return;
12842             }
12843             /* can't set is_fp without other incorrect size checks */
12844             size = MO_16;
12845             break;
12846         default:
12847             unallocated_encoding(s);
12848             return;
12849         }
12850         break;
12851     case 0x11: /* FCMLA #0 */
12852     case 0x13: /* FCMLA #90 */
12853     case 0x15: /* FCMLA #180 */
12854     case 0x17: /* FCMLA #270 */
12855         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12856             unallocated_encoding(s);
12857             return;
12858         }
12859         is_fp = 2;
12860         break;
12861     case 0x00: /* FMLAL */
12862     case 0x04: /* FMLSL */
12863     case 0x18: /* FMLAL2 */
12864     case 0x1c: /* FMLSL2 */
12865         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12866             unallocated_encoding(s);
12867             return;
12868         }
12869         size = MO_16;
12870         /* is_fp, but we pass cpu_env not fp_status.  */
12871         break;
12872     default:
12873         unallocated_encoding(s);
12874         return;
12875     }
12876 
12877     switch (is_fp) {
12878     case 1: /* normal fp */
12879         /* convert insn encoded size to MemOp size */
12880         switch (size) {
12881         case 0: /* half-precision */
12882             size = MO_16;
12883             is_fp16 = true;
12884             break;
12885         case MO_32: /* single precision */
12886         case MO_64: /* double precision */
12887             break;
12888         default:
12889             unallocated_encoding(s);
12890             return;
12891         }
12892         break;
12893 
12894     case 2: /* complex fp */
12895         /* Each indexable element is a complex pair.  */
12896         size += 1;
12897         switch (size) {
12898         case MO_32:
12899             if (h && !is_q) {
12900                 unallocated_encoding(s);
12901                 return;
12902             }
12903             is_fp16 = true;
12904             break;
12905         case MO_64:
12906             break;
12907         default:
12908             unallocated_encoding(s);
12909             return;
12910         }
12911         break;
12912 
12913     default: /* integer */
12914         switch (size) {
12915         case MO_8:
12916         case MO_64:
12917             unallocated_encoding(s);
12918             return;
12919         }
12920         break;
12921     }
12922     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12923         unallocated_encoding(s);
12924         return;
12925     }
12926 
12927     /* Given MemOp size, adjust register and indexing.  */
12928     switch (size) {
12929     case MO_16:
12930         index = h << 2 | l << 1 | m;
12931         break;
12932     case MO_32:
12933         index = h << 1 | l;
12934         rm |= m << 4;
12935         break;
12936     case MO_64:
12937         if (l || !is_q) {
12938             unallocated_encoding(s);
12939             return;
12940         }
12941         index = h;
12942         rm |= m << 4;
12943         break;
12944     default:
12945         g_assert_not_reached();
12946     }
12947 
12948     if (!fp_access_check(s)) {
12949         return;
12950     }
12951 
12952     if (is_fp) {
12953         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12954     } else {
12955         fpst = NULL;
12956     }
12957 
12958     switch (16 * u + opcode) {
12959     case 0x0e: /* SDOT */
12960     case 0x1e: /* UDOT */
12961         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12962                          u ? gen_helper_gvec_udot_idx_b
12963                          : gen_helper_gvec_sdot_idx_b);
12964         return;
12965     case 0x0f:
12966         switch (extract32(insn, 22, 2)) {
12967         case 0: /* SUDOT */
12968             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12969                              gen_helper_gvec_sudot_idx_b);
12970             return;
12971         case 1: /* BFDOT */
12972             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12973                              gen_helper_gvec_bfdot_idx);
12974             return;
12975         case 2: /* USDOT */
12976             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12977                              gen_helper_gvec_usdot_idx_b);
12978             return;
12979         case 3: /* BFMLAL{B,T} */
12980             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12981                               gen_helper_gvec_bfmlal_idx);
12982             return;
12983         }
12984         g_assert_not_reached();
12985     case 0x11: /* FCMLA #0 */
12986     case 0x13: /* FCMLA #90 */
12987     case 0x15: /* FCMLA #180 */
12988     case 0x17: /* FCMLA #270 */
12989         {
12990             int rot = extract32(insn, 13, 2);
12991             int data = (index << 2) | rot;
12992             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12993                                vec_full_reg_offset(s, rn),
12994                                vec_full_reg_offset(s, rm),
12995                                vec_full_reg_offset(s, rd), fpst,
12996                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12997                                size == MO_64
12998                                ? gen_helper_gvec_fcmlas_idx
12999                                : gen_helper_gvec_fcmlah_idx);
13000         }
13001         return;
13002 
13003     case 0x00: /* FMLAL */
13004     case 0x04: /* FMLSL */
13005     case 0x18: /* FMLAL2 */
13006     case 0x1c: /* FMLSL2 */
13007         {
13008             int is_s = extract32(opcode, 2, 1);
13009             int is_2 = u;
13010             int data = (index << 2) | (is_2 << 1) | is_s;
13011             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13012                                vec_full_reg_offset(s, rn),
13013                                vec_full_reg_offset(s, rm), cpu_env,
13014                                is_q ? 16 : 8, vec_full_reg_size(s),
13015                                data, gen_helper_gvec_fmlal_idx_a64);
13016         }
13017         return;
13018 
13019     case 0x08: /* MUL */
13020         if (!is_long && !is_scalar) {
13021             static gen_helper_gvec_3 * const fns[3] = {
13022                 gen_helper_gvec_mul_idx_h,
13023                 gen_helper_gvec_mul_idx_s,
13024                 gen_helper_gvec_mul_idx_d,
13025             };
13026             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13027                                vec_full_reg_offset(s, rn),
13028                                vec_full_reg_offset(s, rm),
13029                                is_q ? 16 : 8, vec_full_reg_size(s),
13030                                index, fns[size - 1]);
13031             return;
13032         }
13033         break;
13034 
13035     case 0x10: /* MLA */
13036         if (!is_long && !is_scalar) {
13037             static gen_helper_gvec_4 * const fns[3] = {
13038                 gen_helper_gvec_mla_idx_h,
13039                 gen_helper_gvec_mla_idx_s,
13040                 gen_helper_gvec_mla_idx_d,
13041             };
13042             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13043                                vec_full_reg_offset(s, rn),
13044                                vec_full_reg_offset(s, rm),
13045                                vec_full_reg_offset(s, rd),
13046                                is_q ? 16 : 8, vec_full_reg_size(s),
13047                                index, fns[size - 1]);
13048             return;
13049         }
13050         break;
13051 
13052     case 0x14: /* MLS */
13053         if (!is_long && !is_scalar) {
13054             static gen_helper_gvec_4 * const fns[3] = {
13055                 gen_helper_gvec_mls_idx_h,
13056                 gen_helper_gvec_mls_idx_s,
13057                 gen_helper_gvec_mls_idx_d,
13058             };
13059             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13060                                vec_full_reg_offset(s, rn),
13061                                vec_full_reg_offset(s, rm),
13062                                vec_full_reg_offset(s, rd),
13063                                is_q ? 16 : 8, vec_full_reg_size(s),
13064                                index, fns[size - 1]);
13065             return;
13066         }
13067         break;
13068     }
13069 
13070     if (size == 3) {
13071         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13072         int pass;
13073 
13074         assert(is_fp && is_q && !is_long);
13075 
13076         read_vec_element(s, tcg_idx, rm, index, MO_64);
13077 
13078         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13079             TCGv_i64 tcg_op = tcg_temp_new_i64();
13080             TCGv_i64 tcg_res = tcg_temp_new_i64();
13081 
13082             read_vec_element(s, tcg_op, rn, pass, MO_64);
13083 
13084             switch (16 * u + opcode) {
13085             case 0x05: /* FMLS */
13086                 /* As usual for ARM, separate negation for fused multiply-add */
13087                 gen_helper_vfp_negd(tcg_op, tcg_op);
13088                 /* fall through */
13089             case 0x01: /* FMLA */
13090                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13091                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13092                 break;
13093             case 0x09: /* FMUL */
13094                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13095                 break;
13096             case 0x19: /* FMULX */
13097                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13098                 break;
13099             default:
13100                 g_assert_not_reached();
13101             }
13102 
13103             write_vec_element(s, tcg_res, rd, pass, MO_64);
13104         }
13105 
13106         clear_vec_high(s, !is_scalar, rd);
13107     } else if (!is_long) {
13108         /* 32 bit floating point, or 16 or 32 bit integer.
13109          * For the 16 bit scalar case we use the usual Neon helpers and
13110          * rely on the fact that 0 op 0 == 0 with no side effects.
13111          */
13112         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13113         int pass, maxpasses;
13114 
13115         if (is_scalar) {
13116             maxpasses = 1;
13117         } else {
13118             maxpasses = is_q ? 4 : 2;
13119         }
13120 
13121         read_vec_element_i32(s, tcg_idx, rm, index, size);
13122 
13123         if (size == 1 && !is_scalar) {
13124             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13125              * the index into both halves of the 32 bit tcg_idx and then use
13126              * the usual Neon helpers.
13127              */
13128             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13129         }
13130 
13131         for (pass = 0; pass < maxpasses; pass++) {
13132             TCGv_i32 tcg_op = tcg_temp_new_i32();
13133             TCGv_i32 tcg_res = tcg_temp_new_i32();
13134 
13135             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13136 
13137             switch (16 * u + opcode) {
13138             case 0x08: /* MUL */
13139             case 0x10: /* MLA */
13140             case 0x14: /* MLS */
13141             {
13142                 static NeonGenTwoOpFn * const fns[2][2] = {
13143                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13144                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13145                 };
13146                 NeonGenTwoOpFn *genfn;
13147                 bool is_sub = opcode == 0x4;
13148 
13149                 if (size == 1) {
13150                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13151                 } else {
13152                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13153                 }
13154                 if (opcode == 0x8) {
13155                     break;
13156                 }
13157                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13158                 genfn = fns[size - 1][is_sub];
13159                 genfn(tcg_res, tcg_op, tcg_res);
13160                 break;
13161             }
13162             case 0x05: /* FMLS */
13163             case 0x01: /* FMLA */
13164                 read_vec_element_i32(s, tcg_res, rd, pass,
13165                                      is_scalar ? size : MO_32);
13166                 switch (size) {
13167                 case 1:
13168                     if (opcode == 0x5) {
13169                         /* As usual for ARM, separate negation for fused
13170                          * multiply-add */
13171                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13172                     }
13173                     if (is_scalar) {
13174                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13175                                                    tcg_res, fpst);
13176                     } else {
13177                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13178                                                     tcg_res, fpst);
13179                     }
13180                     break;
13181                 case 2:
13182                     if (opcode == 0x5) {
13183                         /* As usual for ARM, separate negation for
13184                          * fused multiply-add */
13185                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13186                     }
13187                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13188                                            tcg_res, fpst);
13189                     break;
13190                 default:
13191                     g_assert_not_reached();
13192                 }
13193                 break;
13194             case 0x09: /* FMUL */
13195                 switch (size) {
13196                 case 1:
13197                     if (is_scalar) {
13198                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13199                                                 tcg_idx, fpst);
13200                     } else {
13201                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13202                                                  tcg_idx, fpst);
13203                     }
13204                     break;
13205                 case 2:
13206                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13207                     break;
13208                 default:
13209                     g_assert_not_reached();
13210                 }
13211                 break;
13212             case 0x19: /* FMULX */
13213                 switch (size) {
13214                 case 1:
13215                     if (is_scalar) {
13216                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13217                                                  tcg_idx, fpst);
13218                     } else {
13219                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13220                                                   tcg_idx, fpst);
13221                     }
13222                     break;
13223                 case 2:
13224                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13225                     break;
13226                 default:
13227                     g_assert_not_reached();
13228                 }
13229                 break;
13230             case 0x0c: /* SQDMULH */
13231                 if (size == 1) {
13232                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13233                                                tcg_op, tcg_idx);
13234                 } else {
13235                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13236                                                tcg_op, tcg_idx);
13237                 }
13238                 break;
13239             case 0x0d: /* SQRDMULH */
13240                 if (size == 1) {
13241                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13242                                                 tcg_op, tcg_idx);
13243                 } else {
13244                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13245                                                 tcg_op, tcg_idx);
13246                 }
13247                 break;
13248             case 0x1d: /* SQRDMLAH */
13249                 read_vec_element_i32(s, tcg_res, rd, pass,
13250                                      is_scalar ? size : MO_32);
13251                 if (size == 1) {
13252                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13253                                                 tcg_op, tcg_idx, tcg_res);
13254                 } else {
13255                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13256                                                 tcg_op, tcg_idx, tcg_res);
13257                 }
13258                 break;
13259             case 0x1f: /* SQRDMLSH */
13260                 read_vec_element_i32(s, tcg_res, rd, pass,
13261                                      is_scalar ? size : MO_32);
13262                 if (size == 1) {
13263                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13264                                                 tcg_op, tcg_idx, tcg_res);
13265                 } else {
13266                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13267                                                 tcg_op, tcg_idx, tcg_res);
13268                 }
13269                 break;
13270             default:
13271                 g_assert_not_reached();
13272             }
13273 
13274             if (is_scalar) {
13275                 write_fp_sreg(s, rd, tcg_res);
13276             } else {
13277                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13278             }
13279         }
13280 
13281         clear_vec_high(s, is_q, rd);
13282     } else {
13283         /* long ops: 16x16->32 or 32x32->64 */
13284         TCGv_i64 tcg_res[2];
13285         int pass;
13286         bool satop = extract32(opcode, 0, 1);
13287         MemOp memop = MO_32;
13288 
13289         if (satop || !u) {
13290             memop |= MO_SIGN;
13291         }
13292 
13293         if (size == 2) {
13294             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13295 
13296             read_vec_element(s, tcg_idx, rm, index, memop);
13297 
13298             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13299                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13300                 TCGv_i64 tcg_passres;
13301                 int passelt;
13302 
13303                 if (is_scalar) {
13304                     passelt = 0;
13305                 } else {
13306                     passelt = pass + (is_q * 2);
13307                 }
13308 
13309                 read_vec_element(s, tcg_op, rn, passelt, memop);
13310 
13311                 tcg_res[pass] = tcg_temp_new_i64();
13312 
13313                 if (opcode == 0xa || opcode == 0xb) {
13314                     /* Non-accumulating ops */
13315                     tcg_passres = tcg_res[pass];
13316                 } else {
13317                     tcg_passres = tcg_temp_new_i64();
13318                 }
13319 
13320                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13321 
13322                 if (satop) {
13323                     /* saturating, doubling */
13324                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13325                                                       tcg_passres, tcg_passres);
13326                 }
13327 
13328                 if (opcode == 0xa || opcode == 0xb) {
13329                     continue;
13330                 }
13331 
13332                 /* Accumulating op: handle accumulate step */
13333                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13334 
13335                 switch (opcode) {
13336                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13337                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13338                     break;
13339                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13340                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13341                     break;
13342                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13343                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13344                     /* fall through */
13345                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13346                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13347                                                       tcg_res[pass],
13348                                                       tcg_passres);
13349                     break;
13350                 default:
13351                     g_assert_not_reached();
13352                 }
13353             }
13354 
13355             clear_vec_high(s, !is_scalar, rd);
13356         } else {
13357             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13358 
13359             assert(size == 1);
13360             read_vec_element_i32(s, tcg_idx, rm, index, size);
13361 
13362             if (!is_scalar) {
13363                 /* The simplest way to handle the 16x16 indexed ops is to
13364                  * duplicate the index into both halves of the 32 bit tcg_idx
13365                  * and then use the usual Neon helpers.
13366                  */
13367                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13368             }
13369 
13370             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13371                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13372                 TCGv_i64 tcg_passres;
13373 
13374                 if (is_scalar) {
13375                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13376                 } else {
13377                     read_vec_element_i32(s, tcg_op, rn,
13378                                          pass + (is_q * 2), MO_32);
13379                 }
13380 
13381                 tcg_res[pass] = tcg_temp_new_i64();
13382 
13383                 if (opcode == 0xa || opcode == 0xb) {
13384                     /* Non-accumulating ops */
13385                     tcg_passres = tcg_res[pass];
13386                 } else {
13387                     tcg_passres = tcg_temp_new_i64();
13388                 }
13389 
13390                 if (memop & MO_SIGN) {
13391                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13392                 } else {
13393                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13394                 }
13395                 if (satop) {
13396                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13397                                                       tcg_passres, tcg_passres);
13398                 }
13399 
13400                 if (opcode == 0xa || opcode == 0xb) {
13401                     continue;
13402                 }
13403 
13404                 /* Accumulating op: handle accumulate step */
13405                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13406 
13407                 switch (opcode) {
13408                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13409                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13410                                              tcg_passres);
13411                     break;
13412                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13413                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13414                                              tcg_passres);
13415                     break;
13416                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13417                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13418                     /* fall through */
13419                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13420                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13421                                                       tcg_res[pass],
13422                                                       tcg_passres);
13423                     break;
13424                 default:
13425                     g_assert_not_reached();
13426                 }
13427             }
13428 
13429             if (is_scalar) {
13430                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13431             }
13432         }
13433 
13434         if (is_scalar) {
13435             tcg_res[1] = tcg_constant_i64(0);
13436         }
13437 
13438         for (pass = 0; pass < 2; pass++) {
13439             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13440         }
13441     }
13442 }
13443 
13444 /* Crypto AES
13445  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13446  * +-----------------+------+-----------+--------+-----+------+------+
13447  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13448  * +-----------------+------+-----------+--------+-----+------+------+
13449  */
13450 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13451 {
13452     int size = extract32(insn, 22, 2);
13453     int opcode = extract32(insn, 12, 5);
13454     int rn = extract32(insn, 5, 5);
13455     int rd = extract32(insn, 0, 5);
13456     int decrypt;
13457     gen_helper_gvec_2 *genfn2 = NULL;
13458     gen_helper_gvec_3 *genfn3 = NULL;
13459 
13460     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13461         unallocated_encoding(s);
13462         return;
13463     }
13464 
13465     switch (opcode) {
13466     case 0x4: /* AESE */
13467         decrypt = 0;
13468         genfn3 = gen_helper_crypto_aese;
13469         break;
13470     case 0x6: /* AESMC */
13471         decrypt = 0;
13472         genfn2 = gen_helper_crypto_aesmc;
13473         break;
13474     case 0x5: /* AESD */
13475         decrypt = 1;
13476         genfn3 = gen_helper_crypto_aese;
13477         break;
13478     case 0x7: /* AESIMC */
13479         decrypt = 1;
13480         genfn2 = gen_helper_crypto_aesmc;
13481         break;
13482     default:
13483         unallocated_encoding(s);
13484         return;
13485     }
13486 
13487     if (!fp_access_check(s)) {
13488         return;
13489     }
13490     if (genfn2) {
13491         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13492     } else {
13493         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13494     }
13495 }
13496 
13497 /* Crypto three-reg SHA
13498  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13499  * +-----------------+------+---+------+---+--------+-----+------+------+
13500  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13501  * +-----------------+------+---+------+---+--------+-----+------+------+
13502  */
13503 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13504 {
13505     int size = extract32(insn, 22, 2);
13506     int opcode = extract32(insn, 12, 3);
13507     int rm = extract32(insn, 16, 5);
13508     int rn = extract32(insn, 5, 5);
13509     int rd = extract32(insn, 0, 5);
13510     gen_helper_gvec_3 *genfn;
13511     bool feature;
13512 
13513     if (size != 0) {
13514         unallocated_encoding(s);
13515         return;
13516     }
13517 
13518     switch (opcode) {
13519     case 0: /* SHA1C */
13520         genfn = gen_helper_crypto_sha1c;
13521         feature = dc_isar_feature(aa64_sha1, s);
13522         break;
13523     case 1: /* SHA1P */
13524         genfn = gen_helper_crypto_sha1p;
13525         feature = dc_isar_feature(aa64_sha1, s);
13526         break;
13527     case 2: /* SHA1M */
13528         genfn = gen_helper_crypto_sha1m;
13529         feature = dc_isar_feature(aa64_sha1, s);
13530         break;
13531     case 3: /* SHA1SU0 */
13532         genfn = gen_helper_crypto_sha1su0;
13533         feature = dc_isar_feature(aa64_sha1, s);
13534         break;
13535     case 4: /* SHA256H */
13536         genfn = gen_helper_crypto_sha256h;
13537         feature = dc_isar_feature(aa64_sha256, s);
13538         break;
13539     case 5: /* SHA256H2 */
13540         genfn = gen_helper_crypto_sha256h2;
13541         feature = dc_isar_feature(aa64_sha256, s);
13542         break;
13543     case 6: /* SHA256SU1 */
13544         genfn = gen_helper_crypto_sha256su1;
13545         feature = dc_isar_feature(aa64_sha256, s);
13546         break;
13547     default:
13548         unallocated_encoding(s);
13549         return;
13550     }
13551 
13552     if (!feature) {
13553         unallocated_encoding(s);
13554         return;
13555     }
13556 
13557     if (!fp_access_check(s)) {
13558         return;
13559     }
13560     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13561 }
13562 
13563 /* Crypto two-reg SHA
13564  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13565  * +-----------------+------+-----------+--------+-----+------+------+
13566  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13567  * +-----------------+------+-----------+--------+-----+------+------+
13568  */
13569 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13570 {
13571     int size = extract32(insn, 22, 2);
13572     int opcode = extract32(insn, 12, 5);
13573     int rn = extract32(insn, 5, 5);
13574     int rd = extract32(insn, 0, 5);
13575     gen_helper_gvec_2 *genfn;
13576     bool feature;
13577 
13578     if (size != 0) {
13579         unallocated_encoding(s);
13580         return;
13581     }
13582 
13583     switch (opcode) {
13584     case 0: /* SHA1H */
13585         feature = dc_isar_feature(aa64_sha1, s);
13586         genfn = gen_helper_crypto_sha1h;
13587         break;
13588     case 1: /* SHA1SU1 */
13589         feature = dc_isar_feature(aa64_sha1, s);
13590         genfn = gen_helper_crypto_sha1su1;
13591         break;
13592     case 2: /* SHA256SU0 */
13593         feature = dc_isar_feature(aa64_sha256, s);
13594         genfn = gen_helper_crypto_sha256su0;
13595         break;
13596     default:
13597         unallocated_encoding(s);
13598         return;
13599     }
13600 
13601     if (!feature) {
13602         unallocated_encoding(s);
13603         return;
13604     }
13605 
13606     if (!fp_access_check(s)) {
13607         return;
13608     }
13609     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13610 }
13611 
13612 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13613 {
13614     tcg_gen_rotli_i64(d, m, 1);
13615     tcg_gen_xor_i64(d, d, n);
13616 }
13617 
13618 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13619 {
13620     tcg_gen_rotli_vec(vece, d, m, 1);
13621     tcg_gen_xor_vec(vece, d, d, n);
13622 }
13623 
13624 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13625                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13626 {
13627     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13628     static const GVecGen3 op = {
13629         .fni8 = gen_rax1_i64,
13630         .fniv = gen_rax1_vec,
13631         .opt_opc = vecop_list,
13632         .fno = gen_helper_crypto_rax1,
13633         .vece = MO_64,
13634     };
13635     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13636 }
13637 
13638 /* Crypto three-reg SHA512
13639  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13640  * +-----------------------+------+---+---+-----+--------+------+------+
13641  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13642  * +-----------------------+------+---+---+-----+--------+------+------+
13643  */
13644 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13645 {
13646     int opcode = extract32(insn, 10, 2);
13647     int o =  extract32(insn, 14, 1);
13648     int rm = extract32(insn, 16, 5);
13649     int rn = extract32(insn, 5, 5);
13650     int rd = extract32(insn, 0, 5);
13651     bool feature;
13652     gen_helper_gvec_3 *oolfn = NULL;
13653     GVecGen3Fn *gvecfn = NULL;
13654 
13655     if (o == 0) {
13656         switch (opcode) {
13657         case 0: /* SHA512H */
13658             feature = dc_isar_feature(aa64_sha512, s);
13659             oolfn = gen_helper_crypto_sha512h;
13660             break;
13661         case 1: /* SHA512H2 */
13662             feature = dc_isar_feature(aa64_sha512, s);
13663             oolfn = gen_helper_crypto_sha512h2;
13664             break;
13665         case 2: /* SHA512SU1 */
13666             feature = dc_isar_feature(aa64_sha512, s);
13667             oolfn = gen_helper_crypto_sha512su1;
13668             break;
13669         case 3: /* RAX1 */
13670             feature = dc_isar_feature(aa64_sha3, s);
13671             gvecfn = gen_gvec_rax1;
13672             break;
13673         default:
13674             g_assert_not_reached();
13675         }
13676     } else {
13677         switch (opcode) {
13678         case 0: /* SM3PARTW1 */
13679             feature = dc_isar_feature(aa64_sm3, s);
13680             oolfn = gen_helper_crypto_sm3partw1;
13681             break;
13682         case 1: /* SM3PARTW2 */
13683             feature = dc_isar_feature(aa64_sm3, s);
13684             oolfn = gen_helper_crypto_sm3partw2;
13685             break;
13686         case 2: /* SM4EKEY */
13687             feature = dc_isar_feature(aa64_sm4, s);
13688             oolfn = gen_helper_crypto_sm4ekey;
13689             break;
13690         default:
13691             unallocated_encoding(s);
13692             return;
13693         }
13694     }
13695 
13696     if (!feature) {
13697         unallocated_encoding(s);
13698         return;
13699     }
13700 
13701     if (!fp_access_check(s)) {
13702         return;
13703     }
13704 
13705     if (oolfn) {
13706         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13707     } else {
13708         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13709     }
13710 }
13711 
13712 /* Crypto two-reg SHA512
13713  *  31                                     12  11  10  9    5 4    0
13714  * +-----------------------------------------+--------+------+------+
13715  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13716  * +-----------------------------------------+--------+------+------+
13717  */
13718 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13719 {
13720     int opcode = extract32(insn, 10, 2);
13721     int rn = extract32(insn, 5, 5);
13722     int rd = extract32(insn, 0, 5);
13723     bool feature;
13724 
13725     switch (opcode) {
13726     case 0: /* SHA512SU0 */
13727         feature = dc_isar_feature(aa64_sha512, s);
13728         break;
13729     case 1: /* SM4E */
13730         feature = dc_isar_feature(aa64_sm4, s);
13731         break;
13732     default:
13733         unallocated_encoding(s);
13734         return;
13735     }
13736 
13737     if (!feature) {
13738         unallocated_encoding(s);
13739         return;
13740     }
13741 
13742     if (!fp_access_check(s)) {
13743         return;
13744     }
13745 
13746     switch (opcode) {
13747     case 0: /* SHA512SU0 */
13748         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13749         break;
13750     case 1: /* SM4E */
13751         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13752         break;
13753     default:
13754         g_assert_not_reached();
13755     }
13756 }
13757 
13758 /* Crypto four-register
13759  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13760  * +-------------------+-----+------+---+------+------+------+
13761  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13762  * +-------------------+-----+------+---+------+------+------+
13763  */
13764 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13765 {
13766     int op0 = extract32(insn, 21, 2);
13767     int rm = extract32(insn, 16, 5);
13768     int ra = extract32(insn, 10, 5);
13769     int rn = extract32(insn, 5, 5);
13770     int rd = extract32(insn, 0, 5);
13771     bool feature;
13772 
13773     switch (op0) {
13774     case 0: /* EOR3 */
13775     case 1: /* BCAX */
13776         feature = dc_isar_feature(aa64_sha3, s);
13777         break;
13778     case 2: /* SM3SS1 */
13779         feature = dc_isar_feature(aa64_sm3, s);
13780         break;
13781     default:
13782         unallocated_encoding(s);
13783         return;
13784     }
13785 
13786     if (!feature) {
13787         unallocated_encoding(s);
13788         return;
13789     }
13790 
13791     if (!fp_access_check(s)) {
13792         return;
13793     }
13794 
13795     if (op0 < 2) {
13796         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13797         int pass;
13798 
13799         tcg_op1 = tcg_temp_new_i64();
13800         tcg_op2 = tcg_temp_new_i64();
13801         tcg_op3 = tcg_temp_new_i64();
13802         tcg_res[0] = tcg_temp_new_i64();
13803         tcg_res[1] = tcg_temp_new_i64();
13804 
13805         for (pass = 0; pass < 2; pass++) {
13806             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13807             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13808             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13809 
13810             if (op0 == 0) {
13811                 /* EOR3 */
13812                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13813             } else {
13814                 /* BCAX */
13815                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13816             }
13817             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13818         }
13819         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13820         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13821     } else {
13822         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13823 
13824         tcg_op1 = tcg_temp_new_i32();
13825         tcg_op2 = tcg_temp_new_i32();
13826         tcg_op3 = tcg_temp_new_i32();
13827         tcg_res = tcg_temp_new_i32();
13828         tcg_zero = tcg_constant_i32(0);
13829 
13830         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13831         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13832         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13833 
13834         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13835         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13836         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13837         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13838 
13839         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13840         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13841         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13842         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13843     }
13844 }
13845 
13846 /* Crypto XAR
13847  *  31                   21 20  16 15    10 9    5 4    0
13848  * +-----------------------+------+--------+------+------+
13849  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13850  * +-----------------------+------+--------+------+------+
13851  */
13852 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13853 {
13854     int rm = extract32(insn, 16, 5);
13855     int imm6 = extract32(insn, 10, 6);
13856     int rn = extract32(insn, 5, 5);
13857     int rd = extract32(insn, 0, 5);
13858 
13859     if (!dc_isar_feature(aa64_sha3, s)) {
13860         unallocated_encoding(s);
13861         return;
13862     }
13863 
13864     if (!fp_access_check(s)) {
13865         return;
13866     }
13867 
13868     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13869                  vec_full_reg_offset(s, rn),
13870                  vec_full_reg_offset(s, rm), imm6, 16,
13871                  vec_full_reg_size(s));
13872 }
13873 
13874 /* Crypto three-reg imm2
13875  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13876  * +-----------------------+------+-----+------+--------+------+------+
13877  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13878  * +-----------------------+------+-----+------+--------+------+------+
13879  */
13880 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13881 {
13882     static gen_helper_gvec_3 * const fns[4] = {
13883         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13884         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13885     };
13886     int opcode = extract32(insn, 10, 2);
13887     int imm2 = extract32(insn, 12, 2);
13888     int rm = extract32(insn, 16, 5);
13889     int rn = extract32(insn, 5, 5);
13890     int rd = extract32(insn, 0, 5);
13891 
13892     if (!dc_isar_feature(aa64_sm3, s)) {
13893         unallocated_encoding(s);
13894         return;
13895     }
13896 
13897     if (!fp_access_check(s)) {
13898         return;
13899     }
13900 
13901     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13902 }
13903 
13904 /* C3.6 Data processing - SIMD, inc Crypto
13905  *
13906  * As the decode gets a little complex we are using a table based
13907  * approach for this part of the decode.
13908  */
13909 static const AArch64DecodeTable data_proc_simd[] = {
13910     /* pattern  ,  mask     ,  fn                        */
13911     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13912     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13913     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13914     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13915     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13916     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13917     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13918     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13919     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13920     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13921     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13922     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13923     { 0x2e000000, 0xbf208400, disas_simd_ext },
13924     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13925     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13926     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13927     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13928     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13929     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13930     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13931     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13932     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13933     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13934     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13935     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13936     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13937     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13938     { 0xce800000, 0xffe00000, disas_crypto_xar },
13939     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13940     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13941     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13942     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13943     { 0x00000000, 0x00000000, NULL }
13944 };
13945 
13946 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13947 {
13948     /* Note that this is called with all non-FP cases from
13949      * table C3-6 so it must UNDEF for entries not specifically
13950      * allocated to instructions in that table.
13951      */
13952     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13953     if (fn) {
13954         fn(s, insn);
13955     } else {
13956         unallocated_encoding(s);
13957     }
13958 }
13959 
13960 /* C3.6 Data processing - SIMD and floating point */
13961 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13962 {
13963     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13964         disas_data_proc_fp(s, insn);
13965     } else {
13966         /* SIMD, including crypto */
13967         disas_data_proc_simd(s, insn);
13968     }
13969 }
13970 
13971 static bool trans_OK(DisasContext *s, arg_OK *a)
13972 {
13973     return true;
13974 }
13975 
13976 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13977 {
13978     s->is_nonstreaming = true;
13979     return true;
13980 }
13981 
13982 /**
13983  * is_guarded_page:
13984  * @env: The cpu environment
13985  * @s: The DisasContext
13986  *
13987  * Return true if the page is guarded.
13988  */
13989 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13990 {
13991     uint64_t addr = s->base.pc_first;
13992 #ifdef CONFIG_USER_ONLY
13993     return page_get_flags(addr) & PAGE_BTI;
13994 #else
13995     CPUTLBEntryFull *full;
13996     void *host;
13997     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13998     int flags;
13999 
14000     /*
14001      * We test this immediately after reading an insn, which means
14002      * that the TLB entry must be present and valid, and thus this
14003      * access will never raise an exception.
14004      */
14005     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
14006                               false, &host, &full, 0);
14007     assert(!(flags & TLB_INVALID_MASK));
14008 
14009     return full->guarded;
14010 #endif
14011 }
14012 
14013 /**
14014  * btype_destination_ok:
14015  * @insn: The instruction at the branch destination
14016  * @bt: SCTLR_ELx.BT
14017  * @btype: PSTATE.BTYPE, and is non-zero
14018  *
14019  * On a guarded page, there are a limited number of insns
14020  * that may be present at the branch target:
14021  *   - branch target identifiers,
14022  *   - paciasp, pacibsp,
14023  *   - BRK insn
14024  *   - HLT insn
14025  * Anything else causes a Branch Target Exception.
14026  *
14027  * Return true if the branch is compatible, false to raise BTITRAP.
14028  */
14029 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14030 {
14031     if ((insn & 0xfffff01fu) == 0xd503201fu) {
14032         /* HINT space */
14033         switch (extract32(insn, 5, 7)) {
14034         case 0b011001: /* PACIASP */
14035         case 0b011011: /* PACIBSP */
14036             /*
14037              * If SCTLR_ELx.BT, then PACI*SP are not compatible
14038              * with btype == 3.  Otherwise all btype are ok.
14039              */
14040             return !bt || btype != 3;
14041         case 0b100000: /* BTI */
14042             /* Not compatible with any btype.  */
14043             return false;
14044         case 0b100010: /* BTI c */
14045             /* Not compatible with btype == 3 */
14046             return btype != 3;
14047         case 0b100100: /* BTI j */
14048             /* Not compatible with btype == 2 */
14049             return btype != 2;
14050         case 0b100110: /* BTI jc */
14051             /* Compatible with any btype.  */
14052             return true;
14053         }
14054     } else {
14055         switch (insn & 0xffe0001fu) {
14056         case 0xd4200000u: /* BRK */
14057         case 0xd4400000u: /* HLT */
14058             /* Give priority to the breakpoint exception.  */
14059             return true;
14060         }
14061     }
14062     return false;
14063 }
14064 
14065 /* C3.1 A64 instruction index by encoding */
14066 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14067 {
14068     switch (extract32(insn, 25, 4)) {
14069     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14070         disas_b_exc_sys(s, insn);
14071         break;
14072     case 0x4:
14073     case 0x6:
14074     case 0xc:
14075     case 0xe:      /* Loads and stores */
14076         disas_ldst(s, insn);
14077         break;
14078     case 0x5:
14079     case 0xd:      /* Data processing - register */
14080         disas_data_proc_reg(s, insn);
14081         break;
14082     case 0x7:
14083     case 0xf:      /* Data processing - SIMD and floating point */
14084         disas_data_proc_simd_fp(s, insn);
14085         break;
14086     default:
14087         unallocated_encoding(s);
14088         break;
14089     }
14090 }
14091 
14092 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14093                                           CPUState *cpu)
14094 {
14095     DisasContext *dc = container_of(dcbase, DisasContext, base);
14096     CPUARMState *env = cpu->env_ptr;
14097     ARMCPU *arm_cpu = env_archcpu(env);
14098     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14099     int bound, core_mmu_idx;
14100 
14101     dc->isar = &arm_cpu->isar;
14102     dc->condjmp = 0;
14103     dc->pc_save = dc->base.pc_first;
14104     dc->aarch64 = true;
14105     dc->thumb = false;
14106     dc->sctlr_b = 0;
14107     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14108     dc->condexec_mask = 0;
14109     dc->condexec_cond = 0;
14110     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14111     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14112     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14113     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14114     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14115     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14116 #if !defined(CONFIG_USER_ONLY)
14117     dc->user = (dc->current_el == 0);
14118 #endif
14119     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14120     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14121     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14122     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14123     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14124     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14125     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14126     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14127     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14128     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14129     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14130     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14131     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14132     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14133     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14134     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14135     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14136     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14137     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14138     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14139     dc->vec_len = 0;
14140     dc->vec_stride = 0;
14141     dc->cp_regs = arm_cpu->cp_regs;
14142     dc->features = env->features;
14143     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14144 
14145 #ifdef CONFIG_USER_ONLY
14146     /* In sve_probe_page, we assume TBI is enabled. */
14147     tcg_debug_assert(dc->tbid & 1);
14148 #endif
14149 
14150     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14151 
14152     /* Single step state. The code-generation logic here is:
14153      *  SS_ACTIVE == 0:
14154      *   generate code with no special handling for single-stepping (except
14155      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14156      *   this happens anyway because those changes are all system register or
14157      *   PSTATE writes).
14158      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14159      *   emit code for one insn
14160      *   emit code to clear PSTATE.SS
14161      *   emit code to generate software step exception for completed step
14162      *   end TB (as usual for having generated an exception)
14163      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14164      *   emit code to generate a software step exception
14165      *   end the TB
14166      */
14167     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14168     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14169     dc->is_ldex = false;
14170 
14171     /* Bound the number of insns to execute to those left on the page.  */
14172     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14173 
14174     /* If architectural single step active, limit to 1.  */
14175     if (dc->ss_active) {
14176         bound = 1;
14177     }
14178     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14179 }
14180 
14181 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14182 {
14183 }
14184 
14185 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14186 {
14187     DisasContext *dc = container_of(dcbase, DisasContext, base);
14188     target_ulong pc_arg = dc->base.pc_next;
14189 
14190     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14191         pc_arg &= ~TARGET_PAGE_MASK;
14192     }
14193     tcg_gen_insn_start(pc_arg, 0, 0);
14194     dc->insn_start = tcg_last_op();
14195 }
14196 
14197 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14198 {
14199     DisasContext *s = container_of(dcbase, DisasContext, base);
14200     CPUARMState *env = cpu->env_ptr;
14201     uint64_t pc = s->base.pc_next;
14202     uint32_t insn;
14203 
14204     /* Singlestep exceptions have the highest priority. */
14205     if (s->ss_active && !s->pstate_ss) {
14206         /* Singlestep state is Active-pending.
14207          * If we're in this state at the start of a TB then either
14208          *  a) we just took an exception to an EL which is being debugged
14209          *     and this is the first insn in the exception handler
14210          *  b) debug exceptions were masked and we just unmasked them
14211          *     without changing EL (eg by clearing PSTATE.D)
14212          * In either case we're going to take a swstep exception in the
14213          * "did not step an insn" case, and so the syndrome ISV and EX
14214          * bits should be zero.
14215          */
14216         assert(s->base.num_insns == 1);
14217         gen_swstep_exception(s, 0, 0);
14218         s->base.is_jmp = DISAS_NORETURN;
14219         s->base.pc_next = pc + 4;
14220         return;
14221     }
14222 
14223     if (pc & 3) {
14224         /*
14225          * PC alignment fault.  This has priority over the instruction abort
14226          * that we would receive from a translation fault via arm_ldl_code.
14227          * This should only be possible after an indirect branch, at the
14228          * start of the TB.
14229          */
14230         assert(s->base.num_insns == 1);
14231         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14232         s->base.is_jmp = DISAS_NORETURN;
14233         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14234         return;
14235     }
14236 
14237     s->pc_curr = pc;
14238     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14239     s->insn = insn;
14240     s->base.pc_next = pc + 4;
14241 
14242     s->fp_access_checked = false;
14243     s->sve_access_checked = false;
14244 
14245     if (s->pstate_il) {
14246         /*
14247          * Illegal execution state. This has priority over BTI
14248          * exceptions, but comes after instruction abort exceptions.
14249          */
14250         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14251         return;
14252     }
14253 
14254     if (dc_isar_feature(aa64_bti, s)) {
14255         if (s->base.num_insns == 1) {
14256             /*
14257              * At the first insn of the TB, compute s->guarded_page.
14258              * We delayed computing this until successfully reading
14259              * the first insn of the TB, above.  This (mostly) ensures
14260              * that the softmmu tlb entry has been populated, and the
14261              * page table GP bit is available.
14262              *
14263              * Note that we need to compute this even if btype == 0,
14264              * because this value is used for BR instructions later
14265              * where ENV is not available.
14266              */
14267             s->guarded_page = is_guarded_page(env, s);
14268 
14269             /* First insn can have btype set to non-zero.  */
14270             tcg_debug_assert(s->btype >= 0);
14271 
14272             /*
14273              * Note that the Branch Target Exception has fairly high
14274              * priority -- below debugging exceptions but above most
14275              * everything else.  This allows us to handle this now
14276              * instead of waiting until the insn is otherwise decoded.
14277              */
14278             if (s->btype != 0
14279                 && s->guarded_page
14280                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14281                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14282                 return;
14283             }
14284         } else {
14285             /* Not the first insn: btype must be 0.  */
14286             tcg_debug_assert(s->btype == 0);
14287         }
14288     }
14289 
14290     s->is_nonstreaming = false;
14291     if (s->sme_trap_nonstreaming) {
14292         disas_sme_fa64(s, insn);
14293     }
14294 
14295     if (!disas_a64(s, insn) &&
14296         !disas_sme(s, insn) &&
14297         !disas_sve(s, insn)) {
14298         disas_a64_legacy(s, insn);
14299     }
14300 
14301     /*
14302      * After execution of most insns, btype is reset to 0.
14303      * Note that we set btype == -1 when the insn sets btype.
14304      */
14305     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14306         reset_btype(s);
14307     }
14308 }
14309 
14310 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14311 {
14312     DisasContext *dc = container_of(dcbase, DisasContext, base);
14313 
14314     if (unlikely(dc->ss_active)) {
14315         /* Note that this means single stepping WFI doesn't halt the CPU.
14316          * For conditional branch insns this is harmless unreachable code as
14317          * gen_goto_tb() has already handled emitting the debug exception
14318          * (and thus a tb-jump is not possible when singlestepping).
14319          */
14320         switch (dc->base.is_jmp) {
14321         default:
14322             gen_a64_update_pc(dc, 4);
14323             /* fall through */
14324         case DISAS_EXIT:
14325         case DISAS_JUMP:
14326             gen_step_complete_exception(dc);
14327             break;
14328         case DISAS_NORETURN:
14329             break;
14330         }
14331     } else {
14332         switch (dc->base.is_jmp) {
14333         case DISAS_NEXT:
14334         case DISAS_TOO_MANY:
14335             gen_goto_tb(dc, 1, 4);
14336             break;
14337         default:
14338         case DISAS_UPDATE_EXIT:
14339             gen_a64_update_pc(dc, 4);
14340             /* fall through */
14341         case DISAS_EXIT:
14342             tcg_gen_exit_tb(NULL, 0);
14343             break;
14344         case DISAS_UPDATE_NOCHAIN:
14345             gen_a64_update_pc(dc, 4);
14346             /* fall through */
14347         case DISAS_JUMP:
14348             tcg_gen_lookup_and_goto_ptr();
14349             break;
14350         case DISAS_NORETURN:
14351         case DISAS_SWI:
14352             break;
14353         case DISAS_WFE:
14354             gen_a64_update_pc(dc, 4);
14355             gen_helper_wfe(cpu_env);
14356             break;
14357         case DISAS_YIELD:
14358             gen_a64_update_pc(dc, 4);
14359             gen_helper_yield(cpu_env);
14360             break;
14361         case DISAS_WFI:
14362             /*
14363              * This is a special case because we don't want to just halt
14364              * the CPU if trying to debug across a WFI.
14365              */
14366             gen_a64_update_pc(dc, 4);
14367             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14368             /*
14369              * The helper doesn't necessarily throw an exception, but we
14370              * must go back to the main loop to check for interrupts anyway.
14371              */
14372             tcg_gen_exit_tb(NULL, 0);
14373             break;
14374         }
14375     }
14376 }
14377 
14378 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14379                                  CPUState *cpu, FILE *logfile)
14380 {
14381     DisasContext *dc = container_of(dcbase, DisasContext, base);
14382 
14383     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14384     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14385 }
14386 
14387 const TranslatorOps aarch64_translator_ops = {
14388     .init_disas_context = aarch64_tr_init_disas_context,
14389     .tb_start           = aarch64_tr_tb_start,
14390     .insn_start         = aarch64_tr_insn_start,
14391     .translate_insn     = aarch64_tr_translate_insn,
14392     .tb_stop            = aarch64_tr_tb_stop,
14393     .disas_log          = aarch64_tr_disas_log,
14394 };
14395