xref: /openbmc/qemu/target/arm/tcg/translate-a64.c (revision c74cc082)
1 /*
2  *  AArch64 translation
3  *
4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "translate.h"
22 #include "translate-a64.h"
23 #include "qemu/log.h"
24 #include "disas/disas.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28 
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31 
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34 
35 static const char *regnames[] = {
36     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41 
42 enum a64_shift_type {
43     A64_SHIFT_TYPE_LSL = 0,
44     A64_SHIFT_TYPE_LSR = 1,
45     A64_SHIFT_TYPE_ASR = 2,
46     A64_SHIFT_TYPE_ROR = 3
47 };
48 
49 /*
50  * Include the generated decoders.
51  */
52 
53 #include "decode-sme-fa64.c.inc"
54 #include "decode-a64.c.inc"
55 
56 /* Table based decoder typedefs - used when the relevant bits for decode
57  * are too awkwardly scattered across the instruction (eg SIMD).
58  */
59 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
60 
61 typedef struct AArch64DecodeTable {
62     uint32_t pattern;
63     uint32_t mask;
64     AArch64DecodeFn *disas_fn;
65 } AArch64DecodeTable;
66 
67 /* initialize TCG globals.  */
68 void a64_translate_init(void)
69 {
70     int i;
71 
72     cpu_pc = tcg_global_mem_new_i64(cpu_env,
73                                     offsetof(CPUARMState, pc),
74                                     "pc");
75     for (i = 0; i < 32; i++) {
76         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
77                                           offsetof(CPUARMState, xregs[i]),
78                                           regnames[i]);
79     }
80 
81     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
82         offsetof(CPUARMState, exclusive_high), "exclusive_high");
83 }
84 
85 /*
86  * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
87  */
88 static int get_a64_user_mem_index(DisasContext *s)
89 {
90     /*
91      * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
92      * which is the usual mmu_idx for this cpu state.
93      */
94     ARMMMUIdx useridx = s->mmu_idx;
95 
96     if (s->unpriv) {
97         /*
98          * We have pre-computed the condition for AccType_UNPRIV.
99          * Therefore we should never get here with a mmu_idx for
100          * which we do not know the corresponding user mmu_idx.
101          */
102         switch (useridx) {
103         case ARMMMUIdx_E10_1:
104         case ARMMMUIdx_E10_1_PAN:
105             useridx = ARMMMUIdx_E10_0;
106             break;
107         case ARMMMUIdx_E20_2:
108         case ARMMMUIdx_E20_2_PAN:
109             useridx = ARMMMUIdx_E20_0;
110             break;
111         default:
112             g_assert_not_reached();
113         }
114     }
115     return arm_to_core_mmu_idx(useridx);
116 }
117 
118 static void set_btype_raw(int val)
119 {
120     tcg_gen_st_i32(tcg_constant_i32(val), cpu_env,
121                    offsetof(CPUARMState, btype));
122 }
123 
124 static void set_btype(DisasContext *s, int val)
125 {
126     /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
127     tcg_debug_assert(val >= 1 && val <= 3);
128     set_btype_raw(val);
129     s->btype = -1;
130 }
131 
132 static void reset_btype(DisasContext *s)
133 {
134     if (s->btype != 0) {
135         set_btype_raw(0);
136         s->btype = 0;
137     }
138 }
139 
140 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
141 {
142     assert(s->pc_save != -1);
143     if (tb_cflags(s->base.tb) & CF_PCREL) {
144         tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
145     } else {
146         tcg_gen_movi_i64(dest, s->pc_curr + diff);
147     }
148 }
149 
150 void gen_a64_update_pc(DisasContext *s, target_long diff)
151 {
152     gen_pc_plus_diff(s, cpu_pc, diff);
153     s->pc_save = s->pc_curr + diff;
154 }
155 
156 /*
157  * Handle Top Byte Ignore (TBI) bits.
158  *
159  * If address tagging is enabled via the TCR TBI bits:
160  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
161  *    then the address is zero-extended, clearing bits [63:56]
162  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
163  *    and TBI1 controls addressses with bit 55 == 1.
164  *    If the appropriate TBI bit is set for the address then
165  *    the address is sign-extended from bit 55 into bits [63:56]
166  *
167  * Here We have concatenated TBI{1,0} into tbi.
168  */
169 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
170                                 TCGv_i64 src, int tbi)
171 {
172     if (tbi == 0) {
173         /* Load unmodified address */
174         tcg_gen_mov_i64(dst, src);
175     } else if (!regime_has_2_ranges(s->mmu_idx)) {
176         /* Force tag byte to all zero */
177         tcg_gen_extract_i64(dst, src, 0, 56);
178     } else {
179         /* Sign-extend from bit 55.  */
180         tcg_gen_sextract_i64(dst, src, 0, 56);
181 
182         switch (tbi) {
183         case 1:
184             /* tbi0 but !tbi1: only use the extension if positive */
185             tcg_gen_and_i64(dst, dst, src);
186             break;
187         case 2:
188             /* !tbi0 but tbi1: only use the extension if negative */
189             tcg_gen_or_i64(dst, dst, src);
190             break;
191         case 3:
192             /* tbi0 and tbi1: always use the extension */
193             break;
194         default:
195             g_assert_not_reached();
196         }
197     }
198 }
199 
200 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
201 {
202     /*
203      * If address tagging is enabled for instructions via the TCR TBI bits,
204      * then loading an address into the PC will clear out any tag.
205      */
206     gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
207     s->pc_save = -1;
208 }
209 
210 /*
211  * Handle MTE and/or TBI.
212  *
213  * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
214  * for the tag to be present in the FAR_ELx register.  But for user-only
215  * mode we do not have a TLB with which to implement this, so we must
216  * remove the top byte now.
217  *
218  * Always return a fresh temporary that we can increment independently
219  * of the write-back address.
220  */
221 
222 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
223 {
224     TCGv_i64 clean = tcg_temp_new_i64();
225 #ifdef CONFIG_USER_ONLY
226     gen_top_byte_ignore(s, clean, addr, s->tbid);
227 #else
228     tcg_gen_mov_i64(clean, addr);
229 #endif
230     return clean;
231 }
232 
233 /* Insert a zero tag into src, with the result at dst. */
234 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
235 {
236     tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
237 }
238 
239 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
240                              MMUAccessType acc, int log2_size)
241 {
242     gen_helper_probe_access(cpu_env, ptr,
243                             tcg_constant_i32(acc),
244                             tcg_constant_i32(get_mem_index(s)),
245                             tcg_constant_i32(1 << log2_size));
246 }
247 
248 /*
249  * For MTE, check a single logical or atomic access.  This probes a single
250  * address, the exact one specified.  The size and alignment of the access
251  * is not relevant to MTE, per se, but watchpoints do require the size,
252  * and we want to recognize those before making any other changes to state.
253  */
254 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
255                                       bool is_write, bool tag_checked,
256                                       int log2_size, bool is_unpriv,
257                                       int core_idx)
258 {
259     if (tag_checked && s->mte_active[is_unpriv]) {
260         TCGv_i64 ret;
261         int desc = 0;
262 
263         desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
264         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
265         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
266         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
267         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
268 
269         ret = tcg_temp_new_i64();
270         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
271 
272         return ret;
273     }
274     return clean_data_tbi(s, addr);
275 }
276 
277 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
278                         bool tag_checked, int log2_size)
279 {
280     return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
281                                  false, get_mem_index(s));
282 }
283 
284 /*
285  * For MTE, check multiple logical sequential accesses.
286  */
287 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
288                         bool tag_checked, int size)
289 {
290     if (tag_checked && s->mte_active[0]) {
291         TCGv_i64 ret;
292         int desc = 0;
293 
294         desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
295         desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
296         desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
297         desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
298         desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
299 
300         ret = tcg_temp_new_i64();
301         gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
302 
303         return ret;
304     }
305     return clean_data_tbi(s, addr);
306 }
307 
308 typedef struct DisasCompare64 {
309     TCGCond cond;
310     TCGv_i64 value;
311 } DisasCompare64;
312 
313 static void a64_test_cc(DisasCompare64 *c64, int cc)
314 {
315     DisasCompare c32;
316 
317     arm_test_cc(&c32, cc);
318 
319     /*
320      * Sign-extend the 32-bit value so that the GE/LT comparisons work
321      * properly.  The NE/EQ comparisons are also fine with this choice.
322       */
323     c64->cond = c32.cond;
324     c64->value = tcg_temp_new_i64();
325     tcg_gen_ext_i32_i64(c64->value, c32.value);
326 }
327 
328 static void gen_rebuild_hflags(DisasContext *s)
329 {
330     gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el));
331 }
332 
333 static void gen_exception_internal(int excp)
334 {
335     assert(excp_is_internal(excp));
336     gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp));
337 }
338 
339 static void gen_exception_internal_insn(DisasContext *s, int excp)
340 {
341     gen_a64_update_pc(s, 0);
342     gen_exception_internal(excp);
343     s->base.is_jmp = DISAS_NORETURN;
344 }
345 
346 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
347 {
348     gen_a64_update_pc(s, 0);
349     gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome));
350     s->base.is_jmp = DISAS_NORETURN;
351 }
352 
353 static void gen_step_complete_exception(DisasContext *s)
354 {
355     /* We just completed step of an insn. Move from Active-not-pending
356      * to Active-pending, and then also take the swstep exception.
357      * This corresponds to making the (IMPDEF) choice to prioritize
358      * swstep exceptions over asynchronous exceptions taken to an exception
359      * level where debug is disabled. This choice has the advantage that
360      * we do not need to maintain internal state corresponding to the
361      * ISV/EX syndrome bits between completion of the step and generation
362      * of the exception, and our syndrome information is always correct.
363      */
364     gen_ss_advance(s);
365     gen_swstep_exception(s, 1, s->is_ldex);
366     s->base.is_jmp = DISAS_NORETURN;
367 }
368 
369 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
370 {
371     if (s->ss_active) {
372         return false;
373     }
374     return translator_use_goto_tb(&s->base, dest);
375 }
376 
377 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
378 {
379     if (use_goto_tb(s, s->pc_curr + diff)) {
380         /*
381          * For pcrel, the pc must always be up-to-date on entry to
382          * the linked TB, so that it can use simple additions for all
383          * further adjustments.  For !pcrel, the linked TB is compiled
384          * to know its full virtual address, so we can delay the
385          * update to pc to the unlinked path.  A long chain of links
386          * can thus avoid many updates to the PC.
387          */
388         if (tb_cflags(s->base.tb) & CF_PCREL) {
389             gen_a64_update_pc(s, diff);
390             tcg_gen_goto_tb(n);
391         } else {
392             tcg_gen_goto_tb(n);
393             gen_a64_update_pc(s, diff);
394         }
395         tcg_gen_exit_tb(s->base.tb, n);
396         s->base.is_jmp = DISAS_NORETURN;
397     } else {
398         gen_a64_update_pc(s, diff);
399         if (s->ss_active) {
400             gen_step_complete_exception(s);
401         } else {
402             tcg_gen_lookup_and_goto_ptr();
403             s->base.is_jmp = DISAS_NORETURN;
404         }
405     }
406 }
407 
408 /*
409  * Register access functions
410  *
411  * These functions are used for directly accessing a register in where
412  * changes to the final register value are likely to be made. If you
413  * need to use a register for temporary calculation (e.g. index type
414  * operations) use the read_* form.
415  *
416  * B1.2.1 Register mappings
417  *
418  * In instruction register encoding 31 can refer to ZR (zero register) or
419  * the SP (stack pointer) depending on context. In QEMU's case we map SP
420  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
421  * This is the point of the _sp forms.
422  */
423 TCGv_i64 cpu_reg(DisasContext *s, int reg)
424 {
425     if (reg == 31) {
426         TCGv_i64 t = tcg_temp_new_i64();
427         tcg_gen_movi_i64(t, 0);
428         return t;
429     } else {
430         return cpu_X[reg];
431     }
432 }
433 
434 /* register access for when 31 == SP */
435 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
436 {
437     return cpu_X[reg];
438 }
439 
440 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
441  * representing the register contents. This TCGv is an auto-freed
442  * temporary so it need not be explicitly freed, and may be modified.
443  */
444 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
445 {
446     TCGv_i64 v = tcg_temp_new_i64();
447     if (reg != 31) {
448         if (sf) {
449             tcg_gen_mov_i64(v, cpu_X[reg]);
450         } else {
451             tcg_gen_ext32u_i64(v, cpu_X[reg]);
452         }
453     } else {
454         tcg_gen_movi_i64(v, 0);
455     }
456     return v;
457 }
458 
459 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
460 {
461     TCGv_i64 v = tcg_temp_new_i64();
462     if (sf) {
463         tcg_gen_mov_i64(v, cpu_X[reg]);
464     } else {
465         tcg_gen_ext32u_i64(v, cpu_X[reg]);
466     }
467     return v;
468 }
469 
470 /* Return the offset into CPUARMState of a slice (from
471  * the least significant end) of FP register Qn (ie
472  * Dn, Sn, Hn or Bn).
473  * (Note that this is not the same mapping as for A32; see cpu.h)
474  */
475 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
476 {
477     return vec_reg_offset(s, regno, 0, size);
478 }
479 
480 /* Offset of the high half of the 128 bit vector Qn */
481 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
482 {
483     return vec_reg_offset(s, regno, 1, MO_64);
484 }
485 
486 /* Convenience accessors for reading and writing single and double
487  * FP registers. Writing clears the upper parts of the associated
488  * 128 bit vector register, as required by the architecture.
489  * Note that unlike the GP register accessors, the values returned
490  * by the read functions must be manually freed.
491  */
492 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
493 {
494     TCGv_i64 v = tcg_temp_new_i64();
495 
496     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
497     return v;
498 }
499 
500 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
501 {
502     TCGv_i32 v = tcg_temp_new_i32();
503 
504     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
505     return v;
506 }
507 
508 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
509 {
510     TCGv_i32 v = tcg_temp_new_i32();
511 
512     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
513     return v;
514 }
515 
516 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
517  * If SVE is not enabled, then there are only 128 bits in the vector.
518  */
519 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
520 {
521     unsigned ofs = fp_reg_offset(s, rd, MO_64);
522     unsigned vsz = vec_full_reg_size(s);
523 
524     /* Nop move, with side effect of clearing the tail. */
525     tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
526 }
527 
528 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
529 {
530     unsigned ofs = fp_reg_offset(s, reg, MO_64);
531 
532     tcg_gen_st_i64(v, cpu_env, ofs);
533     clear_vec_high(s, false, reg);
534 }
535 
536 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
537 {
538     TCGv_i64 tmp = tcg_temp_new_i64();
539 
540     tcg_gen_extu_i32_i64(tmp, v);
541     write_fp_dreg(s, reg, tmp);
542 }
543 
544 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
545 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
546                          GVecGen2Fn *gvec_fn, int vece)
547 {
548     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
549             is_q ? 16 : 8, vec_full_reg_size(s));
550 }
551 
552 /* Expand a 2-operand + immediate AdvSIMD vector operation using
553  * an expander function.
554  */
555 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
556                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
557 {
558     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
559             imm, is_q ? 16 : 8, vec_full_reg_size(s));
560 }
561 
562 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
563 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
564                          GVecGen3Fn *gvec_fn, int vece)
565 {
566     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
567             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
568 }
569 
570 /* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
571 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
572                          int rx, GVecGen4Fn *gvec_fn, int vece)
573 {
574     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
575             vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
576             is_q ? 16 : 8, vec_full_reg_size(s));
577 }
578 
579 /* Expand a 2-operand operation using an out-of-line helper.  */
580 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
581                              int rn, int data, gen_helper_gvec_2 *fn)
582 {
583     tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
584                        vec_full_reg_offset(s, rn),
585                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
586 }
587 
588 /* Expand a 3-operand operation using an out-of-line helper.  */
589 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
590                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
591 {
592     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
593                        vec_full_reg_offset(s, rn),
594                        vec_full_reg_offset(s, rm),
595                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
596 }
597 
598 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
599  * an out-of-line helper.
600  */
601 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
602                               int rm, bool is_fp16, int data,
603                               gen_helper_gvec_3_ptr *fn)
604 {
605     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
606     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
607                        vec_full_reg_offset(s, rn),
608                        vec_full_reg_offset(s, rm), fpst,
609                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
610 }
611 
612 /* Expand a 3-operand + qc + operation using an out-of-line helper.  */
613 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
614                             int rm, gen_helper_gvec_3_ptr *fn)
615 {
616     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
617 
618     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
619     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
620                        vec_full_reg_offset(s, rn),
621                        vec_full_reg_offset(s, rm), qc_ptr,
622                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
623 }
624 
625 /* Expand a 4-operand operation using an out-of-line helper.  */
626 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
627                              int rm, int ra, int data, gen_helper_gvec_4 *fn)
628 {
629     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
630                        vec_full_reg_offset(s, rn),
631                        vec_full_reg_offset(s, rm),
632                        vec_full_reg_offset(s, ra),
633                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
634 }
635 
636 /*
637  * Expand a 4-operand + fpstatus pointer + simd data value operation using
638  * an out-of-line helper.
639  */
640 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
641                               int rm, int ra, bool is_fp16, int data,
642                               gen_helper_gvec_4_ptr *fn)
643 {
644     TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
645     tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
646                        vec_full_reg_offset(s, rn),
647                        vec_full_reg_offset(s, rm),
648                        vec_full_reg_offset(s, ra), fpst,
649                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
650 }
651 
652 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
653  * than the 32 bit equivalent.
654  */
655 static inline void gen_set_NZ64(TCGv_i64 result)
656 {
657     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
658     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
659 }
660 
661 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
662 static inline void gen_logic_CC(int sf, TCGv_i64 result)
663 {
664     if (sf) {
665         gen_set_NZ64(result);
666     } else {
667         tcg_gen_extrl_i64_i32(cpu_ZF, result);
668         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
669     }
670     tcg_gen_movi_i32(cpu_CF, 0);
671     tcg_gen_movi_i32(cpu_VF, 0);
672 }
673 
674 /* dest = T0 + T1; compute C, N, V and Z flags */
675 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
676 {
677     TCGv_i64 result, flag, tmp;
678     result = tcg_temp_new_i64();
679     flag = tcg_temp_new_i64();
680     tmp = tcg_temp_new_i64();
681 
682     tcg_gen_movi_i64(tmp, 0);
683     tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
684 
685     tcg_gen_extrl_i64_i32(cpu_CF, flag);
686 
687     gen_set_NZ64(result);
688 
689     tcg_gen_xor_i64(flag, result, t0);
690     tcg_gen_xor_i64(tmp, t0, t1);
691     tcg_gen_andc_i64(flag, flag, tmp);
692     tcg_gen_extrh_i64_i32(cpu_VF, flag);
693 
694     tcg_gen_mov_i64(dest, result);
695 }
696 
697 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
698 {
699     TCGv_i32 t0_32 = tcg_temp_new_i32();
700     TCGv_i32 t1_32 = tcg_temp_new_i32();
701     TCGv_i32 tmp = tcg_temp_new_i32();
702 
703     tcg_gen_movi_i32(tmp, 0);
704     tcg_gen_extrl_i64_i32(t0_32, t0);
705     tcg_gen_extrl_i64_i32(t1_32, t1);
706     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
707     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
708     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
709     tcg_gen_xor_i32(tmp, t0_32, t1_32);
710     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
711     tcg_gen_extu_i32_i64(dest, cpu_NF);
712 }
713 
714 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
715 {
716     if (sf) {
717         gen_add64_CC(dest, t0, t1);
718     } else {
719         gen_add32_CC(dest, t0, t1);
720     }
721 }
722 
723 /* dest = T0 - T1; compute C, N, V and Z flags */
724 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
725 {
726     /* 64 bit arithmetic */
727     TCGv_i64 result, flag, tmp;
728 
729     result = tcg_temp_new_i64();
730     flag = tcg_temp_new_i64();
731     tcg_gen_sub_i64(result, t0, t1);
732 
733     gen_set_NZ64(result);
734 
735     tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
736     tcg_gen_extrl_i64_i32(cpu_CF, flag);
737 
738     tcg_gen_xor_i64(flag, result, t0);
739     tmp = tcg_temp_new_i64();
740     tcg_gen_xor_i64(tmp, t0, t1);
741     tcg_gen_and_i64(flag, flag, tmp);
742     tcg_gen_extrh_i64_i32(cpu_VF, flag);
743     tcg_gen_mov_i64(dest, result);
744 }
745 
746 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
747 {
748     /* 32 bit arithmetic */
749     TCGv_i32 t0_32 = tcg_temp_new_i32();
750     TCGv_i32 t1_32 = tcg_temp_new_i32();
751     TCGv_i32 tmp;
752 
753     tcg_gen_extrl_i64_i32(t0_32, t0);
754     tcg_gen_extrl_i64_i32(t1_32, t1);
755     tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
756     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
757     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
758     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
759     tmp = tcg_temp_new_i32();
760     tcg_gen_xor_i32(tmp, t0_32, t1_32);
761     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
762     tcg_gen_extu_i32_i64(dest, cpu_NF);
763 }
764 
765 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
766 {
767     if (sf) {
768         gen_sub64_CC(dest, t0, t1);
769     } else {
770         gen_sub32_CC(dest, t0, t1);
771     }
772 }
773 
774 /* dest = T0 + T1 + CF; do not compute flags. */
775 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
776 {
777     TCGv_i64 flag = tcg_temp_new_i64();
778     tcg_gen_extu_i32_i64(flag, cpu_CF);
779     tcg_gen_add_i64(dest, t0, t1);
780     tcg_gen_add_i64(dest, dest, flag);
781 
782     if (!sf) {
783         tcg_gen_ext32u_i64(dest, dest);
784     }
785 }
786 
787 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
788 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
789 {
790     if (sf) {
791         TCGv_i64 result = tcg_temp_new_i64();
792         TCGv_i64 cf_64 = tcg_temp_new_i64();
793         TCGv_i64 vf_64 = tcg_temp_new_i64();
794         TCGv_i64 tmp = tcg_temp_new_i64();
795         TCGv_i64 zero = tcg_constant_i64(0);
796 
797         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
798         tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
799         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
800         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
801         gen_set_NZ64(result);
802 
803         tcg_gen_xor_i64(vf_64, result, t0);
804         tcg_gen_xor_i64(tmp, t0, t1);
805         tcg_gen_andc_i64(vf_64, vf_64, tmp);
806         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
807 
808         tcg_gen_mov_i64(dest, result);
809     } else {
810         TCGv_i32 t0_32 = tcg_temp_new_i32();
811         TCGv_i32 t1_32 = tcg_temp_new_i32();
812         TCGv_i32 tmp = tcg_temp_new_i32();
813         TCGv_i32 zero = tcg_constant_i32(0);
814 
815         tcg_gen_extrl_i64_i32(t0_32, t0);
816         tcg_gen_extrl_i64_i32(t1_32, t1);
817         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
818         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
819 
820         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
821         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
822         tcg_gen_xor_i32(tmp, t0_32, t1_32);
823         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
824         tcg_gen_extu_i32_i64(dest, cpu_NF);
825     }
826 }
827 
828 /*
829  * Load/Store generators
830  */
831 
832 /*
833  * Store from GPR register to memory.
834  */
835 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
836                              TCGv_i64 tcg_addr, MemOp memop, int memidx,
837                              bool iss_valid,
838                              unsigned int iss_srt,
839                              bool iss_sf, bool iss_ar)
840 {
841     memop = finalize_memop(s, memop);
842     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
843 
844     if (iss_valid) {
845         uint32_t syn;
846 
847         syn = syn_data_abort_with_iss(0,
848                                       (memop & MO_SIZE),
849                                       false,
850                                       iss_srt,
851                                       iss_sf,
852                                       iss_ar,
853                                       0, 0, 0, 0, 0, false);
854         disas_set_insn_syndrome(s, syn);
855     }
856 }
857 
858 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
859                       TCGv_i64 tcg_addr, MemOp memop,
860                       bool iss_valid,
861                       unsigned int iss_srt,
862                       bool iss_sf, bool iss_ar)
863 {
864     do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
865                      iss_valid, iss_srt, iss_sf, iss_ar);
866 }
867 
868 /*
869  * Load from memory to GPR register
870  */
871 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
872                              MemOp memop, bool extend, int memidx,
873                              bool iss_valid, unsigned int iss_srt,
874                              bool iss_sf, bool iss_ar)
875 {
876     memop = finalize_memop(s, memop);
877     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
878 
879     if (extend && (memop & MO_SIGN)) {
880         g_assert((memop & MO_SIZE) <= MO_32);
881         tcg_gen_ext32u_i64(dest, dest);
882     }
883 
884     if (iss_valid) {
885         uint32_t syn;
886 
887         syn = syn_data_abort_with_iss(0,
888                                       (memop & MO_SIZE),
889                                       (memop & MO_SIGN) != 0,
890                                       iss_srt,
891                                       iss_sf,
892                                       iss_ar,
893                                       0, 0, 0, 0, 0, false);
894         disas_set_insn_syndrome(s, syn);
895     }
896 }
897 
898 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
899                       MemOp memop, bool extend,
900                       bool iss_valid, unsigned int iss_srt,
901                       bool iss_sf, bool iss_ar)
902 {
903     do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
904                      iss_valid, iss_srt, iss_sf, iss_ar);
905 }
906 
907 /*
908  * Store from FP register to memory
909  */
910 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
911 {
912     /* This writes the bottom N bits of a 128 bit wide vector to memory */
913     TCGv_i64 tmplo = tcg_temp_new_i64();
914     MemOp mop;
915 
916     tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
917 
918     if (size < 4) {
919         mop = finalize_memop(s, size);
920         tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
921     } else {
922         bool be = s->be_data == MO_BE;
923         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
924         TCGv_i64 tmphi = tcg_temp_new_i64();
925 
926         tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
927 
928         mop = s->be_data | MO_UQ;
929         tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
930                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
931         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
932         tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
933                             get_mem_index(s), mop);
934     }
935 }
936 
937 /*
938  * Load from memory to FP register
939  */
940 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
941 {
942     /* This always zero-extends and writes to a full 128 bit wide vector */
943     TCGv_i64 tmplo = tcg_temp_new_i64();
944     TCGv_i64 tmphi = NULL;
945     MemOp mop;
946 
947     if (size < 4) {
948         mop = finalize_memop(s, size);
949         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
950     } else {
951         bool be = s->be_data == MO_BE;
952         TCGv_i64 tcg_hiaddr;
953 
954         tmphi = tcg_temp_new_i64();
955         tcg_hiaddr = tcg_temp_new_i64();
956 
957         mop = s->be_data | MO_UQ;
958         tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
959                             mop | (s->align_mem ? MO_ALIGN_16 : 0));
960         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
961         tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
962                             get_mem_index(s), mop);
963     }
964 
965     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
966 
967     if (tmphi) {
968         tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
969     }
970     clear_vec_high(s, tmphi != NULL, destidx);
971 }
972 
973 /*
974  * Vector load/store helpers.
975  *
976  * The principal difference between this and a FP load is that we don't
977  * zero extend as we are filling a partial chunk of the vector register.
978  * These functions don't support 128 bit loads/stores, which would be
979  * normal load/store operations.
980  *
981  * The _i32 versions are useful when operating on 32 bit quantities
982  * (eg for floating point single or using Neon helper functions).
983  */
984 
985 /* Get value of an element within a vector register */
986 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
987                              int element, MemOp memop)
988 {
989     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
990     switch ((unsigned)memop) {
991     case MO_8:
992         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
993         break;
994     case MO_16:
995         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
996         break;
997     case MO_32:
998         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
999         break;
1000     case MO_8|MO_SIGN:
1001         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1002         break;
1003     case MO_16|MO_SIGN:
1004         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1005         break;
1006     case MO_32|MO_SIGN:
1007         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1008         break;
1009     case MO_64:
1010     case MO_64|MO_SIGN:
1011         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1012         break;
1013     default:
1014         g_assert_not_reached();
1015     }
1016 }
1017 
1018 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1019                                  int element, MemOp memop)
1020 {
1021     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1022     switch (memop) {
1023     case MO_8:
1024         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1025         break;
1026     case MO_16:
1027         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1028         break;
1029     case MO_8|MO_SIGN:
1030         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1031         break;
1032     case MO_16|MO_SIGN:
1033         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1034         break;
1035     case MO_32:
1036     case MO_32|MO_SIGN:
1037         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1038         break;
1039     default:
1040         g_assert_not_reached();
1041     }
1042 }
1043 
1044 /* Set value of an element within a vector register */
1045 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1046                               int element, MemOp memop)
1047 {
1048     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1049     switch (memop) {
1050     case MO_8:
1051         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1052         break;
1053     case MO_16:
1054         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1055         break;
1056     case MO_32:
1057         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1058         break;
1059     case MO_64:
1060         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1061         break;
1062     default:
1063         g_assert_not_reached();
1064     }
1065 }
1066 
1067 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1068                                   int destidx, int element, MemOp memop)
1069 {
1070     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1071     switch (memop) {
1072     case MO_8:
1073         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1074         break;
1075     case MO_16:
1076         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1077         break;
1078     case MO_32:
1079         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1080         break;
1081     default:
1082         g_assert_not_reached();
1083     }
1084 }
1085 
1086 /* Store from vector register to memory */
1087 static void do_vec_st(DisasContext *s, int srcidx, int element,
1088                       TCGv_i64 tcg_addr, MemOp mop)
1089 {
1090     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1091 
1092     read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1093     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1094 }
1095 
1096 /* Load from memory to vector register */
1097 static void do_vec_ld(DisasContext *s, int destidx, int element,
1098                       TCGv_i64 tcg_addr, MemOp mop)
1099 {
1100     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1101 
1102     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1103     write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1104 }
1105 
1106 /* Check that FP/Neon access is enabled. If it is, return
1107  * true. If not, emit code to generate an appropriate exception,
1108  * and return false; the caller should not emit any code for
1109  * the instruction. Note that this check must happen after all
1110  * unallocated-encoding checks (otherwise the syndrome information
1111  * for the resulting exception will be incorrect).
1112  */
1113 static bool fp_access_check_only(DisasContext *s)
1114 {
1115     if (s->fp_excp_el) {
1116         assert(!s->fp_access_checked);
1117         s->fp_access_checked = true;
1118 
1119         gen_exception_insn_el(s, 0, EXCP_UDEF,
1120                               syn_fp_access_trap(1, 0xe, false, 0),
1121                               s->fp_excp_el);
1122         return false;
1123     }
1124     s->fp_access_checked = true;
1125     return true;
1126 }
1127 
1128 static bool fp_access_check(DisasContext *s)
1129 {
1130     if (!fp_access_check_only(s)) {
1131         return false;
1132     }
1133     if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1134         gen_exception_insn(s, 0, EXCP_UDEF,
1135                            syn_smetrap(SME_ET_Streaming, false));
1136         return false;
1137     }
1138     return true;
1139 }
1140 
1141 /*
1142  * Check that SVE access is enabled.  If it is, return true.
1143  * If not, emit code to generate an appropriate exception and return false.
1144  * This function corresponds to CheckSVEEnabled().
1145  */
1146 bool sve_access_check(DisasContext *s)
1147 {
1148     if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1149         assert(dc_isar_feature(aa64_sme, s));
1150         if (!sme_sm_enabled_check(s)) {
1151             goto fail_exit;
1152         }
1153     } else if (s->sve_excp_el) {
1154         gen_exception_insn_el(s, 0, EXCP_UDEF,
1155                               syn_sve_access_trap(), s->sve_excp_el);
1156         goto fail_exit;
1157     }
1158     s->sve_access_checked = true;
1159     return fp_access_check(s);
1160 
1161  fail_exit:
1162     /* Assert that we only raise one exception per instruction. */
1163     assert(!s->sve_access_checked);
1164     s->sve_access_checked = true;
1165     return false;
1166 }
1167 
1168 /*
1169  * Check that SME access is enabled, raise an exception if not.
1170  * Note that this function corresponds to CheckSMEAccess and is
1171  * only used directly for cpregs.
1172  */
1173 static bool sme_access_check(DisasContext *s)
1174 {
1175     if (s->sme_excp_el) {
1176         gen_exception_insn_el(s, 0, EXCP_UDEF,
1177                               syn_smetrap(SME_ET_AccessTrap, false),
1178                               s->sme_excp_el);
1179         return false;
1180     }
1181     return true;
1182 }
1183 
1184 /* This function corresponds to CheckSMEEnabled. */
1185 bool sme_enabled_check(DisasContext *s)
1186 {
1187     /*
1188      * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1189      * to be zero when fp_excp_el has priority.  This is because we need
1190      * sme_excp_el by itself for cpregs access checks.
1191      */
1192     if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1193         s->fp_access_checked = true;
1194         return sme_access_check(s);
1195     }
1196     return fp_access_check_only(s);
1197 }
1198 
1199 /* Common subroutine for CheckSMEAnd*Enabled. */
1200 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1201 {
1202     if (!sme_enabled_check(s)) {
1203         return false;
1204     }
1205     if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1206         gen_exception_insn(s, 0, EXCP_UDEF,
1207                            syn_smetrap(SME_ET_NotStreaming, false));
1208         return false;
1209     }
1210     if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1211         gen_exception_insn(s, 0, EXCP_UDEF,
1212                            syn_smetrap(SME_ET_InactiveZA, false));
1213         return false;
1214     }
1215     return true;
1216 }
1217 
1218 /*
1219  * This utility function is for doing register extension with an
1220  * optional shift. You will likely want to pass a temporary for the
1221  * destination register. See DecodeRegExtend() in the ARM ARM.
1222  */
1223 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1224                               int option, unsigned int shift)
1225 {
1226     int extsize = extract32(option, 0, 2);
1227     bool is_signed = extract32(option, 2, 1);
1228 
1229     if (is_signed) {
1230         switch (extsize) {
1231         case 0:
1232             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1233             break;
1234         case 1:
1235             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1236             break;
1237         case 2:
1238             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1239             break;
1240         case 3:
1241             tcg_gen_mov_i64(tcg_out, tcg_in);
1242             break;
1243         }
1244     } else {
1245         switch (extsize) {
1246         case 0:
1247             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1248             break;
1249         case 1:
1250             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1251             break;
1252         case 2:
1253             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1254             break;
1255         case 3:
1256             tcg_gen_mov_i64(tcg_out, tcg_in);
1257             break;
1258         }
1259     }
1260 
1261     if (shift) {
1262         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1263     }
1264 }
1265 
1266 static inline void gen_check_sp_alignment(DisasContext *s)
1267 {
1268     /* The AArch64 architecture mandates that (if enabled via PSTATE
1269      * or SCTLR bits) there is a check that SP is 16-aligned on every
1270      * SP-relative load or store (with an exception generated if it is not).
1271      * In line with general QEMU practice regarding misaligned accesses,
1272      * we omit these checks for the sake of guest program performance.
1273      * This function is provided as a hook so we can more easily add these
1274      * checks in future (possibly as a "favour catching guest program bugs
1275      * over speed" user selectable option).
1276      */
1277 }
1278 
1279 /*
1280  * This provides a simple table based table lookup decoder. It is
1281  * intended to be used when the relevant bits for decode are too
1282  * awkwardly placed and switch/if based logic would be confusing and
1283  * deeply nested. Since it's a linear search through the table, tables
1284  * should be kept small.
1285  *
1286  * It returns the first handler where insn & mask == pattern, or
1287  * NULL if there is no match.
1288  * The table is terminated by an empty mask (i.e. 0)
1289  */
1290 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1291                                                uint32_t insn)
1292 {
1293     const AArch64DecodeTable *tptr = table;
1294 
1295     while (tptr->mask) {
1296         if ((insn & tptr->mask) == tptr->pattern) {
1297             return tptr->disas_fn;
1298         }
1299         tptr++;
1300     }
1301     return NULL;
1302 }
1303 
1304 /*
1305  * The instruction disassembly implemented here matches
1306  * the instruction encoding classifications in chapter C4
1307  * of the ARM Architecture Reference Manual (DDI0487B_a);
1308  * classification names and decode diagrams here should generally
1309  * match up with those in the manual.
1310  */
1311 
1312 static bool trans_B(DisasContext *s, arg_i *a)
1313 {
1314     reset_btype(s);
1315     gen_goto_tb(s, 0, a->imm);
1316     return true;
1317 }
1318 
1319 static bool trans_BL(DisasContext *s, arg_i *a)
1320 {
1321     gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1322     reset_btype(s);
1323     gen_goto_tb(s, 0, a->imm);
1324     return true;
1325 }
1326 
1327 
1328 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1329 {
1330     DisasLabel match;
1331     TCGv_i64 tcg_cmp;
1332 
1333     tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1334     reset_btype(s);
1335 
1336     match = gen_disas_label(s);
1337     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1338                         tcg_cmp, 0, match.label);
1339     gen_goto_tb(s, 0, 4);
1340     set_disas_label(s, match);
1341     gen_goto_tb(s, 1, a->imm);
1342     return true;
1343 }
1344 
1345 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1346 {
1347     DisasLabel match;
1348     TCGv_i64 tcg_cmp;
1349 
1350     tcg_cmp = tcg_temp_new_i64();
1351     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1352 
1353     reset_btype(s);
1354 
1355     match = gen_disas_label(s);
1356     tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1357                         tcg_cmp, 0, match.label);
1358     gen_goto_tb(s, 0, 4);
1359     set_disas_label(s, match);
1360     gen_goto_tb(s, 1, a->imm);
1361     return true;
1362 }
1363 
1364 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1365 {
1366     reset_btype(s);
1367     if (a->cond < 0x0e) {
1368         /* genuinely conditional branches */
1369         DisasLabel match = gen_disas_label(s);
1370         arm_gen_test_cc(a->cond, match.label);
1371         gen_goto_tb(s, 0, 4);
1372         set_disas_label(s, match);
1373         gen_goto_tb(s, 1, a->imm);
1374     } else {
1375         /* 0xe and 0xf are both "always" conditions */
1376         gen_goto_tb(s, 0, a->imm);
1377     }
1378     return true;
1379 }
1380 
1381 static void set_btype_for_br(DisasContext *s, int rn)
1382 {
1383     if (dc_isar_feature(aa64_bti, s)) {
1384         /* BR to {x16,x17} or !guard -> 1, else 3.  */
1385         set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
1386     }
1387 }
1388 
1389 static void set_btype_for_blr(DisasContext *s)
1390 {
1391     if (dc_isar_feature(aa64_bti, s)) {
1392         /* BLR sets BTYPE to 2, regardless of source guarded page.  */
1393         set_btype(s, 2);
1394     }
1395 }
1396 
1397 static bool trans_BR(DisasContext *s, arg_r *a)
1398 {
1399     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1400     set_btype_for_br(s, a->rn);
1401     s->base.is_jmp = DISAS_JUMP;
1402     return true;
1403 }
1404 
1405 static bool trans_BLR(DisasContext *s, arg_r *a)
1406 {
1407     TCGv_i64 dst = cpu_reg(s, a->rn);
1408     TCGv_i64 lr = cpu_reg(s, 30);
1409     if (dst == lr) {
1410         TCGv_i64 tmp = tcg_temp_new_i64();
1411         tcg_gen_mov_i64(tmp, dst);
1412         dst = tmp;
1413     }
1414     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1415     gen_a64_set_pc(s, dst);
1416     set_btype_for_blr(s);
1417     s->base.is_jmp = DISAS_JUMP;
1418     return true;
1419 }
1420 
1421 static bool trans_RET(DisasContext *s, arg_r *a)
1422 {
1423     gen_a64_set_pc(s, cpu_reg(s, a->rn));
1424     s->base.is_jmp = DISAS_JUMP;
1425     return true;
1426 }
1427 
1428 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1429                                    TCGv_i64 modifier, bool use_key_a)
1430 {
1431     TCGv_i64 truedst;
1432     /*
1433      * Return the branch target for a BRAA/RETA/etc, which is either
1434      * just the destination dst, or that value with the pauth check
1435      * done and the code removed from the high bits.
1436      */
1437     if (!s->pauth_active) {
1438         return dst;
1439     }
1440 
1441     truedst = tcg_temp_new_i64();
1442     if (use_key_a) {
1443         gen_helper_autia(truedst, cpu_env, dst, modifier);
1444     } else {
1445         gen_helper_autib(truedst, cpu_env, dst, modifier);
1446     }
1447     return truedst;
1448 }
1449 
1450 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1451 {
1452     TCGv_i64 dst;
1453 
1454     if (!dc_isar_feature(aa64_pauth, s)) {
1455         return false;
1456     }
1457 
1458     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1459     gen_a64_set_pc(s, dst);
1460     set_btype_for_br(s, a->rn);
1461     s->base.is_jmp = DISAS_JUMP;
1462     return true;
1463 }
1464 
1465 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1466 {
1467     TCGv_i64 dst, lr;
1468 
1469     if (!dc_isar_feature(aa64_pauth, s)) {
1470         return false;
1471     }
1472 
1473     dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1474     lr = cpu_reg(s, 30);
1475     if (dst == lr) {
1476         TCGv_i64 tmp = tcg_temp_new_i64();
1477         tcg_gen_mov_i64(tmp, dst);
1478         dst = tmp;
1479     }
1480     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1481     gen_a64_set_pc(s, dst);
1482     set_btype_for_blr(s);
1483     s->base.is_jmp = DISAS_JUMP;
1484     return true;
1485 }
1486 
1487 static bool trans_RETA(DisasContext *s, arg_reta *a)
1488 {
1489     TCGv_i64 dst;
1490 
1491     dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1492     gen_a64_set_pc(s, dst);
1493     s->base.is_jmp = DISAS_JUMP;
1494     return true;
1495 }
1496 
1497 static bool trans_BRA(DisasContext *s, arg_bra *a)
1498 {
1499     TCGv_i64 dst;
1500 
1501     if (!dc_isar_feature(aa64_pauth, s)) {
1502         return false;
1503     }
1504     dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1505     gen_a64_set_pc(s, dst);
1506     set_btype_for_br(s, a->rn);
1507     s->base.is_jmp = DISAS_JUMP;
1508     return true;
1509 }
1510 
1511 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1512 {
1513     TCGv_i64 dst, lr;
1514 
1515     if (!dc_isar_feature(aa64_pauth, s)) {
1516         return false;
1517     }
1518     dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1519     lr = cpu_reg(s, 30);
1520     if (dst == lr) {
1521         TCGv_i64 tmp = tcg_temp_new_i64();
1522         tcg_gen_mov_i64(tmp, dst);
1523         dst = tmp;
1524     }
1525     gen_pc_plus_diff(s, lr, curr_insn_len(s));
1526     gen_a64_set_pc(s, dst);
1527     set_btype_for_blr(s);
1528     s->base.is_jmp = DISAS_JUMP;
1529     return true;
1530 }
1531 
1532 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1533 {
1534     TCGv_i64 dst;
1535 
1536     if (s->current_el == 0) {
1537         return false;
1538     }
1539     if (s->fgt_eret) {
1540         gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2);
1541         return true;
1542     }
1543     dst = tcg_temp_new_i64();
1544     tcg_gen_ld_i64(dst, cpu_env,
1545                    offsetof(CPUARMState, elr_el[s->current_el]));
1546 
1547     translator_io_start(&s->base);
1548 
1549     gen_helper_exception_return(cpu_env, dst);
1550     /* Must exit loop to check un-masked IRQs */
1551     s->base.is_jmp = DISAS_EXIT;
1552     return true;
1553 }
1554 
1555 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1556 {
1557     TCGv_i64 dst;
1558 
1559     if (!dc_isar_feature(aa64_pauth, s)) {
1560         return false;
1561     }
1562     if (s->current_el == 0) {
1563         return false;
1564     }
1565     /* The FGT trap takes precedence over an auth trap. */
1566     if (s->fgt_eret) {
1567         gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2);
1568         return true;
1569     }
1570     dst = tcg_temp_new_i64();
1571     tcg_gen_ld_i64(dst, cpu_env,
1572                    offsetof(CPUARMState, elr_el[s->current_el]));
1573 
1574     dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1575 
1576     translator_io_start(&s->base);
1577 
1578     gen_helper_exception_return(cpu_env, dst);
1579     /* Must exit loop to check un-masked IRQs */
1580     s->base.is_jmp = DISAS_EXIT;
1581     return true;
1582 }
1583 
1584 /* HINT instruction group, including various allocated HINTs */
1585 static void handle_hint(DisasContext *s, uint32_t insn,
1586                         unsigned int op1, unsigned int op2, unsigned int crm)
1587 {
1588     unsigned int selector = crm << 3 | op2;
1589 
1590     if (op1 != 3) {
1591         unallocated_encoding(s);
1592         return;
1593     }
1594 
1595     switch (selector) {
1596     case 0b00000: /* NOP */
1597         break;
1598     case 0b00011: /* WFI */
1599         s->base.is_jmp = DISAS_WFI;
1600         break;
1601     case 0b00001: /* YIELD */
1602         /* When running in MTTCG we don't generate jumps to the yield and
1603          * WFE helpers as it won't affect the scheduling of other vCPUs.
1604          * If we wanted to more completely model WFE/SEV so we don't busy
1605          * spin unnecessarily we would need to do something more involved.
1606          */
1607         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1608             s->base.is_jmp = DISAS_YIELD;
1609         }
1610         break;
1611     case 0b00010: /* WFE */
1612         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1613             s->base.is_jmp = DISAS_WFE;
1614         }
1615         break;
1616     case 0b00100: /* SEV */
1617     case 0b00101: /* SEVL */
1618     case 0b00110: /* DGH */
1619         /* we treat all as NOP at least for now */
1620         break;
1621     case 0b00111: /* XPACLRI */
1622         if (s->pauth_active) {
1623             gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1624         }
1625         break;
1626     case 0b01000: /* PACIA1716 */
1627         if (s->pauth_active) {
1628             gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1629         }
1630         break;
1631     case 0b01010: /* PACIB1716 */
1632         if (s->pauth_active) {
1633             gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1634         }
1635         break;
1636     case 0b01100: /* AUTIA1716 */
1637         if (s->pauth_active) {
1638             gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1639         }
1640         break;
1641     case 0b01110: /* AUTIB1716 */
1642         if (s->pauth_active) {
1643             gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1644         }
1645         break;
1646     case 0b10000: /* ESB */
1647         /* Without RAS, we must implement this as NOP. */
1648         if (dc_isar_feature(aa64_ras, s)) {
1649             /*
1650              * QEMU does not have a source of physical SErrors,
1651              * so we are only concerned with virtual SErrors.
1652              * The pseudocode in the ARM for this case is
1653              *   if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1654              *      AArch64.vESBOperation();
1655              * Most of the condition can be evaluated at translation time.
1656              * Test for EL2 present, and defer test for SEL2 to runtime.
1657              */
1658             if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1659                 gen_helper_vesb(cpu_env);
1660             }
1661         }
1662         break;
1663     case 0b11000: /* PACIAZ */
1664         if (s->pauth_active) {
1665             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1666                              tcg_constant_i64(0));
1667         }
1668         break;
1669     case 0b11001: /* PACIASP */
1670         if (s->pauth_active) {
1671             gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1672         }
1673         break;
1674     case 0b11010: /* PACIBZ */
1675         if (s->pauth_active) {
1676             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1677                              tcg_constant_i64(0));
1678         }
1679         break;
1680     case 0b11011: /* PACIBSP */
1681         if (s->pauth_active) {
1682             gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1683         }
1684         break;
1685     case 0b11100: /* AUTIAZ */
1686         if (s->pauth_active) {
1687             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1688                              tcg_constant_i64(0));
1689         }
1690         break;
1691     case 0b11101: /* AUTIASP */
1692         if (s->pauth_active) {
1693             gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1694         }
1695         break;
1696     case 0b11110: /* AUTIBZ */
1697         if (s->pauth_active) {
1698             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1699                              tcg_constant_i64(0));
1700         }
1701         break;
1702     case 0b11111: /* AUTIBSP */
1703         if (s->pauth_active) {
1704             gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1705         }
1706         break;
1707     default:
1708         /* default specified as NOP equivalent */
1709         break;
1710     }
1711 }
1712 
1713 static void gen_clrex(DisasContext *s, uint32_t insn)
1714 {
1715     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1716 }
1717 
1718 /* CLREX, DSB, DMB, ISB */
1719 static void handle_sync(DisasContext *s, uint32_t insn,
1720                         unsigned int op1, unsigned int op2, unsigned int crm)
1721 {
1722     TCGBar bar;
1723 
1724     if (op1 != 3) {
1725         unallocated_encoding(s);
1726         return;
1727     }
1728 
1729     switch (op2) {
1730     case 2: /* CLREX */
1731         gen_clrex(s, insn);
1732         return;
1733     case 4: /* DSB */
1734     case 5: /* DMB */
1735         switch (crm & 3) {
1736         case 1: /* MBReqTypes_Reads */
1737             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1738             break;
1739         case 2: /* MBReqTypes_Writes */
1740             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1741             break;
1742         default: /* MBReqTypes_All */
1743             bar = TCG_BAR_SC | TCG_MO_ALL;
1744             break;
1745         }
1746         tcg_gen_mb(bar);
1747         return;
1748     case 6: /* ISB */
1749         /* We need to break the TB after this insn to execute
1750          * a self-modified code correctly and also to take
1751          * any pending interrupts immediately.
1752          */
1753         reset_btype(s);
1754         gen_goto_tb(s, 0, 4);
1755         return;
1756 
1757     case 7: /* SB */
1758         if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1759             goto do_unallocated;
1760         }
1761         /*
1762          * TODO: There is no speculation barrier opcode for TCG;
1763          * MB and end the TB instead.
1764          */
1765         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1766         gen_goto_tb(s, 0, 4);
1767         return;
1768 
1769     default:
1770     do_unallocated:
1771         unallocated_encoding(s);
1772         return;
1773     }
1774 }
1775 
1776 static void gen_xaflag(void)
1777 {
1778     TCGv_i32 z = tcg_temp_new_i32();
1779 
1780     tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1781 
1782     /*
1783      * (!C & !Z) << 31
1784      * (!(C | Z)) << 31
1785      * ~((C | Z) << 31)
1786      * ~-(C | Z)
1787      * (C | Z) - 1
1788      */
1789     tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1790     tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1791 
1792     /* !(Z & C) */
1793     tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1794     tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1795 
1796     /* (!C & Z) << 31 -> -(Z & ~C) */
1797     tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1798     tcg_gen_neg_i32(cpu_VF, cpu_VF);
1799 
1800     /* C | Z */
1801     tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1802 }
1803 
1804 static void gen_axflag(void)
1805 {
1806     tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1807     tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1808 
1809     /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1810     tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1811 
1812     tcg_gen_movi_i32(cpu_NF, 0);
1813     tcg_gen_movi_i32(cpu_VF, 0);
1814 }
1815 
1816 /* MSR (immediate) - move immediate to processor state field */
1817 static void handle_msr_i(DisasContext *s, uint32_t insn,
1818                          unsigned int op1, unsigned int op2, unsigned int crm)
1819 {
1820     int op = op1 << 3 | op2;
1821 
1822     /* End the TB by default, chaining is ok.  */
1823     s->base.is_jmp = DISAS_TOO_MANY;
1824 
1825     switch (op) {
1826     case 0x00: /* CFINV */
1827         if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1828             goto do_unallocated;
1829         }
1830         tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1831         s->base.is_jmp = DISAS_NEXT;
1832         break;
1833 
1834     case 0x01: /* XAFlag */
1835         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1836             goto do_unallocated;
1837         }
1838         gen_xaflag();
1839         s->base.is_jmp = DISAS_NEXT;
1840         break;
1841 
1842     case 0x02: /* AXFlag */
1843         if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1844             goto do_unallocated;
1845         }
1846         gen_axflag();
1847         s->base.is_jmp = DISAS_NEXT;
1848         break;
1849 
1850     case 0x03: /* UAO */
1851         if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1852             goto do_unallocated;
1853         }
1854         if (crm & 1) {
1855             set_pstate_bits(PSTATE_UAO);
1856         } else {
1857             clear_pstate_bits(PSTATE_UAO);
1858         }
1859         gen_rebuild_hflags(s);
1860         break;
1861 
1862     case 0x04: /* PAN */
1863         if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1864             goto do_unallocated;
1865         }
1866         if (crm & 1) {
1867             set_pstate_bits(PSTATE_PAN);
1868         } else {
1869             clear_pstate_bits(PSTATE_PAN);
1870         }
1871         gen_rebuild_hflags(s);
1872         break;
1873 
1874     case 0x05: /* SPSel */
1875         if (s->current_el == 0) {
1876             goto do_unallocated;
1877         }
1878         gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP));
1879         break;
1880 
1881     case 0x19: /* SSBS */
1882         if (!dc_isar_feature(aa64_ssbs, s)) {
1883             goto do_unallocated;
1884         }
1885         if (crm & 1) {
1886             set_pstate_bits(PSTATE_SSBS);
1887         } else {
1888             clear_pstate_bits(PSTATE_SSBS);
1889         }
1890         /* Don't need to rebuild hflags since SSBS is a nop */
1891         break;
1892 
1893     case 0x1a: /* DIT */
1894         if (!dc_isar_feature(aa64_dit, s)) {
1895             goto do_unallocated;
1896         }
1897         if (crm & 1) {
1898             set_pstate_bits(PSTATE_DIT);
1899         } else {
1900             clear_pstate_bits(PSTATE_DIT);
1901         }
1902         /* There's no need to rebuild hflags because DIT is a nop */
1903         break;
1904 
1905     case 0x1e: /* DAIFSet */
1906         gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm));
1907         break;
1908 
1909     case 0x1f: /* DAIFClear */
1910         gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm));
1911         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1912         s->base.is_jmp = DISAS_UPDATE_EXIT;
1913         break;
1914 
1915     case 0x1c: /* TCO */
1916         if (dc_isar_feature(aa64_mte, s)) {
1917             /* Full MTE is enabled -- set the TCO bit as directed. */
1918             if (crm & 1) {
1919                 set_pstate_bits(PSTATE_TCO);
1920             } else {
1921                 clear_pstate_bits(PSTATE_TCO);
1922             }
1923             gen_rebuild_hflags(s);
1924             /* Many factors, including TCO, go into MTE_ACTIVE. */
1925             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1926         } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1927             /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1928             s->base.is_jmp = DISAS_NEXT;
1929         } else {
1930             goto do_unallocated;
1931         }
1932         break;
1933 
1934     case 0x1b: /* SVCR* */
1935         if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) {
1936             goto do_unallocated;
1937         }
1938         if (sme_access_check(s)) {
1939             int old = s->pstate_sm | (s->pstate_za << 1);
1940             int new = (crm & 1) * 3;
1941             int msk = (crm >> 1) & 3;
1942 
1943             if ((old ^ new) & msk) {
1944                 /* At least one bit changes. */
1945                 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new),
1946                                     tcg_constant_i32(msk));
1947             } else {
1948                 s->base.is_jmp = DISAS_NEXT;
1949             }
1950         }
1951         break;
1952 
1953     default:
1954     do_unallocated:
1955         unallocated_encoding(s);
1956         return;
1957     }
1958 }
1959 
1960 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1961 {
1962     TCGv_i32 tmp = tcg_temp_new_i32();
1963     TCGv_i32 nzcv = tcg_temp_new_i32();
1964 
1965     /* build bit 31, N */
1966     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1967     /* build bit 30, Z */
1968     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1969     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1970     /* build bit 29, C */
1971     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1972     /* build bit 28, V */
1973     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1974     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1975     /* generate result */
1976     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1977 }
1978 
1979 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1980 {
1981     TCGv_i32 nzcv = tcg_temp_new_i32();
1982 
1983     /* take NZCV from R[t] */
1984     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1985 
1986     /* bit 31, N */
1987     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1988     /* bit 30, Z */
1989     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1990     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1991     /* bit 29, C */
1992     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1993     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1994     /* bit 28, V */
1995     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1996     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1997 }
1998 
1999 static void gen_sysreg_undef(DisasContext *s, bool isread,
2000                              uint8_t op0, uint8_t op1, uint8_t op2,
2001                              uint8_t crn, uint8_t crm, uint8_t rt)
2002 {
2003     /*
2004      * Generate code to emit an UNDEF with correct syndrome
2005      * information for a failed system register access.
2006      * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2007      * but if FEAT_IDST is implemented then read accesses to registers
2008      * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2009      * syndrome.
2010      */
2011     uint32_t syndrome;
2012 
2013     if (isread && dc_isar_feature(aa64_ids, s) &&
2014         arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2015         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2016     } else {
2017         syndrome = syn_uncategorized();
2018     }
2019     gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2020 }
2021 
2022 /* MRS - move from system register
2023  * MSR (register) - move to system register
2024  * SYS
2025  * SYSL
2026  * These are all essentially the same insn in 'read' and 'write'
2027  * versions, with varying op0 fields.
2028  */
2029 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
2030                        unsigned int op0, unsigned int op1, unsigned int op2,
2031                        unsigned int crn, unsigned int crm, unsigned int rt)
2032 {
2033     uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2034                                       crn, crm, op0, op1, op2);
2035     const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2036     bool need_exit_tb = false;
2037     TCGv_ptr tcg_ri = NULL;
2038     TCGv_i64 tcg_rt;
2039 
2040     if (!ri) {
2041         /* Unknown register; this might be a guest error or a QEMU
2042          * unimplemented feature.
2043          */
2044         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2045                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2046                       isread ? "read" : "write", op0, op1, crn, crm, op2);
2047         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2048         return;
2049     }
2050 
2051     /* Check access permissions */
2052     if (!cp_access_ok(s->current_el, ri, isread)) {
2053         gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2054         return;
2055     }
2056 
2057     if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2058         /* Emit code to perform further access permissions checks at
2059          * runtime; this may result in an exception.
2060          */
2061         uint32_t syndrome;
2062 
2063         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2064         gen_a64_update_pc(s, 0);
2065         tcg_ri = tcg_temp_new_ptr();
2066         gen_helper_access_check_cp_reg(tcg_ri, cpu_env,
2067                                        tcg_constant_i32(key),
2068                                        tcg_constant_i32(syndrome),
2069                                        tcg_constant_i32(isread));
2070     } else if (ri->type & ARM_CP_RAISES_EXC) {
2071         /*
2072          * The readfn or writefn might raise an exception;
2073          * synchronize the CPU state in case it does.
2074          */
2075         gen_a64_update_pc(s, 0);
2076     }
2077 
2078     /* Handle special cases first */
2079     switch (ri->type & ARM_CP_SPECIAL_MASK) {
2080     case 0:
2081         break;
2082     case ARM_CP_NOP:
2083         return;
2084     case ARM_CP_NZCV:
2085         tcg_rt = cpu_reg(s, rt);
2086         if (isread) {
2087             gen_get_nzcv(tcg_rt);
2088         } else {
2089             gen_set_nzcv(tcg_rt);
2090         }
2091         return;
2092     case ARM_CP_CURRENTEL:
2093         /* Reads as current EL value from pstate, which is
2094          * guaranteed to be constant by the tb flags.
2095          */
2096         tcg_rt = cpu_reg(s, rt);
2097         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
2098         return;
2099     case ARM_CP_DC_ZVA:
2100         /* Writes clear the aligned block of memory which rt points into. */
2101         if (s->mte_active[0]) {
2102             int desc = 0;
2103 
2104             desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2105             desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2106             desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2107 
2108             tcg_rt = tcg_temp_new_i64();
2109             gen_helper_mte_check_zva(tcg_rt, cpu_env,
2110                                      tcg_constant_i32(desc), cpu_reg(s, rt));
2111         } else {
2112             tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2113         }
2114         gen_helper_dc_zva(cpu_env, tcg_rt);
2115         return;
2116     case ARM_CP_DC_GVA:
2117         {
2118             TCGv_i64 clean_addr, tag;
2119 
2120             /*
2121              * DC_GVA, like DC_ZVA, requires that we supply the original
2122              * pointer for an invalid page.  Probe that address first.
2123              */
2124             tcg_rt = cpu_reg(s, rt);
2125             clean_addr = clean_data_tbi(s, tcg_rt);
2126             gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2127 
2128             if (s->ata) {
2129                 /* Extract the tag from the register to match STZGM.  */
2130                 tag = tcg_temp_new_i64();
2131                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2132                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2133             }
2134         }
2135         return;
2136     case ARM_CP_DC_GZVA:
2137         {
2138             TCGv_i64 clean_addr, tag;
2139 
2140             /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2141             tcg_rt = cpu_reg(s, rt);
2142             clean_addr = clean_data_tbi(s, tcg_rt);
2143             gen_helper_dc_zva(cpu_env, clean_addr);
2144 
2145             if (s->ata) {
2146                 /* Extract the tag from the register to match STZGM.  */
2147                 tag = tcg_temp_new_i64();
2148                 tcg_gen_shri_i64(tag, tcg_rt, 56);
2149                 gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
2150             }
2151         }
2152         return;
2153     default:
2154         g_assert_not_reached();
2155     }
2156     if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2157         return;
2158     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2159         return;
2160     } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2161         return;
2162     }
2163 
2164     if (ri->type & ARM_CP_IO) {
2165         /* I/O operations must end the TB here (whether read or write) */
2166         need_exit_tb = translator_io_start(&s->base);
2167     }
2168 
2169     tcg_rt = cpu_reg(s, rt);
2170 
2171     if (isread) {
2172         if (ri->type & ARM_CP_CONST) {
2173             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2174         } else if (ri->readfn) {
2175             if (!tcg_ri) {
2176                 tcg_ri = gen_lookup_cp_reg(key);
2177             }
2178             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri);
2179         } else {
2180             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
2181         }
2182     } else {
2183         if (ri->type & ARM_CP_CONST) {
2184             /* If not forbidden by access permissions, treat as WI */
2185             return;
2186         } else if (ri->writefn) {
2187             if (!tcg_ri) {
2188                 tcg_ri = gen_lookup_cp_reg(key);
2189             }
2190             gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt);
2191         } else {
2192             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
2193         }
2194     }
2195 
2196     if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2197         /*
2198          * A write to any coprocessor regiser that ends a TB
2199          * must rebuild the hflags for the next TB.
2200          */
2201         gen_rebuild_hflags(s);
2202         /*
2203          * We default to ending the TB on a coprocessor register write,
2204          * but allow this to be suppressed by the register definition
2205          * (usually only necessary to work around guest bugs).
2206          */
2207         need_exit_tb = true;
2208     }
2209     if (need_exit_tb) {
2210         s->base.is_jmp = DISAS_UPDATE_EXIT;
2211     }
2212 }
2213 
2214 /* System
2215  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2216  * +---------------------+---+-----+-----+-------+-------+-----+------+
2217  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2218  * +---------------------+---+-----+-----+-------+-------+-----+------+
2219  */
2220 static void disas_system(DisasContext *s, uint32_t insn)
2221 {
2222     unsigned int l, op0, op1, crn, crm, op2, rt;
2223     l = extract32(insn, 21, 1);
2224     op0 = extract32(insn, 19, 2);
2225     op1 = extract32(insn, 16, 3);
2226     crn = extract32(insn, 12, 4);
2227     crm = extract32(insn, 8, 4);
2228     op2 = extract32(insn, 5, 3);
2229     rt = extract32(insn, 0, 5);
2230 
2231     if (op0 == 0) {
2232         if (l || rt != 31) {
2233             unallocated_encoding(s);
2234             return;
2235         }
2236         switch (crn) {
2237         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2238             handle_hint(s, insn, op1, op2, crm);
2239             break;
2240         case 3: /* CLREX, DSB, DMB, ISB */
2241             handle_sync(s, insn, op1, op2, crm);
2242             break;
2243         case 4: /* MSR (immediate) */
2244             handle_msr_i(s, insn, op1, op2, crm);
2245             break;
2246         default:
2247             unallocated_encoding(s);
2248             break;
2249         }
2250         return;
2251     }
2252     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2253 }
2254 
2255 /* Exception generation
2256  *
2257  *  31             24 23 21 20                     5 4   2 1  0
2258  * +-----------------+-----+------------------------+-----+----+
2259  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2260  * +-----------------------+------------------------+----------+
2261  */
2262 static void disas_exc(DisasContext *s, uint32_t insn)
2263 {
2264     int opc = extract32(insn, 21, 3);
2265     int op2_ll = extract32(insn, 0, 5);
2266     int imm16 = extract32(insn, 5, 16);
2267     uint32_t syndrome;
2268 
2269     switch (opc) {
2270     case 0:
2271         /* For SVC, HVC and SMC we advance the single-step state
2272          * machine before taking the exception. This is architecturally
2273          * mandated, to ensure that single-stepping a system call
2274          * instruction works properly.
2275          */
2276         switch (op2_ll) {
2277         case 1:                                                     /* SVC */
2278             syndrome = syn_aa64_svc(imm16);
2279             if (s->fgt_svc) {
2280                 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2281                 break;
2282             }
2283             gen_ss_advance(s);
2284             gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2285             break;
2286         case 2:                                                     /* HVC */
2287             if (s->current_el == 0) {
2288                 unallocated_encoding(s);
2289                 break;
2290             }
2291             /* The pre HVC helper handles cases when HVC gets trapped
2292              * as an undefined insn by runtime configuration.
2293              */
2294             gen_a64_update_pc(s, 0);
2295             gen_helper_pre_hvc(cpu_env);
2296             gen_ss_advance(s);
2297             gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2);
2298             break;
2299         case 3:                                                     /* SMC */
2300             if (s->current_el == 0) {
2301                 unallocated_encoding(s);
2302                 break;
2303             }
2304             gen_a64_update_pc(s, 0);
2305             gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16)));
2306             gen_ss_advance(s);
2307             gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3);
2308             break;
2309         default:
2310             unallocated_encoding(s);
2311             break;
2312         }
2313         break;
2314     case 1:
2315         if (op2_ll != 0) {
2316             unallocated_encoding(s);
2317             break;
2318         }
2319         /* BRK */
2320         gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2321         break;
2322     case 2:
2323         if (op2_ll != 0) {
2324             unallocated_encoding(s);
2325             break;
2326         }
2327         /* HLT. This has two purposes.
2328          * Architecturally, it is an external halting debug instruction.
2329          * Since QEMU doesn't implement external debug, we treat this as
2330          * it is required for halting debug disabled: it will UNDEF.
2331          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2332          */
2333         if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) {
2334             gen_exception_internal_insn(s, EXCP_SEMIHOST);
2335         } else {
2336             unallocated_encoding(s);
2337         }
2338         break;
2339     case 5:
2340         if (op2_ll < 1 || op2_ll > 3) {
2341             unallocated_encoding(s);
2342             break;
2343         }
2344         /* DCPS1, DCPS2, DCPS3 */
2345         unallocated_encoding(s);
2346         break;
2347     default:
2348         unallocated_encoding(s);
2349         break;
2350     }
2351 }
2352 
2353 /* Branches, exception generating and system instructions */
2354 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2355 {
2356     switch (extract32(insn, 25, 7)) {
2357     case 0x6a: /* Exception generation / System */
2358         if (insn & (1 << 24)) {
2359             if (extract32(insn, 22, 2) == 0) {
2360                 disas_system(s, insn);
2361             } else {
2362                 unallocated_encoding(s);
2363             }
2364         } else {
2365             disas_exc(s, insn);
2366         }
2367         break;
2368     default:
2369         unallocated_encoding(s);
2370         break;
2371     }
2372 }
2373 
2374 /*
2375  * Load/Store exclusive instructions are implemented by remembering
2376  * the value/address loaded, and seeing if these are the same
2377  * when the store is performed. This is not actually the architecturally
2378  * mandated semantics, but it works for typical guest code sequences
2379  * and avoids having to monitor regular stores.
2380  *
2381  * The store exclusive uses the atomic cmpxchg primitives to avoid
2382  * races in multi-threaded linux-user and when MTTCG softmmu is
2383  * enabled.
2384  */
2385 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2386                                TCGv_i64 addr, int size, bool is_pair)
2387 {
2388     int idx = get_mem_index(s);
2389     MemOp memop;
2390 
2391     g_assert(size <= 3);
2392     if (is_pair) {
2393         g_assert(size >= 2);
2394         if (size == 2) {
2395             /* The pair must be single-copy atomic for the doubleword.  */
2396             memop = finalize_memop(s, MO_64 | MO_ALIGN);
2397             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2398             if (s->be_data == MO_LE) {
2399                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2400                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2401             } else {
2402                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2403                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2404             }
2405         } else {
2406             /*
2407              * The pair must be single-copy atomic for *each* doubleword, not
2408              * the entire quadword, however it must be quadword aligned.
2409              * Expose the complete load to tcg, for ease of tlb lookup,
2410              * but indicate that only 8-byte atomicity is required.
2411              */
2412             TCGv_i128 t16 = tcg_temp_new_i128();
2413 
2414             memop = finalize_memop_atom(s, MO_128 | MO_ALIGN_16,
2415                                         MO_ATOM_IFALIGN_PAIR);
2416             tcg_gen_qemu_ld_i128(t16, addr, idx, memop);
2417 
2418             if (s->be_data == MO_LE) {
2419                 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2420                                       cpu_exclusive_high, t16);
2421             } else {
2422                 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2423                                       cpu_exclusive_val, t16);
2424             }
2425             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2426             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2427         }
2428     } else {
2429         memop = finalize_memop(s, size | MO_ALIGN);
2430         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2431         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2432     }
2433     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2434 }
2435 
2436 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2437                                 TCGv_i64 addr, int size, int is_pair)
2438 {
2439     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2440      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2441      *     [addr] = {Rt};
2442      *     if (is_pair) {
2443      *         [addr + datasize] = {Rt2};
2444      *     }
2445      *     {Rd} = 0;
2446      * } else {
2447      *     {Rd} = 1;
2448      * }
2449      * env->exclusive_addr = -1;
2450      */
2451     TCGLabel *fail_label = gen_new_label();
2452     TCGLabel *done_label = gen_new_label();
2453     TCGv_i64 tmp;
2454 
2455     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2456 
2457     tmp = tcg_temp_new_i64();
2458     if (is_pair) {
2459         if (size == 2) {
2460             if (s->be_data == MO_LE) {
2461                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2462             } else {
2463                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2464             }
2465             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2466                                        cpu_exclusive_val, tmp,
2467                                        get_mem_index(s),
2468                                        MO_64 | MO_ALIGN | s->be_data);
2469             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2470         } else {
2471             TCGv_i128 t16 = tcg_temp_new_i128();
2472             TCGv_i128 c16 = tcg_temp_new_i128();
2473             TCGv_i64 a, b;
2474 
2475             if (s->be_data == MO_LE) {
2476                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2477                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2478                                         cpu_exclusive_high);
2479             } else {
2480                 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2481                 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2482                                         cpu_exclusive_val);
2483             }
2484 
2485             tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2486                                         get_mem_index(s),
2487                                         MO_128 | MO_ALIGN | s->be_data);
2488 
2489             a = tcg_temp_new_i64();
2490             b = tcg_temp_new_i64();
2491             if (s->be_data == MO_LE) {
2492                 tcg_gen_extr_i128_i64(a, b, t16);
2493             } else {
2494                 tcg_gen_extr_i128_i64(b, a, t16);
2495             }
2496 
2497             tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2498             tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2499             tcg_gen_or_i64(tmp, a, b);
2500 
2501             tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2502         }
2503     } else {
2504         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2505                                    cpu_reg(s, rt), get_mem_index(s),
2506                                    size | MO_ALIGN | s->be_data);
2507         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2508     }
2509     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2510     tcg_gen_br(done_label);
2511 
2512     gen_set_label(fail_label);
2513     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2514     gen_set_label(done_label);
2515     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2516 }
2517 
2518 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2519                                  int rn, int size)
2520 {
2521     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2522     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2523     int memidx = get_mem_index(s);
2524     TCGv_i64 clean_addr;
2525 
2526     if (rn == 31) {
2527         gen_check_sp_alignment(s);
2528     }
2529     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2530     tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2531                                size | MO_ALIGN | s->be_data);
2532 }
2533 
2534 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2535                                       int rn, int size)
2536 {
2537     TCGv_i64 s1 = cpu_reg(s, rs);
2538     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2539     TCGv_i64 t1 = cpu_reg(s, rt);
2540     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2541     TCGv_i64 clean_addr;
2542     int memidx = get_mem_index(s);
2543 
2544     if (rn == 31) {
2545         gen_check_sp_alignment(s);
2546     }
2547 
2548     /* This is a single atomic access, despite the "pair". */
2549     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2550 
2551     if (size == 2) {
2552         TCGv_i64 cmp = tcg_temp_new_i64();
2553         TCGv_i64 val = tcg_temp_new_i64();
2554 
2555         if (s->be_data == MO_LE) {
2556             tcg_gen_concat32_i64(val, t1, t2);
2557             tcg_gen_concat32_i64(cmp, s1, s2);
2558         } else {
2559             tcg_gen_concat32_i64(val, t2, t1);
2560             tcg_gen_concat32_i64(cmp, s2, s1);
2561         }
2562 
2563         tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2564                                    MO_64 | MO_ALIGN | s->be_data);
2565 
2566         if (s->be_data == MO_LE) {
2567             tcg_gen_extr32_i64(s1, s2, cmp);
2568         } else {
2569             tcg_gen_extr32_i64(s2, s1, cmp);
2570         }
2571     } else {
2572         TCGv_i128 cmp = tcg_temp_new_i128();
2573         TCGv_i128 val = tcg_temp_new_i128();
2574 
2575         if (s->be_data == MO_LE) {
2576             tcg_gen_concat_i64_i128(val, t1, t2);
2577             tcg_gen_concat_i64_i128(cmp, s1, s2);
2578         } else {
2579             tcg_gen_concat_i64_i128(val, t2, t1);
2580             tcg_gen_concat_i64_i128(cmp, s2, s1);
2581         }
2582 
2583         tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
2584                                     MO_128 | MO_ALIGN | s->be_data);
2585 
2586         if (s->be_data == MO_LE) {
2587             tcg_gen_extr_i128_i64(s1, s2, cmp);
2588         } else {
2589             tcg_gen_extr_i128_i64(s2, s1, cmp);
2590         }
2591     }
2592 }
2593 
2594 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2595  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2596  */
2597 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2598 {
2599     int opc0 = extract32(opc, 0, 1);
2600     int regsize;
2601 
2602     if (is_signed) {
2603         regsize = opc0 ? 32 : 64;
2604     } else {
2605         regsize = size == 3 ? 64 : 32;
2606     }
2607     return regsize == 64;
2608 }
2609 
2610 /* Load/store exclusive
2611  *
2612  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2613  * +-----+-------------+----+---+----+------+----+-------+------+------+
2614  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2615  * +-----+-------------+----+---+----+------+----+-------+------+------+
2616  *
2617  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2618  *   L: 0 -> store, 1 -> load
2619  *  o2: 0 -> exclusive, 1 -> not
2620  *  o1: 0 -> single register, 1 -> register pair
2621  *  o0: 1 -> load-acquire/store-release, 0 -> not
2622  */
2623 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2624 {
2625     int rt = extract32(insn, 0, 5);
2626     int rn = extract32(insn, 5, 5);
2627     int rt2 = extract32(insn, 10, 5);
2628     int rs = extract32(insn, 16, 5);
2629     int is_lasr = extract32(insn, 15, 1);
2630     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2631     int size = extract32(insn, 30, 2);
2632     TCGv_i64 clean_addr;
2633 
2634     switch (o2_L_o1_o0) {
2635     case 0x0: /* STXR */
2636     case 0x1: /* STLXR */
2637         if (rn == 31) {
2638             gen_check_sp_alignment(s);
2639         }
2640         if (is_lasr) {
2641             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2642         }
2643         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2644                                     true, rn != 31, size);
2645         gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2646         return;
2647 
2648     case 0x4: /* LDXR */
2649     case 0x5: /* LDAXR */
2650         if (rn == 31) {
2651             gen_check_sp_alignment(s);
2652         }
2653         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2654                                     false, rn != 31, size);
2655         s->is_ldex = true;
2656         gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2657         if (is_lasr) {
2658             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2659         }
2660         return;
2661 
2662     case 0x8: /* STLLR */
2663         if (!dc_isar_feature(aa64_lor, s)) {
2664             break;
2665         }
2666         /* StoreLORelease is the same as Store-Release for QEMU.  */
2667         /* fall through */
2668     case 0x9: /* STLR */
2669         /* Generate ISS for non-exclusive accesses including LASR.  */
2670         if (rn == 31) {
2671             gen_check_sp_alignment(s);
2672         }
2673         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2674         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2675                                     true, rn != 31, size);
2676         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2677         do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2678                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2679         return;
2680 
2681     case 0xc: /* LDLAR */
2682         if (!dc_isar_feature(aa64_lor, s)) {
2683             break;
2684         }
2685         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2686         /* fall through */
2687     case 0xd: /* LDAR */
2688         /* Generate ISS for non-exclusive accesses including LASR.  */
2689         if (rn == 31) {
2690             gen_check_sp_alignment(s);
2691         }
2692         clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2693                                     false, rn != 31, size);
2694         /* TODO: ARMv8.4-LSE SCTLR.nAA */
2695         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2696                   rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2697         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2698         return;
2699 
2700     case 0x2: case 0x3: /* CASP / STXP */
2701         if (size & 2) { /* STXP / STLXP */
2702             if (rn == 31) {
2703                 gen_check_sp_alignment(s);
2704             }
2705             if (is_lasr) {
2706                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2707             }
2708             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2709                                         true, rn != 31, size);
2710             gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2711             return;
2712         }
2713         if (rt2 == 31
2714             && ((rt | rs) & 1) == 0
2715             && dc_isar_feature(aa64_atomics, s)) {
2716             /* CASP / CASPL */
2717             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2718             return;
2719         }
2720         break;
2721 
2722     case 0x6: case 0x7: /* CASPA / LDXP */
2723         if (size & 2) { /* LDXP / LDAXP */
2724             if (rn == 31) {
2725                 gen_check_sp_alignment(s);
2726             }
2727             clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2728                                         false, rn != 31, size);
2729             s->is_ldex = true;
2730             gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2731             if (is_lasr) {
2732                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2733             }
2734             return;
2735         }
2736         if (rt2 == 31
2737             && ((rt | rs) & 1) == 0
2738             && dc_isar_feature(aa64_atomics, s)) {
2739             /* CASPA / CASPAL */
2740             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2741             return;
2742         }
2743         break;
2744 
2745     case 0xa: /* CAS */
2746     case 0xb: /* CASL */
2747     case 0xe: /* CASA */
2748     case 0xf: /* CASAL */
2749         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2750             gen_compare_and_swap(s, rs, rt, rn, size);
2751             return;
2752         }
2753         break;
2754     }
2755     unallocated_encoding(s);
2756 }
2757 
2758 /*
2759  * Load register (literal)
2760  *
2761  *  31 30 29   27  26 25 24 23                5 4     0
2762  * +-----+-------+---+-----+-------------------+-------+
2763  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2764  * +-----+-------+---+-----+-------------------+-------+
2765  *
2766  * V: 1 -> vector (simd/fp)
2767  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2768  *                   10-> 32 bit signed, 11 -> prefetch
2769  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2770  */
2771 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2772 {
2773     int rt = extract32(insn, 0, 5);
2774     int64_t imm = sextract32(insn, 5, 19) << 2;
2775     bool is_vector = extract32(insn, 26, 1);
2776     int opc = extract32(insn, 30, 2);
2777     bool is_signed = false;
2778     int size = 2;
2779     TCGv_i64 tcg_rt, clean_addr;
2780 
2781     if (is_vector) {
2782         if (opc == 3) {
2783             unallocated_encoding(s);
2784             return;
2785         }
2786         size = 2 + opc;
2787         if (!fp_access_check(s)) {
2788             return;
2789         }
2790     } else {
2791         if (opc == 3) {
2792             /* PRFM (literal) : prefetch */
2793             return;
2794         }
2795         size = 2 + extract32(opc, 0, 1);
2796         is_signed = extract32(opc, 1, 1);
2797     }
2798 
2799     tcg_rt = cpu_reg(s, rt);
2800 
2801     clean_addr = tcg_temp_new_i64();
2802     gen_pc_plus_diff(s, clean_addr, imm);
2803     if (is_vector) {
2804         do_fp_ld(s, rt, clean_addr, size);
2805     } else {
2806         /* Only unsigned 32bit loads target 32bit registers.  */
2807         bool iss_sf = opc != 0;
2808 
2809         do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2810                   false, true, rt, iss_sf, false);
2811     }
2812 }
2813 
2814 /*
2815  * LDNP (Load Pair - non-temporal hint)
2816  * LDP (Load Pair - non vector)
2817  * LDPSW (Load Pair Signed Word - non vector)
2818  * STNP (Store Pair - non-temporal hint)
2819  * STP (Store Pair - non vector)
2820  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2821  * LDP (Load Pair of SIMD&FP)
2822  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2823  * STP (Store Pair of SIMD&FP)
2824  *
2825  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2826  * +-----+-------+---+---+-------+---+-----------------------------+
2827  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2828  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2829  *
2830  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2831  *      LDPSW/STGP               01
2832  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2833  *   V: 0 -> GPR, 1 -> Vector
2834  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2835  *      10 -> signed offset, 11 -> pre-index
2836  *   L: 0 -> Store 1 -> Load
2837  *
2838  * Rt, Rt2 = GPR or SIMD registers to be stored
2839  * Rn = general purpose register containing address
2840  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2841  */
2842 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2843 {
2844     int rt = extract32(insn, 0, 5);
2845     int rn = extract32(insn, 5, 5);
2846     int rt2 = extract32(insn, 10, 5);
2847     uint64_t offset = sextract64(insn, 15, 7);
2848     int index = extract32(insn, 23, 2);
2849     bool is_vector = extract32(insn, 26, 1);
2850     bool is_load = extract32(insn, 22, 1);
2851     int opc = extract32(insn, 30, 2);
2852 
2853     bool is_signed = false;
2854     bool postindex = false;
2855     bool wback = false;
2856     bool set_tag = false;
2857 
2858     TCGv_i64 clean_addr, dirty_addr;
2859 
2860     int size;
2861 
2862     if (opc == 3) {
2863         unallocated_encoding(s);
2864         return;
2865     }
2866 
2867     if (is_vector) {
2868         size = 2 + opc;
2869     } else if (opc == 1 && !is_load) {
2870         /* STGP */
2871         if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2872             unallocated_encoding(s);
2873             return;
2874         }
2875         size = 3;
2876         set_tag = true;
2877     } else {
2878         size = 2 + extract32(opc, 1, 1);
2879         is_signed = extract32(opc, 0, 1);
2880         if (!is_load && is_signed) {
2881             unallocated_encoding(s);
2882             return;
2883         }
2884     }
2885 
2886     switch (index) {
2887     case 1: /* post-index */
2888         postindex = true;
2889         wback = true;
2890         break;
2891     case 0:
2892         /* signed offset with "non-temporal" hint. Since we don't emulate
2893          * caches we don't care about hints to the cache system about
2894          * data access patterns, and handle this identically to plain
2895          * signed offset.
2896          */
2897         if (is_signed) {
2898             /* There is no non-temporal-hint version of LDPSW */
2899             unallocated_encoding(s);
2900             return;
2901         }
2902         postindex = false;
2903         break;
2904     case 2: /* signed offset, rn not updated */
2905         postindex = false;
2906         break;
2907     case 3: /* pre-index */
2908         postindex = false;
2909         wback = true;
2910         break;
2911     }
2912 
2913     if (is_vector && !fp_access_check(s)) {
2914         return;
2915     }
2916 
2917     offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2918 
2919     if (rn == 31) {
2920         gen_check_sp_alignment(s);
2921     }
2922 
2923     dirty_addr = read_cpu_reg_sp(s, rn, 1);
2924     if (!postindex) {
2925         tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2926     }
2927 
2928     if (set_tag) {
2929         if (!s->ata) {
2930             /*
2931              * TODO: We could rely on the stores below, at least for
2932              * system mode, if we arrange to add MO_ALIGN_16.
2933              */
2934             gen_helper_stg_stub(cpu_env, dirty_addr);
2935         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2936             gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2937         } else {
2938             gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2939         }
2940     }
2941 
2942     clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2943                                 (wback || rn != 31) && !set_tag, 2 << size);
2944 
2945     if (is_vector) {
2946         if (is_load) {
2947             do_fp_ld(s, rt, clean_addr, size);
2948         } else {
2949             do_fp_st(s, rt, clean_addr, size);
2950         }
2951         tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2952         if (is_load) {
2953             do_fp_ld(s, rt2, clean_addr, size);
2954         } else {
2955             do_fp_st(s, rt2, clean_addr, size);
2956         }
2957     } else {
2958         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2959         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2960 
2961         if (is_load) {
2962             TCGv_i64 tmp = tcg_temp_new_i64();
2963 
2964             /* Do not modify tcg_rt before recognizing any exception
2965              * from the second load.
2966              */
2967             do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
2968                       false, false, 0, false, false);
2969             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2970             do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
2971                       false, false, 0, false, false);
2972 
2973             tcg_gen_mov_i64(tcg_rt, tmp);
2974         } else {
2975             do_gpr_st(s, tcg_rt, clean_addr, size,
2976                       false, 0, false, false);
2977             tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2978             do_gpr_st(s, tcg_rt2, clean_addr, size,
2979                       false, 0, false, false);
2980         }
2981     }
2982 
2983     if (wback) {
2984         if (postindex) {
2985             tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2986         }
2987         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2988     }
2989 }
2990 
2991 /*
2992  * Load/store (immediate post-indexed)
2993  * Load/store (immediate pre-indexed)
2994  * Load/store (unscaled immediate)
2995  *
2996  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2997  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2998  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2999  * +----+-------+---+-----+-----+---+--------+-----+------+------+
3000  *
3001  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3002          10 -> unprivileged
3003  * V = 0 -> non-vector
3004  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3005  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3006  */
3007 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3008                                 int opc,
3009                                 int size,
3010                                 int rt,
3011                                 bool is_vector)
3012 {
3013     int rn = extract32(insn, 5, 5);
3014     int imm9 = sextract32(insn, 12, 9);
3015     int idx = extract32(insn, 10, 2);
3016     bool is_signed = false;
3017     bool is_store = false;
3018     bool is_extended = false;
3019     bool is_unpriv = (idx == 2);
3020     bool iss_valid;
3021     bool post_index;
3022     bool writeback;
3023     int memidx;
3024 
3025     TCGv_i64 clean_addr, dirty_addr;
3026 
3027     if (is_vector) {
3028         size |= (opc & 2) << 1;
3029         if (size > 4 || is_unpriv) {
3030             unallocated_encoding(s);
3031             return;
3032         }
3033         is_store = ((opc & 1) == 0);
3034         if (!fp_access_check(s)) {
3035             return;
3036         }
3037     } else {
3038         if (size == 3 && opc == 2) {
3039             /* PRFM - prefetch */
3040             if (idx != 0) {
3041                 unallocated_encoding(s);
3042                 return;
3043             }
3044             return;
3045         }
3046         if (opc == 3 && size > 1) {
3047             unallocated_encoding(s);
3048             return;
3049         }
3050         is_store = (opc == 0);
3051         is_signed = extract32(opc, 1, 1);
3052         is_extended = (size < 3) && extract32(opc, 0, 1);
3053     }
3054 
3055     switch (idx) {
3056     case 0:
3057     case 2:
3058         post_index = false;
3059         writeback = false;
3060         break;
3061     case 1:
3062         post_index = true;
3063         writeback = true;
3064         break;
3065     case 3:
3066         post_index = false;
3067         writeback = true;
3068         break;
3069     default:
3070         g_assert_not_reached();
3071     }
3072 
3073     iss_valid = !is_vector && !writeback;
3074 
3075     if (rn == 31) {
3076         gen_check_sp_alignment(s);
3077     }
3078 
3079     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3080     if (!post_index) {
3081         tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3082     }
3083 
3084     memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3085     clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3086                                        writeback || rn != 31,
3087                                        size, is_unpriv, memidx);
3088 
3089     if (is_vector) {
3090         if (is_store) {
3091             do_fp_st(s, rt, clean_addr, size);
3092         } else {
3093             do_fp_ld(s, rt, clean_addr, size);
3094         }
3095     } else {
3096         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3097         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3098 
3099         if (is_store) {
3100             do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3101                              iss_valid, rt, iss_sf, false);
3102         } else {
3103             do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3104                              is_extended, memidx,
3105                              iss_valid, rt, iss_sf, false);
3106         }
3107     }
3108 
3109     if (writeback) {
3110         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3111         if (post_index) {
3112             tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3113         }
3114         tcg_gen_mov_i64(tcg_rn, dirty_addr);
3115     }
3116 }
3117 
3118 /*
3119  * Load/store (register offset)
3120  *
3121  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3122  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3123  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3124  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3125  *
3126  * For non-vector:
3127  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3128  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3129  * For vector:
3130  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3131  *   opc<0>: 0 -> store, 1 -> load
3132  * V: 1 -> vector/simd
3133  * opt: extend encoding (see DecodeRegExtend)
3134  * S: if S=1 then scale (essentially index by sizeof(size))
3135  * Rt: register to transfer into/out of
3136  * Rn: address register or SP for base
3137  * Rm: offset register or ZR for offset
3138  */
3139 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3140                                    int opc,
3141                                    int size,
3142                                    int rt,
3143                                    bool is_vector)
3144 {
3145     int rn = extract32(insn, 5, 5);
3146     int shift = extract32(insn, 12, 1);
3147     int rm = extract32(insn, 16, 5);
3148     int opt = extract32(insn, 13, 3);
3149     bool is_signed = false;
3150     bool is_store = false;
3151     bool is_extended = false;
3152 
3153     TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3154 
3155     if (extract32(opt, 1, 1) == 0) {
3156         unallocated_encoding(s);
3157         return;
3158     }
3159 
3160     if (is_vector) {
3161         size |= (opc & 2) << 1;
3162         if (size > 4) {
3163             unallocated_encoding(s);
3164             return;
3165         }
3166         is_store = !extract32(opc, 0, 1);
3167         if (!fp_access_check(s)) {
3168             return;
3169         }
3170     } else {
3171         if (size == 3 && opc == 2) {
3172             /* PRFM - prefetch */
3173             return;
3174         }
3175         if (opc == 3 && size > 1) {
3176             unallocated_encoding(s);
3177             return;
3178         }
3179         is_store = (opc == 0);
3180         is_signed = extract32(opc, 1, 1);
3181         is_extended = (size < 3) && extract32(opc, 0, 1);
3182     }
3183 
3184     if (rn == 31) {
3185         gen_check_sp_alignment(s);
3186     }
3187     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3188 
3189     tcg_rm = read_cpu_reg(s, rm, 1);
3190     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3191 
3192     tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3193     clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3194 
3195     if (is_vector) {
3196         if (is_store) {
3197             do_fp_st(s, rt, clean_addr, size);
3198         } else {
3199             do_fp_ld(s, rt, clean_addr, size);
3200         }
3201     } else {
3202         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3203         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3204         if (is_store) {
3205             do_gpr_st(s, tcg_rt, clean_addr, size,
3206                       true, rt, iss_sf, false);
3207         } else {
3208             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3209                       is_extended, true, rt, iss_sf, false);
3210         }
3211     }
3212 }
3213 
3214 /*
3215  * Load/store (unsigned immediate)
3216  *
3217  * 31 30 29   27  26 25 24 23 22 21        10 9     5
3218  * +----+-------+---+-----+-----+------------+-------+------+
3219  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3220  * +----+-------+---+-----+-----+------------+-------+------+
3221  *
3222  * For non-vector:
3223  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3224  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3225  * For vector:
3226  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3227  *   opc<0>: 0 -> store, 1 -> load
3228  * Rn: base address register (inc SP)
3229  * Rt: target register
3230  */
3231 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3232                                         int opc,
3233                                         int size,
3234                                         int rt,
3235                                         bool is_vector)
3236 {
3237     int rn = extract32(insn, 5, 5);
3238     unsigned int imm12 = extract32(insn, 10, 12);
3239     unsigned int offset;
3240 
3241     TCGv_i64 clean_addr, dirty_addr;
3242 
3243     bool is_store;
3244     bool is_signed = false;
3245     bool is_extended = false;
3246 
3247     if (is_vector) {
3248         size |= (opc & 2) << 1;
3249         if (size > 4) {
3250             unallocated_encoding(s);
3251             return;
3252         }
3253         is_store = !extract32(opc, 0, 1);
3254         if (!fp_access_check(s)) {
3255             return;
3256         }
3257     } else {
3258         if (size == 3 && opc == 2) {
3259             /* PRFM - prefetch */
3260             return;
3261         }
3262         if (opc == 3 && size > 1) {
3263             unallocated_encoding(s);
3264             return;
3265         }
3266         is_store = (opc == 0);
3267         is_signed = extract32(opc, 1, 1);
3268         is_extended = (size < 3) && extract32(opc, 0, 1);
3269     }
3270 
3271     if (rn == 31) {
3272         gen_check_sp_alignment(s);
3273     }
3274     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3275     offset = imm12 << size;
3276     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3277     clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3278 
3279     if (is_vector) {
3280         if (is_store) {
3281             do_fp_st(s, rt, clean_addr, size);
3282         } else {
3283             do_fp_ld(s, rt, clean_addr, size);
3284         }
3285     } else {
3286         TCGv_i64 tcg_rt = cpu_reg(s, rt);
3287         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3288         if (is_store) {
3289             do_gpr_st(s, tcg_rt, clean_addr, size,
3290                       true, rt, iss_sf, false);
3291         } else {
3292             do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3293                       is_extended, true, rt, iss_sf, false);
3294         }
3295     }
3296 }
3297 
3298 /* Atomic memory operations
3299  *
3300  *  31  30      27  26    24    22  21   16   15    12    10    5     0
3301  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3302  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3303  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3304  *
3305  * Rt: the result register
3306  * Rn: base address or SP
3307  * Rs: the source register for the operation
3308  * V: vector flag (always 0 as of v8.3)
3309  * A: acquire flag
3310  * R: release flag
3311  */
3312 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3313                               int size, int rt, bool is_vector)
3314 {
3315     int rs = extract32(insn, 16, 5);
3316     int rn = extract32(insn, 5, 5);
3317     int o3_opc = extract32(insn, 12, 4);
3318     bool r = extract32(insn, 22, 1);
3319     bool a = extract32(insn, 23, 1);
3320     TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3321     AtomicThreeOpFn *fn = NULL;
3322     MemOp mop = s->be_data | size | MO_ALIGN;
3323 
3324     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3325         unallocated_encoding(s);
3326         return;
3327     }
3328     switch (o3_opc) {
3329     case 000: /* LDADD */
3330         fn = tcg_gen_atomic_fetch_add_i64;
3331         break;
3332     case 001: /* LDCLR */
3333         fn = tcg_gen_atomic_fetch_and_i64;
3334         break;
3335     case 002: /* LDEOR */
3336         fn = tcg_gen_atomic_fetch_xor_i64;
3337         break;
3338     case 003: /* LDSET */
3339         fn = tcg_gen_atomic_fetch_or_i64;
3340         break;
3341     case 004: /* LDSMAX */
3342         fn = tcg_gen_atomic_fetch_smax_i64;
3343         mop |= MO_SIGN;
3344         break;
3345     case 005: /* LDSMIN */
3346         fn = tcg_gen_atomic_fetch_smin_i64;
3347         mop |= MO_SIGN;
3348         break;
3349     case 006: /* LDUMAX */
3350         fn = tcg_gen_atomic_fetch_umax_i64;
3351         break;
3352     case 007: /* LDUMIN */
3353         fn = tcg_gen_atomic_fetch_umin_i64;
3354         break;
3355     case 010: /* SWP */
3356         fn = tcg_gen_atomic_xchg_i64;
3357         break;
3358     case 014: /* LDAPR, LDAPRH, LDAPRB */
3359         if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3360             rs != 31 || a != 1 || r != 0) {
3361             unallocated_encoding(s);
3362             return;
3363         }
3364         break;
3365     default:
3366         unallocated_encoding(s);
3367         return;
3368     }
3369 
3370     if (rn == 31) {
3371         gen_check_sp_alignment(s);
3372     }
3373     clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3374 
3375     if (o3_opc == 014) {
3376         /*
3377          * LDAPR* are a special case because they are a simple load, not a
3378          * fetch-and-do-something op.
3379          * The architectural consistency requirements here are weaker than
3380          * full load-acquire (we only need "load-acquire processor consistent"),
3381          * but we choose to implement them as full LDAQ.
3382          */
3383         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3384                   true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3385         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3386         return;
3387     }
3388 
3389     tcg_rs = read_cpu_reg(s, rs, true);
3390     tcg_rt = cpu_reg(s, rt);
3391 
3392     if (o3_opc == 1) { /* LDCLR */
3393         tcg_gen_not_i64(tcg_rs, tcg_rs);
3394     }
3395 
3396     /* The tcg atomic primitives are all full barriers.  Therefore we
3397      * can ignore the Acquire and Release bits of this instruction.
3398      */
3399     fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3400 
3401     if ((mop & MO_SIGN) && size != MO_64) {
3402         tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3403     }
3404 }
3405 
3406 /*
3407  * PAC memory operations
3408  *
3409  *  31  30      27  26    24    22  21       12  11  10    5     0
3410  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3411  * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3412  * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3413  *
3414  * Rt: the result register
3415  * Rn: base address or SP
3416  * V: vector flag (always 0 as of v8.3)
3417  * M: clear for key DA, set for key DB
3418  * W: pre-indexing flag
3419  * S: sign for imm9.
3420  */
3421 static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3422                            int size, int rt, bool is_vector)
3423 {
3424     int rn = extract32(insn, 5, 5);
3425     bool is_wback = extract32(insn, 11, 1);
3426     bool use_key_a = !extract32(insn, 23, 1);
3427     int offset;
3428     TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3429 
3430     if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3431         unallocated_encoding(s);
3432         return;
3433     }
3434 
3435     if (rn == 31) {
3436         gen_check_sp_alignment(s);
3437     }
3438     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3439 
3440     if (s->pauth_active) {
3441         if (use_key_a) {
3442             gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3443                              tcg_constant_i64(0));
3444         } else {
3445             gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3446                              tcg_constant_i64(0));
3447         }
3448     }
3449 
3450     /* Form the 10-bit signed, scaled offset.  */
3451     offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3452     offset = sextract32(offset << size, 0, 10 + size);
3453     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3454 
3455     /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3456     clean_addr = gen_mte_check1(s, dirty_addr, false,
3457                                 is_wback || rn != 31, size);
3458 
3459     tcg_rt = cpu_reg(s, rt);
3460     do_gpr_ld(s, tcg_rt, clean_addr, size,
3461               /* extend */ false, /* iss_valid */ !is_wback,
3462               /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3463 
3464     if (is_wback) {
3465         tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3466     }
3467 }
3468 
3469 /*
3470  * LDAPR/STLR (unscaled immediate)
3471  *
3472  *  31  30            24    22  21       12    10    5     0
3473  * +------+-------------+-----+---+--------+-----+----+-----+
3474  * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3475  * +------+-------------+-----+---+--------+-----+----+-----+
3476  *
3477  * Rt: source or destination register
3478  * Rn: base register
3479  * imm9: unscaled immediate offset
3480  * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3481  * size: size of load/store
3482  */
3483 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3484 {
3485     int rt = extract32(insn, 0, 5);
3486     int rn = extract32(insn, 5, 5);
3487     int offset = sextract32(insn, 12, 9);
3488     int opc = extract32(insn, 22, 2);
3489     int size = extract32(insn, 30, 2);
3490     TCGv_i64 clean_addr, dirty_addr;
3491     bool is_store = false;
3492     bool extend = false;
3493     bool iss_sf;
3494     MemOp mop;
3495 
3496     if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3497         unallocated_encoding(s);
3498         return;
3499     }
3500 
3501     /* TODO: ARMv8.4-LSE SCTLR.nAA */
3502     mop = size | MO_ALIGN;
3503 
3504     switch (opc) {
3505     case 0: /* STLURB */
3506         is_store = true;
3507         break;
3508     case 1: /* LDAPUR* */
3509         break;
3510     case 2: /* LDAPURS* 64-bit variant */
3511         if (size == 3) {
3512             unallocated_encoding(s);
3513             return;
3514         }
3515         mop |= MO_SIGN;
3516         break;
3517     case 3: /* LDAPURS* 32-bit variant */
3518         if (size > 1) {
3519             unallocated_encoding(s);
3520             return;
3521         }
3522         mop |= MO_SIGN;
3523         extend = true; /* zero-extend 32->64 after signed load */
3524         break;
3525     default:
3526         g_assert_not_reached();
3527     }
3528 
3529     iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3530 
3531     if (rn == 31) {
3532         gen_check_sp_alignment(s);
3533     }
3534 
3535     dirty_addr = read_cpu_reg_sp(s, rn, 1);
3536     tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3537     clean_addr = clean_data_tbi(s, dirty_addr);
3538 
3539     if (is_store) {
3540         /* Store-Release semantics */
3541         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3542         do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3543     } else {
3544         /*
3545          * Load-AcquirePC semantics; we implement as the slightly more
3546          * restrictive Load-Acquire.
3547          */
3548         do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3549                   extend, true, rt, iss_sf, true);
3550         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3551     }
3552 }
3553 
3554 /* Load/store register (all forms) */
3555 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3556 {
3557     int rt = extract32(insn, 0, 5);
3558     int opc = extract32(insn, 22, 2);
3559     bool is_vector = extract32(insn, 26, 1);
3560     int size = extract32(insn, 30, 2);
3561 
3562     switch (extract32(insn, 24, 2)) {
3563     case 0:
3564         if (extract32(insn, 21, 1) == 0) {
3565             /* Load/store register (unscaled immediate)
3566              * Load/store immediate pre/post-indexed
3567              * Load/store register unprivileged
3568              */
3569             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3570             return;
3571         }
3572         switch (extract32(insn, 10, 2)) {
3573         case 0:
3574             disas_ldst_atomic(s, insn, size, rt, is_vector);
3575             return;
3576         case 2:
3577             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3578             return;
3579         default:
3580             disas_ldst_pac(s, insn, size, rt, is_vector);
3581             return;
3582         }
3583         break;
3584     case 1:
3585         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3586         return;
3587     }
3588     unallocated_encoding(s);
3589 }
3590 
3591 /* AdvSIMD load/store multiple structures
3592  *
3593  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3594  * +---+---+---------------+---+-------------+--------+------+------+------+
3595  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3596  * +---+---+---------------+---+-------------+--------+------+------+------+
3597  *
3598  * AdvSIMD load/store multiple structures (post-indexed)
3599  *
3600  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3601  * +---+---+---------------+---+---+---------+--------+------+------+------+
3602  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3603  * +---+---+---------------+---+---+---------+--------+------+------+------+
3604  *
3605  * Rt: first (or only) SIMD&FP register to be transferred
3606  * Rn: base address or SP
3607  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3608  */
3609 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3610 {
3611     int rt = extract32(insn, 0, 5);
3612     int rn = extract32(insn, 5, 5);
3613     int rm = extract32(insn, 16, 5);
3614     int size = extract32(insn, 10, 2);
3615     int opcode = extract32(insn, 12, 4);
3616     bool is_store = !extract32(insn, 22, 1);
3617     bool is_postidx = extract32(insn, 23, 1);
3618     bool is_q = extract32(insn, 30, 1);
3619     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3620     MemOp endian, align, mop;
3621 
3622     int total;    /* total bytes */
3623     int elements; /* elements per vector */
3624     int rpt;    /* num iterations */
3625     int selem;  /* structure elements */
3626     int r;
3627 
3628     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3629         unallocated_encoding(s);
3630         return;
3631     }
3632 
3633     if (!is_postidx && rm != 0) {
3634         unallocated_encoding(s);
3635         return;
3636     }
3637 
3638     /* From the shared decode logic */
3639     switch (opcode) {
3640     case 0x0:
3641         rpt = 1;
3642         selem = 4;
3643         break;
3644     case 0x2:
3645         rpt = 4;
3646         selem = 1;
3647         break;
3648     case 0x4:
3649         rpt = 1;
3650         selem = 3;
3651         break;
3652     case 0x6:
3653         rpt = 3;
3654         selem = 1;
3655         break;
3656     case 0x7:
3657         rpt = 1;
3658         selem = 1;
3659         break;
3660     case 0x8:
3661         rpt = 1;
3662         selem = 2;
3663         break;
3664     case 0xa:
3665         rpt = 2;
3666         selem = 1;
3667         break;
3668     default:
3669         unallocated_encoding(s);
3670         return;
3671     }
3672 
3673     if (size == 3 && !is_q && selem != 1) {
3674         /* reserved */
3675         unallocated_encoding(s);
3676         return;
3677     }
3678 
3679     if (!fp_access_check(s)) {
3680         return;
3681     }
3682 
3683     if (rn == 31) {
3684         gen_check_sp_alignment(s);
3685     }
3686 
3687     /* For our purposes, bytes are always little-endian.  */
3688     endian = s->be_data;
3689     if (size == 0) {
3690         endian = MO_LE;
3691     }
3692 
3693     total = rpt * selem * (is_q ? 16 : 8);
3694     tcg_rn = cpu_reg_sp(s, rn);
3695 
3696     /*
3697      * Issue the MTE check vs the logical repeat count, before we
3698      * promote consecutive little-endian elements below.
3699      */
3700     clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3701                                 total);
3702 
3703     /*
3704      * Consecutive little-endian elements from a single register
3705      * can be promoted to a larger little-endian operation.
3706      */
3707     align = MO_ALIGN;
3708     if (selem == 1 && endian == MO_LE) {
3709         align = pow2_align(size);
3710         size = 3;
3711     }
3712     if (!s->align_mem) {
3713         align = 0;
3714     }
3715     mop = endian | size | align;
3716 
3717     elements = (is_q ? 16 : 8) >> size;
3718     tcg_ebytes = tcg_constant_i64(1 << size);
3719     for (r = 0; r < rpt; r++) {
3720         int e;
3721         for (e = 0; e < elements; e++) {
3722             int xs;
3723             for (xs = 0; xs < selem; xs++) {
3724                 int tt = (rt + r + xs) % 32;
3725                 if (is_store) {
3726                     do_vec_st(s, tt, e, clean_addr, mop);
3727                 } else {
3728                     do_vec_ld(s, tt, e, clean_addr, mop);
3729                 }
3730                 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3731             }
3732         }
3733     }
3734 
3735     if (!is_store) {
3736         /* For non-quad operations, setting a slice of the low
3737          * 64 bits of the register clears the high 64 bits (in
3738          * the ARM ARM pseudocode this is implicit in the fact
3739          * that 'rval' is a 64 bit wide variable).
3740          * For quad operations, we might still need to zero the
3741          * high bits of SVE.
3742          */
3743         for (r = 0; r < rpt * selem; r++) {
3744             int tt = (rt + r) % 32;
3745             clear_vec_high(s, is_q, tt);
3746         }
3747     }
3748 
3749     if (is_postidx) {
3750         if (rm == 31) {
3751             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3752         } else {
3753             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3754         }
3755     }
3756 }
3757 
3758 /* AdvSIMD load/store single structure
3759  *
3760  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3761  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3762  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3763  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3764  *
3765  * AdvSIMD load/store single structure (post-indexed)
3766  *
3767  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3768  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3769  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3770  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3771  *
3772  * Rt: first (or only) SIMD&FP register to be transferred
3773  * Rn: base address or SP
3774  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3775  * index = encoded in Q:S:size dependent on size
3776  *
3777  * lane_size = encoded in R, opc
3778  * transfer width = encoded in opc, S, size
3779  */
3780 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3781 {
3782     int rt = extract32(insn, 0, 5);
3783     int rn = extract32(insn, 5, 5);
3784     int rm = extract32(insn, 16, 5);
3785     int size = extract32(insn, 10, 2);
3786     int S = extract32(insn, 12, 1);
3787     int opc = extract32(insn, 13, 3);
3788     int R = extract32(insn, 21, 1);
3789     int is_load = extract32(insn, 22, 1);
3790     int is_postidx = extract32(insn, 23, 1);
3791     int is_q = extract32(insn, 30, 1);
3792 
3793     int scale = extract32(opc, 1, 2);
3794     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3795     bool replicate = false;
3796     int index = is_q << 3 | S << 2 | size;
3797     int xs, total;
3798     TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3799     MemOp mop;
3800 
3801     if (extract32(insn, 31, 1)) {
3802         unallocated_encoding(s);
3803         return;
3804     }
3805     if (!is_postidx && rm != 0) {
3806         unallocated_encoding(s);
3807         return;
3808     }
3809 
3810     switch (scale) {
3811     case 3:
3812         if (!is_load || S) {
3813             unallocated_encoding(s);
3814             return;
3815         }
3816         scale = size;
3817         replicate = true;
3818         break;
3819     case 0:
3820         break;
3821     case 1:
3822         if (extract32(size, 0, 1)) {
3823             unallocated_encoding(s);
3824             return;
3825         }
3826         index >>= 1;
3827         break;
3828     case 2:
3829         if (extract32(size, 1, 1)) {
3830             unallocated_encoding(s);
3831             return;
3832         }
3833         if (!extract32(size, 0, 1)) {
3834             index >>= 2;
3835         } else {
3836             if (S) {
3837                 unallocated_encoding(s);
3838                 return;
3839             }
3840             index >>= 3;
3841             scale = 3;
3842         }
3843         break;
3844     default:
3845         g_assert_not_reached();
3846     }
3847 
3848     if (!fp_access_check(s)) {
3849         return;
3850     }
3851 
3852     if (rn == 31) {
3853         gen_check_sp_alignment(s);
3854     }
3855 
3856     total = selem << scale;
3857     tcg_rn = cpu_reg_sp(s, rn);
3858 
3859     clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3860                                 total);
3861     mop = finalize_memop(s, scale);
3862 
3863     tcg_ebytes = tcg_constant_i64(1 << scale);
3864     for (xs = 0; xs < selem; xs++) {
3865         if (replicate) {
3866             /* Load and replicate to all elements */
3867             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3868 
3869             tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3870             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3871                                  (is_q + 1) * 8, vec_full_reg_size(s),
3872                                  tcg_tmp);
3873         } else {
3874             /* Load/store one element per register */
3875             if (is_load) {
3876                 do_vec_ld(s, rt, index, clean_addr, mop);
3877             } else {
3878                 do_vec_st(s, rt, index, clean_addr, mop);
3879             }
3880         }
3881         tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3882         rt = (rt + 1) % 32;
3883     }
3884 
3885     if (is_postidx) {
3886         if (rm == 31) {
3887             tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3888         } else {
3889             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3890         }
3891     }
3892 }
3893 
3894 /*
3895  * Load/Store memory tags
3896  *
3897  *  31 30 29         24     22  21     12    10      5      0
3898  * +-----+-------------+-----+---+------+-----+------+------+
3899  * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3900  * +-----+-------------+-----+---+------+-----+------+------+
3901  */
3902 static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3903 {
3904     int rt = extract32(insn, 0, 5);
3905     int rn = extract32(insn, 5, 5);
3906     uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3907     int op2 = extract32(insn, 10, 2);
3908     int op1 = extract32(insn, 22, 2);
3909     bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3910     int index = 0;
3911     TCGv_i64 addr, clean_addr, tcg_rt;
3912 
3913     /* We checked insn bits [29:24,21] in the caller.  */
3914     if (extract32(insn, 30, 2) != 3) {
3915         goto do_unallocated;
3916     }
3917 
3918     /*
3919      * @index is a tri-state variable which has 3 states:
3920      * < 0 : post-index, writeback
3921      * = 0 : signed offset
3922      * > 0 : pre-index, writeback
3923      */
3924     switch (op1) {
3925     case 0:
3926         if (op2 != 0) {
3927             /* STG */
3928             index = op2 - 2;
3929         } else {
3930             /* STZGM */
3931             if (s->current_el == 0 || offset != 0) {
3932                 goto do_unallocated;
3933             }
3934             is_mult = is_zero = true;
3935         }
3936         break;
3937     case 1:
3938         if (op2 != 0) {
3939             /* STZG */
3940             is_zero = true;
3941             index = op2 - 2;
3942         } else {
3943             /* LDG */
3944             is_load = true;
3945         }
3946         break;
3947     case 2:
3948         if (op2 != 0) {
3949             /* ST2G */
3950             is_pair = true;
3951             index = op2 - 2;
3952         } else {
3953             /* STGM */
3954             if (s->current_el == 0 || offset != 0) {
3955                 goto do_unallocated;
3956             }
3957             is_mult = true;
3958         }
3959         break;
3960     case 3:
3961         if (op2 != 0) {
3962             /* STZ2G */
3963             is_pair = is_zero = true;
3964             index = op2 - 2;
3965         } else {
3966             /* LDGM */
3967             if (s->current_el == 0 || offset != 0) {
3968                 goto do_unallocated;
3969             }
3970             is_mult = is_load = true;
3971         }
3972         break;
3973 
3974     default:
3975     do_unallocated:
3976         unallocated_encoding(s);
3977         return;
3978     }
3979 
3980     if (is_mult
3981         ? !dc_isar_feature(aa64_mte, s)
3982         : !dc_isar_feature(aa64_mte_insn_reg, s)) {
3983         goto do_unallocated;
3984     }
3985 
3986     if (rn == 31) {
3987         gen_check_sp_alignment(s);
3988     }
3989 
3990     addr = read_cpu_reg_sp(s, rn, true);
3991     if (index >= 0) {
3992         /* pre-index or signed offset */
3993         tcg_gen_addi_i64(addr, addr, offset);
3994     }
3995 
3996     if (is_mult) {
3997         tcg_rt = cpu_reg(s, rt);
3998 
3999         if (is_zero) {
4000             int size = 4 << s->dcz_blocksize;
4001 
4002             if (s->ata) {
4003                 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4004             }
4005             /*
4006              * The non-tags portion of STZGM is mostly like DC_ZVA,
4007              * except the alignment happens before the access.
4008              */
4009             clean_addr = clean_data_tbi(s, addr);
4010             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4011             gen_helper_dc_zva(cpu_env, clean_addr);
4012         } else if (s->ata) {
4013             if (is_load) {
4014                 gen_helper_ldgm(tcg_rt, cpu_env, addr);
4015             } else {
4016                 gen_helper_stgm(cpu_env, addr, tcg_rt);
4017             }
4018         } else {
4019             MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4020             int size = 4 << GMID_EL1_BS;
4021 
4022             clean_addr = clean_data_tbi(s, addr);
4023             tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4024             gen_probe_access(s, clean_addr, acc, size);
4025 
4026             if (is_load) {
4027                 /* The result tags are zeros.  */
4028                 tcg_gen_movi_i64(tcg_rt, 0);
4029             }
4030         }
4031         return;
4032     }
4033 
4034     if (is_load) {
4035         tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4036         tcg_rt = cpu_reg(s, rt);
4037         if (s->ata) {
4038             gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4039         } else {
4040             clean_addr = clean_data_tbi(s, addr);
4041             gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4042             gen_address_with_allocation_tag0(tcg_rt, addr);
4043         }
4044     } else {
4045         tcg_rt = cpu_reg_sp(s, rt);
4046         if (!s->ata) {
4047             /*
4048              * For STG and ST2G, we need to check alignment and probe memory.
4049              * TODO: For STZG and STZ2G, we could rely on the stores below,
4050              * at least for system mode; user-only won't enforce alignment.
4051              */
4052             if (is_pair) {
4053                 gen_helper_st2g_stub(cpu_env, addr);
4054             } else {
4055                 gen_helper_stg_stub(cpu_env, addr);
4056             }
4057         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4058             if (is_pair) {
4059                 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4060             } else {
4061                 gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4062             }
4063         } else {
4064             if (is_pair) {
4065                 gen_helper_st2g(cpu_env, addr, tcg_rt);
4066             } else {
4067                 gen_helper_stg(cpu_env, addr, tcg_rt);
4068             }
4069         }
4070     }
4071 
4072     if (is_zero) {
4073         TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4074         TCGv_i64 tcg_zero = tcg_constant_i64(0);
4075         int mem_index = get_mem_index(s);
4076         int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4077 
4078         tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4079                             MO_UQ | MO_ALIGN_16);
4080         for (i = 8; i < n; i += 8) {
4081             tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4082             tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
4083         }
4084     }
4085 
4086     if (index != 0) {
4087         /* pre-index or post-index */
4088         if (index < 0) {
4089             /* post-index */
4090             tcg_gen_addi_i64(addr, addr, offset);
4091         }
4092         tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4093     }
4094 }
4095 
4096 /* Loads and stores */
4097 static void disas_ldst(DisasContext *s, uint32_t insn)
4098 {
4099     switch (extract32(insn, 24, 6)) {
4100     case 0x08: /* Load/store exclusive */
4101         disas_ldst_excl(s, insn);
4102         break;
4103     case 0x18: case 0x1c: /* Load register (literal) */
4104         disas_ld_lit(s, insn);
4105         break;
4106     case 0x28: case 0x29:
4107     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4108         disas_ldst_pair(s, insn);
4109         break;
4110     case 0x38: case 0x39:
4111     case 0x3c: case 0x3d: /* Load/store register (all forms) */
4112         disas_ldst_reg(s, insn);
4113         break;
4114     case 0x0c: /* AdvSIMD load/store multiple structures */
4115         disas_ldst_multiple_struct(s, insn);
4116         break;
4117     case 0x0d: /* AdvSIMD load/store single structure */
4118         disas_ldst_single_struct(s, insn);
4119         break;
4120     case 0x19:
4121         if (extract32(insn, 21, 1) != 0) {
4122             disas_ldst_tag(s, insn);
4123         } else if (extract32(insn, 10, 2) == 0) {
4124             disas_ldst_ldapr_stlr(s, insn);
4125         } else {
4126             unallocated_encoding(s);
4127         }
4128         break;
4129     default:
4130         unallocated_encoding(s);
4131         break;
4132     }
4133 }
4134 
4135 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4136 
4137 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4138                     bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4139 {
4140     TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4141     TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4142     TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4143 
4144     fn(tcg_rd, tcg_rn, tcg_imm);
4145     if (!a->sf) {
4146         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4147     }
4148     return true;
4149 }
4150 
4151 /*
4152  * PC-rel. addressing
4153  */
4154 
4155 static bool trans_ADR(DisasContext *s, arg_ri *a)
4156 {
4157     gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4158     return true;
4159 }
4160 
4161 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4162 {
4163     int64_t offset = (int64_t)a->imm << 12;
4164 
4165     /* The page offset is ok for CF_PCREL. */
4166     offset -= s->pc_curr & 0xfff;
4167     gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4168     return true;
4169 }
4170 
4171 /*
4172  * Add/subtract (immediate)
4173  */
4174 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4175 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4176 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4177 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4178 
4179 /*
4180  * Add/subtract (immediate, with tags)
4181  */
4182 
4183 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4184                                       bool sub_op)
4185 {
4186     TCGv_i64 tcg_rn, tcg_rd;
4187     int imm;
4188 
4189     imm = a->uimm6 << LOG2_TAG_GRANULE;
4190     if (sub_op) {
4191         imm = -imm;
4192     }
4193 
4194     tcg_rn = cpu_reg_sp(s, a->rn);
4195     tcg_rd = cpu_reg_sp(s, a->rd);
4196 
4197     if (s->ata) {
4198         gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn,
4199                            tcg_constant_i32(imm),
4200                            tcg_constant_i32(a->uimm4));
4201     } else {
4202         tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4203         gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4204     }
4205     return true;
4206 }
4207 
4208 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4209 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4210 
4211 /* The input should be a value in the bottom e bits (with higher
4212  * bits zero); returns that value replicated into every element
4213  * of size e in a 64 bit integer.
4214  */
4215 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4216 {
4217     assert(e != 0);
4218     while (e < 64) {
4219         mask |= mask << e;
4220         e *= 2;
4221     }
4222     return mask;
4223 }
4224 
4225 /*
4226  * Logical (immediate)
4227  */
4228 
4229 /*
4230  * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4231  * only require the wmask. Returns false if the imms/immr/immn are a reserved
4232  * value (ie should cause a guest UNDEF exception), and true if they are
4233  * valid, in which case the decoded bit pattern is written to result.
4234  */
4235 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4236                             unsigned int imms, unsigned int immr)
4237 {
4238     uint64_t mask;
4239     unsigned e, levels, s, r;
4240     int len;
4241 
4242     assert(immn < 2 && imms < 64 && immr < 64);
4243 
4244     /* The bit patterns we create here are 64 bit patterns which
4245      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4246      * 64 bits each. Each element contains the same value: a run
4247      * of between 1 and e-1 non-zero bits, rotated within the
4248      * element by between 0 and e-1 bits.
4249      *
4250      * The element size and run length are encoded into immn (1 bit)
4251      * and imms (6 bits) as follows:
4252      * 64 bit elements: immn = 1, imms = <length of run - 1>
4253      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4254      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4255      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4256      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4257      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4258      * Notice that immn = 0, imms = 11111x is the only combination
4259      * not covered by one of the above options; this is reserved.
4260      * Further, <length of run - 1> all-ones is a reserved pattern.
4261      *
4262      * In all cases the rotation is by immr % e (and immr is 6 bits).
4263      */
4264 
4265     /* First determine the element size */
4266     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4267     if (len < 1) {
4268         /* This is the immn == 0, imms == 0x11111x case */
4269         return false;
4270     }
4271     e = 1 << len;
4272 
4273     levels = e - 1;
4274     s = imms & levels;
4275     r = immr & levels;
4276 
4277     if (s == levels) {
4278         /* <length of run - 1> mustn't be all-ones. */
4279         return false;
4280     }
4281 
4282     /* Create the value of one element: s+1 set bits rotated
4283      * by r within the element (which is e bits wide)...
4284      */
4285     mask = MAKE_64BIT_MASK(0, s + 1);
4286     if (r) {
4287         mask = (mask >> r) | (mask << (e - r));
4288         mask &= MAKE_64BIT_MASK(0, e);
4289     }
4290     /* ...then replicate the element over the whole 64 bit value */
4291     mask = bitfield_replicate(mask, e);
4292     *result = mask;
4293     return true;
4294 }
4295 
4296 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4297                         void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4298 {
4299     TCGv_i64 tcg_rd, tcg_rn;
4300     uint64_t imm;
4301 
4302     /* Some immediate field values are reserved. */
4303     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4304                                 extract32(a->dbm, 0, 6),
4305                                 extract32(a->dbm, 6, 6))) {
4306         return false;
4307     }
4308     if (!a->sf) {
4309         imm &= 0xffffffffull;
4310     }
4311 
4312     tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4313     tcg_rn = cpu_reg(s, a->rn);
4314 
4315     fn(tcg_rd, tcg_rn, imm);
4316     if (set_cc) {
4317         gen_logic_CC(a->sf, tcg_rd);
4318     }
4319     if (!a->sf) {
4320         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4321     }
4322     return true;
4323 }
4324 
4325 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4326 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4327 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4328 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4329 
4330 /*
4331  * Move wide (immediate)
4332  */
4333 
4334 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4335 {
4336     int pos = a->hw << 4;
4337     tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4338     return true;
4339 }
4340 
4341 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4342 {
4343     int pos = a->hw << 4;
4344     uint64_t imm = a->imm;
4345 
4346     imm = ~(imm << pos);
4347     if (!a->sf) {
4348         imm = (uint32_t)imm;
4349     }
4350     tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4351     return true;
4352 }
4353 
4354 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4355 {
4356     int pos = a->hw << 4;
4357     TCGv_i64 tcg_rd, tcg_im;
4358 
4359     tcg_rd = cpu_reg(s, a->rd);
4360     tcg_im = tcg_constant_i64(a->imm);
4361     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4362     if (!a->sf) {
4363         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4364     }
4365     return true;
4366 }
4367 
4368 /*
4369  * Bitfield
4370  */
4371 
4372 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4373 {
4374     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4375     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4376     unsigned int bitsize = a->sf ? 64 : 32;
4377     unsigned int ri = a->immr;
4378     unsigned int si = a->imms;
4379     unsigned int pos, len;
4380 
4381     if (si >= ri) {
4382         /* Wd<s-r:0> = Wn<s:r> */
4383         len = (si - ri) + 1;
4384         tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4385         if (!a->sf) {
4386             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4387         }
4388     } else {
4389         /* Wd<32+s-r,32-r> = Wn<s:0> */
4390         len = si + 1;
4391         pos = (bitsize - ri) & (bitsize - 1);
4392 
4393         if (len < ri) {
4394             /*
4395              * Sign extend the destination field from len to fill the
4396              * balance of the word.  Let the deposit below insert all
4397              * of those sign bits.
4398              */
4399             tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4400             len = ri;
4401         }
4402 
4403         /*
4404          * We start with zero, and we haven't modified any bits outside
4405          * bitsize, therefore no final zero-extension is unneeded for !sf.
4406          */
4407         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4408     }
4409     return true;
4410 }
4411 
4412 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4413 {
4414     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4415     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4416     unsigned int bitsize = a->sf ? 64 : 32;
4417     unsigned int ri = a->immr;
4418     unsigned int si = a->imms;
4419     unsigned int pos, len;
4420 
4421     tcg_rd = cpu_reg(s, a->rd);
4422     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4423 
4424     if (si >= ri) {
4425         /* Wd<s-r:0> = Wn<s:r> */
4426         len = (si - ri) + 1;
4427         tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4428     } else {
4429         /* Wd<32+s-r,32-r> = Wn<s:0> */
4430         len = si + 1;
4431         pos = (bitsize - ri) & (bitsize - 1);
4432         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4433     }
4434     return true;
4435 }
4436 
4437 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4438 {
4439     TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4440     TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4441     unsigned int bitsize = a->sf ? 64 : 32;
4442     unsigned int ri = a->immr;
4443     unsigned int si = a->imms;
4444     unsigned int pos, len;
4445 
4446     tcg_rd = cpu_reg(s, a->rd);
4447     tcg_tmp = read_cpu_reg(s, a->rn, 1);
4448 
4449     if (si >= ri) {
4450         /* Wd<s-r:0> = Wn<s:r> */
4451         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4452         len = (si - ri) + 1;
4453         pos = 0;
4454     } else {
4455         /* Wd<32+s-r,32-r> = Wn<s:0> */
4456         len = si + 1;
4457         pos = (bitsize - ri) & (bitsize - 1);
4458     }
4459 
4460     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4461     if (!a->sf) {
4462         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4463     }
4464     return true;
4465 }
4466 
4467 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4468 {
4469     TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4470 
4471     tcg_rd = cpu_reg(s, a->rd);
4472 
4473     if (unlikely(a->imm == 0)) {
4474         /*
4475          * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4476          * so an extract from bit 0 is a special case.
4477          */
4478         if (a->sf) {
4479             tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4480         } else {
4481             tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4482         }
4483     } else {
4484         tcg_rm = cpu_reg(s, a->rm);
4485         tcg_rn = cpu_reg(s, a->rn);
4486 
4487         if (a->sf) {
4488             /* Specialization to ROR happens in EXTRACT2.  */
4489             tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4490         } else {
4491             TCGv_i32 t0 = tcg_temp_new_i32();
4492 
4493             tcg_gen_extrl_i64_i32(t0, tcg_rm);
4494             if (a->rm == a->rn) {
4495                 tcg_gen_rotri_i32(t0, t0, a->imm);
4496             } else {
4497                 TCGv_i32 t1 = tcg_temp_new_i32();
4498                 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4499                 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4500             }
4501             tcg_gen_extu_i32_i64(tcg_rd, t0);
4502         }
4503     }
4504     return true;
4505 }
4506 
4507 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
4508  * Note that it is the caller's responsibility to ensure that the
4509  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4510  * mandated semantics for out of range shifts.
4511  */
4512 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4513                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4514 {
4515     switch (shift_type) {
4516     case A64_SHIFT_TYPE_LSL:
4517         tcg_gen_shl_i64(dst, src, shift_amount);
4518         break;
4519     case A64_SHIFT_TYPE_LSR:
4520         tcg_gen_shr_i64(dst, src, shift_amount);
4521         break;
4522     case A64_SHIFT_TYPE_ASR:
4523         if (!sf) {
4524             tcg_gen_ext32s_i64(dst, src);
4525         }
4526         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4527         break;
4528     case A64_SHIFT_TYPE_ROR:
4529         if (sf) {
4530             tcg_gen_rotr_i64(dst, src, shift_amount);
4531         } else {
4532             TCGv_i32 t0, t1;
4533             t0 = tcg_temp_new_i32();
4534             t1 = tcg_temp_new_i32();
4535             tcg_gen_extrl_i64_i32(t0, src);
4536             tcg_gen_extrl_i64_i32(t1, shift_amount);
4537             tcg_gen_rotr_i32(t0, t0, t1);
4538             tcg_gen_extu_i32_i64(dst, t0);
4539         }
4540         break;
4541     default:
4542         assert(FALSE); /* all shift types should be handled */
4543         break;
4544     }
4545 
4546     if (!sf) { /* zero extend final result */
4547         tcg_gen_ext32u_i64(dst, dst);
4548     }
4549 }
4550 
4551 /* Shift a TCGv src by immediate, put result in dst.
4552  * The shift amount must be in range (this should always be true as the
4553  * relevant instructions will UNDEF on bad shift immediates).
4554  */
4555 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4556                           enum a64_shift_type shift_type, unsigned int shift_i)
4557 {
4558     assert(shift_i < (sf ? 64 : 32));
4559 
4560     if (shift_i == 0) {
4561         tcg_gen_mov_i64(dst, src);
4562     } else {
4563         shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
4564     }
4565 }
4566 
4567 /* Logical (shifted register)
4568  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4569  * +----+-----+-----------+-------+---+------+--------+------+------+
4570  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4571  * +----+-----+-----------+-------+---+------+--------+------+------+
4572  */
4573 static void disas_logic_reg(DisasContext *s, uint32_t insn)
4574 {
4575     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4576     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4577 
4578     sf = extract32(insn, 31, 1);
4579     opc = extract32(insn, 29, 2);
4580     shift_type = extract32(insn, 22, 2);
4581     invert = extract32(insn, 21, 1);
4582     rm = extract32(insn, 16, 5);
4583     shift_amount = extract32(insn, 10, 6);
4584     rn = extract32(insn, 5, 5);
4585     rd = extract32(insn, 0, 5);
4586 
4587     if (!sf && (shift_amount & (1 << 5))) {
4588         unallocated_encoding(s);
4589         return;
4590     }
4591 
4592     tcg_rd = cpu_reg(s, rd);
4593 
4594     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4595         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4596          * register-register MOV and MVN, so it is worth special casing.
4597          */
4598         tcg_rm = cpu_reg(s, rm);
4599         if (invert) {
4600             tcg_gen_not_i64(tcg_rd, tcg_rm);
4601             if (!sf) {
4602                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4603             }
4604         } else {
4605             if (sf) {
4606                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
4607             } else {
4608                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4609             }
4610         }
4611         return;
4612     }
4613 
4614     tcg_rm = read_cpu_reg(s, rm, sf);
4615 
4616     if (shift_amount) {
4617         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4618     }
4619 
4620     tcg_rn = cpu_reg(s, rn);
4621 
4622     switch (opc | (invert << 2)) {
4623     case 0: /* AND */
4624     case 3: /* ANDS */
4625         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4626         break;
4627     case 1: /* ORR */
4628         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4629         break;
4630     case 2: /* EOR */
4631         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4632         break;
4633     case 4: /* BIC */
4634     case 7: /* BICS */
4635         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4636         break;
4637     case 5: /* ORN */
4638         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4639         break;
4640     case 6: /* EON */
4641         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4642         break;
4643     default:
4644         assert(FALSE);
4645         break;
4646     }
4647 
4648     if (!sf) {
4649         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4650     }
4651 
4652     if (opc == 3) {
4653         gen_logic_CC(sf, tcg_rd);
4654     }
4655 }
4656 
4657 /*
4658  * Add/subtract (extended register)
4659  *
4660  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4661  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4662  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4663  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4664  *
4665  *  sf: 0 -> 32bit, 1 -> 64bit
4666  *  op: 0 -> add  , 1 -> sub
4667  *   S: 1 -> set flags
4668  * opt: 00
4669  * option: extension type (see DecodeRegExtend)
4670  * imm3: optional shift to Rm
4671  *
4672  * Rd = Rn + LSL(extend(Rm), amount)
4673  */
4674 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4675 {
4676     int rd = extract32(insn, 0, 5);
4677     int rn = extract32(insn, 5, 5);
4678     int imm3 = extract32(insn, 10, 3);
4679     int option = extract32(insn, 13, 3);
4680     int rm = extract32(insn, 16, 5);
4681     int opt = extract32(insn, 22, 2);
4682     bool setflags = extract32(insn, 29, 1);
4683     bool sub_op = extract32(insn, 30, 1);
4684     bool sf = extract32(insn, 31, 1);
4685 
4686     TCGv_i64 tcg_rm, tcg_rn; /* temps */
4687     TCGv_i64 tcg_rd;
4688     TCGv_i64 tcg_result;
4689 
4690     if (imm3 > 4 || opt != 0) {
4691         unallocated_encoding(s);
4692         return;
4693     }
4694 
4695     /* non-flag setting ops may use SP */
4696     if (!setflags) {
4697         tcg_rd = cpu_reg_sp(s, rd);
4698     } else {
4699         tcg_rd = cpu_reg(s, rd);
4700     }
4701     tcg_rn = read_cpu_reg_sp(s, rn, sf);
4702 
4703     tcg_rm = read_cpu_reg(s, rm, sf);
4704     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4705 
4706     tcg_result = tcg_temp_new_i64();
4707 
4708     if (!setflags) {
4709         if (sub_op) {
4710             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4711         } else {
4712             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4713         }
4714     } else {
4715         if (sub_op) {
4716             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4717         } else {
4718             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4719         }
4720     }
4721 
4722     if (sf) {
4723         tcg_gen_mov_i64(tcg_rd, tcg_result);
4724     } else {
4725         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4726     }
4727 }
4728 
4729 /*
4730  * Add/subtract (shifted register)
4731  *
4732  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4733  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4734  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4735  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4736  *
4737  *    sf: 0 -> 32bit, 1 -> 64bit
4738  *    op: 0 -> add  , 1 -> sub
4739  *     S: 1 -> set flags
4740  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4741  *  imm6: Shift amount to apply to Rm before the add/sub
4742  */
4743 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4744 {
4745     int rd = extract32(insn, 0, 5);
4746     int rn = extract32(insn, 5, 5);
4747     int imm6 = extract32(insn, 10, 6);
4748     int rm = extract32(insn, 16, 5);
4749     int shift_type = extract32(insn, 22, 2);
4750     bool setflags = extract32(insn, 29, 1);
4751     bool sub_op = extract32(insn, 30, 1);
4752     bool sf = extract32(insn, 31, 1);
4753 
4754     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4755     TCGv_i64 tcg_rn, tcg_rm;
4756     TCGv_i64 tcg_result;
4757 
4758     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4759         unallocated_encoding(s);
4760         return;
4761     }
4762 
4763     tcg_rn = read_cpu_reg(s, rn, sf);
4764     tcg_rm = read_cpu_reg(s, rm, sf);
4765 
4766     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4767 
4768     tcg_result = tcg_temp_new_i64();
4769 
4770     if (!setflags) {
4771         if (sub_op) {
4772             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4773         } else {
4774             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4775         }
4776     } else {
4777         if (sub_op) {
4778             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4779         } else {
4780             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4781         }
4782     }
4783 
4784     if (sf) {
4785         tcg_gen_mov_i64(tcg_rd, tcg_result);
4786     } else {
4787         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4788     }
4789 }
4790 
4791 /* Data-processing (3 source)
4792  *
4793  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4794  *  +--+------+-----------+------+------+----+------+------+------+
4795  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4796  *  +--+------+-----------+------+------+----+------+------+------+
4797  */
4798 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4799 {
4800     int rd = extract32(insn, 0, 5);
4801     int rn = extract32(insn, 5, 5);
4802     int ra = extract32(insn, 10, 5);
4803     int rm = extract32(insn, 16, 5);
4804     int op_id = (extract32(insn, 29, 3) << 4) |
4805         (extract32(insn, 21, 3) << 1) |
4806         extract32(insn, 15, 1);
4807     bool sf = extract32(insn, 31, 1);
4808     bool is_sub = extract32(op_id, 0, 1);
4809     bool is_high = extract32(op_id, 2, 1);
4810     bool is_signed = false;
4811     TCGv_i64 tcg_op1;
4812     TCGv_i64 tcg_op2;
4813     TCGv_i64 tcg_tmp;
4814 
4815     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4816     switch (op_id) {
4817     case 0x42: /* SMADDL */
4818     case 0x43: /* SMSUBL */
4819     case 0x44: /* SMULH */
4820         is_signed = true;
4821         break;
4822     case 0x0: /* MADD (32bit) */
4823     case 0x1: /* MSUB (32bit) */
4824     case 0x40: /* MADD (64bit) */
4825     case 0x41: /* MSUB (64bit) */
4826     case 0x4a: /* UMADDL */
4827     case 0x4b: /* UMSUBL */
4828     case 0x4c: /* UMULH */
4829         break;
4830     default:
4831         unallocated_encoding(s);
4832         return;
4833     }
4834 
4835     if (is_high) {
4836         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4837         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4838         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4839         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4840 
4841         if (is_signed) {
4842             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4843         } else {
4844             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4845         }
4846         return;
4847     }
4848 
4849     tcg_op1 = tcg_temp_new_i64();
4850     tcg_op2 = tcg_temp_new_i64();
4851     tcg_tmp = tcg_temp_new_i64();
4852 
4853     if (op_id < 0x42) {
4854         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4855         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4856     } else {
4857         if (is_signed) {
4858             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4859             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4860         } else {
4861             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4862             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4863         }
4864     }
4865 
4866     if (ra == 31 && !is_sub) {
4867         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4868         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4869     } else {
4870         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4871         if (is_sub) {
4872             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4873         } else {
4874             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4875         }
4876     }
4877 
4878     if (!sf) {
4879         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4880     }
4881 }
4882 
4883 /* Add/subtract (with carry)
4884  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4885  * +--+--+--+------------------------+------+-------------+------+-----+
4886  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4887  * +--+--+--+------------------------+------+-------------+------+-----+
4888  */
4889 
4890 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4891 {
4892     unsigned int sf, op, setflags, rm, rn, rd;
4893     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4894 
4895     sf = extract32(insn, 31, 1);
4896     op = extract32(insn, 30, 1);
4897     setflags = extract32(insn, 29, 1);
4898     rm = extract32(insn, 16, 5);
4899     rn = extract32(insn, 5, 5);
4900     rd = extract32(insn, 0, 5);
4901 
4902     tcg_rd = cpu_reg(s, rd);
4903     tcg_rn = cpu_reg(s, rn);
4904 
4905     if (op) {
4906         tcg_y = tcg_temp_new_i64();
4907         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4908     } else {
4909         tcg_y = cpu_reg(s, rm);
4910     }
4911 
4912     if (setflags) {
4913         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4914     } else {
4915         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4916     }
4917 }
4918 
4919 /*
4920  * Rotate right into flags
4921  *  31 30 29                21       15          10      5  4      0
4922  * +--+--+--+-----------------+--------+-----------+------+--+------+
4923  * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4924  * +--+--+--+-----------------+--------+-----------+------+--+------+
4925  */
4926 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4927 {
4928     int mask = extract32(insn, 0, 4);
4929     int o2 = extract32(insn, 4, 1);
4930     int rn = extract32(insn, 5, 5);
4931     int imm6 = extract32(insn, 15, 6);
4932     int sf_op_s = extract32(insn, 29, 3);
4933     TCGv_i64 tcg_rn;
4934     TCGv_i32 nzcv;
4935 
4936     if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4937         unallocated_encoding(s);
4938         return;
4939     }
4940 
4941     tcg_rn = read_cpu_reg(s, rn, 1);
4942     tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4943 
4944     nzcv = tcg_temp_new_i32();
4945     tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4946 
4947     if (mask & 8) { /* N */
4948         tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4949     }
4950     if (mask & 4) { /* Z */
4951         tcg_gen_not_i32(cpu_ZF, nzcv);
4952         tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4953     }
4954     if (mask & 2) { /* C */
4955         tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4956     }
4957     if (mask & 1) { /* V */
4958         tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4959     }
4960 }
4961 
4962 /*
4963  * Evaluate into flags
4964  *  31 30 29                21        15   14        10      5  4      0
4965  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4966  * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4967  * +--+--+--+-----------------+---------+----+---------+------+--+------+
4968  */
4969 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4970 {
4971     int o3_mask = extract32(insn, 0, 5);
4972     int rn = extract32(insn, 5, 5);
4973     int o2 = extract32(insn, 15, 6);
4974     int sz = extract32(insn, 14, 1);
4975     int sf_op_s = extract32(insn, 29, 3);
4976     TCGv_i32 tmp;
4977     int shift;
4978 
4979     if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4980         !dc_isar_feature(aa64_condm_4, s)) {
4981         unallocated_encoding(s);
4982         return;
4983     }
4984     shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4985 
4986     tmp = tcg_temp_new_i32();
4987     tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4988     tcg_gen_shli_i32(cpu_NF, tmp, shift);
4989     tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4990     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4991     tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4992 }
4993 
4994 /* Conditional compare (immediate / register)
4995  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4996  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4997  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4998  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4999  *        [1]                             y                [0]       [0]
5000  */
5001 static void disas_cc(DisasContext *s, uint32_t insn)
5002 {
5003     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5004     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5005     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5006     DisasCompare c;
5007 
5008     if (!extract32(insn, 29, 1)) {
5009         unallocated_encoding(s);
5010         return;
5011     }
5012     if (insn & (1 << 10 | 1 << 4)) {
5013         unallocated_encoding(s);
5014         return;
5015     }
5016     sf = extract32(insn, 31, 1);
5017     op = extract32(insn, 30, 1);
5018     is_imm = extract32(insn, 11, 1);
5019     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5020     cond = extract32(insn, 12, 4);
5021     rn = extract32(insn, 5, 5);
5022     nzcv = extract32(insn, 0, 4);
5023 
5024     /* Set T0 = !COND.  */
5025     tcg_t0 = tcg_temp_new_i32();
5026     arm_test_cc(&c, cond);
5027     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5028 
5029     /* Load the arguments for the new comparison.  */
5030     if (is_imm) {
5031         tcg_y = tcg_temp_new_i64();
5032         tcg_gen_movi_i64(tcg_y, y);
5033     } else {
5034         tcg_y = cpu_reg(s, y);
5035     }
5036     tcg_rn = cpu_reg(s, rn);
5037 
5038     /* Set the flags for the new comparison.  */
5039     tcg_tmp = tcg_temp_new_i64();
5040     if (op) {
5041         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5042     } else {
5043         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5044     }
5045 
5046     /* If COND was false, force the flags to #nzcv.  Compute two masks
5047      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5048      * For tcg hosts that support ANDC, we can make do with just T1.
5049      * In either case, allow the tcg optimizer to delete any unused mask.
5050      */
5051     tcg_t1 = tcg_temp_new_i32();
5052     tcg_t2 = tcg_temp_new_i32();
5053     tcg_gen_neg_i32(tcg_t1, tcg_t0);
5054     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5055 
5056     if (nzcv & 8) { /* N */
5057         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5058     } else {
5059         if (TCG_TARGET_HAS_andc_i32) {
5060             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5061         } else {
5062             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5063         }
5064     }
5065     if (nzcv & 4) { /* Z */
5066         if (TCG_TARGET_HAS_andc_i32) {
5067             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5068         } else {
5069             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5070         }
5071     } else {
5072         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5073     }
5074     if (nzcv & 2) { /* C */
5075         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5076     } else {
5077         if (TCG_TARGET_HAS_andc_i32) {
5078             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5079         } else {
5080             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5081         }
5082     }
5083     if (nzcv & 1) { /* V */
5084         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5085     } else {
5086         if (TCG_TARGET_HAS_andc_i32) {
5087             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5088         } else {
5089             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5090         }
5091     }
5092 }
5093 
5094 /* Conditional select
5095  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5096  * +----+----+---+-----------------+------+------+-----+------+------+
5097  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5098  * +----+----+---+-----------------+------+------+-----+------+------+
5099  */
5100 static void disas_cond_select(DisasContext *s, uint32_t insn)
5101 {
5102     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5103     TCGv_i64 tcg_rd, zero;
5104     DisasCompare64 c;
5105 
5106     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5107         /* S == 1 or op2<1> == 1 */
5108         unallocated_encoding(s);
5109         return;
5110     }
5111     sf = extract32(insn, 31, 1);
5112     else_inv = extract32(insn, 30, 1);
5113     rm = extract32(insn, 16, 5);
5114     cond = extract32(insn, 12, 4);
5115     else_inc = extract32(insn, 10, 1);
5116     rn = extract32(insn, 5, 5);
5117     rd = extract32(insn, 0, 5);
5118 
5119     tcg_rd = cpu_reg(s, rd);
5120 
5121     a64_test_cc(&c, cond);
5122     zero = tcg_constant_i64(0);
5123 
5124     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5125         /* CSET & CSETM.  */
5126         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5127         if (else_inv) {
5128             tcg_gen_neg_i64(tcg_rd, tcg_rd);
5129         }
5130     } else {
5131         TCGv_i64 t_true = cpu_reg(s, rn);
5132         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5133         if (else_inv && else_inc) {
5134             tcg_gen_neg_i64(t_false, t_false);
5135         } else if (else_inv) {
5136             tcg_gen_not_i64(t_false, t_false);
5137         } else if (else_inc) {
5138             tcg_gen_addi_i64(t_false, t_false, 1);
5139         }
5140         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5141     }
5142 
5143     if (!sf) {
5144         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5145     }
5146 }
5147 
5148 static void handle_clz(DisasContext *s, unsigned int sf,
5149                        unsigned int rn, unsigned int rd)
5150 {
5151     TCGv_i64 tcg_rd, tcg_rn;
5152     tcg_rd = cpu_reg(s, rd);
5153     tcg_rn = cpu_reg(s, rn);
5154 
5155     if (sf) {
5156         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5157     } else {
5158         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5159         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5160         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5161         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5162     }
5163 }
5164 
5165 static void handle_cls(DisasContext *s, unsigned int sf,
5166                        unsigned int rn, unsigned int rd)
5167 {
5168     TCGv_i64 tcg_rd, tcg_rn;
5169     tcg_rd = cpu_reg(s, rd);
5170     tcg_rn = cpu_reg(s, rn);
5171 
5172     if (sf) {
5173         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5174     } else {
5175         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5176         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5177         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5178         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5179     }
5180 }
5181 
5182 static void handle_rbit(DisasContext *s, unsigned int sf,
5183                         unsigned int rn, unsigned int rd)
5184 {
5185     TCGv_i64 tcg_rd, tcg_rn;
5186     tcg_rd = cpu_reg(s, rd);
5187     tcg_rn = cpu_reg(s, rn);
5188 
5189     if (sf) {
5190         gen_helper_rbit64(tcg_rd, tcg_rn);
5191     } else {
5192         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5193         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5194         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5195         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5196     }
5197 }
5198 
5199 /* REV with sf==1, opcode==3 ("REV64") */
5200 static void handle_rev64(DisasContext *s, unsigned int sf,
5201                          unsigned int rn, unsigned int rd)
5202 {
5203     if (!sf) {
5204         unallocated_encoding(s);
5205         return;
5206     }
5207     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5208 }
5209 
5210 /* REV with sf==0, opcode==2
5211  * REV32 (sf==1, opcode==2)
5212  */
5213 static void handle_rev32(DisasContext *s, unsigned int sf,
5214                          unsigned int rn, unsigned int rd)
5215 {
5216     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5217     TCGv_i64 tcg_rn = cpu_reg(s, rn);
5218 
5219     if (sf) {
5220         tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5221         tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5222     } else {
5223         tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5224     }
5225 }
5226 
5227 /* REV16 (opcode==1) */
5228 static void handle_rev16(DisasContext *s, unsigned int sf,
5229                          unsigned int rn, unsigned int rd)
5230 {
5231     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5232     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5233     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5234     TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5235 
5236     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5237     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5238     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5239     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5240     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5241 }
5242 
5243 /* Data-processing (1 source)
5244  *   31  30  29  28             21 20     16 15    10 9    5 4    0
5245  * +----+---+---+-----------------+---------+--------+------+------+
5246  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5247  * +----+---+---+-----------------+---------+--------+------+------+
5248  */
5249 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5250 {
5251     unsigned int sf, opcode, opcode2, rn, rd;
5252     TCGv_i64 tcg_rd;
5253 
5254     if (extract32(insn, 29, 1)) {
5255         unallocated_encoding(s);
5256         return;
5257     }
5258 
5259     sf = extract32(insn, 31, 1);
5260     opcode = extract32(insn, 10, 6);
5261     opcode2 = extract32(insn, 16, 5);
5262     rn = extract32(insn, 5, 5);
5263     rd = extract32(insn, 0, 5);
5264 
5265 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5266 
5267     switch (MAP(sf, opcode2, opcode)) {
5268     case MAP(0, 0x00, 0x00): /* RBIT */
5269     case MAP(1, 0x00, 0x00):
5270         handle_rbit(s, sf, rn, rd);
5271         break;
5272     case MAP(0, 0x00, 0x01): /* REV16 */
5273     case MAP(1, 0x00, 0x01):
5274         handle_rev16(s, sf, rn, rd);
5275         break;
5276     case MAP(0, 0x00, 0x02): /* REV/REV32 */
5277     case MAP(1, 0x00, 0x02):
5278         handle_rev32(s, sf, rn, rd);
5279         break;
5280     case MAP(1, 0x00, 0x03): /* REV64 */
5281         handle_rev64(s, sf, rn, rd);
5282         break;
5283     case MAP(0, 0x00, 0x04): /* CLZ */
5284     case MAP(1, 0x00, 0x04):
5285         handle_clz(s, sf, rn, rd);
5286         break;
5287     case MAP(0, 0x00, 0x05): /* CLS */
5288     case MAP(1, 0x00, 0x05):
5289         handle_cls(s, sf, rn, rd);
5290         break;
5291     case MAP(1, 0x01, 0x00): /* PACIA */
5292         if (s->pauth_active) {
5293             tcg_rd = cpu_reg(s, rd);
5294             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5295         } else if (!dc_isar_feature(aa64_pauth, s)) {
5296             goto do_unallocated;
5297         }
5298         break;
5299     case MAP(1, 0x01, 0x01): /* PACIB */
5300         if (s->pauth_active) {
5301             tcg_rd = cpu_reg(s, rd);
5302             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5303         } else if (!dc_isar_feature(aa64_pauth, s)) {
5304             goto do_unallocated;
5305         }
5306         break;
5307     case MAP(1, 0x01, 0x02): /* PACDA */
5308         if (s->pauth_active) {
5309             tcg_rd = cpu_reg(s, rd);
5310             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5311         } else if (!dc_isar_feature(aa64_pauth, s)) {
5312             goto do_unallocated;
5313         }
5314         break;
5315     case MAP(1, 0x01, 0x03): /* PACDB */
5316         if (s->pauth_active) {
5317             tcg_rd = cpu_reg(s, rd);
5318             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5319         } else if (!dc_isar_feature(aa64_pauth, s)) {
5320             goto do_unallocated;
5321         }
5322         break;
5323     case MAP(1, 0x01, 0x04): /* AUTIA */
5324         if (s->pauth_active) {
5325             tcg_rd = cpu_reg(s, rd);
5326             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5327         } else if (!dc_isar_feature(aa64_pauth, s)) {
5328             goto do_unallocated;
5329         }
5330         break;
5331     case MAP(1, 0x01, 0x05): /* AUTIB */
5332         if (s->pauth_active) {
5333             tcg_rd = cpu_reg(s, rd);
5334             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5335         } else if (!dc_isar_feature(aa64_pauth, s)) {
5336             goto do_unallocated;
5337         }
5338         break;
5339     case MAP(1, 0x01, 0x06): /* AUTDA */
5340         if (s->pauth_active) {
5341             tcg_rd = cpu_reg(s, rd);
5342             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5343         } else if (!dc_isar_feature(aa64_pauth, s)) {
5344             goto do_unallocated;
5345         }
5346         break;
5347     case MAP(1, 0x01, 0x07): /* AUTDB */
5348         if (s->pauth_active) {
5349             tcg_rd = cpu_reg(s, rd);
5350             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5351         } else if (!dc_isar_feature(aa64_pauth, s)) {
5352             goto do_unallocated;
5353         }
5354         break;
5355     case MAP(1, 0x01, 0x08): /* PACIZA */
5356         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5357             goto do_unallocated;
5358         } else if (s->pauth_active) {
5359             tcg_rd = cpu_reg(s, rd);
5360             gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5361         }
5362         break;
5363     case MAP(1, 0x01, 0x09): /* PACIZB */
5364         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5365             goto do_unallocated;
5366         } else if (s->pauth_active) {
5367             tcg_rd = cpu_reg(s, rd);
5368             gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5369         }
5370         break;
5371     case MAP(1, 0x01, 0x0a): /* PACDZA */
5372         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5373             goto do_unallocated;
5374         } else if (s->pauth_active) {
5375             tcg_rd = cpu_reg(s, rd);
5376             gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5377         }
5378         break;
5379     case MAP(1, 0x01, 0x0b): /* PACDZB */
5380         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5381             goto do_unallocated;
5382         } else if (s->pauth_active) {
5383             tcg_rd = cpu_reg(s, rd);
5384             gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5385         }
5386         break;
5387     case MAP(1, 0x01, 0x0c): /* AUTIZA */
5388         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5389             goto do_unallocated;
5390         } else if (s->pauth_active) {
5391             tcg_rd = cpu_reg(s, rd);
5392             gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5393         }
5394         break;
5395     case MAP(1, 0x01, 0x0d): /* AUTIZB */
5396         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5397             goto do_unallocated;
5398         } else if (s->pauth_active) {
5399             tcg_rd = cpu_reg(s, rd);
5400             gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5401         }
5402         break;
5403     case MAP(1, 0x01, 0x0e): /* AUTDZA */
5404         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5405             goto do_unallocated;
5406         } else if (s->pauth_active) {
5407             tcg_rd = cpu_reg(s, rd);
5408             gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5409         }
5410         break;
5411     case MAP(1, 0x01, 0x0f): /* AUTDZB */
5412         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5413             goto do_unallocated;
5414         } else if (s->pauth_active) {
5415             tcg_rd = cpu_reg(s, rd);
5416             gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0));
5417         }
5418         break;
5419     case MAP(1, 0x01, 0x10): /* XPACI */
5420         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5421             goto do_unallocated;
5422         } else if (s->pauth_active) {
5423             tcg_rd = cpu_reg(s, rd);
5424             gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5425         }
5426         break;
5427     case MAP(1, 0x01, 0x11): /* XPACD */
5428         if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5429             goto do_unallocated;
5430         } else if (s->pauth_active) {
5431             tcg_rd = cpu_reg(s, rd);
5432             gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5433         }
5434         break;
5435     default:
5436     do_unallocated:
5437         unallocated_encoding(s);
5438         break;
5439     }
5440 
5441 #undef MAP
5442 }
5443 
5444 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5445                        unsigned int rm, unsigned int rn, unsigned int rd)
5446 {
5447     TCGv_i64 tcg_n, tcg_m, tcg_rd;
5448     tcg_rd = cpu_reg(s, rd);
5449 
5450     if (!sf && is_signed) {
5451         tcg_n = tcg_temp_new_i64();
5452         tcg_m = tcg_temp_new_i64();
5453         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5454         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5455     } else {
5456         tcg_n = read_cpu_reg(s, rn, sf);
5457         tcg_m = read_cpu_reg(s, rm, sf);
5458     }
5459 
5460     if (is_signed) {
5461         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5462     } else {
5463         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5464     }
5465 
5466     if (!sf) { /* zero extend final result */
5467         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5468     }
5469 }
5470 
5471 /* LSLV, LSRV, ASRV, RORV */
5472 static void handle_shift_reg(DisasContext *s,
5473                              enum a64_shift_type shift_type, unsigned int sf,
5474                              unsigned int rm, unsigned int rn, unsigned int rd)
5475 {
5476     TCGv_i64 tcg_shift = tcg_temp_new_i64();
5477     TCGv_i64 tcg_rd = cpu_reg(s, rd);
5478     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5479 
5480     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5481     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5482 }
5483 
5484 /* CRC32[BHWX], CRC32C[BHWX] */
5485 static void handle_crc32(DisasContext *s,
5486                          unsigned int sf, unsigned int sz, bool crc32c,
5487                          unsigned int rm, unsigned int rn, unsigned int rd)
5488 {
5489     TCGv_i64 tcg_acc, tcg_val;
5490     TCGv_i32 tcg_bytes;
5491 
5492     if (!dc_isar_feature(aa64_crc32, s)
5493         || (sf == 1 && sz != 3)
5494         || (sf == 0 && sz == 3)) {
5495         unallocated_encoding(s);
5496         return;
5497     }
5498 
5499     if (sz == 3) {
5500         tcg_val = cpu_reg(s, rm);
5501     } else {
5502         uint64_t mask;
5503         switch (sz) {
5504         case 0:
5505             mask = 0xFF;
5506             break;
5507         case 1:
5508             mask = 0xFFFF;
5509             break;
5510         case 2:
5511             mask = 0xFFFFFFFF;
5512             break;
5513         default:
5514             g_assert_not_reached();
5515         }
5516         tcg_val = tcg_temp_new_i64();
5517         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5518     }
5519 
5520     tcg_acc = cpu_reg(s, rn);
5521     tcg_bytes = tcg_constant_i32(1 << sz);
5522 
5523     if (crc32c) {
5524         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5525     } else {
5526         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5527     }
5528 }
5529 
5530 /* Data-processing (2 source)
5531  *   31   30  29 28             21 20  16 15    10 9    5 4    0
5532  * +----+---+---+-----------------+------+--------+------+------+
5533  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5534  * +----+---+---+-----------------+------+--------+------+------+
5535  */
5536 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5537 {
5538     unsigned int sf, rm, opcode, rn, rd, setflag;
5539     sf = extract32(insn, 31, 1);
5540     setflag = extract32(insn, 29, 1);
5541     rm = extract32(insn, 16, 5);
5542     opcode = extract32(insn, 10, 6);
5543     rn = extract32(insn, 5, 5);
5544     rd = extract32(insn, 0, 5);
5545 
5546     if (setflag && opcode != 0) {
5547         unallocated_encoding(s);
5548         return;
5549     }
5550 
5551     switch (opcode) {
5552     case 0: /* SUBP(S) */
5553         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5554             goto do_unallocated;
5555         } else {
5556             TCGv_i64 tcg_n, tcg_m, tcg_d;
5557 
5558             tcg_n = read_cpu_reg_sp(s, rn, true);
5559             tcg_m = read_cpu_reg_sp(s, rm, true);
5560             tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5561             tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5562             tcg_d = cpu_reg(s, rd);
5563 
5564             if (setflag) {
5565                 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5566             } else {
5567                 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5568             }
5569         }
5570         break;
5571     case 2: /* UDIV */
5572         handle_div(s, false, sf, rm, rn, rd);
5573         break;
5574     case 3: /* SDIV */
5575         handle_div(s, true, sf, rm, rn, rd);
5576         break;
5577     case 4: /* IRG */
5578         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5579             goto do_unallocated;
5580         }
5581         if (s->ata) {
5582             gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5583                            cpu_reg_sp(s, rn), cpu_reg(s, rm));
5584         } else {
5585             gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5586                                              cpu_reg_sp(s, rn));
5587         }
5588         break;
5589     case 5: /* GMI */
5590         if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5591             goto do_unallocated;
5592         } else {
5593             TCGv_i64 t = tcg_temp_new_i64();
5594 
5595             tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
5596             tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
5597             tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
5598         }
5599         break;
5600     case 8: /* LSLV */
5601         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5602         break;
5603     case 9: /* LSRV */
5604         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5605         break;
5606     case 10: /* ASRV */
5607         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5608         break;
5609     case 11: /* RORV */
5610         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5611         break;
5612     case 12: /* PACGA */
5613         if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5614             goto do_unallocated;
5615         }
5616         gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5617                          cpu_reg(s, rn), cpu_reg_sp(s, rm));
5618         break;
5619     case 16:
5620     case 17:
5621     case 18:
5622     case 19:
5623     case 20:
5624     case 21:
5625     case 22:
5626     case 23: /* CRC32 */
5627     {
5628         int sz = extract32(opcode, 0, 2);
5629         bool crc32c = extract32(opcode, 2, 1);
5630         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5631         break;
5632     }
5633     default:
5634     do_unallocated:
5635         unallocated_encoding(s);
5636         break;
5637     }
5638 }
5639 
5640 /*
5641  * Data processing - register
5642  *  31  30 29  28      25    21  20  16      10         0
5643  * +--+---+--+---+-------+-----+-------+-------+---------+
5644  * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5645  * +--+---+--+---+-------+-----+-------+-------+---------+
5646  */
5647 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5648 {
5649     int op0 = extract32(insn, 30, 1);
5650     int op1 = extract32(insn, 28, 1);
5651     int op2 = extract32(insn, 21, 4);
5652     int op3 = extract32(insn, 10, 6);
5653 
5654     if (!op1) {
5655         if (op2 & 8) {
5656             if (op2 & 1) {
5657                 /* Add/sub (extended register) */
5658                 disas_add_sub_ext_reg(s, insn);
5659             } else {
5660                 /* Add/sub (shifted register) */
5661                 disas_add_sub_reg(s, insn);
5662             }
5663         } else {
5664             /* Logical (shifted register) */
5665             disas_logic_reg(s, insn);
5666         }
5667         return;
5668     }
5669 
5670     switch (op2) {
5671     case 0x0:
5672         switch (op3) {
5673         case 0x00: /* Add/subtract (with carry) */
5674             disas_adc_sbc(s, insn);
5675             break;
5676 
5677         case 0x01: /* Rotate right into flags */
5678         case 0x21:
5679             disas_rotate_right_into_flags(s, insn);
5680             break;
5681 
5682         case 0x02: /* Evaluate into flags */
5683         case 0x12:
5684         case 0x22:
5685         case 0x32:
5686             disas_evaluate_into_flags(s, insn);
5687             break;
5688 
5689         default:
5690             goto do_unallocated;
5691         }
5692         break;
5693 
5694     case 0x2: /* Conditional compare */
5695         disas_cc(s, insn); /* both imm and reg forms */
5696         break;
5697 
5698     case 0x4: /* Conditional select */
5699         disas_cond_select(s, insn);
5700         break;
5701 
5702     case 0x6: /* Data-processing */
5703         if (op0) {    /* (1 source) */
5704             disas_data_proc_1src(s, insn);
5705         } else {      /* (2 source) */
5706             disas_data_proc_2src(s, insn);
5707         }
5708         break;
5709     case 0x8 ... 0xf: /* (3 source) */
5710         disas_data_proc_3src(s, insn);
5711         break;
5712 
5713     default:
5714     do_unallocated:
5715         unallocated_encoding(s);
5716         break;
5717     }
5718 }
5719 
5720 static void handle_fp_compare(DisasContext *s, int size,
5721                               unsigned int rn, unsigned int rm,
5722                               bool cmp_with_zero, bool signal_all_nans)
5723 {
5724     TCGv_i64 tcg_flags = tcg_temp_new_i64();
5725     TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5726 
5727     if (size == MO_64) {
5728         TCGv_i64 tcg_vn, tcg_vm;
5729 
5730         tcg_vn = read_fp_dreg(s, rn);
5731         if (cmp_with_zero) {
5732             tcg_vm = tcg_constant_i64(0);
5733         } else {
5734             tcg_vm = read_fp_dreg(s, rm);
5735         }
5736         if (signal_all_nans) {
5737             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5738         } else {
5739             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5740         }
5741     } else {
5742         TCGv_i32 tcg_vn = tcg_temp_new_i32();
5743         TCGv_i32 tcg_vm = tcg_temp_new_i32();
5744 
5745         read_vec_element_i32(s, tcg_vn, rn, 0, size);
5746         if (cmp_with_zero) {
5747             tcg_gen_movi_i32(tcg_vm, 0);
5748         } else {
5749             read_vec_element_i32(s, tcg_vm, rm, 0, size);
5750         }
5751 
5752         switch (size) {
5753         case MO_32:
5754             if (signal_all_nans) {
5755                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5756             } else {
5757                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5758             }
5759             break;
5760         case MO_16:
5761             if (signal_all_nans) {
5762                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5763             } else {
5764                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5765             }
5766             break;
5767         default:
5768             g_assert_not_reached();
5769         }
5770     }
5771 
5772     gen_set_nzcv(tcg_flags);
5773 }
5774 
5775 /* Floating point compare
5776  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5777  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5778  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5779  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5780  */
5781 static void disas_fp_compare(DisasContext *s, uint32_t insn)
5782 {
5783     unsigned int mos, type, rm, op, rn, opc, op2r;
5784     int size;
5785 
5786     mos = extract32(insn, 29, 3);
5787     type = extract32(insn, 22, 2);
5788     rm = extract32(insn, 16, 5);
5789     op = extract32(insn, 14, 2);
5790     rn = extract32(insn, 5, 5);
5791     opc = extract32(insn, 3, 2);
5792     op2r = extract32(insn, 0, 3);
5793 
5794     if (mos || op || op2r) {
5795         unallocated_encoding(s);
5796         return;
5797     }
5798 
5799     switch (type) {
5800     case 0:
5801         size = MO_32;
5802         break;
5803     case 1:
5804         size = MO_64;
5805         break;
5806     case 3:
5807         size = MO_16;
5808         if (dc_isar_feature(aa64_fp16, s)) {
5809             break;
5810         }
5811         /* fallthru */
5812     default:
5813         unallocated_encoding(s);
5814         return;
5815     }
5816 
5817     if (!fp_access_check(s)) {
5818         return;
5819     }
5820 
5821     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5822 }
5823 
5824 /* Floating point conditional compare
5825  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5826  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5827  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5828  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5829  */
5830 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5831 {
5832     unsigned int mos, type, rm, cond, rn, op, nzcv;
5833     TCGLabel *label_continue = NULL;
5834     int size;
5835 
5836     mos = extract32(insn, 29, 3);
5837     type = extract32(insn, 22, 2);
5838     rm = extract32(insn, 16, 5);
5839     cond = extract32(insn, 12, 4);
5840     rn = extract32(insn, 5, 5);
5841     op = extract32(insn, 4, 1);
5842     nzcv = extract32(insn, 0, 4);
5843 
5844     if (mos) {
5845         unallocated_encoding(s);
5846         return;
5847     }
5848 
5849     switch (type) {
5850     case 0:
5851         size = MO_32;
5852         break;
5853     case 1:
5854         size = MO_64;
5855         break;
5856     case 3:
5857         size = MO_16;
5858         if (dc_isar_feature(aa64_fp16, s)) {
5859             break;
5860         }
5861         /* fallthru */
5862     default:
5863         unallocated_encoding(s);
5864         return;
5865     }
5866 
5867     if (!fp_access_check(s)) {
5868         return;
5869     }
5870 
5871     if (cond < 0x0e) { /* not always */
5872         TCGLabel *label_match = gen_new_label();
5873         label_continue = gen_new_label();
5874         arm_gen_test_cc(cond, label_match);
5875         /* nomatch: */
5876         gen_set_nzcv(tcg_constant_i64(nzcv << 28));
5877         tcg_gen_br(label_continue);
5878         gen_set_label(label_match);
5879     }
5880 
5881     handle_fp_compare(s, size, rn, rm, false, op);
5882 
5883     if (cond < 0x0e) {
5884         gen_set_label(label_continue);
5885     }
5886 }
5887 
5888 /* Floating point conditional select
5889  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5890  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5891  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5892  * +---+---+---+-----------+------+---+------+------+-----+------+------+
5893  */
5894 static void disas_fp_csel(DisasContext *s, uint32_t insn)
5895 {
5896     unsigned int mos, type, rm, cond, rn, rd;
5897     TCGv_i64 t_true, t_false;
5898     DisasCompare64 c;
5899     MemOp sz;
5900 
5901     mos = extract32(insn, 29, 3);
5902     type = extract32(insn, 22, 2);
5903     rm = extract32(insn, 16, 5);
5904     cond = extract32(insn, 12, 4);
5905     rn = extract32(insn, 5, 5);
5906     rd = extract32(insn, 0, 5);
5907 
5908     if (mos) {
5909         unallocated_encoding(s);
5910         return;
5911     }
5912 
5913     switch (type) {
5914     case 0:
5915         sz = MO_32;
5916         break;
5917     case 1:
5918         sz = MO_64;
5919         break;
5920     case 3:
5921         sz = MO_16;
5922         if (dc_isar_feature(aa64_fp16, s)) {
5923             break;
5924         }
5925         /* fallthru */
5926     default:
5927         unallocated_encoding(s);
5928         return;
5929     }
5930 
5931     if (!fp_access_check(s)) {
5932         return;
5933     }
5934 
5935     /* Zero extend sreg & hreg inputs to 64 bits now.  */
5936     t_true = tcg_temp_new_i64();
5937     t_false = tcg_temp_new_i64();
5938     read_vec_element(s, t_true, rn, 0, sz);
5939     read_vec_element(s, t_false, rm, 0, sz);
5940 
5941     a64_test_cc(&c, cond);
5942     tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
5943                         t_true, t_false);
5944 
5945     /* Note that sregs & hregs write back zeros to the high bits,
5946        and we've already done the zero-extension.  */
5947     write_fp_dreg(s, rd, t_true);
5948 }
5949 
5950 /* Floating-point data-processing (1 source) - half precision */
5951 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5952 {
5953     TCGv_ptr fpst = NULL;
5954     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5955     TCGv_i32 tcg_res = tcg_temp_new_i32();
5956 
5957     switch (opcode) {
5958     case 0x0: /* FMOV */
5959         tcg_gen_mov_i32(tcg_res, tcg_op);
5960         break;
5961     case 0x1: /* FABS */
5962         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5963         break;
5964     case 0x2: /* FNEG */
5965         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5966         break;
5967     case 0x3: /* FSQRT */
5968         fpst = fpstatus_ptr(FPST_FPCR_F16);
5969         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5970         break;
5971     case 0x8: /* FRINTN */
5972     case 0x9: /* FRINTP */
5973     case 0xa: /* FRINTM */
5974     case 0xb: /* FRINTZ */
5975     case 0xc: /* FRINTA */
5976     {
5977         TCGv_i32 tcg_rmode;
5978 
5979         fpst = fpstatus_ptr(FPST_FPCR_F16);
5980         tcg_rmode = gen_set_rmode(opcode & 7, fpst);
5981         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5982         gen_restore_rmode(tcg_rmode, fpst);
5983         break;
5984     }
5985     case 0xe: /* FRINTX */
5986         fpst = fpstatus_ptr(FPST_FPCR_F16);
5987         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5988         break;
5989     case 0xf: /* FRINTI */
5990         fpst = fpstatus_ptr(FPST_FPCR_F16);
5991         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5992         break;
5993     default:
5994         g_assert_not_reached();
5995     }
5996 
5997     write_fp_sreg(s, rd, tcg_res);
5998 }
5999 
6000 /* Floating-point data-processing (1 source) - single precision */
6001 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6002 {
6003     void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6004     TCGv_i32 tcg_op, tcg_res;
6005     TCGv_ptr fpst;
6006     int rmode = -1;
6007 
6008     tcg_op = read_fp_sreg(s, rn);
6009     tcg_res = tcg_temp_new_i32();
6010 
6011     switch (opcode) {
6012     case 0x0: /* FMOV */
6013         tcg_gen_mov_i32(tcg_res, tcg_op);
6014         goto done;
6015     case 0x1: /* FABS */
6016         gen_helper_vfp_abss(tcg_res, tcg_op);
6017         goto done;
6018     case 0x2: /* FNEG */
6019         gen_helper_vfp_negs(tcg_res, tcg_op);
6020         goto done;
6021     case 0x3: /* FSQRT */
6022         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6023         goto done;
6024     case 0x6: /* BFCVT */
6025         gen_fpst = gen_helper_bfcvt;
6026         break;
6027     case 0x8: /* FRINTN */
6028     case 0x9: /* FRINTP */
6029     case 0xa: /* FRINTM */
6030     case 0xb: /* FRINTZ */
6031     case 0xc: /* FRINTA */
6032         rmode = opcode & 7;
6033         gen_fpst = gen_helper_rints;
6034         break;
6035     case 0xe: /* FRINTX */
6036         gen_fpst = gen_helper_rints_exact;
6037         break;
6038     case 0xf: /* FRINTI */
6039         gen_fpst = gen_helper_rints;
6040         break;
6041     case 0x10: /* FRINT32Z */
6042         rmode = FPROUNDING_ZERO;
6043         gen_fpst = gen_helper_frint32_s;
6044         break;
6045     case 0x11: /* FRINT32X */
6046         gen_fpst = gen_helper_frint32_s;
6047         break;
6048     case 0x12: /* FRINT64Z */
6049         rmode = FPROUNDING_ZERO;
6050         gen_fpst = gen_helper_frint64_s;
6051         break;
6052     case 0x13: /* FRINT64X */
6053         gen_fpst = gen_helper_frint64_s;
6054         break;
6055     default:
6056         g_assert_not_reached();
6057     }
6058 
6059     fpst = fpstatus_ptr(FPST_FPCR);
6060     if (rmode >= 0) {
6061         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6062         gen_fpst(tcg_res, tcg_op, fpst);
6063         gen_restore_rmode(tcg_rmode, fpst);
6064     } else {
6065         gen_fpst(tcg_res, tcg_op, fpst);
6066     }
6067 
6068  done:
6069     write_fp_sreg(s, rd, tcg_res);
6070 }
6071 
6072 /* Floating-point data-processing (1 source) - double precision */
6073 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6074 {
6075     void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6076     TCGv_i64 tcg_op, tcg_res;
6077     TCGv_ptr fpst;
6078     int rmode = -1;
6079 
6080     switch (opcode) {
6081     case 0x0: /* FMOV */
6082         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6083         return;
6084     }
6085 
6086     tcg_op = read_fp_dreg(s, rn);
6087     tcg_res = tcg_temp_new_i64();
6088 
6089     switch (opcode) {
6090     case 0x1: /* FABS */
6091         gen_helper_vfp_absd(tcg_res, tcg_op);
6092         goto done;
6093     case 0x2: /* FNEG */
6094         gen_helper_vfp_negd(tcg_res, tcg_op);
6095         goto done;
6096     case 0x3: /* FSQRT */
6097         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6098         goto done;
6099     case 0x8: /* FRINTN */
6100     case 0x9: /* FRINTP */
6101     case 0xa: /* FRINTM */
6102     case 0xb: /* FRINTZ */
6103     case 0xc: /* FRINTA */
6104         rmode = opcode & 7;
6105         gen_fpst = gen_helper_rintd;
6106         break;
6107     case 0xe: /* FRINTX */
6108         gen_fpst = gen_helper_rintd_exact;
6109         break;
6110     case 0xf: /* FRINTI */
6111         gen_fpst = gen_helper_rintd;
6112         break;
6113     case 0x10: /* FRINT32Z */
6114         rmode = FPROUNDING_ZERO;
6115         gen_fpst = gen_helper_frint32_d;
6116         break;
6117     case 0x11: /* FRINT32X */
6118         gen_fpst = gen_helper_frint32_d;
6119         break;
6120     case 0x12: /* FRINT64Z */
6121         rmode = FPROUNDING_ZERO;
6122         gen_fpst = gen_helper_frint64_d;
6123         break;
6124     case 0x13: /* FRINT64X */
6125         gen_fpst = gen_helper_frint64_d;
6126         break;
6127     default:
6128         g_assert_not_reached();
6129     }
6130 
6131     fpst = fpstatus_ptr(FPST_FPCR);
6132     if (rmode >= 0) {
6133         TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
6134         gen_fpst(tcg_res, tcg_op, fpst);
6135         gen_restore_rmode(tcg_rmode, fpst);
6136     } else {
6137         gen_fpst(tcg_res, tcg_op, fpst);
6138     }
6139 
6140  done:
6141     write_fp_dreg(s, rd, tcg_res);
6142 }
6143 
6144 static void handle_fp_fcvt(DisasContext *s, int opcode,
6145                            int rd, int rn, int dtype, int ntype)
6146 {
6147     switch (ntype) {
6148     case 0x0:
6149     {
6150         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6151         if (dtype == 1) {
6152             /* Single to double */
6153             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6154             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6155             write_fp_dreg(s, rd, tcg_rd);
6156         } else {
6157             /* Single to half */
6158             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6159             TCGv_i32 ahp = get_ahp_flag();
6160             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6161 
6162             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6163             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6164             write_fp_sreg(s, rd, tcg_rd);
6165         }
6166         break;
6167     }
6168     case 0x1:
6169     {
6170         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6171         TCGv_i32 tcg_rd = tcg_temp_new_i32();
6172         if (dtype == 0) {
6173             /* Double to single */
6174             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6175         } else {
6176             TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6177             TCGv_i32 ahp = get_ahp_flag();
6178             /* Double to half */
6179             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6180             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6181         }
6182         write_fp_sreg(s, rd, tcg_rd);
6183         break;
6184     }
6185     case 0x3:
6186     {
6187         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6188         TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6189         TCGv_i32 tcg_ahp = get_ahp_flag();
6190         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6191         if (dtype == 0) {
6192             /* Half to single */
6193             TCGv_i32 tcg_rd = tcg_temp_new_i32();
6194             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6195             write_fp_sreg(s, rd, tcg_rd);
6196         } else {
6197             /* Half to double */
6198             TCGv_i64 tcg_rd = tcg_temp_new_i64();
6199             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6200             write_fp_dreg(s, rd, tcg_rd);
6201         }
6202         break;
6203     }
6204     default:
6205         g_assert_not_reached();
6206     }
6207 }
6208 
6209 /* Floating point data-processing (1 source)
6210  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6211  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6212  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6213  * +---+---+---+-----------+------+---+--------+-----------+------+------+
6214  */
6215 static void disas_fp_1src(DisasContext *s, uint32_t insn)
6216 {
6217     int mos = extract32(insn, 29, 3);
6218     int type = extract32(insn, 22, 2);
6219     int opcode = extract32(insn, 15, 6);
6220     int rn = extract32(insn, 5, 5);
6221     int rd = extract32(insn, 0, 5);
6222 
6223     if (mos) {
6224         goto do_unallocated;
6225     }
6226 
6227     switch (opcode) {
6228     case 0x4: case 0x5: case 0x7:
6229     {
6230         /* FCVT between half, single and double precision */
6231         int dtype = extract32(opcode, 0, 2);
6232         if (type == 2 || dtype == type) {
6233             goto do_unallocated;
6234         }
6235         if (!fp_access_check(s)) {
6236             return;
6237         }
6238 
6239         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6240         break;
6241     }
6242 
6243     case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6244         if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6245             goto do_unallocated;
6246         }
6247         /* fall through */
6248     case 0x0 ... 0x3:
6249     case 0x8 ... 0xc:
6250     case 0xe ... 0xf:
6251         /* 32-to-32 and 64-to-64 ops */
6252         switch (type) {
6253         case 0:
6254             if (!fp_access_check(s)) {
6255                 return;
6256             }
6257             handle_fp_1src_single(s, opcode, rd, rn);
6258             break;
6259         case 1:
6260             if (!fp_access_check(s)) {
6261                 return;
6262             }
6263             handle_fp_1src_double(s, opcode, rd, rn);
6264             break;
6265         case 3:
6266             if (!dc_isar_feature(aa64_fp16, s)) {
6267                 goto do_unallocated;
6268             }
6269 
6270             if (!fp_access_check(s)) {
6271                 return;
6272             }
6273             handle_fp_1src_half(s, opcode, rd, rn);
6274             break;
6275         default:
6276             goto do_unallocated;
6277         }
6278         break;
6279 
6280     case 0x6:
6281         switch (type) {
6282         case 1: /* BFCVT */
6283             if (!dc_isar_feature(aa64_bf16, s)) {
6284                 goto do_unallocated;
6285             }
6286             if (!fp_access_check(s)) {
6287                 return;
6288             }
6289             handle_fp_1src_single(s, opcode, rd, rn);
6290             break;
6291         default:
6292             goto do_unallocated;
6293         }
6294         break;
6295 
6296     default:
6297     do_unallocated:
6298         unallocated_encoding(s);
6299         break;
6300     }
6301 }
6302 
6303 /* Floating-point data-processing (2 source) - single precision */
6304 static void handle_fp_2src_single(DisasContext *s, int opcode,
6305                                   int rd, int rn, int rm)
6306 {
6307     TCGv_i32 tcg_op1;
6308     TCGv_i32 tcg_op2;
6309     TCGv_i32 tcg_res;
6310     TCGv_ptr fpst;
6311 
6312     tcg_res = tcg_temp_new_i32();
6313     fpst = fpstatus_ptr(FPST_FPCR);
6314     tcg_op1 = read_fp_sreg(s, rn);
6315     tcg_op2 = read_fp_sreg(s, rm);
6316 
6317     switch (opcode) {
6318     case 0x0: /* FMUL */
6319         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6320         break;
6321     case 0x1: /* FDIV */
6322         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6323         break;
6324     case 0x2: /* FADD */
6325         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6326         break;
6327     case 0x3: /* FSUB */
6328         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6329         break;
6330     case 0x4: /* FMAX */
6331         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6332         break;
6333     case 0x5: /* FMIN */
6334         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6335         break;
6336     case 0x6: /* FMAXNM */
6337         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6338         break;
6339     case 0x7: /* FMINNM */
6340         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6341         break;
6342     case 0x8: /* FNMUL */
6343         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6344         gen_helper_vfp_negs(tcg_res, tcg_res);
6345         break;
6346     }
6347 
6348     write_fp_sreg(s, rd, tcg_res);
6349 }
6350 
6351 /* Floating-point data-processing (2 source) - double precision */
6352 static void handle_fp_2src_double(DisasContext *s, int opcode,
6353                                   int rd, int rn, int rm)
6354 {
6355     TCGv_i64 tcg_op1;
6356     TCGv_i64 tcg_op2;
6357     TCGv_i64 tcg_res;
6358     TCGv_ptr fpst;
6359 
6360     tcg_res = tcg_temp_new_i64();
6361     fpst = fpstatus_ptr(FPST_FPCR);
6362     tcg_op1 = read_fp_dreg(s, rn);
6363     tcg_op2 = read_fp_dreg(s, rm);
6364 
6365     switch (opcode) {
6366     case 0x0: /* FMUL */
6367         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6368         break;
6369     case 0x1: /* FDIV */
6370         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6371         break;
6372     case 0x2: /* FADD */
6373         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6374         break;
6375     case 0x3: /* FSUB */
6376         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6377         break;
6378     case 0x4: /* FMAX */
6379         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6380         break;
6381     case 0x5: /* FMIN */
6382         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6383         break;
6384     case 0x6: /* FMAXNM */
6385         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6386         break;
6387     case 0x7: /* FMINNM */
6388         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6389         break;
6390     case 0x8: /* FNMUL */
6391         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6392         gen_helper_vfp_negd(tcg_res, tcg_res);
6393         break;
6394     }
6395 
6396     write_fp_dreg(s, rd, tcg_res);
6397 }
6398 
6399 /* Floating-point data-processing (2 source) - half precision */
6400 static void handle_fp_2src_half(DisasContext *s, int opcode,
6401                                 int rd, int rn, int rm)
6402 {
6403     TCGv_i32 tcg_op1;
6404     TCGv_i32 tcg_op2;
6405     TCGv_i32 tcg_res;
6406     TCGv_ptr fpst;
6407 
6408     tcg_res = tcg_temp_new_i32();
6409     fpst = fpstatus_ptr(FPST_FPCR_F16);
6410     tcg_op1 = read_fp_hreg(s, rn);
6411     tcg_op2 = read_fp_hreg(s, rm);
6412 
6413     switch (opcode) {
6414     case 0x0: /* FMUL */
6415         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6416         break;
6417     case 0x1: /* FDIV */
6418         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6419         break;
6420     case 0x2: /* FADD */
6421         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6422         break;
6423     case 0x3: /* FSUB */
6424         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6425         break;
6426     case 0x4: /* FMAX */
6427         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6428         break;
6429     case 0x5: /* FMIN */
6430         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6431         break;
6432     case 0x6: /* FMAXNM */
6433         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6434         break;
6435     case 0x7: /* FMINNM */
6436         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6437         break;
6438     case 0x8: /* FNMUL */
6439         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6440         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6441         break;
6442     default:
6443         g_assert_not_reached();
6444     }
6445 
6446     write_fp_sreg(s, rd, tcg_res);
6447 }
6448 
6449 /* Floating point data-processing (2 source)
6450  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6451  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6452  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6453  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6454  */
6455 static void disas_fp_2src(DisasContext *s, uint32_t insn)
6456 {
6457     int mos = extract32(insn, 29, 3);
6458     int type = extract32(insn, 22, 2);
6459     int rd = extract32(insn, 0, 5);
6460     int rn = extract32(insn, 5, 5);
6461     int rm = extract32(insn, 16, 5);
6462     int opcode = extract32(insn, 12, 4);
6463 
6464     if (opcode > 8 || mos) {
6465         unallocated_encoding(s);
6466         return;
6467     }
6468 
6469     switch (type) {
6470     case 0:
6471         if (!fp_access_check(s)) {
6472             return;
6473         }
6474         handle_fp_2src_single(s, opcode, rd, rn, rm);
6475         break;
6476     case 1:
6477         if (!fp_access_check(s)) {
6478             return;
6479         }
6480         handle_fp_2src_double(s, opcode, rd, rn, rm);
6481         break;
6482     case 3:
6483         if (!dc_isar_feature(aa64_fp16, s)) {
6484             unallocated_encoding(s);
6485             return;
6486         }
6487         if (!fp_access_check(s)) {
6488             return;
6489         }
6490         handle_fp_2src_half(s, opcode, rd, rn, rm);
6491         break;
6492     default:
6493         unallocated_encoding(s);
6494     }
6495 }
6496 
6497 /* Floating-point data-processing (3 source) - single precision */
6498 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6499                                   int rd, int rn, int rm, int ra)
6500 {
6501     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6502     TCGv_i32 tcg_res = tcg_temp_new_i32();
6503     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6504 
6505     tcg_op1 = read_fp_sreg(s, rn);
6506     tcg_op2 = read_fp_sreg(s, rm);
6507     tcg_op3 = read_fp_sreg(s, ra);
6508 
6509     /* These are fused multiply-add, and must be done as one
6510      * floating point operation with no rounding between the
6511      * multiplication and addition steps.
6512      * NB that doing the negations here as separate steps is
6513      * correct : an input NaN should come out with its sign bit
6514      * flipped if it is a negated-input.
6515      */
6516     if (o1 == true) {
6517         gen_helper_vfp_negs(tcg_op3, tcg_op3);
6518     }
6519 
6520     if (o0 != o1) {
6521         gen_helper_vfp_negs(tcg_op1, tcg_op1);
6522     }
6523 
6524     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6525 
6526     write_fp_sreg(s, rd, tcg_res);
6527 }
6528 
6529 /* Floating-point data-processing (3 source) - double precision */
6530 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6531                                   int rd, int rn, int rm, int ra)
6532 {
6533     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6534     TCGv_i64 tcg_res = tcg_temp_new_i64();
6535     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6536 
6537     tcg_op1 = read_fp_dreg(s, rn);
6538     tcg_op2 = read_fp_dreg(s, rm);
6539     tcg_op3 = read_fp_dreg(s, ra);
6540 
6541     /* These are fused multiply-add, and must be done as one
6542      * floating point operation with no rounding between the
6543      * multiplication and addition steps.
6544      * NB that doing the negations here as separate steps is
6545      * correct : an input NaN should come out with its sign bit
6546      * flipped if it is a negated-input.
6547      */
6548     if (o1 == true) {
6549         gen_helper_vfp_negd(tcg_op3, tcg_op3);
6550     }
6551 
6552     if (o0 != o1) {
6553         gen_helper_vfp_negd(tcg_op1, tcg_op1);
6554     }
6555 
6556     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6557 
6558     write_fp_dreg(s, rd, tcg_res);
6559 }
6560 
6561 /* Floating-point data-processing (3 source) - half precision */
6562 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6563                                 int rd, int rn, int rm, int ra)
6564 {
6565     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6566     TCGv_i32 tcg_res = tcg_temp_new_i32();
6567     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6568 
6569     tcg_op1 = read_fp_hreg(s, rn);
6570     tcg_op2 = read_fp_hreg(s, rm);
6571     tcg_op3 = read_fp_hreg(s, ra);
6572 
6573     /* These are fused multiply-add, and must be done as one
6574      * floating point operation with no rounding between the
6575      * multiplication and addition steps.
6576      * NB that doing the negations here as separate steps is
6577      * correct : an input NaN should come out with its sign bit
6578      * flipped if it is a negated-input.
6579      */
6580     if (o1 == true) {
6581         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6582     }
6583 
6584     if (o0 != o1) {
6585         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6586     }
6587 
6588     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6589 
6590     write_fp_sreg(s, rd, tcg_res);
6591 }
6592 
6593 /* Floating point data-processing (3 source)
6594  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6595  * +---+---+---+-----------+------+----+------+----+------+------+------+
6596  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6597  * +---+---+---+-----------+------+----+------+----+------+------+------+
6598  */
6599 static void disas_fp_3src(DisasContext *s, uint32_t insn)
6600 {
6601     int mos = extract32(insn, 29, 3);
6602     int type = extract32(insn, 22, 2);
6603     int rd = extract32(insn, 0, 5);
6604     int rn = extract32(insn, 5, 5);
6605     int ra = extract32(insn, 10, 5);
6606     int rm = extract32(insn, 16, 5);
6607     bool o0 = extract32(insn, 15, 1);
6608     bool o1 = extract32(insn, 21, 1);
6609 
6610     if (mos) {
6611         unallocated_encoding(s);
6612         return;
6613     }
6614 
6615     switch (type) {
6616     case 0:
6617         if (!fp_access_check(s)) {
6618             return;
6619         }
6620         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6621         break;
6622     case 1:
6623         if (!fp_access_check(s)) {
6624             return;
6625         }
6626         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6627         break;
6628     case 3:
6629         if (!dc_isar_feature(aa64_fp16, s)) {
6630             unallocated_encoding(s);
6631             return;
6632         }
6633         if (!fp_access_check(s)) {
6634             return;
6635         }
6636         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6637         break;
6638     default:
6639         unallocated_encoding(s);
6640     }
6641 }
6642 
6643 /* Floating point immediate
6644  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6645  * +---+---+---+-----------+------+---+------------+-------+------+------+
6646  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6647  * +---+---+---+-----------+------+---+------------+-------+------+------+
6648  */
6649 static void disas_fp_imm(DisasContext *s, uint32_t insn)
6650 {
6651     int rd = extract32(insn, 0, 5);
6652     int imm5 = extract32(insn, 5, 5);
6653     int imm8 = extract32(insn, 13, 8);
6654     int type = extract32(insn, 22, 2);
6655     int mos = extract32(insn, 29, 3);
6656     uint64_t imm;
6657     MemOp sz;
6658 
6659     if (mos || imm5) {
6660         unallocated_encoding(s);
6661         return;
6662     }
6663 
6664     switch (type) {
6665     case 0:
6666         sz = MO_32;
6667         break;
6668     case 1:
6669         sz = MO_64;
6670         break;
6671     case 3:
6672         sz = MO_16;
6673         if (dc_isar_feature(aa64_fp16, s)) {
6674             break;
6675         }
6676         /* fallthru */
6677     default:
6678         unallocated_encoding(s);
6679         return;
6680     }
6681 
6682     if (!fp_access_check(s)) {
6683         return;
6684     }
6685 
6686     imm = vfp_expand_imm(sz, imm8);
6687     write_fp_dreg(s, rd, tcg_constant_i64(imm));
6688 }
6689 
6690 /* Handle floating point <=> fixed point conversions. Note that we can
6691  * also deal with fp <=> integer conversions as a special case (scale == 64)
6692  * OPTME: consider handling that special case specially or at least skipping
6693  * the call to scalbn in the helpers for zero shifts.
6694  */
6695 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6696                            bool itof, int rmode, int scale, int sf, int type)
6697 {
6698     bool is_signed = !(opcode & 1);
6699     TCGv_ptr tcg_fpstatus;
6700     TCGv_i32 tcg_shift, tcg_single;
6701     TCGv_i64 tcg_double;
6702 
6703     tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6704 
6705     tcg_shift = tcg_constant_i32(64 - scale);
6706 
6707     if (itof) {
6708         TCGv_i64 tcg_int = cpu_reg(s, rn);
6709         if (!sf) {
6710             TCGv_i64 tcg_extend = tcg_temp_new_i64();
6711 
6712             if (is_signed) {
6713                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6714             } else {
6715                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6716             }
6717 
6718             tcg_int = tcg_extend;
6719         }
6720 
6721         switch (type) {
6722         case 1: /* float64 */
6723             tcg_double = tcg_temp_new_i64();
6724             if (is_signed) {
6725                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6726                                      tcg_shift, tcg_fpstatus);
6727             } else {
6728                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6729                                      tcg_shift, tcg_fpstatus);
6730             }
6731             write_fp_dreg(s, rd, tcg_double);
6732             break;
6733 
6734         case 0: /* float32 */
6735             tcg_single = tcg_temp_new_i32();
6736             if (is_signed) {
6737                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6738                                      tcg_shift, tcg_fpstatus);
6739             } else {
6740                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6741                                      tcg_shift, tcg_fpstatus);
6742             }
6743             write_fp_sreg(s, rd, tcg_single);
6744             break;
6745 
6746         case 3: /* float16 */
6747             tcg_single = tcg_temp_new_i32();
6748             if (is_signed) {
6749                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6750                                      tcg_shift, tcg_fpstatus);
6751             } else {
6752                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6753                                      tcg_shift, tcg_fpstatus);
6754             }
6755             write_fp_sreg(s, rd, tcg_single);
6756             break;
6757 
6758         default:
6759             g_assert_not_reached();
6760         }
6761     } else {
6762         TCGv_i64 tcg_int = cpu_reg(s, rd);
6763         TCGv_i32 tcg_rmode;
6764 
6765         if (extract32(opcode, 2, 1)) {
6766             /* There are too many rounding modes to all fit into rmode,
6767              * so FCVTA[US] is a special case.
6768              */
6769             rmode = FPROUNDING_TIEAWAY;
6770         }
6771 
6772         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
6773 
6774         switch (type) {
6775         case 1: /* float64 */
6776             tcg_double = read_fp_dreg(s, rn);
6777             if (is_signed) {
6778                 if (!sf) {
6779                     gen_helper_vfp_tosld(tcg_int, tcg_double,
6780                                          tcg_shift, tcg_fpstatus);
6781                 } else {
6782                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
6783                                          tcg_shift, tcg_fpstatus);
6784                 }
6785             } else {
6786                 if (!sf) {
6787                     gen_helper_vfp_tould(tcg_int, tcg_double,
6788                                          tcg_shift, tcg_fpstatus);
6789                 } else {
6790                     gen_helper_vfp_touqd(tcg_int, tcg_double,
6791                                          tcg_shift, tcg_fpstatus);
6792                 }
6793             }
6794             if (!sf) {
6795                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
6796             }
6797             break;
6798 
6799         case 0: /* float32 */
6800             tcg_single = read_fp_sreg(s, rn);
6801             if (sf) {
6802                 if (is_signed) {
6803                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
6804                                          tcg_shift, tcg_fpstatus);
6805                 } else {
6806                     gen_helper_vfp_touqs(tcg_int, tcg_single,
6807                                          tcg_shift, tcg_fpstatus);
6808                 }
6809             } else {
6810                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6811                 if (is_signed) {
6812                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
6813                                          tcg_shift, tcg_fpstatus);
6814                 } else {
6815                     gen_helper_vfp_touls(tcg_dest, tcg_single,
6816                                          tcg_shift, tcg_fpstatus);
6817                 }
6818                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6819             }
6820             break;
6821 
6822         case 3: /* float16 */
6823             tcg_single = read_fp_sreg(s, rn);
6824             if (sf) {
6825                 if (is_signed) {
6826                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
6827                                          tcg_shift, tcg_fpstatus);
6828                 } else {
6829                     gen_helper_vfp_touqh(tcg_int, tcg_single,
6830                                          tcg_shift, tcg_fpstatus);
6831                 }
6832             } else {
6833                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
6834                 if (is_signed) {
6835                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
6836                                          tcg_shift, tcg_fpstatus);
6837                 } else {
6838                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
6839                                          tcg_shift, tcg_fpstatus);
6840                 }
6841                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6842             }
6843             break;
6844 
6845         default:
6846             g_assert_not_reached();
6847         }
6848 
6849         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
6850     }
6851 }
6852 
6853 /* Floating point <-> fixed point conversions
6854  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6855  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6856  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6857  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6858  */
6859 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6860 {
6861     int rd = extract32(insn, 0, 5);
6862     int rn = extract32(insn, 5, 5);
6863     int scale = extract32(insn, 10, 6);
6864     int opcode = extract32(insn, 16, 3);
6865     int rmode = extract32(insn, 19, 2);
6866     int type = extract32(insn, 22, 2);
6867     bool sbit = extract32(insn, 29, 1);
6868     bool sf = extract32(insn, 31, 1);
6869     bool itof;
6870 
6871     if (sbit || (!sf && scale < 32)) {
6872         unallocated_encoding(s);
6873         return;
6874     }
6875 
6876     switch (type) {
6877     case 0: /* float32 */
6878     case 1: /* float64 */
6879         break;
6880     case 3: /* float16 */
6881         if (dc_isar_feature(aa64_fp16, s)) {
6882             break;
6883         }
6884         /* fallthru */
6885     default:
6886         unallocated_encoding(s);
6887         return;
6888     }
6889 
6890     switch ((rmode << 3) | opcode) {
6891     case 0x2: /* SCVTF */
6892     case 0x3: /* UCVTF */
6893         itof = true;
6894         break;
6895     case 0x18: /* FCVTZS */
6896     case 0x19: /* FCVTZU */
6897         itof = false;
6898         break;
6899     default:
6900         unallocated_encoding(s);
6901         return;
6902     }
6903 
6904     if (!fp_access_check(s)) {
6905         return;
6906     }
6907 
6908     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6909 }
6910 
6911 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6912 {
6913     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6914      * without conversion.
6915      */
6916 
6917     if (itof) {
6918         TCGv_i64 tcg_rn = cpu_reg(s, rn);
6919         TCGv_i64 tmp;
6920 
6921         switch (type) {
6922         case 0:
6923             /* 32 bit */
6924             tmp = tcg_temp_new_i64();
6925             tcg_gen_ext32u_i64(tmp, tcg_rn);
6926             write_fp_dreg(s, rd, tmp);
6927             break;
6928         case 1:
6929             /* 64 bit */
6930             write_fp_dreg(s, rd, tcg_rn);
6931             break;
6932         case 2:
6933             /* 64 bit to top half. */
6934             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6935             clear_vec_high(s, true, rd);
6936             break;
6937         case 3:
6938             /* 16 bit */
6939             tmp = tcg_temp_new_i64();
6940             tcg_gen_ext16u_i64(tmp, tcg_rn);
6941             write_fp_dreg(s, rd, tmp);
6942             break;
6943         default:
6944             g_assert_not_reached();
6945         }
6946     } else {
6947         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6948 
6949         switch (type) {
6950         case 0:
6951             /* 32 bit */
6952             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6953             break;
6954         case 1:
6955             /* 64 bit */
6956             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6957             break;
6958         case 2:
6959             /* 64 bits from top half */
6960             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6961             break;
6962         case 3:
6963             /* 16 bit */
6964             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6965             break;
6966         default:
6967             g_assert_not_reached();
6968         }
6969     }
6970 }
6971 
6972 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6973 {
6974     TCGv_i64 t = read_fp_dreg(s, rn);
6975     TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
6976 
6977     gen_helper_fjcvtzs(t, t, fpstatus);
6978 
6979     tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6980     tcg_gen_extrh_i64_i32(cpu_ZF, t);
6981     tcg_gen_movi_i32(cpu_CF, 0);
6982     tcg_gen_movi_i32(cpu_NF, 0);
6983     tcg_gen_movi_i32(cpu_VF, 0);
6984 }
6985 
6986 /* Floating point <-> integer conversions
6987  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6988  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6989  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6990  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6991  */
6992 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6993 {
6994     int rd = extract32(insn, 0, 5);
6995     int rn = extract32(insn, 5, 5);
6996     int opcode = extract32(insn, 16, 3);
6997     int rmode = extract32(insn, 19, 2);
6998     int type = extract32(insn, 22, 2);
6999     bool sbit = extract32(insn, 29, 1);
7000     bool sf = extract32(insn, 31, 1);
7001     bool itof = false;
7002 
7003     if (sbit) {
7004         goto do_unallocated;
7005     }
7006 
7007     switch (opcode) {
7008     case 2: /* SCVTF */
7009     case 3: /* UCVTF */
7010         itof = true;
7011         /* fallthru */
7012     case 4: /* FCVTAS */
7013     case 5: /* FCVTAU */
7014         if (rmode != 0) {
7015             goto do_unallocated;
7016         }
7017         /* fallthru */
7018     case 0: /* FCVT[NPMZ]S */
7019     case 1: /* FCVT[NPMZ]U */
7020         switch (type) {
7021         case 0: /* float32 */
7022         case 1: /* float64 */
7023             break;
7024         case 3: /* float16 */
7025             if (!dc_isar_feature(aa64_fp16, s)) {
7026                 goto do_unallocated;
7027             }
7028             break;
7029         default:
7030             goto do_unallocated;
7031         }
7032         if (!fp_access_check(s)) {
7033             return;
7034         }
7035         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7036         break;
7037 
7038     default:
7039         switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7040         case 0b01100110: /* FMOV half <-> 32-bit int */
7041         case 0b01100111:
7042         case 0b11100110: /* FMOV half <-> 64-bit int */
7043         case 0b11100111:
7044             if (!dc_isar_feature(aa64_fp16, s)) {
7045                 goto do_unallocated;
7046             }
7047             /* fallthru */
7048         case 0b00000110: /* FMOV 32-bit */
7049         case 0b00000111:
7050         case 0b10100110: /* FMOV 64-bit */
7051         case 0b10100111:
7052         case 0b11001110: /* FMOV top half of 128-bit */
7053         case 0b11001111:
7054             if (!fp_access_check(s)) {
7055                 return;
7056             }
7057             itof = opcode & 1;
7058             handle_fmov(s, rd, rn, type, itof);
7059             break;
7060 
7061         case 0b00111110: /* FJCVTZS */
7062             if (!dc_isar_feature(aa64_jscvt, s)) {
7063                 goto do_unallocated;
7064             } else if (fp_access_check(s)) {
7065                 handle_fjcvtzs(s, rd, rn);
7066             }
7067             break;
7068 
7069         default:
7070         do_unallocated:
7071             unallocated_encoding(s);
7072             return;
7073         }
7074         break;
7075     }
7076 }
7077 
7078 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7079  *   31  30  29 28     25 24                          0
7080  * +---+---+---+---------+-----------------------------+
7081  * |   | 0 |   | 1 1 1 1 |                             |
7082  * +---+---+---+---------+-----------------------------+
7083  */
7084 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7085 {
7086     if (extract32(insn, 24, 1)) {
7087         /* Floating point data-processing (3 source) */
7088         disas_fp_3src(s, insn);
7089     } else if (extract32(insn, 21, 1) == 0) {
7090         /* Floating point to fixed point conversions */
7091         disas_fp_fixed_conv(s, insn);
7092     } else {
7093         switch (extract32(insn, 10, 2)) {
7094         case 1:
7095             /* Floating point conditional compare */
7096             disas_fp_ccomp(s, insn);
7097             break;
7098         case 2:
7099             /* Floating point data-processing (2 source) */
7100             disas_fp_2src(s, insn);
7101             break;
7102         case 3:
7103             /* Floating point conditional select */
7104             disas_fp_csel(s, insn);
7105             break;
7106         case 0:
7107             switch (ctz32(extract32(insn, 12, 4))) {
7108             case 0: /* [15:12] == xxx1 */
7109                 /* Floating point immediate */
7110                 disas_fp_imm(s, insn);
7111                 break;
7112             case 1: /* [15:12] == xx10 */
7113                 /* Floating point compare */
7114                 disas_fp_compare(s, insn);
7115                 break;
7116             case 2: /* [15:12] == x100 */
7117                 /* Floating point data-processing (1 source) */
7118                 disas_fp_1src(s, insn);
7119                 break;
7120             case 3: /* [15:12] == 1000 */
7121                 unallocated_encoding(s);
7122                 break;
7123             default: /* [15:12] == 0000 */
7124                 /* Floating point <-> integer conversions */
7125                 disas_fp_int_conv(s, insn);
7126                 break;
7127             }
7128             break;
7129         }
7130     }
7131 }
7132 
7133 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7134                      int pos)
7135 {
7136     /* Extract 64 bits from the middle of two concatenated 64 bit
7137      * vector register slices left:right. The extracted bits start
7138      * at 'pos' bits into the right (least significant) side.
7139      * We return the result in tcg_right, and guarantee not to
7140      * trash tcg_left.
7141      */
7142     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7143     assert(pos > 0 && pos < 64);
7144 
7145     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7146     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7147     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7148 }
7149 
7150 /* EXT
7151  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7152  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7153  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7154  * +---+---+-------------+-----+---+------+---+------+---+------+------+
7155  */
7156 static void disas_simd_ext(DisasContext *s, uint32_t insn)
7157 {
7158     int is_q = extract32(insn, 30, 1);
7159     int op2 = extract32(insn, 22, 2);
7160     int imm4 = extract32(insn, 11, 4);
7161     int rm = extract32(insn, 16, 5);
7162     int rn = extract32(insn, 5, 5);
7163     int rd = extract32(insn, 0, 5);
7164     int pos = imm4 << 3;
7165     TCGv_i64 tcg_resl, tcg_resh;
7166 
7167     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7168         unallocated_encoding(s);
7169         return;
7170     }
7171 
7172     if (!fp_access_check(s)) {
7173         return;
7174     }
7175 
7176     tcg_resh = tcg_temp_new_i64();
7177     tcg_resl = tcg_temp_new_i64();
7178 
7179     /* Vd gets bits starting at pos bits into Vm:Vn. This is
7180      * either extracting 128 bits from a 128:128 concatenation, or
7181      * extracting 64 bits from a 64:64 concatenation.
7182      */
7183     if (!is_q) {
7184         read_vec_element(s, tcg_resl, rn, 0, MO_64);
7185         if (pos != 0) {
7186             read_vec_element(s, tcg_resh, rm, 0, MO_64);
7187             do_ext64(s, tcg_resh, tcg_resl, pos);
7188         }
7189     } else {
7190         TCGv_i64 tcg_hh;
7191         typedef struct {
7192             int reg;
7193             int elt;
7194         } EltPosns;
7195         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7196         EltPosns *elt = eltposns;
7197 
7198         if (pos >= 64) {
7199             elt++;
7200             pos -= 64;
7201         }
7202 
7203         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7204         elt++;
7205         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7206         elt++;
7207         if (pos != 0) {
7208             do_ext64(s, tcg_resh, tcg_resl, pos);
7209             tcg_hh = tcg_temp_new_i64();
7210             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7211             do_ext64(s, tcg_hh, tcg_resh, pos);
7212         }
7213     }
7214 
7215     write_vec_element(s, tcg_resl, rd, 0, MO_64);
7216     if (is_q) {
7217         write_vec_element(s, tcg_resh, rd, 1, MO_64);
7218     }
7219     clear_vec_high(s, is_q, rd);
7220 }
7221 
7222 /* TBL/TBX
7223  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7224  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7225  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7226  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7227  */
7228 static void disas_simd_tb(DisasContext *s, uint32_t insn)
7229 {
7230     int op2 = extract32(insn, 22, 2);
7231     int is_q = extract32(insn, 30, 1);
7232     int rm = extract32(insn, 16, 5);
7233     int rn = extract32(insn, 5, 5);
7234     int rd = extract32(insn, 0, 5);
7235     int is_tbx = extract32(insn, 12, 1);
7236     int len = (extract32(insn, 13, 2) + 1) * 16;
7237 
7238     if (op2 != 0) {
7239         unallocated_encoding(s);
7240         return;
7241     }
7242 
7243     if (!fp_access_check(s)) {
7244         return;
7245     }
7246 
7247     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7248                        vec_full_reg_offset(s, rm), cpu_env,
7249                        is_q ? 16 : 8, vec_full_reg_size(s),
7250                        (len << 6) | (is_tbx << 5) | rn,
7251                        gen_helper_simd_tblx);
7252 }
7253 
7254 /* ZIP/UZP/TRN
7255  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7256  * +---+---+-------------+------+---+------+---+------------------+------+
7257  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7258  * +---+---+-------------+------+---+------+---+------------------+------+
7259  */
7260 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7261 {
7262     int rd = extract32(insn, 0, 5);
7263     int rn = extract32(insn, 5, 5);
7264     int rm = extract32(insn, 16, 5);
7265     int size = extract32(insn, 22, 2);
7266     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7267      * bit 2 indicates 1 vs 2 variant of the insn.
7268      */
7269     int opcode = extract32(insn, 12, 2);
7270     bool part = extract32(insn, 14, 1);
7271     bool is_q = extract32(insn, 30, 1);
7272     int esize = 8 << size;
7273     int i;
7274     int datasize = is_q ? 128 : 64;
7275     int elements = datasize / esize;
7276     TCGv_i64 tcg_res[2], tcg_ele;
7277 
7278     if (opcode == 0 || (size == 3 && !is_q)) {
7279         unallocated_encoding(s);
7280         return;
7281     }
7282 
7283     if (!fp_access_check(s)) {
7284         return;
7285     }
7286 
7287     tcg_res[0] = tcg_temp_new_i64();
7288     tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
7289     tcg_ele = tcg_temp_new_i64();
7290 
7291     for (i = 0; i < elements; i++) {
7292         int o, w;
7293 
7294         switch (opcode) {
7295         case 1: /* UZP1/2 */
7296         {
7297             int midpoint = elements / 2;
7298             if (i < midpoint) {
7299                 read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
7300             } else {
7301                 read_vec_element(s, tcg_ele, rm,
7302                                  2 * (i - midpoint) + part, size);
7303             }
7304             break;
7305         }
7306         case 2: /* TRN1/2 */
7307             if (i & 1) {
7308                 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
7309             } else {
7310                 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
7311             }
7312             break;
7313         case 3: /* ZIP1/2 */
7314         {
7315             int base = part * elements / 2;
7316             if (i & 1) {
7317                 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
7318             } else {
7319                 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
7320             }
7321             break;
7322         }
7323         default:
7324             g_assert_not_reached();
7325         }
7326 
7327         w = (i * esize) / 64;
7328         o = (i * esize) % 64;
7329         if (o == 0) {
7330             tcg_gen_mov_i64(tcg_res[w], tcg_ele);
7331         } else {
7332             tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
7333             tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
7334         }
7335     }
7336 
7337     for (i = 0; i <= is_q; ++i) {
7338         write_vec_element(s, tcg_res[i], rd, i, MO_64);
7339     }
7340     clear_vec_high(s, is_q, rd);
7341 }
7342 
7343 /*
7344  * do_reduction_op helper
7345  *
7346  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7347  * important for correct NaN propagation that we do these
7348  * operations in exactly the order specified by the pseudocode.
7349  *
7350  * This is a recursive function, TCG temps should be freed by the
7351  * calling function once it is done with the values.
7352  */
7353 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7354                                 int esize, int size, int vmap, TCGv_ptr fpst)
7355 {
7356     if (esize == size) {
7357         int element;
7358         MemOp msize = esize == 16 ? MO_16 : MO_32;
7359         TCGv_i32 tcg_elem;
7360 
7361         /* We should have one register left here */
7362         assert(ctpop8(vmap) == 1);
7363         element = ctz32(vmap);
7364         assert(element < 8);
7365 
7366         tcg_elem = tcg_temp_new_i32();
7367         read_vec_element_i32(s, tcg_elem, rn, element, msize);
7368         return tcg_elem;
7369     } else {
7370         int bits = size / 2;
7371         int shift = ctpop8(vmap) / 2;
7372         int vmap_lo = (vmap >> shift) & vmap;
7373         int vmap_hi = (vmap & ~vmap_lo);
7374         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7375 
7376         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7377         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7378         tcg_res = tcg_temp_new_i32();
7379 
7380         switch (fpopcode) {
7381         case 0x0c: /* fmaxnmv half-precision */
7382             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7383             break;
7384         case 0x0f: /* fmaxv half-precision */
7385             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7386             break;
7387         case 0x1c: /* fminnmv half-precision */
7388             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7389             break;
7390         case 0x1f: /* fminv half-precision */
7391             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7392             break;
7393         case 0x2c: /* fmaxnmv */
7394             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7395             break;
7396         case 0x2f: /* fmaxv */
7397             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7398             break;
7399         case 0x3c: /* fminnmv */
7400             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7401             break;
7402         case 0x3f: /* fminv */
7403             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7404             break;
7405         default:
7406             g_assert_not_reached();
7407         }
7408         return tcg_res;
7409     }
7410 }
7411 
7412 /* AdvSIMD across lanes
7413  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7414  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7415  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7416  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7417  */
7418 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7419 {
7420     int rd = extract32(insn, 0, 5);
7421     int rn = extract32(insn, 5, 5);
7422     int size = extract32(insn, 22, 2);
7423     int opcode = extract32(insn, 12, 5);
7424     bool is_q = extract32(insn, 30, 1);
7425     bool is_u = extract32(insn, 29, 1);
7426     bool is_fp = false;
7427     bool is_min = false;
7428     int esize;
7429     int elements;
7430     int i;
7431     TCGv_i64 tcg_res, tcg_elt;
7432 
7433     switch (opcode) {
7434     case 0x1b: /* ADDV */
7435         if (is_u) {
7436             unallocated_encoding(s);
7437             return;
7438         }
7439         /* fall through */
7440     case 0x3: /* SADDLV, UADDLV */
7441     case 0xa: /* SMAXV, UMAXV */
7442     case 0x1a: /* SMINV, UMINV */
7443         if (size == 3 || (size == 2 && !is_q)) {
7444             unallocated_encoding(s);
7445             return;
7446         }
7447         break;
7448     case 0xc: /* FMAXNMV, FMINNMV */
7449     case 0xf: /* FMAXV, FMINV */
7450         /* Bit 1 of size field encodes min vs max and the actual size
7451          * depends on the encoding of the U bit. If not set (and FP16
7452          * enabled) then we do half-precision float instead of single
7453          * precision.
7454          */
7455         is_min = extract32(size, 1, 1);
7456         is_fp = true;
7457         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7458             size = 1;
7459         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7460             unallocated_encoding(s);
7461             return;
7462         } else {
7463             size = 2;
7464         }
7465         break;
7466     default:
7467         unallocated_encoding(s);
7468         return;
7469     }
7470 
7471     if (!fp_access_check(s)) {
7472         return;
7473     }
7474 
7475     esize = 8 << size;
7476     elements = (is_q ? 128 : 64) / esize;
7477 
7478     tcg_res = tcg_temp_new_i64();
7479     tcg_elt = tcg_temp_new_i64();
7480 
7481     /* These instructions operate across all lanes of a vector
7482      * to produce a single result. We can guarantee that a 64
7483      * bit intermediate is sufficient:
7484      *  + for [US]ADDLV the maximum element size is 32 bits, and
7485      *    the result type is 64 bits
7486      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7487      *    same as the element size, which is 32 bits at most
7488      * For the integer operations we can choose to work at 64
7489      * or 32 bits and truncate at the end; for simplicity
7490      * we use 64 bits always. The floating point
7491      * ops do require 32 bit intermediates, though.
7492      */
7493     if (!is_fp) {
7494         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7495 
7496         for (i = 1; i < elements; i++) {
7497             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7498 
7499             switch (opcode) {
7500             case 0x03: /* SADDLV / UADDLV */
7501             case 0x1b: /* ADDV */
7502                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7503                 break;
7504             case 0x0a: /* SMAXV / UMAXV */
7505                 if (is_u) {
7506                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7507                 } else {
7508                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7509                 }
7510                 break;
7511             case 0x1a: /* SMINV / UMINV */
7512                 if (is_u) {
7513                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7514                 } else {
7515                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7516                 }
7517                 break;
7518             default:
7519                 g_assert_not_reached();
7520             }
7521 
7522         }
7523     } else {
7524         /* Floating point vector reduction ops which work across 32
7525          * bit (single) or 16 bit (half-precision) intermediates.
7526          * Note that correct NaN propagation requires that we do these
7527          * operations in exactly the order specified by the pseudocode.
7528          */
7529         TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7530         int fpopcode = opcode | is_min << 4 | is_u << 5;
7531         int vmap = (1 << elements) - 1;
7532         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7533                                              (is_q ? 128 : 64), vmap, fpst);
7534         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7535     }
7536 
7537     /* Now truncate the result to the width required for the final output */
7538     if (opcode == 0x03) {
7539         /* SADDLV, UADDLV: result is 2*esize */
7540         size++;
7541     }
7542 
7543     switch (size) {
7544     case 0:
7545         tcg_gen_ext8u_i64(tcg_res, tcg_res);
7546         break;
7547     case 1:
7548         tcg_gen_ext16u_i64(tcg_res, tcg_res);
7549         break;
7550     case 2:
7551         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7552         break;
7553     case 3:
7554         break;
7555     default:
7556         g_assert_not_reached();
7557     }
7558 
7559     write_fp_dreg(s, rd, tcg_res);
7560 }
7561 
7562 /* DUP (Element, Vector)
7563  *
7564  *  31  30   29              21 20    16 15        10  9    5 4    0
7565  * +---+---+-------------------+--------+-------------+------+------+
7566  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7567  * +---+---+-------------------+--------+-------------+------+------+
7568  *
7569  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7570  */
7571 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7572                              int imm5)
7573 {
7574     int size = ctz32(imm5);
7575     int index;
7576 
7577     if (size > 3 || (size == 3 && !is_q)) {
7578         unallocated_encoding(s);
7579         return;
7580     }
7581 
7582     if (!fp_access_check(s)) {
7583         return;
7584     }
7585 
7586     index = imm5 >> (size + 1);
7587     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7588                          vec_reg_offset(s, rn, index, size),
7589                          is_q ? 16 : 8, vec_full_reg_size(s));
7590 }
7591 
7592 /* DUP (element, scalar)
7593  *  31                   21 20    16 15        10  9    5 4    0
7594  * +-----------------------+--------+-------------+------+------+
7595  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7596  * +-----------------------+--------+-------------+------+------+
7597  */
7598 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7599                               int imm5)
7600 {
7601     int size = ctz32(imm5);
7602     int index;
7603     TCGv_i64 tmp;
7604 
7605     if (size > 3) {
7606         unallocated_encoding(s);
7607         return;
7608     }
7609 
7610     if (!fp_access_check(s)) {
7611         return;
7612     }
7613 
7614     index = imm5 >> (size + 1);
7615 
7616     /* This instruction just extracts the specified element and
7617      * zero-extends it into the bottom of the destination register.
7618      */
7619     tmp = tcg_temp_new_i64();
7620     read_vec_element(s, tmp, rn, index, size);
7621     write_fp_dreg(s, rd, tmp);
7622 }
7623 
7624 /* DUP (General)
7625  *
7626  *  31  30   29              21 20    16 15        10  9    5 4    0
7627  * +---+---+-------------------+--------+-------------+------+------+
7628  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7629  * +---+---+-------------------+--------+-------------+------+------+
7630  *
7631  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7632  */
7633 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7634                              int imm5)
7635 {
7636     int size = ctz32(imm5);
7637     uint32_t dofs, oprsz, maxsz;
7638 
7639     if (size > 3 || ((size == 3) && !is_q)) {
7640         unallocated_encoding(s);
7641         return;
7642     }
7643 
7644     if (!fp_access_check(s)) {
7645         return;
7646     }
7647 
7648     dofs = vec_full_reg_offset(s, rd);
7649     oprsz = is_q ? 16 : 8;
7650     maxsz = vec_full_reg_size(s);
7651 
7652     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7653 }
7654 
7655 /* INS (Element)
7656  *
7657  *  31                   21 20    16 15  14    11  10 9    5 4    0
7658  * +-----------------------+--------+------------+---+------+------+
7659  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7660  * +-----------------------+--------+------------+---+------+------+
7661  *
7662  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7663  * index: encoded in imm5<4:size+1>
7664  */
7665 static void handle_simd_inse(DisasContext *s, int rd, int rn,
7666                              int imm4, int imm5)
7667 {
7668     int size = ctz32(imm5);
7669     int src_index, dst_index;
7670     TCGv_i64 tmp;
7671 
7672     if (size > 3) {
7673         unallocated_encoding(s);
7674         return;
7675     }
7676 
7677     if (!fp_access_check(s)) {
7678         return;
7679     }
7680 
7681     dst_index = extract32(imm5, 1+size, 5);
7682     src_index = extract32(imm4, size, 4);
7683 
7684     tmp = tcg_temp_new_i64();
7685 
7686     read_vec_element(s, tmp, rn, src_index, size);
7687     write_vec_element(s, tmp, rd, dst_index, size);
7688 
7689     /* INS is considered a 128-bit write for SVE. */
7690     clear_vec_high(s, true, rd);
7691 }
7692 
7693 
7694 /* INS (General)
7695  *
7696  *  31                   21 20    16 15        10  9    5 4    0
7697  * +-----------------------+--------+-------------+------+------+
7698  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7699  * +-----------------------+--------+-------------+------+------+
7700  *
7701  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7702  * index: encoded in imm5<4:size+1>
7703  */
7704 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7705 {
7706     int size = ctz32(imm5);
7707     int idx;
7708 
7709     if (size > 3) {
7710         unallocated_encoding(s);
7711         return;
7712     }
7713 
7714     if (!fp_access_check(s)) {
7715         return;
7716     }
7717 
7718     idx = extract32(imm5, 1 + size, 4 - size);
7719     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7720 
7721     /* INS is considered a 128-bit write for SVE. */
7722     clear_vec_high(s, true, rd);
7723 }
7724 
7725 /*
7726  * UMOV (General)
7727  * SMOV (General)
7728  *
7729  *  31  30   29              21 20    16 15    12   10 9    5 4    0
7730  * +---+---+-------------------+--------+-------------+------+------+
7731  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7732  * +---+---+-------------------+--------+-------------+------+------+
7733  *
7734  * U: unsigned when set
7735  * size: encoded in imm5 (see ARM ARM LowestSetBit())
7736  */
7737 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7738                                   int rn, int rd, int imm5)
7739 {
7740     int size = ctz32(imm5);
7741     int element;
7742     TCGv_i64 tcg_rd;
7743 
7744     /* Check for UnallocatedEncodings */
7745     if (is_signed) {
7746         if (size > 2 || (size == 2 && !is_q)) {
7747             unallocated_encoding(s);
7748             return;
7749         }
7750     } else {
7751         if (size > 3
7752             || (size < 3 && is_q)
7753             || (size == 3 && !is_q)) {
7754             unallocated_encoding(s);
7755             return;
7756         }
7757     }
7758 
7759     if (!fp_access_check(s)) {
7760         return;
7761     }
7762 
7763     element = extract32(imm5, 1+size, 4);
7764 
7765     tcg_rd = cpu_reg(s, rd);
7766     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7767     if (is_signed && !is_q) {
7768         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7769     }
7770 }
7771 
7772 /* AdvSIMD copy
7773  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7774  * +---+---+----+-----------------+------+---+------+---+------+------+
7775  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7776  * +---+---+----+-----------------+------+---+------+---+------+------+
7777  */
7778 static void disas_simd_copy(DisasContext *s, uint32_t insn)
7779 {
7780     int rd = extract32(insn, 0, 5);
7781     int rn = extract32(insn, 5, 5);
7782     int imm4 = extract32(insn, 11, 4);
7783     int op = extract32(insn, 29, 1);
7784     int is_q = extract32(insn, 30, 1);
7785     int imm5 = extract32(insn, 16, 5);
7786 
7787     if (op) {
7788         if (is_q) {
7789             /* INS (element) */
7790             handle_simd_inse(s, rd, rn, imm4, imm5);
7791         } else {
7792             unallocated_encoding(s);
7793         }
7794     } else {
7795         switch (imm4) {
7796         case 0:
7797             /* DUP (element - vector) */
7798             handle_simd_dupe(s, is_q, rd, rn, imm5);
7799             break;
7800         case 1:
7801             /* DUP (general) */
7802             handle_simd_dupg(s, is_q, rd, rn, imm5);
7803             break;
7804         case 3:
7805             if (is_q) {
7806                 /* INS (general) */
7807                 handle_simd_insg(s, rd, rn, imm5);
7808             } else {
7809                 unallocated_encoding(s);
7810             }
7811             break;
7812         case 5:
7813         case 7:
7814             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7815             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7816             break;
7817         default:
7818             unallocated_encoding(s);
7819             break;
7820         }
7821     }
7822 }
7823 
7824 /* AdvSIMD modified immediate
7825  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7826  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7827  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7828  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7829  *
7830  * There are a number of operations that can be carried out here:
7831  *   MOVI - move (shifted) imm into register
7832  *   MVNI - move inverted (shifted) imm into register
7833  *   ORR  - bitwise OR of (shifted) imm with register
7834  *   BIC  - bitwise clear of (shifted) imm with register
7835  * With ARMv8.2 we also have:
7836  *   FMOV half-precision
7837  */
7838 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7839 {
7840     int rd = extract32(insn, 0, 5);
7841     int cmode = extract32(insn, 12, 4);
7842     int o2 = extract32(insn, 11, 1);
7843     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7844     bool is_neg = extract32(insn, 29, 1);
7845     bool is_q = extract32(insn, 30, 1);
7846     uint64_t imm = 0;
7847 
7848     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7849         /* Check for FMOV (vector, immediate) - half-precision */
7850         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7851             unallocated_encoding(s);
7852             return;
7853         }
7854     }
7855 
7856     if (!fp_access_check(s)) {
7857         return;
7858     }
7859 
7860     if (cmode == 15 && o2 && !is_neg) {
7861         /* FMOV (vector, immediate) - half-precision */
7862         imm = vfp_expand_imm(MO_16, abcdefgh);
7863         /* now duplicate across the lanes */
7864         imm = dup_const(MO_16, imm);
7865     } else {
7866         imm = asimd_imm_const(abcdefgh, cmode, is_neg);
7867     }
7868 
7869     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7870         /* MOVI or MVNI, with MVNI negation handled above.  */
7871         tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7872                              vec_full_reg_size(s), imm);
7873     } else {
7874         /* ORR or BIC, with BIC negation to AND handled above.  */
7875         if (is_neg) {
7876             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7877         } else {
7878             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7879         }
7880     }
7881 }
7882 
7883 /* AdvSIMD scalar copy
7884  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7885  * +-----+----+-----------------+------+---+------+---+------+------+
7886  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7887  * +-----+----+-----------------+------+---+------+---+------+------+
7888  */
7889 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7890 {
7891     int rd = extract32(insn, 0, 5);
7892     int rn = extract32(insn, 5, 5);
7893     int imm4 = extract32(insn, 11, 4);
7894     int imm5 = extract32(insn, 16, 5);
7895     int op = extract32(insn, 29, 1);
7896 
7897     if (op != 0 || imm4 != 0) {
7898         unallocated_encoding(s);
7899         return;
7900     }
7901 
7902     /* DUP (element, scalar) */
7903     handle_simd_dupes(s, rd, rn, imm5);
7904 }
7905 
7906 /* AdvSIMD scalar pairwise
7907  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7908  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7909  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7910  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7911  */
7912 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7913 {
7914     int u = extract32(insn, 29, 1);
7915     int size = extract32(insn, 22, 2);
7916     int opcode = extract32(insn, 12, 5);
7917     int rn = extract32(insn, 5, 5);
7918     int rd = extract32(insn, 0, 5);
7919     TCGv_ptr fpst;
7920 
7921     /* For some ops (the FP ones), size[1] is part of the encoding.
7922      * For ADDP strictly it is not but size[1] is always 1 for valid
7923      * encodings.
7924      */
7925     opcode |= (extract32(size, 1, 1) << 5);
7926 
7927     switch (opcode) {
7928     case 0x3b: /* ADDP */
7929         if (u || size != 3) {
7930             unallocated_encoding(s);
7931             return;
7932         }
7933         if (!fp_access_check(s)) {
7934             return;
7935         }
7936 
7937         fpst = NULL;
7938         break;
7939     case 0xc: /* FMAXNMP */
7940     case 0xd: /* FADDP */
7941     case 0xf: /* FMAXP */
7942     case 0x2c: /* FMINNMP */
7943     case 0x2f: /* FMINP */
7944         /* FP op, size[0] is 32 or 64 bit*/
7945         if (!u) {
7946             if (!dc_isar_feature(aa64_fp16, s)) {
7947                 unallocated_encoding(s);
7948                 return;
7949             } else {
7950                 size = MO_16;
7951             }
7952         } else {
7953             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7954         }
7955 
7956         if (!fp_access_check(s)) {
7957             return;
7958         }
7959 
7960         fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7961         break;
7962     default:
7963         unallocated_encoding(s);
7964         return;
7965     }
7966 
7967     if (size == MO_64) {
7968         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7969         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7970         TCGv_i64 tcg_res = tcg_temp_new_i64();
7971 
7972         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7973         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7974 
7975         switch (opcode) {
7976         case 0x3b: /* ADDP */
7977             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7978             break;
7979         case 0xc: /* FMAXNMP */
7980             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7981             break;
7982         case 0xd: /* FADDP */
7983             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7984             break;
7985         case 0xf: /* FMAXP */
7986             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7987             break;
7988         case 0x2c: /* FMINNMP */
7989             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7990             break;
7991         case 0x2f: /* FMINP */
7992             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7993             break;
7994         default:
7995             g_assert_not_reached();
7996         }
7997 
7998         write_fp_dreg(s, rd, tcg_res);
7999     } else {
8000         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8001         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8002         TCGv_i32 tcg_res = tcg_temp_new_i32();
8003 
8004         read_vec_element_i32(s, tcg_op1, rn, 0, size);
8005         read_vec_element_i32(s, tcg_op2, rn, 1, size);
8006 
8007         if (size == MO_16) {
8008             switch (opcode) {
8009             case 0xc: /* FMAXNMP */
8010                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8011                 break;
8012             case 0xd: /* FADDP */
8013                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8014                 break;
8015             case 0xf: /* FMAXP */
8016                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8017                 break;
8018             case 0x2c: /* FMINNMP */
8019                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8020                 break;
8021             case 0x2f: /* FMINP */
8022                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8023                 break;
8024             default:
8025                 g_assert_not_reached();
8026             }
8027         } else {
8028             switch (opcode) {
8029             case 0xc: /* FMAXNMP */
8030                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8031                 break;
8032             case 0xd: /* FADDP */
8033                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8034                 break;
8035             case 0xf: /* FMAXP */
8036                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8037                 break;
8038             case 0x2c: /* FMINNMP */
8039                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8040                 break;
8041             case 0x2f: /* FMINP */
8042                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8043                 break;
8044             default:
8045                 g_assert_not_reached();
8046             }
8047         }
8048 
8049         write_fp_sreg(s, rd, tcg_res);
8050     }
8051 }
8052 
8053 /*
8054  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8055  *
8056  * This code is handles the common shifting code and is used by both
8057  * the vector and scalar code.
8058  */
8059 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8060                                     TCGv_i64 tcg_rnd, bool accumulate,
8061                                     bool is_u, int size, int shift)
8062 {
8063     bool extended_result = false;
8064     bool round = tcg_rnd != NULL;
8065     int ext_lshift = 0;
8066     TCGv_i64 tcg_src_hi;
8067 
8068     if (round && size == 3) {
8069         extended_result = true;
8070         ext_lshift = 64 - shift;
8071         tcg_src_hi = tcg_temp_new_i64();
8072     } else if (shift == 64) {
8073         if (!accumulate && is_u) {
8074             /* result is zero */
8075             tcg_gen_movi_i64(tcg_res, 0);
8076             return;
8077         }
8078     }
8079 
8080     /* Deal with the rounding step */
8081     if (round) {
8082         if (extended_result) {
8083             TCGv_i64 tcg_zero = tcg_constant_i64(0);
8084             if (!is_u) {
8085                 /* take care of sign extending tcg_res */
8086                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8087                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8088                                  tcg_src, tcg_src_hi,
8089                                  tcg_rnd, tcg_zero);
8090             } else {
8091                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8092                                  tcg_src, tcg_zero,
8093                                  tcg_rnd, tcg_zero);
8094             }
8095         } else {
8096             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8097         }
8098     }
8099 
8100     /* Now do the shift right */
8101     if (round && extended_result) {
8102         /* extended case, >64 bit precision required */
8103         if (ext_lshift == 0) {
8104             /* special case, only high bits matter */
8105             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8106         } else {
8107             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8108             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8109             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8110         }
8111     } else {
8112         if (is_u) {
8113             if (shift == 64) {
8114                 /* essentially shifting in 64 zeros */
8115                 tcg_gen_movi_i64(tcg_src, 0);
8116             } else {
8117                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8118             }
8119         } else {
8120             if (shift == 64) {
8121                 /* effectively extending the sign-bit */
8122                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8123             } else {
8124                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8125             }
8126         }
8127     }
8128 
8129     if (accumulate) {
8130         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8131     } else {
8132         tcg_gen_mov_i64(tcg_res, tcg_src);
8133     }
8134 }
8135 
8136 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8137 static void handle_scalar_simd_shri(DisasContext *s,
8138                                     bool is_u, int immh, int immb,
8139                                     int opcode, int rn, int rd)
8140 {
8141     const int size = 3;
8142     int immhb = immh << 3 | immb;
8143     int shift = 2 * (8 << size) - immhb;
8144     bool accumulate = false;
8145     bool round = false;
8146     bool insert = false;
8147     TCGv_i64 tcg_rn;
8148     TCGv_i64 tcg_rd;
8149     TCGv_i64 tcg_round;
8150 
8151     if (!extract32(immh, 3, 1)) {
8152         unallocated_encoding(s);
8153         return;
8154     }
8155 
8156     if (!fp_access_check(s)) {
8157         return;
8158     }
8159 
8160     switch (opcode) {
8161     case 0x02: /* SSRA / USRA (accumulate) */
8162         accumulate = true;
8163         break;
8164     case 0x04: /* SRSHR / URSHR (rounding) */
8165         round = true;
8166         break;
8167     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8168         accumulate = round = true;
8169         break;
8170     case 0x08: /* SRI */
8171         insert = true;
8172         break;
8173     }
8174 
8175     if (round) {
8176         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8177     } else {
8178         tcg_round = NULL;
8179     }
8180 
8181     tcg_rn = read_fp_dreg(s, rn);
8182     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8183 
8184     if (insert) {
8185         /* shift count same as element size is valid but does nothing;
8186          * special case to avoid potential shift by 64.
8187          */
8188         int esize = 8 << size;
8189         if (shift != esize) {
8190             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8191             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8192         }
8193     } else {
8194         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8195                                 accumulate, is_u, size, shift);
8196     }
8197 
8198     write_fp_dreg(s, rd, tcg_rd);
8199 }
8200 
8201 /* SHL/SLI - Scalar shift left */
8202 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8203                                     int immh, int immb, int opcode,
8204                                     int rn, int rd)
8205 {
8206     int size = 32 - clz32(immh) - 1;
8207     int immhb = immh << 3 | immb;
8208     int shift = immhb - (8 << size);
8209     TCGv_i64 tcg_rn;
8210     TCGv_i64 tcg_rd;
8211 
8212     if (!extract32(immh, 3, 1)) {
8213         unallocated_encoding(s);
8214         return;
8215     }
8216 
8217     if (!fp_access_check(s)) {
8218         return;
8219     }
8220 
8221     tcg_rn = read_fp_dreg(s, rn);
8222     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8223 
8224     if (insert) {
8225         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8226     } else {
8227         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8228     }
8229 
8230     write_fp_dreg(s, rd, tcg_rd);
8231 }
8232 
8233 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8234  * (signed/unsigned) narrowing */
8235 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8236                                    bool is_u_shift, bool is_u_narrow,
8237                                    int immh, int immb, int opcode,
8238                                    int rn, int rd)
8239 {
8240     int immhb = immh << 3 | immb;
8241     int size = 32 - clz32(immh) - 1;
8242     int esize = 8 << size;
8243     int shift = (2 * esize) - immhb;
8244     int elements = is_scalar ? 1 : (64 / esize);
8245     bool round = extract32(opcode, 0, 1);
8246     MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8247     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8248     TCGv_i32 tcg_rd_narrowed;
8249     TCGv_i64 tcg_final;
8250 
8251     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8252         { gen_helper_neon_narrow_sat_s8,
8253           gen_helper_neon_unarrow_sat8 },
8254         { gen_helper_neon_narrow_sat_s16,
8255           gen_helper_neon_unarrow_sat16 },
8256         { gen_helper_neon_narrow_sat_s32,
8257           gen_helper_neon_unarrow_sat32 },
8258         { NULL, NULL },
8259     };
8260     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8261         gen_helper_neon_narrow_sat_u8,
8262         gen_helper_neon_narrow_sat_u16,
8263         gen_helper_neon_narrow_sat_u32,
8264         NULL
8265     };
8266     NeonGenNarrowEnvFn *narrowfn;
8267 
8268     int i;
8269 
8270     assert(size < 4);
8271 
8272     if (extract32(immh, 3, 1)) {
8273         unallocated_encoding(s);
8274         return;
8275     }
8276 
8277     if (!fp_access_check(s)) {
8278         return;
8279     }
8280 
8281     if (is_u_shift) {
8282         narrowfn = unsigned_narrow_fns[size];
8283     } else {
8284         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8285     }
8286 
8287     tcg_rn = tcg_temp_new_i64();
8288     tcg_rd = tcg_temp_new_i64();
8289     tcg_rd_narrowed = tcg_temp_new_i32();
8290     tcg_final = tcg_temp_new_i64();
8291 
8292     if (round) {
8293         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
8294     } else {
8295         tcg_round = NULL;
8296     }
8297 
8298     for (i = 0; i < elements; i++) {
8299         read_vec_element(s, tcg_rn, rn, i, ldop);
8300         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8301                                 false, is_u_shift, size+1, shift);
8302         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8303         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8304         if (i == 0) {
8305             tcg_gen_mov_i64(tcg_final, tcg_rd);
8306         } else {
8307             tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8308         }
8309     }
8310 
8311     if (!is_q) {
8312         write_vec_element(s, tcg_final, rd, 0, MO_64);
8313     } else {
8314         write_vec_element(s, tcg_final, rd, 1, MO_64);
8315     }
8316     clear_vec_high(s, is_q, rd);
8317 }
8318 
8319 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8320 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8321                              bool src_unsigned, bool dst_unsigned,
8322                              int immh, int immb, int rn, int rd)
8323 {
8324     int immhb = immh << 3 | immb;
8325     int size = 32 - clz32(immh) - 1;
8326     int shift = immhb - (8 << size);
8327     int pass;
8328 
8329     assert(immh != 0);
8330     assert(!(scalar && is_q));
8331 
8332     if (!scalar) {
8333         if (!is_q && extract32(immh, 3, 1)) {
8334             unallocated_encoding(s);
8335             return;
8336         }
8337 
8338         /* Since we use the variable-shift helpers we must
8339          * replicate the shift count into each element of
8340          * the tcg_shift value.
8341          */
8342         switch (size) {
8343         case 0:
8344             shift |= shift << 8;
8345             /* fall through */
8346         case 1:
8347             shift |= shift << 16;
8348             break;
8349         case 2:
8350         case 3:
8351             break;
8352         default:
8353             g_assert_not_reached();
8354         }
8355     }
8356 
8357     if (!fp_access_check(s)) {
8358         return;
8359     }
8360 
8361     if (size == 3) {
8362         TCGv_i64 tcg_shift = tcg_constant_i64(shift);
8363         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8364             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8365             { NULL, gen_helper_neon_qshl_u64 },
8366         };
8367         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8368         int maxpass = is_q ? 2 : 1;
8369 
8370         for (pass = 0; pass < maxpass; pass++) {
8371             TCGv_i64 tcg_op = tcg_temp_new_i64();
8372 
8373             read_vec_element(s, tcg_op, rn, pass, MO_64);
8374             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8375             write_vec_element(s, tcg_op, rd, pass, MO_64);
8376         }
8377         clear_vec_high(s, is_q, rd);
8378     } else {
8379         TCGv_i32 tcg_shift = tcg_constant_i32(shift);
8380         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8381             {
8382                 { gen_helper_neon_qshl_s8,
8383                   gen_helper_neon_qshl_s16,
8384                   gen_helper_neon_qshl_s32 },
8385                 { gen_helper_neon_qshlu_s8,
8386                   gen_helper_neon_qshlu_s16,
8387                   gen_helper_neon_qshlu_s32 }
8388             }, {
8389                 { NULL, NULL, NULL },
8390                 { gen_helper_neon_qshl_u8,
8391                   gen_helper_neon_qshl_u16,
8392                   gen_helper_neon_qshl_u32 }
8393             }
8394         };
8395         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8396         MemOp memop = scalar ? size : MO_32;
8397         int maxpass = scalar ? 1 : is_q ? 4 : 2;
8398 
8399         for (pass = 0; pass < maxpass; pass++) {
8400             TCGv_i32 tcg_op = tcg_temp_new_i32();
8401 
8402             read_vec_element_i32(s, tcg_op, rn, pass, memop);
8403             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8404             if (scalar) {
8405                 switch (size) {
8406                 case 0:
8407                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
8408                     break;
8409                 case 1:
8410                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
8411                     break;
8412                 case 2:
8413                     break;
8414                 default:
8415                     g_assert_not_reached();
8416                 }
8417                 write_fp_sreg(s, rd, tcg_op);
8418             } else {
8419                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8420             }
8421         }
8422 
8423         if (!scalar) {
8424             clear_vec_high(s, is_q, rd);
8425         }
8426     }
8427 }
8428 
8429 /* Common vector code for handling integer to FP conversion */
8430 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8431                                    int elements, int is_signed,
8432                                    int fracbits, int size)
8433 {
8434     TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8435     TCGv_i32 tcg_shift = NULL;
8436 
8437     MemOp mop = size | (is_signed ? MO_SIGN : 0);
8438     int pass;
8439 
8440     if (fracbits || size == MO_64) {
8441         tcg_shift = tcg_constant_i32(fracbits);
8442     }
8443 
8444     if (size == MO_64) {
8445         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8446         TCGv_i64 tcg_double = tcg_temp_new_i64();
8447 
8448         for (pass = 0; pass < elements; pass++) {
8449             read_vec_element(s, tcg_int64, rn, pass, mop);
8450 
8451             if (is_signed) {
8452                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8453                                      tcg_shift, tcg_fpst);
8454             } else {
8455                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8456                                      tcg_shift, tcg_fpst);
8457             }
8458             if (elements == 1) {
8459                 write_fp_dreg(s, rd, tcg_double);
8460             } else {
8461                 write_vec_element(s, tcg_double, rd, pass, MO_64);
8462             }
8463         }
8464     } else {
8465         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8466         TCGv_i32 tcg_float = tcg_temp_new_i32();
8467 
8468         for (pass = 0; pass < elements; pass++) {
8469             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8470 
8471             switch (size) {
8472             case MO_32:
8473                 if (fracbits) {
8474                     if (is_signed) {
8475                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
8476                                              tcg_shift, tcg_fpst);
8477                     } else {
8478                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
8479                                              tcg_shift, tcg_fpst);
8480                     }
8481                 } else {
8482                     if (is_signed) {
8483                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8484                     } else {
8485                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8486                     }
8487                 }
8488                 break;
8489             case MO_16:
8490                 if (fracbits) {
8491                     if (is_signed) {
8492                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8493                                              tcg_shift, tcg_fpst);
8494                     } else {
8495                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8496                                              tcg_shift, tcg_fpst);
8497                     }
8498                 } else {
8499                     if (is_signed) {
8500                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8501                     } else {
8502                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8503                     }
8504                 }
8505                 break;
8506             default:
8507                 g_assert_not_reached();
8508             }
8509 
8510             if (elements == 1) {
8511                 write_fp_sreg(s, rd, tcg_float);
8512             } else {
8513                 write_vec_element_i32(s, tcg_float, rd, pass, size);
8514             }
8515         }
8516     }
8517 
8518     clear_vec_high(s, elements << size == 16, rd);
8519 }
8520 
8521 /* UCVTF/SCVTF - Integer to FP conversion */
8522 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8523                                          bool is_q, bool is_u,
8524                                          int immh, int immb, int opcode,
8525                                          int rn, int rd)
8526 {
8527     int size, elements, fracbits;
8528     int immhb = immh << 3 | immb;
8529 
8530     if (immh & 8) {
8531         size = MO_64;
8532         if (!is_scalar && !is_q) {
8533             unallocated_encoding(s);
8534             return;
8535         }
8536     } else if (immh & 4) {
8537         size = MO_32;
8538     } else if (immh & 2) {
8539         size = MO_16;
8540         if (!dc_isar_feature(aa64_fp16, s)) {
8541             unallocated_encoding(s);
8542             return;
8543         }
8544     } else {
8545         /* immh == 0 would be a failure of the decode logic */
8546         g_assert(immh == 1);
8547         unallocated_encoding(s);
8548         return;
8549     }
8550 
8551     if (is_scalar) {
8552         elements = 1;
8553     } else {
8554         elements = (8 << is_q) >> size;
8555     }
8556     fracbits = (16 << size) - immhb;
8557 
8558     if (!fp_access_check(s)) {
8559         return;
8560     }
8561 
8562     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8563 }
8564 
8565 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8566 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8567                                          bool is_q, bool is_u,
8568                                          int immh, int immb, int rn, int rd)
8569 {
8570     int immhb = immh << 3 | immb;
8571     int pass, size, fracbits;
8572     TCGv_ptr tcg_fpstatus;
8573     TCGv_i32 tcg_rmode, tcg_shift;
8574 
8575     if (immh & 0x8) {
8576         size = MO_64;
8577         if (!is_scalar && !is_q) {
8578             unallocated_encoding(s);
8579             return;
8580         }
8581     } else if (immh & 0x4) {
8582         size = MO_32;
8583     } else if (immh & 0x2) {
8584         size = MO_16;
8585         if (!dc_isar_feature(aa64_fp16, s)) {
8586             unallocated_encoding(s);
8587             return;
8588         }
8589     } else {
8590         /* Should have split out AdvSIMD modified immediate earlier.  */
8591         assert(immh == 1);
8592         unallocated_encoding(s);
8593         return;
8594     }
8595 
8596     if (!fp_access_check(s)) {
8597         return;
8598     }
8599 
8600     assert(!(is_scalar && is_q));
8601 
8602     tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8603     tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
8604     fracbits = (16 << size) - immhb;
8605     tcg_shift = tcg_constant_i32(fracbits);
8606 
8607     if (size == MO_64) {
8608         int maxpass = is_scalar ? 1 : 2;
8609 
8610         for (pass = 0; pass < maxpass; pass++) {
8611             TCGv_i64 tcg_op = tcg_temp_new_i64();
8612 
8613             read_vec_element(s, tcg_op, rn, pass, MO_64);
8614             if (is_u) {
8615                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8616             } else {
8617                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8618             }
8619             write_vec_element(s, tcg_op, rd, pass, MO_64);
8620         }
8621         clear_vec_high(s, is_q, rd);
8622     } else {
8623         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8624         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8625 
8626         switch (size) {
8627         case MO_16:
8628             if (is_u) {
8629                 fn = gen_helper_vfp_touhh;
8630             } else {
8631                 fn = gen_helper_vfp_toshh;
8632             }
8633             break;
8634         case MO_32:
8635             if (is_u) {
8636                 fn = gen_helper_vfp_touls;
8637             } else {
8638                 fn = gen_helper_vfp_tosls;
8639             }
8640             break;
8641         default:
8642             g_assert_not_reached();
8643         }
8644 
8645         for (pass = 0; pass < maxpass; pass++) {
8646             TCGv_i32 tcg_op = tcg_temp_new_i32();
8647 
8648             read_vec_element_i32(s, tcg_op, rn, pass, size);
8649             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8650             if (is_scalar) {
8651                 write_fp_sreg(s, rd, tcg_op);
8652             } else {
8653                 write_vec_element_i32(s, tcg_op, rd, pass, size);
8654             }
8655         }
8656         if (!is_scalar) {
8657             clear_vec_high(s, is_q, rd);
8658         }
8659     }
8660 
8661     gen_restore_rmode(tcg_rmode, tcg_fpstatus);
8662 }
8663 
8664 /* AdvSIMD scalar shift by immediate
8665  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8666  * +-----+---+-------------+------+------+--------+---+------+------+
8667  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8668  * +-----+---+-------------+------+------+--------+---+------+------+
8669  *
8670  * This is the scalar version so it works on a fixed sized registers
8671  */
8672 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8673 {
8674     int rd = extract32(insn, 0, 5);
8675     int rn = extract32(insn, 5, 5);
8676     int opcode = extract32(insn, 11, 5);
8677     int immb = extract32(insn, 16, 3);
8678     int immh = extract32(insn, 19, 4);
8679     bool is_u = extract32(insn, 29, 1);
8680 
8681     if (immh == 0) {
8682         unallocated_encoding(s);
8683         return;
8684     }
8685 
8686     switch (opcode) {
8687     case 0x08: /* SRI */
8688         if (!is_u) {
8689             unallocated_encoding(s);
8690             return;
8691         }
8692         /* fall through */
8693     case 0x00: /* SSHR / USHR */
8694     case 0x02: /* SSRA / USRA */
8695     case 0x04: /* SRSHR / URSHR */
8696     case 0x06: /* SRSRA / URSRA */
8697         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8698         break;
8699     case 0x0a: /* SHL / SLI */
8700         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8701         break;
8702     case 0x1c: /* SCVTF, UCVTF */
8703         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8704                                      opcode, rn, rd);
8705         break;
8706     case 0x10: /* SQSHRUN, SQSHRUN2 */
8707     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8708         if (!is_u) {
8709             unallocated_encoding(s);
8710             return;
8711         }
8712         handle_vec_simd_sqshrn(s, true, false, false, true,
8713                                immh, immb, opcode, rn, rd);
8714         break;
8715     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8716     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8717         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8718                                immh, immb, opcode, rn, rd);
8719         break;
8720     case 0xc: /* SQSHLU */
8721         if (!is_u) {
8722             unallocated_encoding(s);
8723             return;
8724         }
8725         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8726         break;
8727     case 0xe: /* SQSHL, UQSHL */
8728         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8729         break;
8730     case 0x1f: /* FCVTZS, FCVTZU */
8731         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8732         break;
8733     default:
8734         unallocated_encoding(s);
8735         break;
8736     }
8737 }
8738 
8739 /* AdvSIMD scalar three different
8740  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8741  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8742  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8743  * +-----+---+-----------+------+---+------+--------+-----+------+------+
8744  */
8745 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8746 {
8747     bool is_u = extract32(insn, 29, 1);
8748     int size = extract32(insn, 22, 2);
8749     int opcode = extract32(insn, 12, 4);
8750     int rm = extract32(insn, 16, 5);
8751     int rn = extract32(insn, 5, 5);
8752     int rd = extract32(insn, 0, 5);
8753 
8754     if (is_u) {
8755         unallocated_encoding(s);
8756         return;
8757     }
8758 
8759     switch (opcode) {
8760     case 0x9: /* SQDMLAL, SQDMLAL2 */
8761     case 0xb: /* SQDMLSL, SQDMLSL2 */
8762     case 0xd: /* SQDMULL, SQDMULL2 */
8763         if (size == 0 || size == 3) {
8764             unallocated_encoding(s);
8765             return;
8766         }
8767         break;
8768     default:
8769         unallocated_encoding(s);
8770         return;
8771     }
8772 
8773     if (!fp_access_check(s)) {
8774         return;
8775     }
8776 
8777     if (size == 2) {
8778         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8779         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8780         TCGv_i64 tcg_res = tcg_temp_new_i64();
8781 
8782         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8783         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8784 
8785         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8786         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8787 
8788         switch (opcode) {
8789         case 0xd: /* SQDMULL, SQDMULL2 */
8790             break;
8791         case 0xb: /* SQDMLSL, SQDMLSL2 */
8792             tcg_gen_neg_i64(tcg_res, tcg_res);
8793             /* fall through */
8794         case 0x9: /* SQDMLAL, SQDMLAL2 */
8795             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8796             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8797                                               tcg_res, tcg_op1);
8798             break;
8799         default:
8800             g_assert_not_reached();
8801         }
8802 
8803         write_fp_dreg(s, rd, tcg_res);
8804     } else {
8805         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8806         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8807         TCGv_i64 tcg_res = tcg_temp_new_i64();
8808 
8809         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8810         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8811 
8812         switch (opcode) {
8813         case 0xd: /* SQDMULL, SQDMULL2 */
8814             break;
8815         case 0xb: /* SQDMLSL, SQDMLSL2 */
8816             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8817             /* fall through */
8818         case 0x9: /* SQDMLAL, SQDMLAL2 */
8819         {
8820             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8821             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8822             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8823                                               tcg_res, tcg_op3);
8824             break;
8825         }
8826         default:
8827             g_assert_not_reached();
8828         }
8829 
8830         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8831         write_fp_dreg(s, rd, tcg_res);
8832     }
8833 }
8834 
8835 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8836                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8837 {
8838     /* Handle 64x64->64 opcodes which are shared between the scalar
8839      * and vector 3-same groups. We cover every opcode where size == 3
8840      * is valid in either the three-reg-same (integer, not pairwise)
8841      * or scalar-three-reg-same groups.
8842      */
8843     TCGCond cond;
8844 
8845     switch (opcode) {
8846     case 0x1: /* SQADD */
8847         if (u) {
8848             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8849         } else {
8850             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8851         }
8852         break;
8853     case 0x5: /* SQSUB */
8854         if (u) {
8855             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8856         } else {
8857             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8858         }
8859         break;
8860     case 0x6: /* CMGT, CMHI */
8861         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8862          * We implement this using setcond (test) and then negating.
8863          */
8864         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8865     do_cmop:
8866         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8867         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8868         break;
8869     case 0x7: /* CMGE, CMHS */
8870         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8871         goto do_cmop;
8872     case 0x11: /* CMTST, CMEQ */
8873         if (u) {
8874             cond = TCG_COND_EQ;
8875             goto do_cmop;
8876         }
8877         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8878         break;
8879     case 0x8: /* SSHL, USHL */
8880         if (u) {
8881             gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
8882         } else {
8883             gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
8884         }
8885         break;
8886     case 0x9: /* SQSHL, UQSHL */
8887         if (u) {
8888             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8889         } else {
8890             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8891         }
8892         break;
8893     case 0xa: /* SRSHL, URSHL */
8894         if (u) {
8895             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8896         } else {
8897             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8898         }
8899         break;
8900     case 0xb: /* SQRSHL, UQRSHL */
8901         if (u) {
8902             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8903         } else {
8904             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8905         }
8906         break;
8907     case 0x10: /* ADD, SUB */
8908         if (u) {
8909             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8910         } else {
8911             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8912         }
8913         break;
8914     default:
8915         g_assert_not_reached();
8916     }
8917 }
8918 
8919 /* Handle the 3-same-operands float operations; shared by the scalar
8920  * and vector encodings. The caller must filter out any encodings
8921  * not allocated for the encoding it is dealing with.
8922  */
8923 static void handle_3same_float(DisasContext *s, int size, int elements,
8924                                int fpopcode, int rd, int rn, int rm)
8925 {
8926     int pass;
8927     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
8928 
8929     for (pass = 0; pass < elements; pass++) {
8930         if (size) {
8931             /* Double */
8932             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8933             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8934             TCGv_i64 tcg_res = tcg_temp_new_i64();
8935 
8936             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8937             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8938 
8939             switch (fpopcode) {
8940             case 0x39: /* FMLS */
8941                 /* As usual for ARM, separate negation for fused multiply-add */
8942                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8943                 /* fall through */
8944             case 0x19: /* FMLA */
8945                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8946                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8947                                        tcg_res, fpst);
8948                 break;
8949             case 0x18: /* FMAXNM */
8950                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8951                 break;
8952             case 0x1a: /* FADD */
8953                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8954                 break;
8955             case 0x1b: /* FMULX */
8956                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8957                 break;
8958             case 0x1c: /* FCMEQ */
8959                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8960                 break;
8961             case 0x1e: /* FMAX */
8962                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8963                 break;
8964             case 0x1f: /* FRECPS */
8965                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8966                 break;
8967             case 0x38: /* FMINNM */
8968                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8969                 break;
8970             case 0x3a: /* FSUB */
8971                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8972                 break;
8973             case 0x3e: /* FMIN */
8974                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8975                 break;
8976             case 0x3f: /* FRSQRTS */
8977                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8978                 break;
8979             case 0x5b: /* FMUL */
8980                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8981                 break;
8982             case 0x5c: /* FCMGE */
8983                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8984                 break;
8985             case 0x5d: /* FACGE */
8986                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8987                 break;
8988             case 0x5f: /* FDIV */
8989                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8990                 break;
8991             case 0x7a: /* FABD */
8992                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8993                 gen_helper_vfp_absd(tcg_res, tcg_res);
8994                 break;
8995             case 0x7c: /* FCMGT */
8996                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8997                 break;
8998             case 0x7d: /* FACGT */
8999                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9000                 break;
9001             default:
9002                 g_assert_not_reached();
9003             }
9004 
9005             write_vec_element(s, tcg_res, rd, pass, MO_64);
9006         } else {
9007             /* Single */
9008             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9009             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9010             TCGv_i32 tcg_res = tcg_temp_new_i32();
9011 
9012             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9013             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9014 
9015             switch (fpopcode) {
9016             case 0x39: /* FMLS */
9017                 /* As usual for ARM, separate negation for fused multiply-add */
9018                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
9019                 /* fall through */
9020             case 0x19: /* FMLA */
9021                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9022                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9023                                        tcg_res, fpst);
9024                 break;
9025             case 0x1a: /* FADD */
9026                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9027                 break;
9028             case 0x1b: /* FMULX */
9029                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9030                 break;
9031             case 0x1c: /* FCMEQ */
9032                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9033                 break;
9034             case 0x1e: /* FMAX */
9035                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9036                 break;
9037             case 0x1f: /* FRECPS */
9038                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9039                 break;
9040             case 0x18: /* FMAXNM */
9041                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9042                 break;
9043             case 0x38: /* FMINNM */
9044                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9045                 break;
9046             case 0x3a: /* FSUB */
9047                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9048                 break;
9049             case 0x3e: /* FMIN */
9050                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9051                 break;
9052             case 0x3f: /* FRSQRTS */
9053                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9054                 break;
9055             case 0x5b: /* FMUL */
9056                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9057                 break;
9058             case 0x5c: /* FCMGE */
9059                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9060                 break;
9061             case 0x5d: /* FACGE */
9062                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9063                 break;
9064             case 0x5f: /* FDIV */
9065                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9066                 break;
9067             case 0x7a: /* FABD */
9068                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9069                 gen_helper_vfp_abss(tcg_res, tcg_res);
9070                 break;
9071             case 0x7c: /* FCMGT */
9072                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9073                 break;
9074             case 0x7d: /* FACGT */
9075                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9076                 break;
9077             default:
9078                 g_assert_not_reached();
9079             }
9080 
9081             if (elements == 1) {
9082                 /* scalar single so clear high part */
9083                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9084 
9085                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9086                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9087             } else {
9088                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9089             }
9090         }
9091     }
9092 
9093     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9094 }
9095 
9096 /* AdvSIMD scalar three same
9097  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9098  * +-----+---+-----------+------+---+------+--------+---+------+------+
9099  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9100  * +-----+---+-----------+------+---+------+--------+---+------+------+
9101  */
9102 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9103 {
9104     int rd = extract32(insn, 0, 5);
9105     int rn = extract32(insn, 5, 5);
9106     int opcode = extract32(insn, 11, 5);
9107     int rm = extract32(insn, 16, 5);
9108     int size = extract32(insn, 22, 2);
9109     bool u = extract32(insn, 29, 1);
9110     TCGv_i64 tcg_rd;
9111 
9112     if (opcode >= 0x18) {
9113         /* Floating point: U, size[1] and opcode indicate operation */
9114         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9115         switch (fpopcode) {
9116         case 0x1b: /* FMULX */
9117         case 0x1f: /* FRECPS */
9118         case 0x3f: /* FRSQRTS */
9119         case 0x5d: /* FACGE */
9120         case 0x7d: /* FACGT */
9121         case 0x1c: /* FCMEQ */
9122         case 0x5c: /* FCMGE */
9123         case 0x7c: /* FCMGT */
9124         case 0x7a: /* FABD */
9125             break;
9126         default:
9127             unallocated_encoding(s);
9128             return;
9129         }
9130 
9131         if (!fp_access_check(s)) {
9132             return;
9133         }
9134 
9135         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9136         return;
9137     }
9138 
9139     switch (opcode) {
9140     case 0x1: /* SQADD, UQADD */
9141     case 0x5: /* SQSUB, UQSUB */
9142     case 0x9: /* SQSHL, UQSHL */
9143     case 0xb: /* SQRSHL, UQRSHL */
9144         break;
9145     case 0x8: /* SSHL, USHL */
9146     case 0xa: /* SRSHL, URSHL */
9147     case 0x6: /* CMGT, CMHI */
9148     case 0x7: /* CMGE, CMHS */
9149     case 0x11: /* CMTST, CMEQ */
9150     case 0x10: /* ADD, SUB (vector) */
9151         if (size != 3) {
9152             unallocated_encoding(s);
9153             return;
9154         }
9155         break;
9156     case 0x16: /* SQDMULH, SQRDMULH (vector) */
9157         if (size != 1 && size != 2) {
9158             unallocated_encoding(s);
9159             return;
9160         }
9161         break;
9162     default:
9163         unallocated_encoding(s);
9164         return;
9165     }
9166 
9167     if (!fp_access_check(s)) {
9168         return;
9169     }
9170 
9171     tcg_rd = tcg_temp_new_i64();
9172 
9173     if (size == 3) {
9174         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9175         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9176 
9177         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9178     } else {
9179         /* Do a single operation on the lowest element in the vector.
9180          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9181          * no side effects for all these operations.
9182          * OPTME: special-purpose helpers would avoid doing some
9183          * unnecessary work in the helper for the 8 and 16 bit cases.
9184          */
9185         NeonGenTwoOpEnvFn *genenvfn;
9186         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9187         TCGv_i32 tcg_rm = tcg_temp_new_i32();
9188         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9189 
9190         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9191         read_vec_element_i32(s, tcg_rm, rm, 0, size);
9192 
9193         switch (opcode) {
9194         case 0x1: /* SQADD, UQADD */
9195         {
9196             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9197                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9198                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9199                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9200             };
9201             genenvfn = fns[size][u];
9202             break;
9203         }
9204         case 0x5: /* SQSUB, UQSUB */
9205         {
9206             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9207                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9208                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9209                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9210             };
9211             genenvfn = fns[size][u];
9212             break;
9213         }
9214         case 0x9: /* SQSHL, UQSHL */
9215         {
9216             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9217                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9218                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9219                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9220             };
9221             genenvfn = fns[size][u];
9222             break;
9223         }
9224         case 0xb: /* SQRSHL, UQRSHL */
9225         {
9226             static NeonGenTwoOpEnvFn * const fns[3][2] = {
9227                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9228                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9229                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9230             };
9231             genenvfn = fns[size][u];
9232             break;
9233         }
9234         case 0x16: /* SQDMULH, SQRDMULH */
9235         {
9236             static NeonGenTwoOpEnvFn * const fns[2][2] = {
9237                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9238                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9239             };
9240             assert(size == 1 || size == 2);
9241             genenvfn = fns[size - 1][u];
9242             break;
9243         }
9244         default:
9245             g_assert_not_reached();
9246         }
9247 
9248         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9249         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9250     }
9251 
9252     write_fp_dreg(s, rd, tcg_rd);
9253 }
9254 
9255 /* AdvSIMD scalar three same FP16
9256  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9257  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9258  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9259  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9260  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9261  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9262  */
9263 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9264                                                   uint32_t insn)
9265 {
9266     int rd = extract32(insn, 0, 5);
9267     int rn = extract32(insn, 5, 5);
9268     int opcode = extract32(insn, 11, 3);
9269     int rm = extract32(insn, 16, 5);
9270     bool u = extract32(insn, 29, 1);
9271     bool a = extract32(insn, 23, 1);
9272     int fpopcode = opcode | (a << 3) |  (u << 4);
9273     TCGv_ptr fpst;
9274     TCGv_i32 tcg_op1;
9275     TCGv_i32 tcg_op2;
9276     TCGv_i32 tcg_res;
9277 
9278     switch (fpopcode) {
9279     case 0x03: /* FMULX */
9280     case 0x04: /* FCMEQ (reg) */
9281     case 0x07: /* FRECPS */
9282     case 0x0f: /* FRSQRTS */
9283     case 0x14: /* FCMGE (reg) */
9284     case 0x15: /* FACGE */
9285     case 0x1a: /* FABD */
9286     case 0x1c: /* FCMGT (reg) */
9287     case 0x1d: /* FACGT */
9288         break;
9289     default:
9290         unallocated_encoding(s);
9291         return;
9292     }
9293 
9294     if (!dc_isar_feature(aa64_fp16, s)) {
9295         unallocated_encoding(s);
9296     }
9297 
9298     if (!fp_access_check(s)) {
9299         return;
9300     }
9301 
9302     fpst = fpstatus_ptr(FPST_FPCR_F16);
9303 
9304     tcg_op1 = read_fp_hreg(s, rn);
9305     tcg_op2 = read_fp_hreg(s, rm);
9306     tcg_res = tcg_temp_new_i32();
9307 
9308     switch (fpopcode) {
9309     case 0x03: /* FMULX */
9310         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9311         break;
9312     case 0x04: /* FCMEQ (reg) */
9313         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9314         break;
9315     case 0x07: /* FRECPS */
9316         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9317         break;
9318     case 0x0f: /* FRSQRTS */
9319         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9320         break;
9321     case 0x14: /* FCMGE (reg) */
9322         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9323         break;
9324     case 0x15: /* FACGE */
9325         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9326         break;
9327     case 0x1a: /* FABD */
9328         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9329         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9330         break;
9331     case 0x1c: /* FCMGT (reg) */
9332         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9333         break;
9334     case 0x1d: /* FACGT */
9335         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9336         break;
9337     default:
9338         g_assert_not_reached();
9339     }
9340 
9341     write_fp_sreg(s, rd, tcg_res);
9342 }
9343 
9344 /* AdvSIMD scalar three same extra
9345  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9346  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9347  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9348  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9349  */
9350 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9351                                                    uint32_t insn)
9352 {
9353     int rd = extract32(insn, 0, 5);
9354     int rn = extract32(insn, 5, 5);
9355     int opcode = extract32(insn, 11, 4);
9356     int rm = extract32(insn, 16, 5);
9357     int size = extract32(insn, 22, 2);
9358     bool u = extract32(insn, 29, 1);
9359     TCGv_i32 ele1, ele2, ele3;
9360     TCGv_i64 res;
9361     bool feature;
9362 
9363     switch (u * 16 + opcode) {
9364     case 0x10: /* SQRDMLAH (vector) */
9365     case 0x11: /* SQRDMLSH (vector) */
9366         if (size != 1 && size != 2) {
9367             unallocated_encoding(s);
9368             return;
9369         }
9370         feature = dc_isar_feature(aa64_rdm, s);
9371         break;
9372     default:
9373         unallocated_encoding(s);
9374         return;
9375     }
9376     if (!feature) {
9377         unallocated_encoding(s);
9378         return;
9379     }
9380     if (!fp_access_check(s)) {
9381         return;
9382     }
9383 
9384     /* Do a single operation on the lowest element in the vector.
9385      * We use the standard Neon helpers and rely on 0 OP 0 == 0
9386      * with no side effects for all these operations.
9387      * OPTME: special-purpose helpers would avoid doing some
9388      * unnecessary work in the helper for the 16 bit cases.
9389      */
9390     ele1 = tcg_temp_new_i32();
9391     ele2 = tcg_temp_new_i32();
9392     ele3 = tcg_temp_new_i32();
9393 
9394     read_vec_element_i32(s, ele1, rn, 0, size);
9395     read_vec_element_i32(s, ele2, rm, 0, size);
9396     read_vec_element_i32(s, ele3, rd, 0, size);
9397 
9398     switch (opcode) {
9399     case 0x0: /* SQRDMLAH */
9400         if (size == 1) {
9401             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9402         } else {
9403             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9404         }
9405         break;
9406     case 0x1: /* SQRDMLSH */
9407         if (size == 1) {
9408             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9409         } else {
9410             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9411         }
9412         break;
9413     default:
9414         g_assert_not_reached();
9415     }
9416 
9417     res = tcg_temp_new_i64();
9418     tcg_gen_extu_i32_i64(res, ele3);
9419     write_fp_dreg(s, rd, res);
9420 }
9421 
9422 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9423                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9424                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9425 {
9426     /* Handle 64->64 opcodes which are shared between the scalar and
9427      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9428      * is valid in either group and also the double-precision fp ops.
9429      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9430      * requires them.
9431      */
9432     TCGCond cond;
9433 
9434     switch (opcode) {
9435     case 0x4: /* CLS, CLZ */
9436         if (u) {
9437             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9438         } else {
9439             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9440         }
9441         break;
9442     case 0x5: /* NOT */
9443         /* This opcode is shared with CNT and RBIT but we have earlier
9444          * enforced that size == 3 if and only if this is the NOT insn.
9445          */
9446         tcg_gen_not_i64(tcg_rd, tcg_rn);
9447         break;
9448     case 0x7: /* SQABS, SQNEG */
9449         if (u) {
9450             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9451         } else {
9452             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9453         }
9454         break;
9455     case 0xa: /* CMLT */
9456         /* 64 bit integer comparison against zero, result is
9457          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9458          * subtracting 1.
9459          */
9460         cond = TCG_COND_LT;
9461     do_cmop:
9462         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9463         tcg_gen_neg_i64(tcg_rd, tcg_rd);
9464         break;
9465     case 0x8: /* CMGT, CMGE */
9466         cond = u ? TCG_COND_GE : TCG_COND_GT;
9467         goto do_cmop;
9468     case 0x9: /* CMEQ, CMLE */
9469         cond = u ? TCG_COND_LE : TCG_COND_EQ;
9470         goto do_cmop;
9471     case 0xb: /* ABS, NEG */
9472         if (u) {
9473             tcg_gen_neg_i64(tcg_rd, tcg_rn);
9474         } else {
9475             tcg_gen_abs_i64(tcg_rd, tcg_rn);
9476         }
9477         break;
9478     case 0x2f: /* FABS */
9479         gen_helper_vfp_absd(tcg_rd, tcg_rn);
9480         break;
9481     case 0x6f: /* FNEG */
9482         gen_helper_vfp_negd(tcg_rd, tcg_rn);
9483         break;
9484     case 0x7f: /* FSQRT */
9485         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9486         break;
9487     case 0x1a: /* FCVTNS */
9488     case 0x1b: /* FCVTMS */
9489     case 0x1c: /* FCVTAS */
9490     case 0x3a: /* FCVTPS */
9491     case 0x3b: /* FCVTZS */
9492         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9493         break;
9494     case 0x5a: /* FCVTNU */
9495     case 0x5b: /* FCVTMU */
9496     case 0x5c: /* FCVTAU */
9497     case 0x7a: /* FCVTPU */
9498     case 0x7b: /* FCVTZU */
9499         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
9500         break;
9501     case 0x18: /* FRINTN */
9502     case 0x19: /* FRINTM */
9503     case 0x38: /* FRINTP */
9504     case 0x39: /* FRINTZ */
9505     case 0x58: /* FRINTA */
9506     case 0x79: /* FRINTI */
9507         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9508         break;
9509     case 0x59: /* FRINTX */
9510         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9511         break;
9512     case 0x1e: /* FRINT32Z */
9513     case 0x5e: /* FRINT32X */
9514         gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9515         break;
9516     case 0x1f: /* FRINT64Z */
9517     case 0x5f: /* FRINT64X */
9518         gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9519         break;
9520     default:
9521         g_assert_not_reached();
9522     }
9523 }
9524 
9525 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9526                                    bool is_scalar, bool is_u, bool is_q,
9527                                    int size, int rn, int rd)
9528 {
9529     bool is_double = (size == MO_64);
9530     TCGv_ptr fpst;
9531 
9532     if (!fp_access_check(s)) {
9533         return;
9534     }
9535 
9536     fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9537 
9538     if (is_double) {
9539         TCGv_i64 tcg_op = tcg_temp_new_i64();
9540         TCGv_i64 tcg_zero = tcg_constant_i64(0);
9541         TCGv_i64 tcg_res = tcg_temp_new_i64();
9542         NeonGenTwoDoubleOpFn *genfn;
9543         bool swap = false;
9544         int pass;
9545 
9546         switch (opcode) {
9547         case 0x2e: /* FCMLT (zero) */
9548             swap = true;
9549             /* fallthrough */
9550         case 0x2c: /* FCMGT (zero) */
9551             genfn = gen_helper_neon_cgt_f64;
9552             break;
9553         case 0x2d: /* FCMEQ (zero) */
9554             genfn = gen_helper_neon_ceq_f64;
9555             break;
9556         case 0x6d: /* FCMLE (zero) */
9557             swap = true;
9558             /* fall through */
9559         case 0x6c: /* FCMGE (zero) */
9560             genfn = gen_helper_neon_cge_f64;
9561             break;
9562         default:
9563             g_assert_not_reached();
9564         }
9565 
9566         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9567             read_vec_element(s, tcg_op, rn, pass, MO_64);
9568             if (swap) {
9569                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9570             } else {
9571                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9572             }
9573             write_vec_element(s, tcg_res, rd, pass, MO_64);
9574         }
9575 
9576         clear_vec_high(s, !is_scalar, rd);
9577     } else {
9578         TCGv_i32 tcg_op = tcg_temp_new_i32();
9579         TCGv_i32 tcg_zero = tcg_constant_i32(0);
9580         TCGv_i32 tcg_res = tcg_temp_new_i32();
9581         NeonGenTwoSingleOpFn *genfn;
9582         bool swap = false;
9583         int pass, maxpasses;
9584 
9585         if (size == MO_16) {
9586             switch (opcode) {
9587             case 0x2e: /* FCMLT (zero) */
9588                 swap = true;
9589                 /* fall through */
9590             case 0x2c: /* FCMGT (zero) */
9591                 genfn = gen_helper_advsimd_cgt_f16;
9592                 break;
9593             case 0x2d: /* FCMEQ (zero) */
9594                 genfn = gen_helper_advsimd_ceq_f16;
9595                 break;
9596             case 0x6d: /* FCMLE (zero) */
9597                 swap = true;
9598                 /* fall through */
9599             case 0x6c: /* FCMGE (zero) */
9600                 genfn = gen_helper_advsimd_cge_f16;
9601                 break;
9602             default:
9603                 g_assert_not_reached();
9604             }
9605         } else {
9606             switch (opcode) {
9607             case 0x2e: /* FCMLT (zero) */
9608                 swap = true;
9609                 /* fall through */
9610             case 0x2c: /* FCMGT (zero) */
9611                 genfn = gen_helper_neon_cgt_f32;
9612                 break;
9613             case 0x2d: /* FCMEQ (zero) */
9614                 genfn = gen_helper_neon_ceq_f32;
9615                 break;
9616             case 0x6d: /* FCMLE (zero) */
9617                 swap = true;
9618                 /* fall through */
9619             case 0x6c: /* FCMGE (zero) */
9620                 genfn = gen_helper_neon_cge_f32;
9621                 break;
9622             default:
9623                 g_assert_not_reached();
9624             }
9625         }
9626 
9627         if (is_scalar) {
9628             maxpasses = 1;
9629         } else {
9630             int vector_size = 8 << is_q;
9631             maxpasses = vector_size >> size;
9632         }
9633 
9634         for (pass = 0; pass < maxpasses; pass++) {
9635             read_vec_element_i32(s, tcg_op, rn, pass, size);
9636             if (swap) {
9637                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
9638             } else {
9639                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
9640             }
9641             if (is_scalar) {
9642                 write_fp_sreg(s, rd, tcg_res);
9643             } else {
9644                 write_vec_element_i32(s, tcg_res, rd, pass, size);
9645             }
9646         }
9647 
9648         if (!is_scalar) {
9649             clear_vec_high(s, is_q, rd);
9650         }
9651     }
9652 }
9653 
9654 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9655                                     bool is_scalar, bool is_u, bool is_q,
9656                                     int size, int rn, int rd)
9657 {
9658     bool is_double = (size == 3);
9659     TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9660 
9661     if (is_double) {
9662         TCGv_i64 tcg_op = tcg_temp_new_i64();
9663         TCGv_i64 tcg_res = tcg_temp_new_i64();
9664         int pass;
9665 
9666         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9667             read_vec_element(s, tcg_op, rn, pass, MO_64);
9668             switch (opcode) {
9669             case 0x3d: /* FRECPE */
9670                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9671                 break;
9672             case 0x3f: /* FRECPX */
9673                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9674                 break;
9675             case 0x7d: /* FRSQRTE */
9676                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9677                 break;
9678             default:
9679                 g_assert_not_reached();
9680             }
9681             write_vec_element(s, tcg_res, rd, pass, MO_64);
9682         }
9683         clear_vec_high(s, !is_scalar, rd);
9684     } else {
9685         TCGv_i32 tcg_op = tcg_temp_new_i32();
9686         TCGv_i32 tcg_res = tcg_temp_new_i32();
9687         int pass, maxpasses;
9688 
9689         if (is_scalar) {
9690             maxpasses = 1;
9691         } else {
9692             maxpasses = is_q ? 4 : 2;
9693         }
9694 
9695         for (pass = 0; pass < maxpasses; pass++) {
9696             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9697 
9698             switch (opcode) {
9699             case 0x3c: /* URECPE */
9700                 gen_helper_recpe_u32(tcg_res, tcg_op);
9701                 break;
9702             case 0x3d: /* FRECPE */
9703                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9704                 break;
9705             case 0x3f: /* FRECPX */
9706                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9707                 break;
9708             case 0x7d: /* FRSQRTE */
9709                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9710                 break;
9711             default:
9712                 g_assert_not_reached();
9713             }
9714 
9715             if (is_scalar) {
9716                 write_fp_sreg(s, rd, tcg_res);
9717             } else {
9718                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9719             }
9720         }
9721         if (!is_scalar) {
9722             clear_vec_high(s, is_q, rd);
9723         }
9724     }
9725 }
9726 
9727 static void handle_2misc_narrow(DisasContext *s, bool scalar,
9728                                 int opcode, bool u, bool is_q,
9729                                 int size, int rn, int rd)
9730 {
9731     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9732      * in the source becomes a size element in the destination).
9733      */
9734     int pass;
9735     TCGv_i32 tcg_res[2];
9736     int destelt = is_q ? 2 : 0;
9737     int passes = scalar ? 1 : 2;
9738 
9739     if (scalar) {
9740         tcg_res[1] = tcg_constant_i32(0);
9741     }
9742 
9743     for (pass = 0; pass < passes; pass++) {
9744         TCGv_i64 tcg_op = tcg_temp_new_i64();
9745         NeonGenNarrowFn *genfn = NULL;
9746         NeonGenNarrowEnvFn *genenvfn = NULL;
9747 
9748         if (scalar) {
9749             read_vec_element(s, tcg_op, rn, pass, size + 1);
9750         } else {
9751             read_vec_element(s, tcg_op, rn, pass, MO_64);
9752         }
9753         tcg_res[pass] = tcg_temp_new_i32();
9754 
9755         switch (opcode) {
9756         case 0x12: /* XTN, SQXTUN */
9757         {
9758             static NeonGenNarrowFn * const xtnfns[3] = {
9759                 gen_helper_neon_narrow_u8,
9760                 gen_helper_neon_narrow_u16,
9761                 tcg_gen_extrl_i64_i32,
9762             };
9763             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9764                 gen_helper_neon_unarrow_sat8,
9765                 gen_helper_neon_unarrow_sat16,
9766                 gen_helper_neon_unarrow_sat32,
9767             };
9768             if (u) {
9769                 genenvfn = sqxtunfns[size];
9770             } else {
9771                 genfn = xtnfns[size];
9772             }
9773             break;
9774         }
9775         case 0x14: /* SQXTN, UQXTN */
9776         {
9777             static NeonGenNarrowEnvFn * const fns[3][2] = {
9778                 { gen_helper_neon_narrow_sat_s8,
9779                   gen_helper_neon_narrow_sat_u8 },
9780                 { gen_helper_neon_narrow_sat_s16,
9781                   gen_helper_neon_narrow_sat_u16 },
9782                 { gen_helper_neon_narrow_sat_s32,
9783                   gen_helper_neon_narrow_sat_u32 },
9784             };
9785             genenvfn = fns[size][u];
9786             break;
9787         }
9788         case 0x16: /* FCVTN, FCVTN2 */
9789             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9790             if (size == 2) {
9791                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9792             } else {
9793                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9794                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9795                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9796                 TCGv_i32 ahp = get_ahp_flag();
9797 
9798                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9799                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9800                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9801                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9802             }
9803             break;
9804         case 0x36: /* BFCVTN, BFCVTN2 */
9805             {
9806                 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9807                 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
9808             }
9809             break;
9810         case 0x56:  /* FCVTXN, FCVTXN2 */
9811             /* 64 bit to 32 bit float conversion
9812              * with von Neumann rounding (round to odd)
9813              */
9814             assert(size == 2);
9815             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9816             break;
9817         default:
9818             g_assert_not_reached();
9819         }
9820 
9821         if (genfn) {
9822             genfn(tcg_res[pass], tcg_op);
9823         } else if (genenvfn) {
9824             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9825         }
9826     }
9827 
9828     for (pass = 0; pass < 2; pass++) {
9829         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9830     }
9831     clear_vec_high(s, is_q, rd);
9832 }
9833 
9834 /* Remaining saturating accumulating ops */
9835 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9836                                 bool is_q, int size, int rn, int rd)
9837 {
9838     bool is_double = (size == 3);
9839 
9840     if (is_double) {
9841         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9842         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9843         int pass;
9844 
9845         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9846             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9847             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9848 
9849             if (is_u) { /* USQADD */
9850                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9851             } else { /* SUQADD */
9852                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9853             }
9854             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9855         }
9856         clear_vec_high(s, !is_scalar, rd);
9857     } else {
9858         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9859         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9860         int pass, maxpasses;
9861 
9862         if (is_scalar) {
9863             maxpasses = 1;
9864         } else {
9865             maxpasses = is_q ? 4 : 2;
9866         }
9867 
9868         for (pass = 0; pass < maxpasses; pass++) {
9869             if (is_scalar) {
9870                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9871                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9872             } else {
9873                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9874                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9875             }
9876 
9877             if (is_u) { /* USQADD */
9878                 switch (size) {
9879                 case 0:
9880                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9881                     break;
9882                 case 1:
9883                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9884                     break;
9885                 case 2:
9886                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9887                     break;
9888                 default:
9889                     g_assert_not_reached();
9890                 }
9891             } else { /* SUQADD */
9892                 switch (size) {
9893                 case 0:
9894                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9895                     break;
9896                 case 1:
9897                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9898                     break;
9899                 case 2:
9900                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9901                     break;
9902                 default:
9903                     g_assert_not_reached();
9904                 }
9905             }
9906 
9907             if (is_scalar) {
9908                 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64);
9909             }
9910             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9911         }
9912         clear_vec_high(s, is_q, rd);
9913     }
9914 }
9915 
9916 /* AdvSIMD scalar two reg misc
9917  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9918  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9919  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9920  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9921  */
9922 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9923 {
9924     int rd = extract32(insn, 0, 5);
9925     int rn = extract32(insn, 5, 5);
9926     int opcode = extract32(insn, 12, 5);
9927     int size = extract32(insn, 22, 2);
9928     bool u = extract32(insn, 29, 1);
9929     bool is_fcvt = false;
9930     int rmode;
9931     TCGv_i32 tcg_rmode;
9932     TCGv_ptr tcg_fpstatus;
9933 
9934     switch (opcode) {
9935     case 0x3: /* USQADD / SUQADD*/
9936         if (!fp_access_check(s)) {
9937             return;
9938         }
9939         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9940         return;
9941     case 0x7: /* SQABS / SQNEG */
9942         break;
9943     case 0xa: /* CMLT */
9944         if (u) {
9945             unallocated_encoding(s);
9946             return;
9947         }
9948         /* fall through */
9949     case 0x8: /* CMGT, CMGE */
9950     case 0x9: /* CMEQ, CMLE */
9951     case 0xb: /* ABS, NEG */
9952         if (size != 3) {
9953             unallocated_encoding(s);
9954             return;
9955         }
9956         break;
9957     case 0x12: /* SQXTUN */
9958         if (!u) {
9959             unallocated_encoding(s);
9960             return;
9961         }
9962         /* fall through */
9963     case 0x14: /* SQXTN, UQXTN */
9964         if (size == 3) {
9965             unallocated_encoding(s);
9966             return;
9967         }
9968         if (!fp_access_check(s)) {
9969             return;
9970         }
9971         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9972         return;
9973     case 0xc ... 0xf:
9974     case 0x16 ... 0x1d:
9975     case 0x1f:
9976         /* Floating point: U, size[1] and opcode indicate operation;
9977          * size[0] indicates single or double precision.
9978          */
9979         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9980         size = extract32(size, 0, 1) ? 3 : 2;
9981         switch (opcode) {
9982         case 0x2c: /* FCMGT (zero) */
9983         case 0x2d: /* FCMEQ (zero) */
9984         case 0x2e: /* FCMLT (zero) */
9985         case 0x6c: /* FCMGE (zero) */
9986         case 0x6d: /* FCMLE (zero) */
9987             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9988             return;
9989         case 0x1d: /* SCVTF */
9990         case 0x5d: /* UCVTF */
9991         {
9992             bool is_signed = (opcode == 0x1d);
9993             if (!fp_access_check(s)) {
9994                 return;
9995             }
9996             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9997             return;
9998         }
9999         case 0x3d: /* FRECPE */
10000         case 0x3f: /* FRECPX */
10001         case 0x7d: /* FRSQRTE */
10002             if (!fp_access_check(s)) {
10003                 return;
10004             }
10005             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10006             return;
10007         case 0x1a: /* FCVTNS */
10008         case 0x1b: /* FCVTMS */
10009         case 0x3a: /* FCVTPS */
10010         case 0x3b: /* FCVTZS */
10011         case 0x5a: /* FCVTNU */
10012         case 0x5b: /* FCVTMU */
10013         case 0x7a: /* FCVTPU */
10014         case 0x7b: /* FCVTZU */
10015             is_fcvt = true;
10016             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10017             break;
10018         case 0x1c: /* FCVTAS */
10019         case 0x5c: /* FCVTAU */
10020             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10021             is_fcvt = true;
10022             rmode = FPROUNDING_TIEAWAY;
10023             break;
10024         case 0x56: /* FCVTXN, FCVTXN2 */
10025             if (size == 2) {
10026                 unallocated_encoding(s);
10027                 return;
10028             }
10029             if (!fp_access_check(s)) {
10030                 return;
10031             }
10032             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10033             return;
10034         default:
10035             unallocated_encoding(s);
10036             return;
10037         }
10038         break;
10039     default:
10040         unallocated_encoding(s);
10041         return;
10042     }
10043 
10044     if (!fp_access_check(s)) {
10045         return;
10046     }
10047 
10048     if (is_fcvt) {
10049         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10050         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10051     } else {
10052         tcg_fpstatus = NULL;
10053         tcg_rmode = NULL;
10054     }
10055 
10056     if (size == 3) {
10057         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10058         TCGv_i64 tcg_rd = tcg_temp_new_i64();
10059 
10060         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10061         write_fp_dreg(s, rd, tcg_rd);
10062     } else {
10063         TCGv_i32 tcg_rn = tcg_temp_new_i32();
10064         TCGv_i32 tcg_rd = tcg_temp_new_i32();
10065 
10066         read_vec_element_i32(s, tcg_rn, rn, 0, size);
10067 
10068         switch (opcode) {
10069         case 0x7: /* SQABS, SQNEG */
10070         {
10071             NeonGenOneOpEnvFn *genfn;
10072             static NeonGenOneOpEnvFn * const fns[3][2] = {
10073                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10074                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10075                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10076             };
10077             genfn = fns[size][u];
10078             genfn(tcg_rd, cpu_env, tcg_rn);
10079             break;
10080         }
10081         case 0x1a: /* FCVTNS */
10082         case 0x1b: /* FCVTMS */
10083         case 0x1c: /* FCVTAS */
10084         case 0x3a: /* FCVTPS */
10085         case 0x3b: /* FCVTZS */
10086             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10087                                  tcg_fpstatus);
10088             break;
10089         case 0x5a: /* FCVTNU */
10090         case 0x5b: /* FCVTMU */
10091         case 0x5c: /* FCVTAU */
10092         case 0x7a: /* FCVTPU */
10093         case 0x7b: /* FCVTZU */
10094             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10095                                  tcg_fpstatus);
10096             break;
10097         default:
10098             g_assert_not_reached();
10099         }
10100 
10101         write_fp_sreg(s, rd, tcg_rd);
10102     }
10103 
10104     if (is_fcvt) {
10105         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10106     }
10107 }
10108 
10109 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10110 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10111                                  int immh, int immb, int opcode, int rn, int rd)
10112 {
10113     int size = 32 - clz32(immh) - 1;
10114     int immhb = immh << 3 | immb;
10115     int shift = 2 * (8 << size) - immhb;
10116     GVecGen2iFn *gvec_fn;
10117 
10118     if (extract32(immh, 3, 1) && !is_q) {
10119         unallocated_encoding(s);
10120         return;
10121     }
10122     tcg_debug_assert(size <= 3);
10123 
10124     if (!fp_access_check(s)) {
10125         return;
10126     }
10127 
10128     switch (opcode) {
10129     case 0x02: /* SSRA / USRA (accumulate) */
10130         gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10131         break;
10132 
10133     case 0x08: /* SRI */
10134         gvec_fn = gen_gvec_sri;
10135         break;
10136 
10137     case 0x00: /* SSHR / USHR */
10138         if (is_u) {
10139             if (shift == 8 << size) {
10140                 /* Shift count the same size as element size produces zero.  */
10141                 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10142                                      is_q ? 16 : 8, vec_full_reg_size(s), 0);
10143                 return;
10144             }
10145             gvec_fn = tcg_gen_gvec_shri;
10146         } else {
10147             /* Shift count the same size as element size produces all sign.  */
10148             if (shift == 8 << size) {
10149                 shift -= 1;
10150             }
10151             gvec_fn = tcg_gen_gvec_sari;
10152         }
10153         break;
10154 
10155     case 0x04: /* SRSHR / URSHR (rounding) */
10156         gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10157         break;
10158 
10159     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10160         gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10161         break;
10162 
10163     default:
10164         g_assert_not_reached();
10165     }
10166 
10167     gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10168 }
10169 
10170 /* SHL/SLI - Vector shift left */
10171 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10172                                  int immh, int immb, int opcode, int rn, int rd)
10173 {
10174     int size = 32 - clz32(immh) - 1;
10175     int immhb = immh << 3 | immb;
10176     int shift = immhb - (8 << size);
10177 
10178     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10179     assert(size >= 0 && size <= 3);
10180 
10181     if (extract32(immh, 3, 1) && !is_q) {
10182         unallocated_encoding(s);
10183         return;
10184     }
10185 
10186     if (!fp_access_check(s)) {
10187         return;
10188     }
10189 
10190     if (insert) {
10191         gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10192     } else {
10193         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10194     }
10195 }
10196 
10197 /* USHLL/SHLL - Vector shift left with widening */
10198 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10199                                  int immh, int immb, int opcode, int rn, int rd)
10200 {
10201     int size = 32 - clz32(immh) - 1;
10202     int immhb = immh << 3 | immb;
10203     int shift = immhb - (8 << size);
10204     int dsize = 64;
10205     int esize = 8 << size;
10206     int elements = dsize/esize;
10207     TCGv_i64 tcg_rn = tcg_temp_new_i64();
10208     TCGv_i64 tcg_rd = tcg_temp_new_i64();
10209     int i;
10210 
10211     if (size >= 3) {
10212         unallocated_encoding(s);
10213         return;
10214     }
10215 
10216     if (!fp_access_check(s)) {
10217         return;
10218     }
10219 
10220     /* For the LL variants the store is larger than the load,
10221      * so if rd == rn we would overwrite parts of our input.
10222      * So load everything right now and use shifts in the main loop.
10223      */
10224     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10225 
10226     for (i = 0; i < elements; i++) {
10227         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10228         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10229         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10230         write_vec_element(s, tcg_rd, rd, i, size + 1);
10231     }
10232 }
10233 
10234 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10235 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10236                                  int immh, int immb, int opcode, int rn, int rd)
10237 {
10238     int immhb = immh << 3 | immb;
10239     int size = 32 - clz32(immh) - 1;
10240     int dsize = 64;
10241     int esize = 8 << size;
10242     int elements = dsize/esize;
10243     int shift = (2 * esize) - immhb;
10244     bool round = extract32(opcode, 0, 1);
10245     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10246     TCGv_i64 tcg_round;
10247     int i;
10248 
10249     if (extract32(immh, 3, 1)) {
10250         unallocated_encoding(s);
10251         return;
10252     }
10253 
10254     if (!fp_access_check(s)) {
10255         return;
10256     }
10257 
10258     tcg_rn = tcg_temp_new_i64();
10259     tcg_rd = tcg_temp_new_i64();
10260     tcg_final = tcg_temp_new_i64();
10261     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10262 
10263     if (round) {
10264         tcg_round = tcg_constant_i64(1ULL << (shift - 1));
10265     } else {
10266         tcg_round = NULL;
10267     }
10268 
10269     for (i = 0; i < elements; i++) {
10270         read_vec_element(s, tcg_rn, rn, i, size+1);
10271         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10272                                 false, true, size+1, shift);
10273 
10274         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10275     }
10276 
10277     if (!is_q) {
10278         write_vec_element(s, tcg_final, rd, 0, MO_64);
10279     } else {
10280         write_vec_element(s, tcg_final, rd, 1, MO_64);
10281     }
10282 
10283     clear_vec_high(s, is_q, rd);
10284 }
10285 
10286 
10287 /* AdvSIMD shift by immediate
10288  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10289  * +---+---+---+-------------+------+------+--------+---+------+------+
10290  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10291  * +---+---+---+-------------+------+------+--------+---+------+------+
10292  */
10293 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10294 {
10295     int rd = extract32(insn, 0, 5);
10296     int rn = extract32(insn, 5, 5);
10297     int opcode = extract32(insn, 11, 5);
10298     int immb = extract32(insn, 16, 3);
10299     int immh = extract32(insn, 19, 4);
10300     bool is_u = extract32(insn, 29, 1);
10301     bool is_q = extract32(insn, 30, 1);
10302 
10303     /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10304     assert(immh != 0);
10305 
10306     switch (opcode) {
10307     case 0x08: /* SRI */
10308         if (!is_u) {
10309             unallocated_encoding(s);
10310             return;
10311         }
10312         /* fall through */
10313     case 0x00: /* SSHR / USHR */
10314     case 0x02: /* SSRA / USRA (accumulate) */
10315     case 0x04: /* SRSHR / URSHR (rounding) */
10316     case 0x06: /* SRSRA / URSRA (accum + rounding) */
10317         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10318         break;
10319     case 0x0a: /* SHL / SLI */
10320         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10321         break;
10322     case 0x10: /* SHRN */
10323     case 0x11: /* RSHRN / SQRSHRUN */
10324         if (is_u) {
10325             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10326                                    opcode, rn, rd);
10327         } else {
10328             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10329         }
10330         break;
10331     case 0x12: /* SQSHRN / UQSHRN */
10332     case 0x13: /* SQRSHRN / UQRSHRN */
10333         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10334                                opcode, rn, rd);
10335         break;
10336     case 0x14: /* SSHLL / USHLL */
10337         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10338         break;
10339     case 0x1c: /* SCVTF / UCVTF */
10340         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10341                                      opcode, rn, rd);
10342         break;
10343     case 0xc: /* SQSHLU */
10344         if (!is_u) {
10345             unallocated_encoding(s);
10346             return;
10347         }
10348         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10349         break;
10350     case 0xe: /* SQSHL, UQSHL */
10351         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10352         break;
10353     case 0x1f: /* FCVTZS/ FCVTZU */
10354         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10355         return;
10356     default:
10357         unallocated_encoding(s);
10358         return;
10359     }
10360 }
10361 
10362 /* Generate code to do a "long" addition or subtraction, ie one done in
10363  * TCGv_i64 on vector lanes twice the width specified by size.
10364  */
10365 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10366                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10367 {
10368     static NeonGenTwo64OpFn * const fns[3][2] = {
10369         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10370         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10371         { tcg_gen_add_i64, tcg_gen_sub_i64 },
10372     };
10373     NeonGenTwo64OpFn *genfn;
10374     assert(size < 3);
10375 
10376     genfn = fns[size][is_sub];
10377     genfn(tcg_res, tcg_op1, tcg_op2);
10378 }
10379 
10380 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10381                                 int opcode, int rd, int rn, int rm)
10382 {
10383     /* 3-reg-different widening insns: 64 x 64 -> 128 */
10384     TCGv_i64 tcg_res[2];
10385     int pass, accop;
10386 
10387     tcg_res[0] = tcg_temp_new_i64();
10388     tcg_res[1] = tcg_temp_new_i64();
10389 
10390     /* Does this op do an adding accumulate, a subtracting accumulate,
10391      * or no accumulate at all?
10392      */
10393     switch (opcode) {
10394     case 5:
10395     case 8:
10396     case 9:
10397         accop = 1;
10398         break;
10399     case 10:
10400     case 11:
10401         accop = -1;
10402         break;
10403     default:
10404         accop = 0;
10405         break;
10406     }
10407 
10408     if (accop != 0) {
10409         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10410         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10411     }
10412 
10413     /* size == 2 means two 32x32->64 operations; this is worth special
10414      * casing because we can generally handle it inline.
10415      */
10416     if (size == 2) {
10417         for (pass = 0; pass < 2; pass++) {
10418             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10419             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10420             TCGv_i64 tcg_passres;
10421             MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10422 
10423             int elt = pass + is_q * 2;
10424 
10425             read_vec_element(s, tcg_op1, rn, elt, memop);
10426             read_vec_element(s, tcg_op2, rm, elt, memop);
10427 
10428             if (accop == 0) {
10429                 tcg_passres = tcg_res[pass];
10430             } else {
10431                 tcg_passres = tcg_temp_new_i64();
10432             }
10433 
10434             switch (opcode) {
10435             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10436                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10437                 break;
10438             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10439                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10440                 break;
10441             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10442             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10443             {
10444                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10445                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10446 
10447                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10448                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10449                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10450                                     tcg_passres,
10451                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10452                 break;
10453             }
10454             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10455             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10456             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10457                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10458                 break;
10459             case 9: /* SQDMLAL, SQDMLAL2 */
10460             case 11: /* SQDMLSL, SQDMLSL2 */
10461             case 13: /* SQDMULL, SQDMULL2 */
10462                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10463                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10464                                                   tcg_passres, tcg_passres);
10465                 break;
10466             default:
10467                 g_assert_not_reached();
10468             }
10469 
10470             if (opcode == 9 || opcode == 11) {
10471                 /* saturating accumulate ops */
10472                 if (accop < 0) {
10473                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10474                 }
10475                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10476                                                   tcg_res[pass], tcg_passres);
10477             } else if (accop > 0) {
10478                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10479             } else if (accop < 0) {
10480                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10481             }
10482         }
10483     } else {
10484         /* size 0 or 1, generally helper functions */
10485         for (pass = 0; pass < 2; pass++) {
10486             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10487             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10488             TCGv_i64 tcg_passres;
10489             int elt = pass + is_q * 2;
10490 
10491             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10492             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10493 
10494             if (accop == 0) {
10495                 tcg_passres = tcg_res[pass];
10496             } else {
10497                 tcg_passres = tcg_temp_new_i64();
10498             }
10499 
10500             switch (opcode) {
10501             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10502             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10503             {
10504                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10505                 static NeonGenWidenFn * const widenfns[2][2] = {
10506                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10507                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10508                 };
10509                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10510 
10511                 widenfn(tcg_op2_64, tcg_op2);
10512                 widenfn(tcg_passres, tcg_op1);
10513                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10514                               tcg_passres, tcg_op2_64);
10515                 break;
10516             }
10517             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10518             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10519                 if (size == 0) {
10520                     if (is_u) {
10521                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10522                     } else {
10523                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10524                     }
10525                 } else {
10526                     if (is_u) {
10527                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10528                     } else {
10529                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10530                     }
10531                 }
10532                 break;
10533             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10534             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10535             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10536                 if (size == 0) {
10537                     if (is_u) {
10538                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10539                     } else {
10540                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10541                     }
10542                 } else {
10543                     if (is_u) {
10544                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10545                     } else {
10546                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10547                     }
10548                 }
10549                 break;
10550             case 9: /* SQDMLAL, SQDMLAL2 */
10551             case 11: /* SQDMLSL, SQDMLSL2 */
10552             case 13: /* SQDMULL, SQDMULL2 */
10553                 assert(size == 1);
10554                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10555                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10556                                                   tcg_passres, tcg_passres);
10557                 break;
10558             default:
10559                 g_assert_not_reached();
10560             }
10561 
10562             if (accop != 0) {
10563                 if (opcode == 9 || opcode == 11) {
10564                     /* saturating accumulate ops */
10565                     if (accop < 0) {
10566                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10567                     }
10568                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10569                                                       tcg_res[pass],
10570                                                       tcg_passres);
10571                 } else {
10572                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10573                                   tcg_res[pass], tcg_passres);
10574                 }
10575             }
10576         }
10577     }
10578 
10579     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10580     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10581 }
10582 
10583 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10584                             int opcode, int rd, int rn, int rm)
10585 {
10586     TCGv_i64 tcg_res[2];
10587     int part = is_q ? 2 : 0;
10588     int pass;
10589 
10590     for (pass = 0; pass < 2; pass++) {
10591         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10592         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10593         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10594         static NeonGenWidenFn * const widenfns[3][2] = {
10595             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10596             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10597             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10598         };
10599         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10600 
10601         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10602         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10603         widenfn(tcg_op2_wide, tcg_op2);
10604         tcg_res[pass] = tcg_temp_new_i64();
10605         gen_neon_addl(size, (opcode == 3),
10606                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10607     }
10608 
10609     for (pass = 0; pass < 2; pass++) {
10610         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10611     }
10612 }
10613 
10614 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10615 {
10616     tcg_gen_addi_i64(in, in, 1U << 31);
10617     tcg_gen_extrh_i64_i32(res, in);
10618 }
10619 
10620 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10621                                  int opcode, int rd, int rn, int rm)
10622 {
10623     TCGv_i32 tcg_res[2];
10624     int part = is_q ? 2 : 0;
10625     int pass;
10626 
10627     for (pass = 0; pass < 2; pass++) {
10628         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10629         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10630         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10631         static NeonGenNarrowFn * const narrowfns[3][2] = {
10632             { gen_helper_neon_narrow_high_u8,
10633               gen_helper_neon_narrow_round_high_u8 },
10634             { gen_helper_neon_narrow_high_u16,
10635               gen_helper_neon_narrow_round_high_u16 },
10636             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10637         };
10638         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10639 
10640         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10641         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10642 
10643         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10644 
10645         tcg_res[pass] = tcg_temp_new_i32();
10646         gennarrow(tcg_res[pass], tcg_wideres);
10647     }
10648 
10649     for (pass = 0; pass < 2; pass++) {
10650         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10651     }
10652     clear_vec_high(s, is_q, rd);
10653 }
10654 
10655 /* AdvSIMD three different
10656  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10657  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10658  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10659  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10660  */
10661 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10662 {
10663     /* Instructions in this group fall into three basic classes
10664      * (in each case with the operation working on each element in
10665      * the input vectors):
10666      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10667      *     128 bit input)
10668      * (2) wide 64 x 128 -> 128
10669      * (3) narrowing 128 x 128 -> 64
10670      * Here we do initial decode, catch unallocated cases and
10671      * dispatch to separate functions for each class.
10672      */
10673     int is_q = extract32(insn, 30, 1);
10674     int is_u = extract32(insn, 29, 1);
10675     int size = extract32(insn, 22, 2);
10676     int opcode = extract32(insn, 12, 4);
10677     int rm = extract32(insn, 16, 5);
10678     int rn = extract32(insn, 5, 5);
10679     int rd = extract32(insn, 0, 5);
10680 
10681     switch (opcode) {
10682     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10683     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10684         /* 64 x 128 -> 128 */
10685         if (size == 3) {
10686             unallocated_encoding(s);
10687             return;
10688         }
10689         if (!fp_access_check(s)) {
10690             return;
10691         }
10692         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10693         break;
10694     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10695     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10696         /* 128 x 128 -> 64 */
10697         if (size == 3) {
10698             unallocated_encoding(s);
10699             return;
10700         }
10701         if (!fp_access_check(s)) {
10702             return;
10703         }
10704         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10705         break;
10706     case 14: /* PMULL, PMULL2 */
10707         if (is_u) {
10708             unallocated_encoding(s);
10709             return;
10710         }
10711         switch (size) {
10712         case 0: /* PMULL.P8 */
10713             if (!fp_access_check(s)) {
10714                 return;
10715             }
10716             /* The Q field specifies lo/hi half input for this insn.  */
10717             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10718                              gen_helper_neon_pmull_h);
10719             break;
10720 
10721         case 3: /* PMULL.P64 */
10722             if (!dc_isar_feature(aa64_pmull, s)) {
10723                 unallocated_encoding(s);
10724                 return;
10725             }
10726             if (!fp_access_check(s)) {
10727                 return;
10728             }
10729             /* The Q field specifies lo/hi half input for this insn.  */
10730             gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
10731                              gen_helper_gvec_pmull_q);
10732             break;
10733 
10734         default:
10735             unallocated_encoding(s);
10736             break;
10737         }
10738         return;
10739     case 9: /* SQDMLAL, SQDMLAL2 */
10740     case 11: /* SQDMLSL, SQDMLSL2 */
10741     case 13: /* SQDMULL, SQDMULL2 */
10742         if (is_u || size == 0) {
10743             unallocated_encoding(s);
10744             return;
10745         }
10746         /* fall through */
10747     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10748     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10749     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10750     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10751     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10752     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10753     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10754         /* 64 x 64 -> 128 */
10755         if (size == 3) {
10756             unallocated_encoding(s);
10757             return;
10758         }
10759         if (!fp_access_check(s)) {
10760             return;
10761         }
10762 
10763         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10764         break;
10765     default:
10766         /* opcode 15 not allocated */
10767         unallocated_encoding(s);
10768         break;
10769     }
10770 }
10771 
10772 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10773 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10774 {
10775     int rd = extract32(insn, 0, 5);
10776     int rn = extract32(insn, 5, 5);
10777     int rm = extract32(insn, 16, 5);
10778     int size = extract32(insn, 22, 2);
10779     bool is_u = extract32(insn, 29, 1);
10780     bool is_q = extract32(insn, 30, 1);
10781 
10782     if (!fp_access_check(s)) {
10783         return;
10784     }
10785 
10786     switch (size + 4 * is_u) {
10787     case 0: /* AND */
10788         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10789         return;
10790     case 1: /* BIC */
10791         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10792         return;
10793     case 2: /* ORR */
10794         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10795         return;
10796     case 3: /* ORN */
10797         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10798         return;
10799     case 4: /* EOR */
10800         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10801         return;
10802 
10803     case 5: /* BSL bitwise select */
10804         gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10805         return;
10806     case 6: /* BIT, bitwise insert if true */
10807         gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10808         return;
10809     case 7: /* BIF, bitwise insert if false */
10810         gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10811         return;
10812 
10813     default:
10814         g_assert_not_reached();
10815     }
10816 }
10817 
10818 /* Pairwise op subgroup of C3.6.16.
10819  *
10820  * This is called directly or via the handle_3same_float for float pairwise
10821  * operations where the opcode and size are calculated differently.
10822  */
10823 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10824                                    int size, int rn, int rm, int rd)
10825 {
10826     TCGv_ptr fpst;
10827     int pass;
10828 
10829     /* Floating point operations need fpst */
10830     if (opcode >= 0x58) {
10831         fpst = fpstatus_ptr(FPST_FPCR);
10832     } else {
10833         fpst = NULL;
10834     }
10835 
10836     if (!fp_access_check(s)) {
10837         return;
10838     }
10839 
10840     /* These operations work on the concatenated rm:rn, with each pair of
10841      * adjacent elements being operated on to produce an element in the result.
10842      */
10843     if (size == 3) {
10844         TCGv_i64 tcg_res[2];
10845 
10846         for (pass = 0; pass < 2; pass++) {
10847             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10848             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10849             int passreg = (pass == 0) ? rn : rm;
10850 
10851             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10852             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10853             tcg_res[pass] = tcg_temp_new_i64();
10854 
10855             switch (opcode) {
10856             case 0x17: /* ADDP */
10857                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10858                 break;
10859             case 0x58: /* FMAXNMP */
10860                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10861                 break;
10862             case 0x5a: /* FADDP */
10863                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10864                 break;
10865             case 0x5e: /* FMAXP */
10866                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10867                 break;
10868             case 0x78: /* FMINNMP */
10869                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10870                 break;
10871             case 0x7e: /* FMINP */
10872                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10873                 break;
10874             default:
10875                 g_assert_not_reached();
10876             }
10877         }
10878 
10879         for (pass = 0; pass < 2; pass++) {
10880             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10881         }
10882     } else {
10883         int maxpass = is_q ? 4 : 2;
10884         TCGv_i32 tcg_res[4];
10885 
10886         for (pass = 0; pass < maxpass; pass++) {
10887             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10888             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10889             NeonGenTwoOpFn *genfn = NULL;
10890             int passreg = pass < (maxpass / 2) ? rn : rm;
10891             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10892 
10893             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10894             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10895             tcg_res[pass] = tcg_temp_new_i32();
10896 
10897             switch (opcode) {
10898             case 0x17: /* ADDP */
10899             {
10900                 static NeonGenTwoOpFn * const fns[3] = {
10901                     gen_helper_neon_padd_u8,
10902                     gen_helper_neon_padd_u16,
10903                     tcg_gen_add_i32,
10904                 };
10905                 genfn = fns[size];
10906                 break;
10907             }
10908             case 0x14: /* SMAXP, UMAXP */
10909             {
10910                 static NeonGenTwoOpFn * const fns[3][2] = {
10911                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10912                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10913                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10914                 };
10915                 genfn = fns[size][u];
10916                 break;
10917             }
10918             case 0x15: /* SMINP, UMINP */
10919             {
10920                 static NeonGenTwoOpFn * const fns[3][2] = {
10921                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10922                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10923                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10924                 };
10925                 genfn = fns[size][u];
10926                 break;
10927             }
10928             /* The FP operations are all on single floats (32 bit) */
10929             case 0x58: /* FMAXNMP */
10930                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10931                 break;
10932             case 0x5a: /* FADDP */
10933                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10934                 break;
10935             case 0x5e: /* FMAXP */
10936                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10937                 break;
10938             case 0x78: /* FMINNMP */
10939                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10940                 break;
10941             case 0x7e: /* FMINP */
10942                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10943                 break;
10944             default:
10945                 g_assert_not_reached();
10946             }
10947 
10948             /* FP ops called directly, otherwise call now */
10949             if (genfn) {
10950                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10951             }
10952         }
10953 
10954         for (pass = 0; pass < maxpass; pass++) {
10955             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10956         }
10957         clear_vec_high(s, is_q, rd);
10958     }
10959 }
10960 
10961 /* Floating point op subgroup of C3.6.16. */
10962 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10963 {
10964     /* For floating point ops, the U, size[1] and opcode bits
10965      * together indicate the operation. size[0] indicates single
10966      * or double.
10967      */
10968     int fpopcode = extract32(insn, 11, 5)
10969         | (extract32(insn, 23, 1) << 5)
10970         | (extract32(insn, 29, 1) << 6);
10971     int is_q = extract32(insn, 30, 1);
10972     int size = extract32(insn, 22, 1);
10973     int rm = extract32(insn, 16, 5);
10974     int rn = extract32(insn, 5, 5);
10975     int rd = extract32(insn, 0, 5);
10976 
10977     int datasize = is_q ? 128 : 64;
10978     int esize = 32 << size;
10979     int elements = datasize / esize;
10980 
10981     if (size == 1 && !is_q) {
10982         unallocated_encoding(s);
10983         return;
10984     }
10985 
10986     switch (fpopcode) {
10987     case 0x58: /* FMAXNMP */
10988     case 0x5a: /* FADDP */
10989     case 0x5e: /* FMAXP */
10990     case 0x78: /* FMINNMP */
10991     case 0x7e: /* FMINP */
10992         if (size && !is_q) {
10993             unallocated_encoding(s);
10994             return;
10995         }
10996         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10997                                rn, rm, rd);
10998         return;
10999     case 0x1b: /* FMULX */
11000     case 0x1f: /* FRECPS */
11001     case 0x3f: /* FRSQRTS */
11002     case 0x5d: /* FACGE */
11003     case 0x7d: /* FACGT */
11004     case 0x19: /* FMLA */
11005     case 0x39: /* FMLS */
11006     case 0x18: /* FMAXNM */
11007     case 0x1a: /* FADD */
11008     case 0x1c: /* FCMEQ */
11009     case 0x1e: /* FMAX */
11010     case 0x38: /* FMINNM */
11011     case 0x3a: /* FSUB */
11012     case 0x3e: /* FMIN */
11013     case 0x5b: /* FMUL */
11014     case 0x5c: /* FCMGE */
11015     case 0x5f: /* FDIV */
11016     case 0x7a: /* FABD */
11017     case 0x7c: /* FCMGT */
11018         if (!fp_access_check(s)) {
11019             return;
11020         }
11021         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11022         return;
11023 
11024     case 0x1d: /* FMLAL  */
11025     case 0x3d: /* FMLSL  */
11026     case 0x59: /* FMLAL2 */
11027     case 0x79: /* FMLSL2 */
11028         if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11029             unallocated_encoding(s);
11030             return;
11031         }
11032         if (fp_access_check(s)) {
11033             int is_s = extract32(insn, 23, 1);
11034             int is_2 = extract32(insn, 29, 1);
11035             int data = (is_2 << 1) | is_s;
11036             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11037                                vec_full_reg_offset(s, rn),
11038                                vec_full_reg_offset(s, rm), cpu_env,
11039                                is_q ? 16 : 8, vec_full_reg_size(s),
11040                                data, gen_helper_gvec_fmlal_a64);
11041         }
11042         return;
11043 
11044     default:
11045         unallocated_encoding(s);
11046         return;
11047     }
11048 }
11049 
11050 /* Integer op subgroup of C3.6.16. */
11051 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11052 {
11053     int is_q = extract32(insn, 30, 1);
11054     int u = extract32(insn, 29, 1);
11055     int size = extract32(insn, 22, 2);
11056     int opcode = extract32(insn, 11, 5);
11057     int rm = extract32(insn, 16, 5);
11058     int rn = extract32(insn, 5, 5);
11059     int rd = extract32(insn, 0, 5);
11060     int pass;
11061     TCGCond cond;
11062 
11063     switch (opcode) {
11064     case 0x13: /* MUL, PMUL */
11065         if (u && size != 0) {
11066             unallocated_encoding(s);
11067             return;
11068         }
11069         /* fall through */
11070     case 0x0: /* SHADD, UHADD */
11071     case 0x2: /* SRHADD, URHADD */
11072     case 0x4: /* SHSUB, UHSUB */
11073     case 0xc: /* SMAX, UMAX */
11074     case 0xd: /* SMIN, UMIN */
11075     case 0xe: /* SABD, UABD */
11076     case 0xf: /* SABA, UABA */
11077     case 0x12: /* MLA, MLS */
11078         if (size == 3) {
11079             unallocated_encoding(s);
11080             return;
11081         }
11082         break;
11083     case 0x16: /* SQDMULH, SQRDMULH */
11084         if (size == 0 || size == 3) {
11085             unallocated_encoding(s);
11086             return;
11087         }
11088         break;
11089     default:
11090         if (size == 3 && !is_q) {
11091             unallocated_encoding(s);
11092             return;
11093         }
11094         break;
11095     }
11096 
11097     if (!fp_access_check(s)) {
11098         return;
11099     }
11100 
11101     switch (opcode) {
11102     case 0x01: /* SQADD, UQADD */
11103         if (u) {
11104             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11105         } else {
11106             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11107         }
11108         return;
11109     case 0x05: /* SQSUB, UQSUB */
11110         if (u) {
11111             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11112         } else {
11113             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11114         }
11115         return;
11116     case 0x08: /* SSHL, USHL */
11117         if (u) {
11118             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11119         } else {
11120             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11121         }
11122         return;
11123     case 0x0c: /* SMAX, UMAX */
11124         if (u) {
11125             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11126         } else {
11127             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11128         }
11129         return;
11130     case 0x0d: /* SMIN, UMIN */
11131         if (u) {
11132             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11133         } else {
11134             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11135         }
11136         return;
11137     case 0xe: /* SABD, UABD */
11138         if (u) {
11139             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11140         } else {
11141             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11142         }
11143         return;
11144     case 0xf: /* SABA, UABA */
11145         if (u) {
11146             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11147         } else {
11148             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11149         }
11150         return;
11151     case 0x10: /* ADD, SUB */
11152         if (u) {
11153             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11154         } else {
11155             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11156         }
11157         return;
11158     case 0x13: /* MUL, PMUL */
11159         if (!u) { /* MUL */
11160             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11161         } else {  /* PMUL */
11162             gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11163         }
11164         return;
11165     case 0x12: /* MLA, MLS */
11166         if (u) {
11167             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11168         } else {
11169             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11170         }
11171         return;
11172     case 0x16: /* SQDMULH, SQRDMULH */
11173         {
11174             static gen_helper_gvec_3_ptr * const fns[2][2] = {
11175                 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11176                 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11177             };
11178             gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11179         }
11180         return;
11181     case 0x11:
11182         if (!u) { /* CMTST */
11183             gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11184             return;
11185         }
11186         /* else CMEQ */
11187         cond = TCG_COND_EQ;
11188         goto do_gvec_cmp;
11189     case 0x06: /* CMGT, CMHI */
11190         cond = u ? TCG_COND_GTU : TCG_COND_GT;
11191         goto do_gvec_cmp;
11192     case 0x07: /* CMGE, CMHS */
11193         cond = u ? TCG_COND_GEU : TCG_COND_GE;
11194     do_gvec_cmp:
11195         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11196                          vec_full_reg_offset(s, rn),
11197                          vec_full_reg_offset(s, rm),
11198                          is_q ? 16 : 8, vec_full_reg_size(s));
11199         return;
11200     }
11201 
11202     if (size == 3) {
11203         assert(is_q);
11204         for (pass = 0; pass < 2; pass++) {
11205             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11206             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11207             TCGv_i64 tcg_res = tcg_temp_new_i64();
11208 
11209             read_vec_element(s, tcg_op1, rn, pass, MO_64);
11210             read_vec_element(s, tcg_op2, rm, pass, MO_64);
11211 
11212             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11213 
11214             write_vec_element(s, tcg_res, rd, pass, MO_64);
11215         }
11216     } else {
11217         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11218             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11219             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11220             TCGv_i32 tcg_res = tcg_temp_new_i32();
11221             NeonGenTwoOpFn *genfn = NULL;
11222             NeonGenTwoOpEnvFn *genenvfn = NULL;
11223 
11224             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11225             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11226 
11227             switch (opcode) {
11228             case 0x0: /* SHADD, UHADD */
11229             {
11230                 static NeonGenTwoOpFn * const fns[3][2] = {
11231                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11232                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11233                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11234                 };
11235                 genfn = fns[size][u];
11236                 break;
11237             }
11238             case 0x2: /* SRHADD, URHADD */
11239             {
11240                 static NeonGenTwoOpFn * const fns[3][2] = {
11241                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11242                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11243                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11244                 };
11245                 genfn = fns[size][u];
11246                 break;
11247             }
11248             case 0x4: /* SHSUB, UHSUB */
11249             {
11250                 static NeonGenTwoOpFn * const fns[3][2] = {
11251                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11252                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11253                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11254                 };
11255                 genfn = fns[size][u];
11256                 break;
11257             }
11258             case 0x9: /* SQSHL, UQSHL */
11259             {
11260                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11261                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11262                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11263                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11264                 };
11265                 genenvfn = fns[size][u];
11266                 break;
11267             }
11268             case 0xa: /* SRSHL, URSHL */
11269             {
11270                 static NeonGenTwoOpFn * const fns[3][2] = {
11271                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11272                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11273                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11274                 };
11275                 genfn = fns[size][u];
11276                 break;
11277             }
11278             case 0xb: /* SQRSHL, UQRSHL */
11279             {
11280                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
11281                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11282                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11283                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11284                 };
11285                 genenvfn = fns[size][u];
11286                 break;
11287             }
11288             default:
11289                 g_assert_not_reached();
11290             }
11291 
11292             if (genenvfn) {
11293                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11294             } else {
11295                 genfn(tcg_res, tcg_op1, tcg_op2);
11296             }
11297 
11298             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11299         }
11300     }
11301     clear_vec_high(s, is_q, rd);
11302 }
11303 
11304 /* AdvSIMD three same
11305  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11306  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11307  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11308  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11309  */
11310 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11311 {
11312     int opcode = extract32(insn, 11, 5);
11313 
11314     switch (opcode) {
11315     case 0x3: /* logic ops */
11316         disas_simd_3same_logic(s, insn);
11317         break;
11318     case 0x17: /* ADDP */
11319     case 0x14: /* SMAXP, UMAXP */
11320     case 0x15: /* SMINP, UMINP */
11321     {
11322         /* Pairwise operations */
11323         int is_q = extract32(insn, 30, 1);
11324         int u = extract32(insn, 29, 1);
11325         int size = extract32(insn, 22, 2);
11326         int rm = extract32(insn, 16, 5);
11327         int rn = extract32(insn, 5, 5);
11328         int rd = extract32(insn, 0, 5);
11329         if (opcode == 0x17) {
11330             if (u || (size == 3 && !is_q)) {
11331                 unallocated_encoding(s);
11332                 return;
11333             }
11334         } else {
11335             if (size == 3) {
11336                 unallocated_encoding(s);
11337                 return;
11338             }
11339         }
11340         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11341         break;
11342     }
11343     case 0x18 ... 0x31:
11344         /* floating point ops, sz[1] and U are part of opcode */
11345         disas_simd_3same_float(s, insn);
11346         break;
11347     default:
11348         disas_simd_3same_int(s, insn);
11349         break;
11350     }
11351 }
11352 
11353 /*
11354  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11355  *
11356  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11357  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11358  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11359  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11360  *
11361  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11362  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11363  *
11364  */
11365 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11366 {
11367     int opcode = extract32(insn, 11, 3);
11368     int u = extract32(insn, 29, 1);
11369     int a = extract32(insn, 23, 1);
11370     int is_q = extract32(insn, 30, 1);
11371     int rm = extract32(insn, 16, 5);
11372     int rn = extract32(insn, 5, 5);
11373     int rd = extract32(insn, 0, 5);
11374     /*
11375      * For these floating point ops, the U, a and opcode bits
11376      * together indicate the operation.
11377      */
11378     int fpopcode = opcode | (a << 3) | (u << 4);
11379     int datasize = is_q ? 128 : 64;
11380     int elements = datasize / 16;
11381     bool pairwise;
11382     TCGv_ptr fpst;
11383     int pass;
11384 
11385     switch (fpopcode) {
11386     case 0x0: /* FMAXNM */
11387     case 0x1: /* FMLA */
11388     case 0x2: /* FADD */
11389     case 0x3: /* FMULX */
11390     case 0x4: /* FCMEQ */
11391     case 0x6: /* FMAX */
11392     case 0x7: /* FRECPS */
11393     case 0x8: /* FMINNM */
11394     case 0x9: /* FMLS */
11395     case 0xa: /* FSUB */
11396     case 0xe: /* FMIN */
11397     case 0xf: /* FRSQRTS */
11398     case 0x13: /* FMUL */
11399     case 0x14: /* FCMGE */
11400     case 0x15: /* FACGE */
11401     case 0x17: /* FDIV */
11402     case 0x1a: /* FABD */
11403     case 0x1c: /* FCMGT */
11404     case 0x1d: /* FACGT */
11405         pairwise = false;
11406         break;
11407     case 0x10: /* FMAXNMP */
11408     case 0x12: /* FADDP */
11409     case 0x16: /* FMAXP */
11410     case 0x18: /* FMINNMP */
11411     case 0x1e: /* FMINP */
11412         pairwise = true;
11413         break;
11414     default:
11415         unallocated_encoding(s);
11416         return;
11417     }
11418 
11419     if (!dc_isar_feature(aa64_fp16, s)) {
11420         unallocated_encoding(s);
11421         return;
11422     }
11423 
11424     if (!fp_access_check(s)) {
11425         return;
11426     }
11427 
11428     fpst = fpstatus_ptr(FPST_FPCR_F16);
11429 
11430     if (pairwise) {
11431         int maxpass = is_q ? 8 : 4;
11432         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11433         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11434         TCGv_i32 tcg_res[8];
11435 
11436         for (pass = 0; pass < maxpass; pass++) {
11437             int passreg = pass < (maxpass / 2) ? rn : rm;
11438             int passelt = (pass << 1) & (maxpass - 1);
11439 
11440             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11441             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11442             tcg_res[pass] = tcg_temp_new_i32();
11443 
11444             switch (fpopcode) {
11445             case 0x10: /* FMAXNMP */
11446                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11447                                            fpst);
11448                 break;
11449             case 0x12: /* FADDP */
11450                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11451                 break;
11452             case 0x16: /* FMAXP */
11453                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11454                 break;
11455             case 0x18: /* FMINNMP */
11456                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11457                                            fpst);
11458                 break;
11459             case 0x1e: /* FMINP */
11460                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11461                 break;
11462             default:
11463                 g_assert_not_reached();
11464             }
11465         }
11466 
11467         for (pass = 0; pass < maxpass; pass++) {
11468             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11469         }
11470     } else {
11471         for (pass = 0; pass < elements; pass++) {
11472             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11473             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11474             TCGv_i32 tcg_res = tcg_temp_new_i32();
11475 
11476             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11477             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11478 
11479             switch (fpopcode) {
11480             case 0x0: /* FMAXNM */
11481                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11482                 break;
11483             case 0x1: /* FMLA */
11484                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11485                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11486                                            fpst);
11487                 break;
11488             case 0x2: /* FADD */
11489                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11490                 break;
11491             case 0x3: /* FMULX */
11492                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11493                 break;
11494             case 0x4: /* FCMEQ */
11495                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11496                 break;
11497             case 0x6: /* FMAX */
11498                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11499                 break;
11500             case 0x7: /* FRECPS */
11501                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11502                 break;
11503             case 0x8: /* FMINNM */
11504                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11505                 break;
11506             case 0x9: /* FMLS */
11507                 /* As usual for ARM, separate negation for fused multiply-add */
11508                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11509                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11510                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11511                                            fpst);
11512                 break;
11513             case 0xa: /* FSUB */
11514                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11515                 break;
11516             case 0xe: /* FMIN */
11517                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11518                 break;
11519             case 0xf: /* FRSQRTS */
11520                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11521                 break;
11522             case 0x13: /* FMUL */
11523                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11524                 break;
11525             case 0x14: /* FCMGE */
11526                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11527                 break;
11528             case 0x15: /* FACGE */
11529                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11530                 break;
11531             case 0x17: /* FDIV */
11532                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11533                 break;
11534             case 0x1a: /* FABD */
11535                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11536                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11537                 break;
11538             case 0x1c: /* FCMGT */
11539                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11540                 break;
11541             case 0x1d: /* FACGT */
11542                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11543                 break;
11544             default:
11545                 g_assert_not_reached();
11546             }
11547 
11548             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11549         }
11550     }
11551 
11552     clear_vec_high(s, is_q, rd);
11553 }
11554 
11555 /* AdvSIMD three same extra
11556  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11557  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11558  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11559  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11560  */
11561 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11562 {
11563     int rd = extract32(insn, 0, 5);
11564     int rn = extract32(insn, 5, 5);
11565     int opcode = extract32(insn, 11, 4);
11566     int rm = extract32(insn, 16, 5);
11567     int size = extract32(insn, 22, 2);
11568     bool u = extract32(insn, 29, 1);
11569     bool is_q = extract32(insn, 30, 1);
11570     bool feature;
11571     int rot;
11572 
11573     switch (u * 16 + opcode) {
11574     case 0x10: /* SQRDMLAH (vector) */
11575     case 0x11: /* SQRDMLSH (vector) */
11576         if (size != 1 && size != 2) {
11577             unallocated_encoding(s);
11578             return;
11579         }
11580         feature = dc_isar_feature(aa64_rdm, s);
11581         break;
11582     case 0x02: /* SDOT (vector) */
11583     case 0x12: /* UDOT (vector) */
11584         if (size != MO_32) {
11585             unallocated_encoding(s);
11586             return;
11587         }
11588         feature = dc_isar_feature(aa64_dp, s);
11589         break;
11590     case 0x03: /* USDOT */
11591         if (size != MO_32) {
11592             unallocated_encoding(s);
11593             return;
11594         }
11595         feature = dc_isar_feature(aa64_i8mm, s);
11596         break;
11597     case 0x04: /* SMMLA */
11598     case 0x14: /* UMMLA */
11599     case 0x05: /* USMMLA */
11600         if (!is_q || size != MO_32) {
11601             unallocated_encoding(s);
11602             return;
11603         }
11604         feature = dc_isar_feature(aa64_i8mm, s);
11605         break;
11606     case 0x18: /* FCMLA, #0 */
11607     case 0x19: /* FCMLA, #90 */
11608     case 0x1a: /* FCMLA, #180 */
11609     case 0x1b: /* FCMLA, #270 */
11610     case 0x1c: /* FCADD, #90 */
11611     case 0x1e: /* FCADD, #270 */
11612         if (size == 0
11613             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11614             || (size == 3 && !is_q)) {
11615             unallocated_encoding(s);
11616             return;
11617         }
11618         feature = dc_isar_feature(aa64_fcma, s);
11619         break;
11620     case 0x1d: /* BFMMLA */
11621         if (size != MO_16 || !is_q) {
11622             unallocated_encoding(s);
11623             return;
11624         }
11625         feature = dc_isar_feature(aa64_bf16, s);
11626         break;
11627     case 0x1f:
11628         switch (size) {
11629         case 1: /* BFDOT */
11630         case 3: /* BFMLAL{B,T} */
11631             feature = dc_isar_feature(aa64_bf16, s);
11632             break;
11633         default:
11634             unallocated_encoding(s);
11635             return;
11636         }
11637         break;
11638     default:
11639         unallocated_encoding(s);
11640         return;
11641     }
11642     if (!feature) {
11643         unallocated_encoding(s);
11644         return;
11645     }
11646     if (!fp_access_check(s)) {
11647         return;
11648     }
11649 
11650     switch (opcode) {
11651     case 0x0: /* SQRDMLAH (vector) */
11652         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
11653         return;
11654 
11655     case 0x1: /* SQRDMLSH (vector) */
11656         gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
11657         return;
11658 
11659     case 0x2: /* SDOT / UDOT */
11660         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
11661                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11662         return;
11663 
11664     case 0x3: /* USDOT */
11665         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
11666         return;
11667 
11668     case 0x04: /* SMMLA, UMMLA */
11669         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
11670                          u ? gen_helper_gvec_ummla_b
11671                          : gen_helper_gvec_smmla_b);
11672         return;
11673     case 0x05: /* USMMLA */
11674         gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
11675         return;
11676 
11677     case 0x8: /* FCMLA, #0 */
11678     case 0x9: /* FCMLA, #90 */
11679     case 0xa: /* FCMLA, #180 */
11680     case 0xb: /* FCMLA, #270 */
11681         rot = extract32(opcode, 0, 2);
11682         switch (size) {
11683         case 1:
11684             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
11685                               gen_helper_gvec_fcmlah);
11686             break;
11687         case 2:
11688             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11689                               gen_helper_gvec_fcmlas);
11690             break;
11691         case 3:
11692             gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
11693                               gen_helper_gvec_fcmlad);
11694             break;
11695         default:
11696             g_assert_not_reached();
11697         }
11698         return;
11699 
11700     case 0xc: /* FCADD, #90 */
11701     case 0xe: /* FCADD, #270 */
11702         rot = extract32(opcode, 1, 1);
11703         switch (size) {
11704         case 1:
11705             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11706                               gen_helper_gvec_fcaddh);
11707             break;
11708         case 2:
11709             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11710                               gen_helper_gvec_fcadds);
11711             break;
11712         case 3:
11713             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11714                               gen_helper_gvec_fcaddd);
11715             break;
11716         default:
11717             g_assert_not_reached();
11718         }
11719         return;
11720 
11721     case 0xd: /* BFMMLA */
11722         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
11723         return;
11724     case 0xf:
11725         switch (size) {
11726         case 1: /* BFDOT */
11727             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
11728             break;
11729         case 3: /* BFMLAL{B,T} */
11730             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
11731                               gen_helper_gvec_bfmlal);
11732             break;
11733         default:
11734             g_assert_not_reached();
11735         }
11736         return;
11737 
11738     default:
11739         g_assert_not_reached();
11740     }
11741 }
11742 
11743 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11744                                   int size, int rn, int rd)
11745 {
11746     /* Handle 2-reg-misc ops which are widening (so each size element
11747      * in the source becomes a 2*size element in the destination.
11748      * The only instruction like this is FCVTL.
11749      */
11750     int pass;
11751 
11752     if (size == 3) {
11753         /* 32 -> 64 bit fp conversion */
11754         TCGv_i64 tcg_res[2];
11755         int srcelt = is_q ? 2 : 0;
11756 
11757         for (pass = 0; pass < 2; pass++) {
11758             TCGv_i32 tcg_op = tcg_temp_new_i32();
11759             tcg_res[pass] = tcg_temp_new_i64();
11760 
11761             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11762             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11763         }
11764         for (pass = 0; pass < 2; pass++) {
11765             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11766         }
11767     } else {
11768         /* 16 -> 32 bit fp conversion */
11769         int srcelt = is_q ? 4 : 0;
11770         TCGv_i32 tcg_res[4];
11771         TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
11772         TCGv_i32 ahp = get_ahp_flag();
11773 
11774         for (pass = 0; pass < 4; pass++) {
11775             tcg_res[pass] = tcg_temp_new_i32();
11776 
11777             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11778             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11779                                            fpst, ahp);
11780         }
11781         for (pass = 0; pass < 4; pass++) {
11782             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11783         }
11784     }
11785 }
11786 
11787 static void handle_rev(DisasContext *s, int opcode, bool u,
11788                        bool is_q, int size, int rn, int rd)
11789 {
11790     int op = (opcode << 1) | u;
11791     int opsz = op + size;
11792     int grp_size = 3 - opsz;
11793     int dsize = is_q ? 128 : 64;
11794     int i;
11795 
11796     if (opsz >= 3) {
11797         unallocated_encoding(s);
11798         return;
11799     }
11800 
11801     if (!fp_access_check(s)) {
11802         return;
11803     }
11804 
11805     if (size == 0) {
11806         /* Special case bytes, use bswap op on each group of elements */
11807         int groups = dsize / (8 << grp_size);
11808 
11809         for (i = 0; i < groups; i++) {
11810             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11811 
11812             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11813             switch (grp_size) {
11814             case MO_16:
11815                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11816                 break;
11817             case MO_32:
11818                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
11819                 break;
11820             case MO_64:
11821                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11822                 break;
11823             default:
11824                 g_assert_not_reached();
11825             }
11826             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11827         }
11828         clear_vec_high(s, is_q, rd);
11829     } else {
11830         int revmask = (1 << grp_size) - 1;
11831         int esize = 8 << size;
11832         int elements = dsize / esize;
11833         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11834         TCGv_i64 tcg_rd[2];
11835 
11836         for (i = 0; i < 2; i++) {
11837             tcg_rd[i] = tcg_temp_new_i64();
11838             tcg_gen_movi_i64(tcg_rd[i], 0);
11839         }
11840 
11841         for (i = 0; i < elements; i++) {
11842             int e_rev = (i & 0xf) ^ revmask;
11843             int w = (e_rev * esize) / 64;
11844             int o = (e_rev * esize) % 64;
11845 
11846             read_vec_element(s, tcg_rn, rn, i, size);
11847             tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
11848         }
11849 
11850         for (i = 0; i < 2; i++) {
11851             write_vec_element(s, tcg_rd[i], rd, i, MO_64);
11852         }
11853         clear_vec_high(s, true, rd);
11854     }
11855 }
11856 
11857 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11858                                   bool is_q, int size, int rn, int rd)
11859 {
11860     /* Implement the pairwise operations from 2-misc:
11861      * SADDLP, UADDLP, SADALP, UADALP.
11862      * These all add pairs of elements in the input to produce a
11863      * double-width result element in the output (possibly accumulating).
11864      */
11865     bool accum = (opcode == 0x6);
11866     int maxpass = is_q ? 2 : 1;
11867     int pass;
11868     TCGv_i64 tcg_res[2];
11869 
11870     if (size == 2) {
11871         /* 32 + 32 -> 64 op */
11872         MemOp memop = size + (u ? 0 : MO_SIGN);
11873 
11874         for (pass = 0; pass < maxpass; pass++) {
11875             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11876             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11877 
11878             tcg_res[pass] = tcg_temp_new_i64();
11879 
11880             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11881             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11882             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11883             if (accum) {
11884                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11885                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11886             }
11887         }
11888     } else {
11889         for (pass = 0; pass < maxpass; pass++) {
11890             TCGv_i64 tcg_op = tcg_temp_new_i64();
11891             NeonGenOne64OpFn *genfn;
11892             static NeonGenOne64OpFn * const fns[2][2] = {
11893                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11894                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11895             };
11896 
11897             genfn = fns[size][u];
11898 
11899             tcg_res[pass] = tcg_temp_new_i64();
11900 
11901             read_vec_element(s, tcg_op, rn, pass, MO_64);
11902             genfn(tcg_res[pass], tcg_op);
11903 
11904             if (accum) {
11905                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11906                 if (size == 0) {
11907                     gen_helper_neon_addl_u16(tcg_res[pass],
11908                                              tcg_res[pass], tcg_op);
11909                 } else {
11910                     gen_helper_neon_addl_u32(tcg_res[pass],
11911                                              tcg_res[pass], tcg_op);
11912                 }
11913             }
11914         }
11915     }
11916     if (!is_q) {
11917         tcg_res[1] = tcg_constant_i64(0);
11918     }
11919     for (pass = 0; pass < 2; pass++) {
11920         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11921     }
11922 }
11923 
11924 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11925 {
11926     /* Implement SHLL and SHLL2 */
11927     int pass;
11928     int part = is_q ? 2 : 0;
11929     TCGv_i64 tcg_res[2];
11930 
11931     for (pass = 0; pass < 2; pass++) {
11932         static NeonGenWidenFn * const widenfns[3] = {
11933             gen_helper_neon_widen_u8,
11934             gen_helper_neon_widen_u16,
11935             tcg_gen_extu_i32_i64,
11936         };
11937         NeonGenWidenFn *widenfn = widenfns[size];
11938         TCGv_i32 tcg_op = tcg_temp_new_i32();
11939 
11940         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11941         tcg_res[pass] = tcg_temp_new_i64();
11942         widenfn(tcg_res[pass], tcg_op);
11943         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11944     }
11945 
11946     for (pass = 0; pass < 2; pass++) {
11947         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11948     }
11949 }
11950 
11951 /* AdvSIMD two reg misc
11952  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11953  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11954  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11955  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11956  */
11957 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11958 {
11959     int size = extract32(insn, 22, 2);
11960     int opcode = extract32(insn, 12, 5);
11961     bool u = extract32(insn, 29, 1);
11962     bool is_q = extract32(insn, 30, 1);
11963     int rn = extract32(insn, 5, 5);
11964     int rd = extract32(insn, 0, 5);
11965     bool need_fpstatus = false;
11966     int rmode = -1;
11967     TCGv_i32 tcg_rmode;
11968     TCGv_ptr tcg_fpstatus;
11969 
11970     switch (opcode) {
11971     case 0x0: /* REV64, REV32 */
11972     case 0x1: /* REV16 */
11973         handle_rev(s, opcode, u, is_q, size, rn, rd);
11974         return;
11975     case 0x5: /* CNT, NOT, RBIT */
11976         if (u && size == 0) {
11977             /* NOT */
11978             break;
11979         } else if (u && size == 1) {
11980             /* RBIT */
11981             break;
11982         } else if (!u && size == 0) {
11983             /* CNT */
11984             break;
11985         }
11986         unallocated_encoding(s);
11987         return;
11988     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11989     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11990         if (size == 3) {
11991             unallocated_encoding(s);
11992             return;
11993         }
11994         if (!fp_access_check(s)) {
11995             return;
11996         }
11997 
11998         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11999         return;
12000     case 0x4: /* CLS, CLZ */
12001         if (size == 3) {
12002             unallocated_encoding(s);
12003             return;
12004         }
12005         break;
12006     case 0x2: /* SADDLP, UADDLP */
12007     case 0x6: /* SADALP, UADALP */
12008         if (size == 3) {
12009             unallocated_encoding(s);
12010             return;
12011         }
12012         if (!fp_access_check(s)) {
12013             return;
12014         }
12015         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12016         return;
12017     case 0x13: /* SHLL, SHLL2 */
12018         if (u == 0 || size == 3) {
12019             unallocated_encoding(s);
12020             return;
12021         }
12022         if (!fp_access_check(s)) {
12023             return;
12024         }
12025         handle_shll(s, is_q, size, rn, rd);
12026         return;
12027     case 0xa: /* CMLT */
12028         if (u == 1) {
12029             unallocated_encoding(s);
12030             return;
12031         }
12032         /* fall through */
12033     case 0x8: /* CMGT, CMGE */
12034     case 0x9: /* CMEQ, CMLE */
12035     case 0xb: /* ABS, NEG */
12036         if (size == 3 && !is_q) {
12037             unallocated_encoding(s);
12038             return;
12039         }
12040         break;
12041     case 0x3: /* SUQADD, USQADD */
12042         if (size == 3 && !is_q) {
12043             unallocated_encoding(s);
12044             return;
12045         }
12046         if (!fp_access_check(s)) {
12047             return;
12048         }
12049         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12050         return;
12051     case 0x7: /* SQABS, SQNEG */
12052         if (size == 3 && !is_q) {
12053             unallocated_encoding(s);
12054             return;
12055         }
12056         break;
12057     case 0xc ... 0xf:
12058     case 0x16 ... 0x1f:
12059     {
12060         /* Floating point: U, size[1] and opcode indicate operation;
12061          * size[0] indicates single or double precision.
12062          */
12063         int is_double = extract32(size, 0, 1);
12064         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12065         size = is_double ? 3 : 2;
12066         switch (opcode) {
12067         case 0x2f: /* FABS */
12068         case 0x6f: /* FNEG */
12069             if (size == 3 && !is_q) {
12070                 unallocated_encoding(s);
12071                 return;
12072             }
12073             break;
12074         case 0x1d: /* SCVTF */
12075         case 0x5d: /* UCVTF */
12076         {
12077             bool is_signed = (opcode == 0x1d) ? true : false;
12078             int elements = is_double ? 2 : is_q ? 4 : 2;
12079             if (is_double && !is_q) {
12080                 unallocated_encoding(s);
12081                 return;
12082             }
12083             if (!fp_access_check(s)) {
12084                 return;
12085             }
12086             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12087             return;
12088         }
12089         case 0x2c: /* FCMGT (zero) */
12090         case 0x2d: /* FCMEQ (zero) */
12091         case 0x2e: /* FCMLT (zero) */
12092         case 0x6c: /* FCMGE (zero) */
12093         case 0x6d: /* FCMLE (zero) */
12094             if (size == 3 && !is_q) {
12095                 unallocated_encoding(s);
12096                 return;
12097             }
12098             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12099             return;
12100         case 0x7f: /* FSQRT */
12101             if (size == 3 && !is_q) {
12102                 unallocated_encoding(s);
12103                 return;
12104             }
12105             break;
12106         case 0x1a: /* FCVTNS */
12107         case 0x1b: /* FCVTMS */
12108         case 0x3a: /* FCVTPS */
12109         case 0x3b: /* FCVTZS */
12110         case 0x5a: /* FCVTNU */
12111         case 0x5b: /* FCVTMU */
12112         case 0x7a: /* FCVTPU */
12113         case 0x7b: /* FCVTZU */
12114             need_fpstatus = true;
12115             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12116             if (size == 3 && !is_q) {
12117                 unallocated_encoding(s);
12118                 return;
12119             }
12120             break;
12121         case 0x5c: /* FCVTAU */
12122         case 0x1c: /* FCVTAS */
12123             need_fpstatus = true;
12124             rmode = FPROUNDING_TIEAWAY;
12125             if (size == 3 && !is_q) {
12126                 unallocated_encoding(s);
12127                 return;
12128             }
12129             break;
12130         case 0x3c: /* URECPE */
12131             if (size == 3) {
12132                 unallocated_encoding(s);
12133                 return;
12134             }
12135             /* fall through */
12136         case 0x3d: /* FRECPE */
12137         case 0x7d: /* FRSQRTE */
12138             if (size == 3 && !is_q) {
12139                 unallocated_encoding(s);
12140                 return;
12141             }
12142             if (!fp_access_check(s)) {
12143                 return;
12144             }
12145             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12146             return;
12147         case 0x56: /* FCVTXN, FCVTXN2 */
12148             if (size == 2) {
12149                 unallocated_encoding(s);
12150                 return;
12151             }
12152             /* fall through */
12153         case 0x16: /* FCVTN, FCVTN2 */
12154             /* handle_2misc_narrow does a 2*size -> size operation, but these
12155              * instructions encode the source size rather than dest size.
12156              */
12157             if (!fp_access_check(s)) {
12158                 return;
12159             }
12160             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12161             return;
12162         case 0x36: /* BFCVTN, BFCVTN2 */
12163             if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12164                 unallocated_encoding(s);
12165                 return;
12166             }
12167             if (!fp_access_check(s)) {
12168                 return;
12169             }
12170             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12171             return;
12172         case 0x17: /* FCVTL, FCVTL2 */
12173             if (!fp_access_check(s)) {
12174                 return;
12175             }
12176             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12177             return;
12178         case 0x18: /* FRINTN */
12179         case 0x19: /* FRINTM */
12180         case 0x38: /* FRINTP */
12181         case 0x39: /* FRINTZ */
12182             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12183             /* fall through */
12184         case 0x59: /* FRINTX */
12185         case 0x79: /* FRINTI */
12186             need_fpstatus = true;
12187             if (size == 3 && !is_q) {
12188                 unallocated_encoding(s);
12189                 return;
12190             }
12191             break;
12192         case 0x58: /* FRINTA */
12193             rmode = FPROUNDING_TIEAWAY;
12194             need_fpstatus = true;
12195             if (size == 3 && !is_q) {
12196                 unallocated_encoding(s);
12197                 return;
12198             }
12199             break;
12200         case 0x7c: /* URSQRTE */
12201             if (size == 3) {
12202                 unallocated_encoding(s);
12203                 return;
12204             }
12205             break;
12206         case 0x1e: /* FRINT32Z */
12207         case 0x1f: /* FRINT64Z */
12208             rmode = FPROUNDING_ZERO;
12209             /* fall through */
12210         case 0x5e: /* FRINT32X */
12211         case 0x5f: /* FRINT64X */
12212             need_fpstatus = true;
12213             if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12214                 unallocated_encoding(s);
12215                 return;
12216             }
12217             break;
12218         default:
12219             unallocated_encoding(s);
12220             return;
12221         }
12222         break;
12223     }
12224     default:
12225         unallocated_encoding(s);
12226         return;
12227     }
12228 
12229     if (!fp_access_check(s)) {
12230         return;
12231     }
12232 
12233     if (need_fpstatus || rmode >= 0) {
12234         tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12235     } else {
12236         tcg_fpstatus = NULL;
12237     }
12238     if (rmode >= 0) {
12239         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12240     } else {
12241         tcg_rmode = NULL;
12242     }
12243 
12244     switch (opcode) {
12245     case 0x5:
12246         if (u && size == 0) { /* NOT */
12247             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12248             return;
12249         }
12250         break;
12251     case 0x8: /* CMGT, CMGE */
12252         if (u) {
12253             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12254         } else {
12255             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12256         }
12257         return;
12258     case 0x9: /* CMEQ, CMLE */
12259         if (u) {
12260             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12261         } else {
12262             gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12263         }
12264         return;
12265     case 0xa: /* CMLT */
12266         gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12267         return;
12268     case 0xb:
12269         if (u) { /* ABS, NEG */
12270             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12271         } else {
12272             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12273         }
12274         return;
12275     }
12276 
12277     if (size == 3) {
12278         /* All 64-bit element operations can be shared with scalar 2misc */
12279         int pass;
12280 
12281         /* Coverity claims (size == 3 && !is_q) has been eliminated
12282          * from all paths leading to here.
12283          */
12284         tcg_debug_assert(is_q);
12285         for (pass = 0; pass < 2; pass++) {
12286             TCGv_i64 tcg_op = tcg_temp_new_i64();
12287             TCGv_i64 tcg_res = tcg_temp_new_i64();
12288 
12289             read_vec_element(s, tcg_op, rn, pass, MO_64);
12290 
12291             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12292                             tcg_rmode, tcg_fpstatus);
12293 
12294             write_vec_element(s, tcg_res, rd, pass, MO_64);
12295         }
12296     } else {
12297         int pass;
12298 
12299         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12300             TCGv_i32 tcg_op = tcg_temp_new_i32();
12301             TCGv_i32 tcg_res = tcg_temp_new_i32();
12302 
12303             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12304 
12305             if (size == 2) {
12306                 /* Special cases for 32 bit elements */
12307                 switch (opcode) {
12308                 case 0x4: /* CLS */
12309                     if (u) {
12310                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12311                     } else {
12312                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12313                     }
12314                     break;
12315                 case 0x7: /* SQABS, SQNEG */
12316                     if (u) {
12317                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12318                     } else {
12319                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12320                     }
12321                     break;
12322                 case 0x2f: /* FABS */
12323                     gen_helper_vfp_abss(tcg_res, tcg_op);
12324                     break;
12325                 case 0x6f: /* FNEG */
12326                     gen_helper_vfp_negs(tcg_res, tcg_op);
12327                     break;
12328                 case 0x7f: /* FSQRT */
12329                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12330                     break;
12331                 case 0x1a: /* FCVTNS */
12332                 case 0x1b: /* FCVTMS */
12333                 case 0x1c: /* FCVTAS */
12334                 case 0x3a: /* FCVTPS */
12335                 case 0x3b: /* FCVTZS */
12336                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12337                                          tcg_constant_i32(0), tcg_fpstatus);
12338                     break;
12339                 case 0x5a: /* FCVTNU */
12340                 case 0x5b: /* FCVTMU */
12341                 case 0x5c: /* FCVTAU */
12342                 case 0x7a: /* FCVTPU */
12343                 case 0x7b: /* FCVTZU */
12344                     gen_helper_vfp_touls(tcg_res, tcg_op,
12345                                          tcg_constant_i32(0), tcg_fpstatus);
12346                     break;
12347                 case 0x18: /* FRINTN */
12348                 case 0x19: /* FRINTM */
12349                 case 0x38: /* FRINTP */
12350                 case 0x39: /* FRINTZ */
12351                 case 0x58: /* FRINTA */
12352                 case 0x79: /* FRINTI */
12353                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12354                     break;
12355                 case 0x59: /* FRINTX */
12356                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12357                     break;
12358                 case 0x7c: /* URSQRTE */
12359                     gen_helper_rsqrte_u32(tcg_res, tcg_op);
12360                     break;
12361                 case 0x1e: /* FRINT32Z */
12362                 case 0x5e: /* FRINT32X */
12363                     gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12364                     break;
12365                 case 0x1f: /* FRINT64Z */
12366                 case 0x5f: /* FRINT64X */
12367                     gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12368                     break;
12369                 default:
12370                     g_assert_not_reached();
12371                 }
12372             } else {
12373                 /* Use helpers for 8 and 16 bit elements */
12374                 switch (opcode) {
12375                 case 0x5: /* CNT, RBIT */
12376                     /* For these two insns size is part of the opcode specifier
12377                      * (handled earlier); they always operate on byte elements.
12378                      */
12379                     if (u) {
12380                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12381                     } else {
12382                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12383                     }
12384                     break;
12385                 case 0x7: /* SQABS, SQNEG */
12386                 {
12387                     NeonGenOneOpEnvFn *genfn;
12388                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12389                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12390                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12391                     };
12392                     genfn = fns[size][u];
12393                     genfn(tcg_res, cpu_env, tcg_op);
12394                     break;
12395                 }
12396                 case 0x4: /* CLS, CLZ */
12397                     if (u) {
12398                         if (size == 0) {
12399                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12400                         } else {
12401                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12402                         }
12403                     } else {
12404                         if (size == 0) {
12405                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12406                         } else {
12407                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12408                         }
12409                     }
12410                     break;
12411                 default:
12412                     g_assert_not_reached();
12413                 }
12414             }
12415 
12416             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12417         }
12418     }
12419     clear_vec_high(s, is_q, rd);
12420 
12421     if (tcg_rmode) {
12422         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12423     }
12424 }
12425 
12426 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12427  *
12428  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12429  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12430  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12431  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12432  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12433  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12434  *
12435  * This actually covers two groups where scalar access is governed by
12436  * bit 28. A bunch of the instructions (float to integral) only exist
12437  * in the vector form and are un-allocated for the scalar decode. Also
12438  * in the scalar decode Q is always 1.
12439  */
12440 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12441 {
12442     int fpop, opcode, a, u;
12443     int rn, rd;
12444     bool is_q;
12445     bool is_scalar;
12446     bool only_in_vector = false;
12447 
12448     int pass;
12449     TCGv_i32 tcg_rmode = NULL;
12450     TCGv_ptr tcg_fpstatus = NULL;
12451     bool need_fpst = true;
12452     int rmode = -1;
12453 
12454     if (!dc_isar_feature(aa64_fp16, s)) {
12455         unallocated_encoding(s);
12456         return;
12457     }
12458 
12459     rd = extract32(insn, 0, 5);
12460     rn = extract32(insn, 5, 5);
12461 
12462     a = extract32(insn, 23, 1);
12463     u = extract32(insn, 29, 1);
12464     is_scalar = extract32(insn, 28, 1);
12465     is_q = extract32(insn, 30, 1);
12466 
12467     opcode = extract32(insn, 12, 5);
12468     fpop = deposit32(opcode, 5, 1, a);
12469     fpop = deposit32(fpop, 6, 1, u);
12470 
12471     switch (fpop) {
12472     case 0x1d: /* SCVTF */
12473     case 0x5d: /* UCVTF */
12474     {
12475         int elements;
12476 
12477         if (is_scalar) {
12478             elements = 1;
12479         } else {
12480             elements = (is_q ? 8 : 4);
12481         }
12482 
12483         if (!fp_access_check(s)) {
12484             return;
12485         }
12486         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12487         return;
12488     }
12489     break;
12490     case 0x2c: /* FCMGT (zero) */
12491     case 0x2d: /* FCMEQ (zero) */
12492     case 0x2e: /* FCMLT (zero) */
12493     case 0x6c: /* FCMGE (zero) */
12494     case 0x6d: /* FCMLE (zero) */
12495         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12496         return;
12497     case 0x3d: /* FRECPE */
12498     case 0x3f: /* FRECPX */
12499         break;
12500     case 0x18: /* FRINTN */
12501         only_in_vector = true;
12502         rmode = FPROUNDING_TIEEVEN;
12503         break;
12504     case 0x19: /* FRINTM */
12505         only_in_vector = true;
12506         rmode = FPROUNDING_NEGINF;
12507         break;
12508     case 0x38: /* FRINTP */
12509         only_in_vector = true;
12510         rmode = FPROUNDING_POSINF;
12511         break;
12512     case 0x39: /* FRINTZ */
12513         only_in_vector = true;
12514         rmode = FPROUNDING_ZERO;
12515         break;
12516     case 0x58: /* FRINTA */
12517         only_in_vector = true;
12518         rmode = FPROUNDING_TIEAWAY;
12519         break;
12520     case 0x59: /* FRINTX */
12521     case 0x79: /* FRINTI */
12522         only_in_vector = true;
12523         /* current rounding mode */
12524         break;
12525     case 0x1a: /* FCVTNS */
12526         rmode = FPROUNDING_TIEEVEN;
12527         break;
12528     case 0x1b: /* FCVTMS */
12529         rmode = FPROUNDING_NEGINF;
12530         break;
12531     case 0x1c: /* FCVTAS */
12532         rmode = FPROUNDING_TIEAWAY;
12533         break;
12534     case 0x3a: /* FCVTPS */
12535         rmode = FPROUNDING_POSINF;
12536         break;
12537     case 0x3b: /* FCVTZS */
12538         rmode = FPROUNDING_ZERO;
12539         break;
12540     case 0x5a: /* FCVTNU */
12541         rmode = FPROUNDING_TIEEVEN;
12542         break;
12543     case 0x5b: /* FCVTMU */
12544         rmode = FPROUNDING_NEGINF;
12545         break;
12546     case 0x5c: /* FCVTAU */
12547         rmode = FPROUNDING_TIEAWAY;
12548         break;
12549     case 0x7a: /* FCVTPU */
12550         rmode = FPROUNDING_POSINF;
12551         break;
12552     case 0x7b: /* FCVTZU */
12553         rmode = FPROUNDING_ZERO;
12554         break;
12555     case 0x2f: /* FABS */
12556     case 0x6f: /* FNEG */
12557         need_fpst = false;
12558         break;
12559     case 0x7d: /* FRSQRTE */
12560     case 0x7f: /* FSQRT (vector) */
12561         break;
12562     default:
12563         unallocated_encoding(s);
12564         return;
12565     }
12566 
12567 
12568     /* Check additional constraints for the scalar encoding */
12569     if (is_scalar) {
12570         if (!is_q) {
12571             unallocated_encoding(s);
12572             return;
12573         }
12574         /* FRINTxx is only in the vector form */
12575         if (only_in_vector) {
12576             unallocated_encoding(s);
12577             return;
12578         }
12579     }
12580 
12581     if (!fp_access_check(s)) {
12582         return;
12583     }
12584 
12585     if (rmode >= 0 || need_fpst) {
12586         tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
12587     }
12588 
12589     if (rmode >= 0) {
12590         tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
12591     }
12592 
12593     if (is_scalar) {
12594         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12595         TCGv_i32 tcg_res = tcg_temp_new_i32();
12596 
12597         switch (fpop) {
12598         case 0x1a: /* FCVTNS */
12599         case 0x1b: /* FCVTMS */
12600         case 0x1c: /* FCVTAS */
12601         case 0x3a: /* FCVTPS */
12602         case 0x3b: /* FCVTZS */
12603             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12604             break;
12605         case 0x3d: /* FRECPE */
12606             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12607             break;
12608         case 0x3f: /* FRECPX */
12609             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12610             break;
12611         case 0x5a: /* FCVTNU */
12612         case 0x5b: /* FCVTMU */
12613         case 0x5c: /* FCVTAU */
12614         case 0x7a: /* FCVTPU */
12615         case 0x7b: /* FCVTZU */
12616             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12617             break;
12618         case 0x6f: /* FNEG */
12619             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12620             break;
12621         case 0x7d: /* FRSQRTE */
12622             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12623             break;
12624         default:
12625             g_assert_not_reached();
12626         }
12627 
12628         /* limit any sign extension going on */
12629         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12630         write_fp_sreg(s, rd, tcg_res);
12631     } else {
12632         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12633             TCGv_i32 tcg_op = tcg_temp_new_i32();
12634             TCGv_i32 tcg_res = tcg_temp_new_i32();
12635 
12636             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12637 
12638             switch (fpop) {
12639             case 0x1a: /* FCVTNS */
12640             case 0x1b: /* FCVTMS */
12641             case 0x1c: /* FCVTAS */
12642             case 0x3a: /* FCVTPS */
12643             case 0x3b: /* FCVTZS */
12644                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12645                 break;
12646             case 0x3d: /* FRECPE */
12647                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12648                 break;
12649             case 0x5a: /* FCVTNU */
12650             case 0x5b: /* FCVTMU */
12651             case 0x5c: /* FCVTAU */
12652             case 0x7a: /* FCVTPU */
12653             case 0x7b: /* FCVTZU */
12654                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12655                 break;
12656             case 0x18: /* FRINTN */
12657             case 0x19: /* FRINTM */
12658             case 0x38: /* FRINTP */
12659             case 0x39: /* FRINTZ */
12660             case 0x58: /* FRINTA */
12661             case 0x79: /* FRINTI */
12662                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12663                 break;
12664             case 0x59: /* FRINTX */
12665                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12666                 break;
12667             case 0x2f: /* FABS */
12668                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12669                 break;
12670             case 0x6f: /* FNEG */
12671                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12672                 break;
12673             case 0x7d: /* FRSQRTE */
12674                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12675                 break;
12676             case 0x7f: /* FSQRT */
12677                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12678                 break;
12679             default:
12680                 g_assert_not_reached();
12681             }
12682 
12683             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12684         }
12685 
12686         clear_vec_high(s, is_q, rd);
12687     }
12688 
12689     if (tcg_rmode) {
12690         gen_restore_rmode(tcg_rmode, tcg_fpstatus);
12691     }
12692 }
12693 
12694 /* AdvSIMD scalar x indexed element
12695  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12696  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12697  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12698  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12699  * AdvSIMD vector x indexed element
12700  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12701  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12702  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12703  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12704  */
12705 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12706 {
12707     /* This encoding has two kinds of instruction:
12708      *  normal, where we perform elt x idxelt => elt for each
12709      *     element in the vector
12710      *  long, where we perform elt x idxelt and generate a result of
12711      *     double the width of the input element
12712      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12713      */
12714     bool is_scalar = extract32(insn, 28, 1);
12715     bool is_q = extract32(insn, 30, 1);
12716     bool u = extract32(insn, 29, 1);
12717     int size = extract32(insn, 22, 2);
12718     int l = extract32(insn, 21, 1);
12719     int m = extract32(insn, 20, 1);
12720     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12721     int rm = extract32(insn, 16, 4);
12722     int opcode = extract32(insn, 12, 4);
12723     int h = extract32(insn, 11, 1);
12724     int rn = extract32(insn, 5, 5);
12725     int rd = extract32(insn, 0, 5);
12726     bool is_long = false;
12727     int is_fp = 0;
12728     bool is_fp16 = false;
12729     int index;
12730     TCGv_ptr fpst;
12731 
12732     switch (16 * u + opcode) {
12733     case 0x08: /* MUL */
12734     case 0x10: /* MLA */
12735     case 0x14: /* MLS */
12736         if (is_scalar) {
12737             unallocated_encoding(s);
12738             return;
12739         }
12740         break;
12741     case 0x02: /* SMLAL, SMLAL2 */
12742     case 0x12: /* UMLAL, UMLAL2 */
12743     case 0x06: /* SMLSL, SMLSL2 */
12744     case 0x16: /* UMLSL, UMLSL2 */
12745     case 0x0a: /* SMULL, SMULL2 */
12746     case 0x1a: /* UMULL, UMULL2 */
12747         if (is_scalar) {
12748             unallocated_encoding(s);
12749             return;
12750         }
12751         is_long = true;
12752         break;
12753     case 0x03: /* SQDMLAL, SQDMLAL2 */
12754     case 0x07: /* SQDMLSL, SQDMLSL2 */
12755     case 0x0b: /* SQDMULL, SQDMULL2 */
12756         is_long = true;
12757         break;
12758     case 0x0c: /* SQDMULH */
12759     case 0x0d: /* SQRDMULH */
12760         break;
12761     case 0x01: /* FMLA */
12762     case 0x05: /* FMLS */
12763     case 0x09: /* FMUL */
12764     case 0x19: /* FMULX */
12765         is_fp = 1;
12766         break;
12767     case 0x1d: /* SQRDMLAH */
12768     case 0x1f: /* SQRDMLSH */
12769         if (!dc_isar_feature(aa64_rdm, s)) {
12770             unallocated_encoding(s);
12771             return;
12772         }
12773         break;
12774     case 0x0e: /* SDOT */
12775     case 0x1e: /* UDOT */
12776         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12777             unallocated_encoding(s);
12778             return;
12779         }
12780         break;
12781     case 0x0f:
12782         switch (size) {
12783         case 0: /* SUDOT */
12784         case 2: /* USDOT */
12785             if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
12786                 unallocated_encoding(s);
12787                 return;
12788             }
12789             size = MO_32;
12790             break;
12791         case 1: /* BFDOT */
12792             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12793                 unallocated_encoding(s);
12794                 return;
12795             }
12796             size = MO_32;
12797             break;
12798         case 3: /* BFMLAL{B,T} */
12799             if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
12800                 unallocated_encoding(s);
12801                 return;
12802             }
12803             /* can't set is_fp without other incorrect size checks */
12804             size = MO_16;
12805             break;
12806         default:
12807             unallocated_encoding(s);
12808             return;
12809         }
12810         break;
12811     case 0x11: /* FCMLA #0 */
12812     case 0x13: /* FCMLA #90 */
12813     case 0x15: /* FCMLA #180 */
12814     case 0x17: /* FCMLA #270 */
12815         if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12816             unallocated_encoding(s);
12817             return;
12818         }
12819         is_fp = 2;
12820         break;
12821     case 0x00: /* FMLAL */
12822     case 0x04: /* FMLSL */
12823     case 0x18: /* FMLAL2 */
12824     case 0x1c: /* FMLSL2 */
12825         if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12826             unallocated_encoding(s);
12827             return;
12828         }
12829         size = MO_16;
12830         /* is_fp, but we pass cpu_env not fp_status.  */
12831         break;
12832     default:
12833         unallocated_encoding(s);
12834         return;
12835     }
12836 
12837     switch (is_fp) {
12838     case 1: /* normal fp */
12839         /* convert insn encoded size to MemOp size */
12840         switch (size) {
12841         case 0: /* half-precision */
12842             size = MO_16;
12843             is_fp16 = true;
12844             break;
12845         case MO_32: /* single precision */
12846         case MO_64: /* double precision */
12847             break;
12848         default:
12849             unallocated_encoding(s);
12850             return;
12851         }
12852         break;
12853 
12854     case 2: /* complex fp */
12855         /* Each indexable element is a complex pair.  */
12856         size += 1;
12857         switch (size) {
12858         case MO_32:
12859             if (h && !is_q) {
12860                 unallocated_encoding(s);
12861                 return;
12862             }
12863             is_fp16 = true;
12864             break;
12865         case MO_64:
12866             break;
12867         default:
12868             unallocated_encoding(s);
12869             return;
12870         }
12871         break;
12872 
12873     default: /* integer */
12874         switch (size) {
12875         case MO_8:
12876         case MO_64:
12877             unallocated_encoding(s);
12878             return;
12879         }
12880         break;
12881     }
12882     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12883         unallocated_encoding(s);
12884         return;
12885     }
12886 
12887     /* Given MemOp size, adjust register and indexing.  */
12888     switch (size) {
12889     case MO_16:
12890         index = h << 2 | l << 1 | m;
12891         break;
12892     case MO_32:
12893         index = h << 1 | l;
12894         rm |= m << 4;
12895         break;
12896     case MO_64:
12897         if (l || !is_q) {
12898             unallocated_encoding(s);
12899             return;
12900         }
12901         index = h;
12902         rm |= m << 4;
12903         break;
12904     default:
12905         g_assert_not_reached();
12906     }
12907 
12908     if (!fp_access_check(s)) {
12909         return;
12910     }
12911 
12912     if (is_fp) {
12913         fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
12914     } else {
12915         fpst = NULL;
12916     }
12917 
12918     switch (16 * u + opcode) {
12919     case 0x0e: /* SDOT */
12920     case 0x1e: /* UDOT */
12921         gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12922                          u ? gen_helper_gvec_udot_idx_b
12923                          : gen_helper_gvec_sdot_idx_b);
12924         return;
12925     case 0x0f:
12926         switch (extract32(insn, 22, 2)) {
12927         case 0: /* SUDOT */
12928             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12929                              gen_helper_gvec_sudot_idx_b);
12930             return;
12931         case 1: /* BFDOT */
12932             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12933                              gen_helper_gvec_bfdot_idx);
12934             return;
12935         case 2: /* USDOT */
12936             gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
12937                              gen_helper_gvec_usdot_idx_b);
12938             return;
12939         case 3: /* BFMLAL{B,T} */
12940             gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
12941                               gen_helper_gvec_bfmlal_idx);
12942             return;
12943         }
12944         g_assert_not_reached();
12945     case 0x11: /* FCMLA #0 */
12946     case 0x13: /* FCMLA #90 */
12947     case 0x15: /* FCMLA #180 */
12948     case 0x17: /* FCMLA #270 */
12949         {
12950             int rot = extract32(insn, 13, 2);
12951             int data = (index << 2) | rot;
12952             tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
12953                                vec_full_reg_offset(s, rn),
12954                                vec_full_reg_offset(s, rm),
12955                                vec_full_reg_offset(s, rd), fpst,
12956                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12957                                size == MO_64
12958                                ? gen_helper_gvec_fcmlas_idx
12959                                : gen_helper_gvec_fcmlah_idx);
12960         }
12961         return;
12962 
12963     case 0x00: /* FMLAL */
12964     case 0x04: /* FMLSL */
12965     case 0x18: /* FMLAL2 */
12966     case 0x1c: /* FMLSL2 */
12967         {
12968             int is_s = extract32(opcode, 2, 1);
12969             int is_2 = u;
12970             int data = (index << 2) | (is_2 << 1) | is_s;
12971             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12972                                vec_full_reg_offset(s, rn),
12973                                vec_full_reg_offset(s, rm), cpu_env,
12974                                is_q ? 16 : 8, vec_full_reg_size(s),
12975                                data, gen_helper_gvec_fmlal_idx_a64);
12976         }
12977         return;
12978 
12979     case 0x08: /* MUL */
12980         if (!is_long && !is_scalar) {
12981             static gen_helper_gvec_3 * const fns[3] = {
12982                 gen_helper_gvec_mul_idx_h,
12983                 gen_helper_gvec_mul_idx_s,
12984                 gen_helper_gvec_mul_idx_d,
12985             };
12986             tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
12987                                vec_full_reg_offset(s, rn),
12988                                vec_full_reg_offset(s, rm),
12989                                is_q ? 16 : 8, vec_full_reg_size(s),
12990                                index, fns[size - 1]);
12991             return;
12992         }
12993         break;
12994 
12995     case 0x10: /* MLA */
12996         if (!is_long && !is_scalar) {
12997             static gen_helper_gvec_4 * const fns[3] = {
12998                 gen_helper_gvec_mla_idx_h,
12999                 gen_helper_gvec_mla_idx_s,
13000                 gen_helper_gvec_mla_idx_d,
13001             };
13002             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13003                                vec_full_reg_offset(s, rn),
13004                                vec_full_reg_offset(s, rm),
13005                                vec_full_reg_offset(s, rd),
13006                                is_q ? 16 : 8, vec_full_reg_size(s),
13007                                index, fns[size - 1]);
13008             return;
13009         }
13010         break;
13011 
13012     case 0x14: /* MLS */
13013         if (!is_long && !is_scalar) {
13014             static gen_helper_gvec_4 * const fns[3] = {
13015                 gen_helper_gvec_mls_idx_h,
13016                 gen_helper_gvec_mls_idx_s,
13017                 gen_helper_gvec_mls_idx_d,
13018             };
13019             tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13020                                vec_full_reg_offset(s, rn),
13021                                vec_full_reg_offset(s, rm),
13022                                vec_full_reg_offset(s, rd),
13023                                is_q ? 16 : 8, vec_full_reg_size(s),
13024                                index, fns[size - 1]);
13025             return;
13026         }
13027         break;
13028     }
13029 
13030     if (size == 3) {
13031         TCGv_i64 tcg_idx = tcg_temp_new_i64();
13032         int pass;
13033 
13034         assert(is_fp && is_q && !is_long);
13035 
13036         read_vec_element(s, tcg_idx, rm, index, MO_64);
13037 
13038         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13039             TCGv_i64 tcg_op = tcg_temp_new_i64();
13040             TCGv_i64 tcg_res = tcg_temp_new_i64();
13041 
13042             read_vec_element(s, tcg_op, rn, pass, MO_64);
13043 
13044             switch (16 * u + opcode) {
13045             case 0x05: /* FMLS */
13046                 /* As usual for ARM, separate negation for fused multiply-add */
13047                 gen_helper_vfp_negd(tcg_op, tcg_op);
13048                 /* fall through */
13049             case 0x01: /* FMLA */
13050                 read_vec_element(s, tcg_res, rd, pass, MO_64);
13051                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13052                 break;
13053             case 0x09: /* FMUL */
13054                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13055                 break;
13056             case 0x19: /* FMULX */
13057                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13058                 break;
13059             default:
13060                 g_assert_not_reached();
13061             }
13062 
13063             write_vec_element(s, tcg_res, rd, pass, MO_64);
13064         }
13065 
13066         clear_vec_high(s, !is_scalar, rd);
13067     } else if (!is_long) {
13068         /* 32 bit floating point, or 16 or 32 bit integer.
13069          * For the 16 bit scalar case we use the usual Neon helpers and
13070          * rely on the fact that 0 op 0 == 0 with no side effects.
13071          */
13072         TCGv_i32 tcg_idx = tcg_temp_new_i32();
13073         int pass, maxpasses;
13074 
13075         if (is_scalar) {
13076             maxpasses = 1;
13077         } else {
13078             maxpasses = is_q ? 4 : 2;
13079         }
13080 
13081         read_vec_element_i32(s, tcg_idx, rm, index, size);
13082 
13083         if (size == 1 && !is_scalar) {
13084             /* The simplest way to handle the 16x16 indexed ops is to duplicate
13085              * the index into both halves of the 32 bit tcg_idx and then use
13086              * the usual Neon helpers.
13087              */
13088             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13089         }
13090 
13091         for (pass = 0; pass < maxpasses; pass++) {
13092             TCGv_i32 tcg_op = tcg_temp_new_i32();
13093             TCGv_i32 tcg_res = tcg_temp_new_i32();
13094 
13095             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13096 
13097             switch (16 * u + opcode) {
13098             case 0x08: /* MUL */
13099             case 0x10: /* MLA */
13100             case 0x14: /* MLS */
13101             {
13102                 static NeonGenTwoOpFn * const fns[2][2] = {
13103                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13104                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
13105                 };
13106                 NeonGenTwoOpFn *genfn;
13107                 bool is_sub = opcode == 0x4;
13108 
13109                 if (size == 1) {
13110                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13111                 } else {
13112                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13113                 }
13114                 if (opcode == 0x8) {
13115                     break;
13116                 }
13117                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13118                 genfn = fns[size - 1][is_sub];
13119                 genfn(tcg_res, tcg_op, tcg_res);
13120                 break;
13121             }
13122             case 0x05: /* FMLS */
13123             case 0x01: /* FMLA */
13124                 read_vec_element_i32(s, tcg_res, rd, pass,
13125                                      is_scalar ? size : MO_32);
13126                 switch (size) {
13127                 case 1:
13128                     if (opcode == 0x5) {
13129                         /* As usual for ARM, separate negation for fused
13130                          * multiply-add */
13131                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13132                     }
13133                     if (is_scalar) {
13134                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13135                                                    tcg_res, fpst);
13136                     } else {
13137                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13138                                                     tcg_res, fpst);
13139                     }
13140                     break;
13141                 case 2:
13142                     if (opcode == 0x5) {
13143                         /* As usual for ARM, separate negation for
13144                          * fused multiply-add */
13145                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13146                     }
13147                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13148                                            tcg_res, fpst);
13149                     break;
13150                 default:
13151                     g_assert_not_reached();
13152                 }
13153                 break;
13154             case 0x09: /* FMUL */
13155                 switch (size) {
13156                 case 1:
13157                     if (is_scalar) {
13158                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
13159                                                 tcg_idx, fpst);
13160                     } else {
13161                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13162                                                  tcg_idx, fpst);
13163                     }
13164                     break;
13165                 case 2:
13166                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13167                     break;
13168                 default:
13169                     g_assert_not_reached();
13170                 }
13171                 break;
13172             case 0x19: /* FMULX */
13173                 switch (size) {
13174                 case 1:
13175                     if (is_scalar) {
13176                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13177                                                  tcg_idx, fpst);
13178                     } else {
13179                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13180                                                   tcg_idx, fpst);
13181                     }
13182                     break;
13183                 case 2:
13184                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13185                     break;
13186                 default:
13187                     g_assert_not_reached();
13188                 }
13189                 break;
13190             case 0x0c: /* SQDMULH */
13191                 if (size == 1) {
13192                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13193                                                tcg_op, tcg_idx);
13194                 } else {
13195                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13196                                                tcg_op, tcg_idx);
13197                 }
13198                 break;
13199             case 0x0d: /* SQRDMULH */
13200                 if (size == 1) {
13201                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13202                                                 tcg_op, tcg_idx);
13203                 } else {
13204                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13205                                                 tcg_op, tcg_idx);
13206                 }
13207                 break;
13208             case 0x1d: /* SQRDMLAH */
13209                 read_vec_element_i32(s, tcg_res, rd, pass,
13210                                      is_scalar ? size : MO_32);
13211                 if (size == 1) {
13212                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13213                                                 tcg_op, tcg_idx, tcg_res);
13214                 } else {
13215                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13216                                                 tcg_op, tcg_idx, tcg_res);
13217                 }
13218                 break;
13219             case 0x1f: /* SQRDMLSH */
13220                 read_vec_element_i32(s, tcg_res, rd, pass,
13221                                      is_scalar ? size : MO_32);
13222                 if (size == 1) {
13223                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13224                                                 tcg_op, tcg_idx, tcg_res);
13225                 } else {
13226                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13227                                                 tcg_op, tcg_idx, tcg_res);
13228                 }
13229                 break;
13230             default:
13231                 g_assert_not_reached();
13232             }
13233 
13234             if (is_scalar) {
13235                 write_fp_sreg(s, rd, tcg_res);
13236             } else {
13237                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13238             }
13239         }
13240 
13241         clear_vec_high(s, is_q, rd);
13242     } else {
13243         /* long ops: 16x16->32 or 32x32->64 */
13244         TCGv_i64 tcg_res[2];
13245         int pass;
13246         bool satop = extract32(opcode, 0, 1);
13247         MemOp memop = MO_32;
13248 
13249         if (satop || !u) {
13250             memop |= MO_SIGN;
13251         }
13252 
13253         if (size == 2) {
13254             TCGv_i64 tcg_idx = tcg_temp_new_i64();
13255 
13256             read_vec_element(s, tcg_idx, rm, index, memop);
13257 
13258             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13259                 TCGv_i64 tcg_op = tcg_temp_new_i64();
13260                 TCGv_i64 tcg_passres;
13261                 int passelt;
13262 
13263                 if (is_scalar) {
13264                     passelt = 0;
13265                 } else {
13266                     passelt = pass + (is_q * 2);
13267                 }
13268 
13269                 read_vec_element(s, tcg_op, rn, passelt, memop);
13270 
13271                 tcg_res[pass] = tcg_temp_new_i64();
13272 
13273                 if (opcode == 0xa || opcode == 0xb) {
13274                     /* Non-accumulating ops */
13275                     tcg_passres = tcg_res[pass];
13276                 } else {
13277                     tcg_passres = tcg_temp_new_i64();
13278                 }
13279 
13280                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13281 
13282                 if (satop) {
13283                     /* saturating, doubling */
13284                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13285                                                       tcg_passres, tcg_passres);
13286                 }
13287 
13288                 if (opcode == 0xa || opcode == 0xb) {
13289                     continue;
13290                 }
13291 
13292                 /* Accumulating op: handle accumulate step */
13293                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13294 
13295                 switch (opcode) {
13296                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13297                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13298                     break;
13299                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13300                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13301                     break;
13302                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13303                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13304                     /* fall through */
13305                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13306                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13307                                                       tcg_res[pass],
13308                                                       tcg_passres);
13309                     break;
13310                 default:
13311                     g_assert_not_reached();
13312                 }
13313             }
13314 
13315             clear_vec_high(s, !is_scalar, rd);
13316         } else {
13317             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13318 
13319             assert(size == 1);
13320             read_vec_element_i32(s, tcg_idx, rm, index, size);
13321 
13322             if (!is_scalar) {
13323                 /* The simplest way to handle the 16x16 indexed ops is to
13324                  * duplicate the index into both halves of the 32 bit tcg_idx
13325                  * and then use the usual Neon helpers.
13326                  */
13327                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13328             }
13329 
13330             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13331                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13332                 TCGv_i64 tcg_passres;
13333 
13334                 if (is_scalar) {
13335                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13336                 } else {
13337                     read_vec_element_i32(s, tcg_op, rn,
13338                                          pass + (is_q * 2), MO_32);
13339                 }
13340 
13341                 tcg_res[pass] = tcg_temp_new_i64();
13342 
13343                 if (opcode == 0xa || opcode == 0xb) {
13344                     /* Non-accumulating ops */
13345                     tcg_passres = tcg_res[pass];
13346                 } else {
13347                     tcg_passres = tcg_temp_new_i64();
13348                 }
13349 
13350                 if (memop & MO_SIGN) {
13351                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13352                 } else {
13353                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13354                 }
13355                 if (satop) {
13356                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13357                                                       tcg_passres, tcg_passres);
13358                 }
13359 
13360                 if (opcode == 0xa || opcode == 0xb) {
13361                     continue;
13362                 }
13363 
13364                 /* Accumulating op: handle accumulate step */
13365                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13366 
13367                 switch (opcode) {
13368                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13369                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13370                                              tcg_passres);
13371                     break;
13372                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13373                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13374                                              tcg_passres);
13375                     break;
13376                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13377                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13378                     /* fall through */
13379                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13380                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13381                                                       tcg_res[pass],
13382                                                       tcg_passres);
13383                     break;
13384                 default:
13385                     g_assert_not_reached();
13386                 }
13387             }
13388 
13389             if (is_scalar) {
13390                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13391             }
13392         }
13393 
13394         if (is_scalar) {
13395             tcg_res[1] = tcg_constant_i64(0);
13396         }
13397 
13398         for (pass = 0; pass < 2; pass++) {
13399             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13400         }
13401     }
13402 }
13403 
13404 /* Crypto AES
13405  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13406  * +-----------------+------+-----------+--------+-----+------+------+
13407  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13408  * +-----------------+------+-----------+--------+-----+------+------+
13409  */
13410 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13411 {
13412     int size = extract32(insn, 22, 2);
13413     int opcode = extract32(insn, 12, 5);
13414     int rn = extract32(insn, 5, 5);
13415     int rd = extract32(insn, 0, 5);
13416     int decrypt;
13417     gen_helper_gvec_2 *genfn2 = NULL;
13418     gen_helper_gvec_3 *genfn3 = NULL;
13419 
13420     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13421         unallocated_encoding(s);
13422         return;
13423     }
13424 
13425     switch (opcode) {
13426     case 0x4: /* AESE */
13427         decrypt = 0;
13428         genfn3 = gen_helper_crypto_aese;
13429         break;
13430     case 0x6: /* AESMC */
13431         decrypt = 0;
13432         genfn2 = gen_helper_crypto_aesmc;
13433         break;
13434     case 0x5: /* AESD */
13435         decrypt = 1;
13436         genfn3 = gen_helper_crypto_aese;
13437         break;
13438     case 0x7: /* AESIMC */
13439         decrypt = 1;
13440         genfn2 = gen_helper_crypto_aesmc;
13441         break;
13442     default:
13443         unallocated_encoding(s);
13444         return;
13445     }
13446 
13447     if (!fp_access_check(s)) {
13448         return;
13449     }
13450     if (genfn2) {
13451         gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
13452     } else {
13453         gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
13454     }
13455 }
13456 
13457 /* Crypto three-reg SHA
13458  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13459  * +-----------------+------+---+------+---+--------+-----+------+------+
13460  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13461  * +-----------------+------+---+------+---+--------+-----+------+------+
13462  */
13463 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13464 {
13465     int size = extract32(insn, 22, 2);
13466     int opcode = extract32(insn, 12, 3);
13467     int rm = extract32(insn, 16, 5);
13468     int rn = extract32(insn, 5, 5);
13469     int rd = extract32(insn, 0, 5);
13470     gen_helper_gvec_3 *genfn;
13471     bool feature;
13472 
13473     if (size != 0) {
13474         unallocated_encoding(s);
13475         return;
13476     }
13477 
13478     switch (opcode) {
13479     case 0: /* SHA1C */
13480         genfn = gen_helper_crypto_sha1c;
13481         feature = dc_isar_feature(aa64_sha1, s);
13482         break;
13483     case 1: /* SHA1P */
13484         genfn = gen_helper_crypto_sha1p;
13485         feature = dc_isar_feature(aa64_sha1, s);
13486         break;
13487     case 2: /* SHA1M */
13488         genfn = gen_helper_crypto_sha1m;
13489         feature = dc_isar_feature(aa64_sha1, s);
13490         break;
13491     case 3: /* SHA1SU0 */
13492         genfn = gen_helper_crypto_sha1su0;
13493         feature = dc_isar_feature(aa64_sha1, s);
13494         break;
13495     case 4: /* SHA256H */
13496         genfn = gen_helper_crypto_sha256h;
13497         feature = dc_isar_feature(aa64_sha256, s);
13498         break;
13499     case 5: /* SHA256H2 */
13500         genfn = gen_helper_crypto_sha256h2;
13501         feature = dc_isar_feature(aa64_sha256, s);
13502         break;
13503     case 6: /* SHA256SU1 */
13504         genfn = gen_helper_crypto_sha256su1;
13505         feature = dc_isar_feature(aa64_sha256, s);
13506         break;
13507     default:
13508         unallocated_encoding(s);
13509         return;
13510     }
13511 
13512     if (!feature) {
13513         unallocated_encoding(s);
13514         return;
13515     }
13516 
13517     if (!fp_access_check(s)) {
13518         return;
13519     }
13520     gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
13521 }
13522 
13523 /* Crypto two-reg SHA
13524  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13525  * +-----------------+------+-----------+--------+-----+------+------+
13526  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13527  * +-----------------+------+-----------+--------+-----+------+------+
13528  */
13529 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13530 {
13531     int size = extract32(insn, 22, 2);
13532     int opcode = extract32(insn, 12, 5);
13533     int rn = extract32(insn, 5, 5);
13534     int rd = extract32(insn, 0, 5);
13535     gen_helper_gvec_2 *genfn;
13536     bool feature;
13537 
13538     if (size != 0) {
13539         unallocated_encoding(s);
13540         return;
13541     }
13542 
13543     switch (opcode) {
13544     case 0: /* SHA1H */
13545         feature = dc_isar_feature(aa64_sha1, s);
13546         genfn = gen_helper_crypto_sha1h;
13547         break;
13548     case 1: /* SHA1SU1 */
13549         feature = dc_isar_feature(aa64_sha1, s);
13550         genfn = gen_helper_crypto_sha1su1;
13551         break;
13552     case 2: /* SHA256SU0 */
13553         feature = dc_isar_feature(aa64_sha256, s);
13554         genfn = gen_helper_crypto_sha256su0;
13555         break;
13556     default:
13557         unallocated_encoding(s);
13558         return;
13559     }
13560 
13561     if (!feature) {
13562         unallocated_encoding(s);
13563         return;
13564     }
13565 
13566     if (!fp_access_check(s)) {
13567         return;
13568     }
13569     gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
13570 }
13571 
13572 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
13573 {
13574     tcg_gen_rotli_i64(d, m, 1);
13575     tcg_gen_xor_i64(d, d, n);
13576 }
13577 
13578 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
13579 {
13580     tcg_gen_rotli_vec(vece, d, m, 1);
13581     tcg_gen_xor_vec(vece, d, d, n);
13582 }
13583 
13584 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
13585                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
13586 {
13587     static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
13588     static const GVecGen3 op = {
13589         .fni8 = gen_rax1_i64,
13590         .fniv = gen_rax1_vec,
13591         .opt_opc = vecop_list,
13592         .fno = gen_helper_crypto_rax1,
13593         .vece = MO_64,
13594     };
13595     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
13596 }
13597 
13598 /* Crypto three-reg SHA512
13599  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13600  * +-----------------------+------+---+---+-----+--------+------+------+
13601  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13602  * +-----------------------+------+---+---+-----+--------+------+------+
13603  */
13604 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13605 {
13606     int opcode = extract32(insn, 10, 2);
13607     int o =  extract32(insn, 14, 1);
13608     int rm = extract32(insn, 16, 5);
13609     int rn = extract32(insn, 5, 5);
13610     int rd = extract32(insn, 0, 5);
13611     bool feature;
13612     gen_helper_gvec_3 *oolfn = NULL;
13613     GVecGen3Fn *gvecfn = NULL;
13614 
13615     if (o == 0) {
13616         switch (opcode) {
13617         case 0: /* SHA512H */
13618             feature = dc_isar_feature(aa64_sha512, s);
13619             oolfn = gen_helper_crypto_sha512h;
13620             break;
13621         case 1: /* SHA512H2 */
13622             feature = dc_isar_feature(aa64_sha512, s);
13623             oolfn = gen_helper_crypto_sha512h2;
13624             break;
13625         case 2: /* SHA512SU1 */
13626             feature = dc_isar_feature(aa64_sha512, s);
13627             oolfn = gen_helper_crypto_sha512su1;
13628             break;
13629         case 3: /* RAX1 */
13630             feature = dc_isar_feature(aa64_sha3, s);
13631             gvecfn = gen_gvec_rax1;
13632             break;
13633         default:
13634             g_assert_not_reached();
13635         }
13636     } else {
13637         switch (opcode) {
13638         case 0: /* SM3PARTW1 */
13639             feature = dc_isar_feature(aa64_sm3, s);
13640             oolfn = gen_helper_crypto_sm3partw1;
13641             break;
13642         case 1: /* SM3PARTW2 */
13643             feature = dc_isar_feature(aa64_sm3, s);
13644             oolfn = gen_helper_crypto_sm3partw2;
13645             break;
13646         case 2: /* SM4EKEY */
13647             feature = dc_isar_feature(aa64_sm4, s);
13648             oolfn = gen_helper_crypto_sm4ekey;
13649             break;
13650         default:
13651             unallocated_encoding(s);
13652             return;
13653         }
13654     }
13655 
13656     if (!feature) {
13657         unallocated_encoding(s);
13658         return;
13659     }
13660 
13661     if (!fp_access_check(s)) {
13662         return;
13663     }
13664 
13665     if (oolfn) {
13666         gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
13667     } else {
13668         gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
13669     }
13670 }
13671 
13672 /* Crypto two-reg SHA512
13673  *  31                                     12  11  10  9    5 4    0
13674  * +-----------------------------------------+--------+------+------+
13675  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13676  * +-----------------------------------------+--------+------+------+
13677  */
13678 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13679 {
13680     int opcode = extract32(insn, 10, 2);
13681     int rn = extract32(insn, 5, 5);
13682     int rd = extract32(insn, 0, 5);
13683     bool feature;
13684 
13685     switch (opcode) {
13686     case 0: /* SHA512SU0 */
13687         feature = dc_isar_feature(aa64_sha512, s);
13688         break;
13689     case 1: /* SM4E */
13690         feature = dc_isar_feature(aa64_sm4, s);
13691         break;
13692     default:
13693         unallocated_encoding(s);
13694         return;
13695     }
13696 
13697     if (!feature) {
13698         unallocated_encoding(s);
13699         return;
13700     }
13701 
13702     if (!fp_access_check(s)) {
13703         return;
13704     }
13705 
13706     switch (opcode) {
13707     case 0: /* SHA512SU0 */
13708         gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
13709         break;
13710     case 1: /* SM4E */
13711         gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
13712         break;
13713     default:
13714         g_assert_not_reached();
13715     }
13716 }
13717 
13718 /* Crypto four-register
13719  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13720  * +-------------------+-----+------+---+------+------+------+
13721  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13722  * +-------------------+-----+------+---+------+------+------+
13723  */
13724 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13725 {
13726     int op0 = extract32(insn, 21, 2);
13727     int rm = extract32(insn, 16, 5);
13728     int ra = extract32(insn, 10, 5);
13729     int rn = extract32(insn, 5, 5);
13730     int rd = extract32(insn, 0, 5);
13731     bool feature;
13732 
13733     switch (op0) {
13734     case 0: /* EOR3 */
13735     case 1: /* BCAX */
13736         feature = dc_isar_feature(aa64_sha3, s);
13737         break;
13738     case 2: /* SM3SS1 */
13739         feature = dc_isar_feature(aa64_sm3, s);
13740         break;
13741     default:
13742         unallocated_encoding(s);
13743         return;
13744     }
13745 
13746     if (!feature) {
13747         unallocated_encoding(s);
13748         return;
13749     }
13750 
13751     if (!fp_access_check(s)) {
13752         return;
13753     }
13754 
13755     if (op0 < 2) {
13756         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13757         int pass;
13758 
13759         tcg_op1 = tcg_temp_new_i64();
13760         tcg_op2 = tcg_temp_new_i64();
13761         tcg_op3 = tcg_temp_new_i64();
13762         tcg_res[0] = tcg_temp_new_i64();
13763         tcg_res[1] = tcg_temp_new_i64();
13764 
13765         for (pass = 0; pass < 2; pass++) {
13766             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13767             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13768             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13769 
13770             if (op0 == 0) {
13771                 /* EOR3 */
13772                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13773             } else {
13774                 /* BCAX */
13775                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13776             }
13777             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13778         }
13779         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13780         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13781     } else {
13782         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13783 
13784         tcg_op1 = tcg_temp_new_i32();
13785         tcg_op2 = tcg_temp_new_i32();
13786         tcg_op3 = tcg_temp_new_i32();
13787         tcg_res = tcg_temp_new_i32();
13788         tcg_zero = tcg_constant_i32(0);
13789 
13790         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13791         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13792         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13793 
13794         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13795         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13796         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13797         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13798 
13799         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13800         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13801         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13802         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13803     }
13804 }
13805 
13806 /* Crypto XAR
13807  *  31                   21 20  16 15    10 9    5 4    0
13808  * +-----------------------+------+--------+------+------+
13809  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13810  * +-----------------------+------+--------+------+------+
13811  */
13812 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13813 {
13814     int rm = extract32(insn, 16, 5);
13815     int imm6 = extract32(insn, 10, 6);
13816     int rn = extract32(insn, 5, 5);
13817     int rd = extract32(insn, 0, 5);
13818 
13819     if (!dc_isar_feature(aa64_sha3, s)) {
13820         unallocated_encoding(s);
13821         return;
13822     }
13823 
13824     if (!fp_access_check(s)) {
13825         return;
13826     }
13827 
13828     gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
13829                  vec_full_reg_offset(s, rn),
13830                  vec_full_reg_offset(s, rm), imm6, 16,
13831                  vec_full_reg_size(s));
13832 }
13833 
13834 /* Crypto three-reg imm2
13835  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13836  * +-----------------------+------+-----+------+--------+------+------+
13837  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13838  * +-----------------------+------+-----+------+--------+------+------+
13839  */
13840 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13841 {
13842     static gen_helper_gvec_3 * const fns[4] = {
13843         gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
13844         gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
13845     };
13846     int opcode = extract32(insn, 10, 2);
13847     int imm2 = extract32(insn, 12, 2);
13848     int rm = extract32(insn, 16, 5);
13849     int rn = extract32(insn, 5, 5);
13850     int rd = extract32(insn, 0, 5);
13851 
13852     if (!dc_isar_feature(aa64_sm3, s)) {
13853         unallocated_encoding(s);
13854         return;
13855     }
13856 
13857     if (!fp_access_check(s)) {
13858         return;
13859     }
13860 
13861     gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
13862 }
13863 
13864 /* C3.6 Data processing - SIMD, inc Crypto
13865  *
13866  * As the decode gets a little complex we are using a table based
13867  * approach for this part of the decode.
13868  */
13869 static const AArch64DecodeTable data_proc_simd[] = {
13870     /* pattern  ,  mask     ,  fn                        */
13871     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13872     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13873     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13874     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13875     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13876     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13877     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13878     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13879     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13880     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13881     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13882     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13883     { 0x2e000000, 0xbf208400, disas_simd_ext },
13884     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13885     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13886     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13887     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13888     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13889     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13890     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13891     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13892     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13893     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13894     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13895     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13896     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13897     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13898     { 0xce800000, 0xffe00000, disas_crypto_xar },
13899     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13900     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13901     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13902     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13903     { 0x00000000, 0x00000000, NULL }
13904 };
13905 
13906 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13907 {
13908     /* Note that this is called with all non-FP cases from
13909      * table C3-6 so it must UNDEF for entries not specifically
13910      * allocated to instructions in that table.
13911      */
13912     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13913     if (fn) {
13914         fn(s, insn);
13915     } else {
13916         unallocated_encoding(s);
13917     }
13918 }
13919 
13920 /* C3.6 Data processing - SIMD and floating point */
13921 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13922 {
13923     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13924         disas_data_proc_fp(s, insn);
13925     } else {
13926         /* SIMD, including crypto */
13927         disas_data_proc_simd(s, insn);
13928     }
13929 }
13930 
13931 static bool trans_OK(DisasContext *s, arg_OK *a)
13932 {
13933     return true;
13934 }
13935 
13936 static bool trans_FAIL(DisasContext *s, arg_OK *a)
13937 {
13938     s->is_nonstreaming = true;
13939     return true;
13940 }
13941 
13942 /**
13943  * is_guarded_page:
13944  * @env: The cpu environment
13945  * @s: The DisasContext
13946  *
13947  * Return true if the page is guarded.
13948  */
13949 static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13950 {
13951     uint64_t addr = s->base.pc_first;
13952 #ifdef CONFIG_USER_ONLY
13953     return page_get_flags(addr) & PAGE_BTI;
13954 #else
13955     CPUTLBEntryFull *full;
13956     void *host;
13957     int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13958     int flags;
13959 
13960     /*
13961      * We test this immediately after reading an insn, which means
13962      * that the TLB entry must be present and valid, and thus this
13963      * access will never raise an exception.
13964      */
13965     flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx,
13966                               false, &host, &full, 0);
13967     assert(!(flags & TLB_INVALID_MASK));
13968 
13969     return full->guarded;
13970 #endif
13971 }
13972 
13973 /**
13974  * btype_destination_ok:
13975  * @insn: The instruction at the branch destination
13976  * @bt: SCTLR_ELx.BT
13977  * @btype: PSTATE.BTYPE, and is non-zero
13978  *
13979  * On a guarded page, there are a limited number of insns
13980  * that may be present at the branch target:
13981  *   - branch target identifiers,
13982  *   - paciasp, pacibsp,
13983  *   - BRK insn
13984  *   - HLT insn
13985  * Anything else causes a Branch Target Exception.
13986  *
13987  * Return true if the branch is compatible, false to raise BTITRAP.
13988  */
13989 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
13990 {
13991     if ((insn & 0xfffff01fu) == 0xd503201fu) {
13992         /* HINT space */
13993         switch (extract32(insn, 5, 7)) {
13994         case 0b011001: /* PACIASP */
13995         case 0b011011: /* PACIBSP */
13996             /*
13997              * If SCTLR_ELx.BT, then PACI*SP are not compatible
13998              * with btype == 3.  Otherwise all btype are ok.
13999              */
14000             return !bt || btype != 3;
14001         case 0b100000: /* BTI */
14002             /* Not compatible with any btype.  */
14003             return false;
14004         case 0b100010: /* BTI c */
14005             /* Not compatible with btype == 3 */
14006             return btype != 3;
14007         case 0b100100: /* BTI j */
14008             /* Not compatible with btype == 2 */
14009             return btype != 2;
14010         case 0b100110: /* BTI jc */
14011             /* Compatible with any btype.  */
14012             return true;
14013         }
14014     } else {
14015         switch (insn & 0xffe0001fu) {
14016         case 0xd4200000u: /* BRK */
14017         case 0xd4400000u: /* HLT */
14018             /* Give priority to the breakpoint exception.  */
14019             return true;
14020         }
14021     }
14022     return false;
14023 }
14024 
14025 /* C3.1 A64 instruction index by encoding */
14026 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
14027 {
14028     switch (extract32(insn, 25, 4)) {
14029     case 0xa: case 0xb: /* Branch, exception generation and system insns */
14030         disas_b_exc_sys(s, insn);
14031         break;
14032     case 0x4:
14033     case 0x6:
14034     case 0xc:
14035     case 0xe:      /* Loads and stores */
14036         disas_ldst(s, insn);
14037         break;
14038     case 0x5:
14039     case 0xd:      /* Data processing - register */
14040         disas_data_proc_reg(s, insn);
14041         break;
14042     case 0x7:
14043     case 0xf:      /* Data processing - SIMD and floating point */
14044         disas_data_proc_simd_fp(s, insn);
14045         break;
14046     default:
14047         unallocated_encoding(s);
14048         break;
14049     }
14050 }
14051 
14052 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14053                                           CPUState *cpu)
14054 {
14055     DisasContext *dc = container_of(dcbase, DisasContext, base);
14056     CPUARMState *env = cpu->env_ptr;
14057     ARMCPU *arm_cpu = env_archcpu(env);
14058     CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14059     int bound, core_mmu_idx;
14060 
14061     dc->isar = &arm_cpu->isar;
14062     dc->condjmp = 0;
14063     dc->pc_save = dc->base.pc_first;
14064     dc->aarch64 = true;
14065     dc->thumb = false;
14066     dc->sctlr_b = 0;
14067     dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14068     dc->condexec_mask = 0;
14069     dc->condexec_cond = 0;
14070     core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14071     dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14072     dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14073     dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14074     dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14075     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14076 #if !defined(CONFIG_USER_ONLY)
14077     dc->user = (dc->current_el == 0);
14078 #endif
14079     dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14080     dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14081     dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
14082     dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
14083     dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
14084     dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET);
14085     dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14086     dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
14087     dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
14088     dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
14089     dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14090     dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14091     dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14092     dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14093     dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14094     dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14095     dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14096     dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
14097     dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
14098     dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
14099     dc->vec_len = 0;
14100     dc->vec_stride = 0;
14101     dc->cp_regs = arm_cpu->cp_regs;
14102     dc->features = env->features;
14103     dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14104 
14105 #ifdef CONFIG_USER_ONLY
14106     /* In sve_probe_page, we assume TBI is enabled. */
14107     tcg_debug_assert(dc->tbid & 1);
14108 #endif
14109 
14110     dc->lse2 = dc_isar_feature(aa64_lse2, dc);
14111 
14112     /* Single step state. The code-generation logic here is:
14113      *  SS_ACTIVE == 0:
14114      *   generate code with no special handling for single-stepping (except
14115      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14116      *   this happens anyway because those changes are all system register or
14117      *   PSTATE writes).
14118      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14119      *   emit code for one insn
14120      *   emit code to clear PSTATE.SS
14121      *   emit code to generate software step exception for completed step
14122      *   end TB (as usual for having generated an exception)
14123      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14124      *   emit code to generate a software step exception
14125      *   end the TB
14126      */
14127     dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14128     dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14129     dc->is_ldex = false;
14130 
14131     /* Bound the number of insns to execute to those left on the page.  */
14132     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14133 
14134     /* If architectural single step active, limit to 1.  */
14135     if (dc->ss_active) {
14136         bound = 1;
14137     }
14138     dc->base.max_insns = MIN(dc->base.max_insns, bound);
14139 }
14140 
14141 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14142 {
14143 }
14144 
14145 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14146 {
14147     DisasContext *dc = container_of(dcbase, DisasContext, base);
14148     target_ulong pc_arg = dc->base.pc_next;
14149 
14150     if (tb_cflags(dcbase->tb) & CF_PCREL) {
14151         pc_arg &= ~TARGET_PAGE_MASK;
14152     }
14153     tcg_gen_insn_start(pc_arg, 0, 0);
14154     dc->insn_start = tcg_last_op();
14155 }
14156 
14157 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14158 {
14159     DisasContext *s = container_of(dcbase, DisasContext, base);
14160     CPUARMState *env = cpu->env_ptr;
14161     uint64_t pc = s->base.pc_next;
14162     uint32_t insn;
14163 
14164     /* Singlestep exceptions have the highest priority. */
14165     if (s->ss_active && !s->pstate_ss) {
14166         /* Singlestep state is Active-pending.
14167          * If we're in this state at the start of a TB then either
14168          *  a) we just took an exception to an EL which is being debugged
14169          *     and this is the first insn in the exception handler
14170          *  b) debug exceptions were masked and we just unmasked them
14171          *     without changing EL (eg by clearing PSTATE.D)
14172          * In either case we're going to take a swstep exception in the
14173          * "did not step an insn" case, and so the syndrome ISV and EX
14174          * bits should be zero.
14175          */
14176         assert(s->base.num_insns == 1);
14177         gen_swstep_exception(s, 0, 0);
14178         s->base.is_jmp = DISAS_NORETURN;
14179         s->base.pc_next = pc + 4;
14180         return;
14181     }
14182 
14183     if (pc & 3) {
14184         /*
14185          * PC alignment fault.  This has priority over the instruction abort
14186          * that we would receive from a translation fault via arm_ldl_code.
14187          * This should only be possible after an indirect branch, at the
14188          * start of the TB.
14189          */
14190         assert(s->base.num_insns == 1);
14191         gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc));
14192         s->base.is_jmp = DISAS_NORETURN;
14193         s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
14194         return;
14195     }
14196 
14197     s->pc_curr = pc;
14198     insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
14199     s->insn = insn;
14200     s->base.pc_next = pc + 4;
14201 
14202     s->fp_access_checked = false;
14203     s->sve_access_checked = false;
14204 
14205     if (s->pstate_il) {
14206         /*
14207          * Illegal execution state. This has priority over BTI
14208          * exceptions, but comes after instruction abort exceptions.
14209          */
14210         gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
14211         return;
14212     }
14213 
14214     if (dc_isar_feature(aa64_bti, s)) {
14215         if (s->base.num_insns == 1) {
14216             /*
14217              * At the first insn of the TB, compute s->guarded_page.
14218              * We delayed computing this until successfully reading
14219              * the first insn of the TB, above.  This (mostly) ensures
14220              * that the softmmu tlb entry has been populated, and the
14221              * page table GP bit is available.
14222              *
14223              * Note that we need to compute this even if btype == 0,
14224              * because this value is used for BR instructions later
14225              * where ENV is not available.
14226              */
14227             s->guarded_page = is_guarded_page(env, s);
14228 
14229             /* First insn can have btype set to non-zero.  */
14230             tcg_debug_assert(s->btype >= 0);
14231 
14232             /*
14233              * Note that the Branch Target Exception has fairly high
14234              * priority -- below debugging exceptions but above most
14235              * everything else.  This allows us to handle this now
14236              * instead of waiting until the insn is otherwise decoded.
14237              */
14238             if (s->btype != 0
14239                 && s->guarded_page
14240                 && !btype_destination_ok(insn, s->bt, s->btype)) {
14241                 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype));
14242                 return;
14243             }
14244         } else {
14245             /* Not the first insn: btype must be 0.  */
14246             tcg_debug_assert(s->btype == 0);
14247         }
14248     }
14249 
14250     s->is_nonstreaming = false;
14251     if (s->sme_trap_nonstreaming) {
14252         disas_sme_fa64(s, insn);
14253     }
14254 
14255     if (!disas_a64(s, insn) &&
14256         !disas_sme(s, insn) &&
14257         !disas_sve(s, insn)) {
14258         disas_a64_legacy(s, insn);
14259     }
14260 
14261     /*
14262      * After execution of most insns, btype is reset to 0.
14263      * Note that we set btype == -1 when the insn sets btype.
14264      */
14265     if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14266         reset_btype(s);
14267     }
14268 }
14269 
14270 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14271 {
14272     DisasContext *dc = container_of(dcbase, DisasContext, base);
14273 
14274     if (unlikely(dc->ss_active)) {
14275         /* Note that this means single stepping WFI doesn't halt the CPU.
14276          * For conditional branch insns this is harmless unreachable code as
14277          * gen_goto_tb() has already handled emitting the debug exception
14278          * (and thus a tb-jump is not possible when singlestepping).
14279          */
14280         switch (dc->base.is_jmp) {
14281         default:
14282             gen_a64_update_pc(dc, 4);
14283             /* fall through */
14284         case DISAS_EXIT:
14285         case DISAS_JUMP:
14286             gen_step_complete_exception(dc);
14287             break;
14288         case DISAS_NORETURN:
14289             break;
14290         }
14291     } else {
14292         switch (dc->base.is_jmp) {
14293         case DISAS_NEXT:
14294         case DISAS_TOO_MANY:
14295             gen_goto_tb(dc, 1, 4);
14296             break;
14297         default:
14298         case DISAS_UPDATE_EXIT:
14299             gen_a64_update_pc(dc, 4);
14300             /* fall through */
14301         case DISAS_EXIT:
14302             tcg_gen_exit_tb(NULL, 0);
14303             break;
14304         case DISAS_UPDATE_NOCHAIN:
14305             gen_a64_update_pc(dc, 4);
14306             /* fall through */
14307         case DISAS_JUMP:
14308             tcg_gen_lookup_and_goto_ptr();
14309             break;
14310         case DISAS_NORETURN:
14311         case DISAS_SWI:
14312             break;
14313         case DISAS_WFE:
14314             gen_a64_update_pc(dc, 4);
14315             gen_helper_wfe(cpu_env);
14316             break;
14317         case DISAS_YIELD:
14318             gen_a64_update_pc(dc, 4);
14319             gen_helper_yield(cpu_env);
14320             break;
14321         case DISAS_WFI:
14322             /*
14323              * This is a special case because we don't want to just halt
14324              * the CPU if trying to debug across a WFI.
14325              */
14326             gen_a64_update_pc(dc, 4);
14327             gen_helper_wfi(cpu_env, tcg_constant_i32(4));
14328             /*
14329              * The helper doesn't necessarily throw an exception, but we
14330              * must go back to the main loop to check for interrupts anyway.
14331              */
14332             tcg_gen_exit_tb(NULL, 0);
14333             break;
14334         }
14335     }
14336 }
14337 
14338 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14339                                  CPUState *cpu, FILE *logfile)
14340 {
14341     DisasContext *dc = container_of(dcbase, DisasContext, base);
14342 
14343     fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
14344     target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
14345 }
14346 
14347 const TranslatorOps aarch64_translator_ops = {
14348     .init_disas_context = aarch64_tr_init_disas_context,
14349     .tb_start           = aarch64_tr_tb_start,
14350     .insn_start         = aarch64_tr_insn_start,
14351     .translate_insn     = aarch64_tr_translate_insn,
14352     .tb_stop            = aarch64_tr_tb_stop,
14353     .disas_log          = aarch64_tr_disas_log,
14354 };
14355