1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34
35 static const char *regnames[] = {
36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41
42 enum a64_shift_type {
43 A64_SHIFT_TYPE_LSL = 0,
44 A64_SHIFT_TYPE_LSR = 1,
45 A64_SHIFT_TYPE_ASR = 2,
46 A64_SHIFT_TYPE_ROR = 3
47 };
48
49 /*
50 * Helpers for extracting complex instruction fields
51 */
52
53 /*
54 * For load/store with an unsigned 12 bit immediate scaled by the element
55 * size. The input has the immediate field in bits [14:3] and the element
56 * size in [2:0].
57 */
uimm_scaled(DisasContext * s,int x)58 static int uimm_scaled(DisasContext *s, int x)
59 {
60 unsigned imm = x >> 3;
61 unsigned scale = extract32(x, 0, 3);
62 return imm << scale;
63 }
64
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
scale_by_log2_tag_granule(DisasContext * s,int x)66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68 return x << LOG2_TAG_GRANULE;
69 }
70
71 /*
72 * Include the generated decoders.
73 */
74
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77
78 /* Table based decoder typedefs - used when the relevant bits for decode
79 * are too awkwardly scattered across the instruction (eg SIMD).
80 */
81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
82
83 typedef struct AArch64DecodeTable {
84 uint32_t pattern;
85 uint32_t mask;
86 AArch64DecodeFn *disas_fn;
87 } AArch64DecodeTable;
88
89 /* initialize TCG globals. */
a64_translate_init(void)90 void a64_translate_init(void)
91 {
92 int i;
93
94 cpu_pc = tcg_global_mem_new_i64(tcg_env,
95 offsetof(CPUARMState, pc),
96 "pc");
97 for (i = 0; i < 32; i++) {
98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
99 offsetof(CPUARMState, xregs[i]),
100 regnames[i]);
101 }
102
103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
104 offsetof(CPUARMState, exclusive_high), "exclusive_high");
105 }
106
107 /*
108 * Return the core mmu_idx to use for A64 load/store insns which
109 * have a "unprivileged load/store" variant. Those insns access
110 * EL0 if executed from an EL which has control over EL0 (usually
111 * EL1) but behave like normal loads and stores if executed from
112 * elsewhere (eg EL3).
113 *
114 * @unpriv : true for the unprivileged encoding; false for the
115 * normal encoding (in which case we will return the same
116 * thing as get_mem_index().
117 */
get_a64_user_mem_index(DisasContext * s,bool unpriv)118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
119 {
120 /*
121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
122 * which is the usual mmu_idx for this cpu state.
123 */
124 ARMMMUIdx useridx = s->mmu_idx;
125
126 if (unpriv && s->unpriv) {
127 /*
128 * We have pre-computed the condition for AccType_UNPRIV.
129 * Therefore we should never get here with a mmu_idx for
130 * which we do not know the corresponding user mmu_idx.
131 */
132 switch (useridx) {
133 case ARMMMUIdx_E10_1:
134 case ARMMMUIdx_E10_1_PAN:
135 useridx = ARMMMUIdx_E10_0;
136 break;
137 case ARMMMUIdx_E20_2:
138 case ARMMMUIdx_E20_2_PAN:
139 useridx = ARMMMUIdx_E20_0;
140 break;
141 default:
142 g_assert_not_reached();
143 }
144 }
145 return arm_to_core_mmu_idx(useridx);
146 }
147
set_btype_raw(int val)148 static void set_btype_raw(int val)
149 {
150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
151 offsetof(CPUARMState, btype));
152 }
153
set_btype(DisasContext * s,int val)154 static void set_btype(DisasContext *s, int val)
155 {
156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
157 tcg_debug_assert(val >= 1 && val <= 3);
158 set_btype_raw(val);
159 s->btype = -1;
160 }
161
reset_btype(DisasContext * s)162 static void reset_btype(DisasContext *s)
163 {
164 if (s->btype != 0) {
165 set_btype_raw(0);
166 s->btype = 0;
167 }
168 }
169
gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
171 {
172 assert(s->pc_save != -1);
173 if (tb_cflags(s->base.tb) & CF_PCREL) {
174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
175 } else {
176 tcg_gen_movi_i64(dest, s->pc_curr + diff);
177 }
178 }
179
gen_a64_update_pc(DisasContext * s,target_long diff)180 void gen_a64_update_pc(DisasContext *s, target_long diff)
181 {
182 gen_pc_plus_diff(s, cpu_pc, diff);
183 s->pc_save = s->pc_curr + diff;
184 }
185
186 /*
187 * Handle Top Byte Ignore (TBI) bits.
188 *
189 * If address tagging is enabled via the TCR TBI bits:
190 * + for EL2 and EL3 there is only one TBI bit, and if it is set
191 * then the address is zero-extended, clearing bits [63:56]
192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
193 * and TBI1 controls addresses with bit 55 == 1.
194 * If the appropriate TBI bit is set for the address then
195 * the address is sign-extended from bit 55 into bits [63:56]
196 *
197 * Here We have concatenated TBI{1,0} into tbi.
198 */
gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
200 TCGv_i64 src, int tbi)
201 {
202 if (tbi == 0) {
203 /* Load unmodified address */
204 tcg_gen_mov_i64(dst, src);
205 } else if (!regime_has_2_ranges(s->mmu_idx)) {
206 /* Force tag byte to all zero */
207 tcg_gen_extract_i64(dst, src, 0, 56);
208 } else {
209 /* Sign-extend from bit 55. */
210 tcg_gen_sextract_i64(dst, src, 0, 56);
211
212 switch (tbi) {
213 case 1:
214 /* tbi0 but !tbi1: only use the extension if positive */
215 tcg_gen_and_i64(dst, dst, src);
216 break;
217 case 2:
218 /* !tbi0 but tbi1: only use the extension if negative */
219 tcg_gen_or_i64(dst, dst, src);
220 break;
221 case 3:
222 /* tbi0 and tbi1: always use the extension */
223 break;
224 default:
225 g_assert_not_reached();
226 }
227 }
228 }
229
gen_a64_set_pc(DisasContext * s,TCGv_i64 src)230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
231 {
232 /*
233 * If address tagging is enabled for instructions via the TCR TBI bits,
234 * then loading an address into the PC will clear out any tag.
235 */
236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
237 s->pc_save = -1;
238 }
239
240 /*
241 * Handle MTE and/or TBI.
242 *
243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
244 * for the tag to be present in the FAR_ELx register. But for user-only
245 * mode we do not have a TLB with which to implement this, so we must
246 * remove the top byte now.
247 *
248 * Always return a fresh temporary that we can increment independently
249 * of the write-back address.
250 */
251
clean_data_tbi(DisasContext * s,TCGv_i64 addr)252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
253 {
254 TCGv_i64 clean = tcg_temp_new_i64();
255 #ifdef CONFIG_USER_ONLY
256 gen_top_byte_ignore(s, clean, addr, s->tbid);
257 #else
258 tcg_gen_mov_i64(clean, addr);
259 #endif
260 return clean;
261 }
262
263 /* Insert a zero tag into src, with the result at dst. */
gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
265 {
266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
267 }
268
gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
270 MMUAccessType acc, int log2_size)
271 {
272 gen_helper_probe_access(tcg_env, ptr,
273 tcg_constant_i32(acc),
274 tcg_constant_i32(get_mem_index(s)),
275 tcg_constant_i32(1 << log2_size));
276 }
277
278 /*
279 * For MTE, check a single logical or atomic access. This probes a single
280 * address, the exact one specified. The size and alignment of the access
281 * is not relevant to MTE, per se, but watchpoints do require the size,
282 * and we want to recognize those before making any other changes to state.
283 */
gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
285 bool is_write, bool tag_checked,
286 MemOp memop, bool is_unpriv,
287 int core_idx)
288 {
289 if (tag_checked && s->mte_active[is_unpriv]) {
290 TCGv_i64 ret;
291 int desc = 0;
292
293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
299
300 ret = tcg_temp_new_i64();
301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
302
303 return ret;
304 }
305 return clean_data_tbi(s, addr);
306 }
307
gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
309 bool tag_checked, MemOp memop)
310 {
311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
312 false, get_mem_index(s));
313 }
314
315 /*
316 * For MTE, check multiple logical sequential accesses.
317 */
gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
319 bool tag_checked, int total_size, MemOp single_mop)
320 {
321 if (tag_checked && s->mte_active[0]) {
322 TCGv_i64 ret;
323 int desc = 0;
324
325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
331
332 ret = tcg_temp_new_i64();
333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
334
335 return ret;
336 }
337 return clean_data_tbi(s, addr);
338 }
339
340 /*
341 * Generate the special alignment check that applies to AccType_ATOMIC
342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
343 * naturally aligned, but it must not cross a 16-byte boundary.
344 * See AArch64.CheckAlignment().
345 */
check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)346 static void check_lse2_align(DisasContext *s, int rn, int imm,
347 bool is_write, MemOp mop)
348 {
349 TCGv_i32 tmp;
350 TCGv_i64 addr;
351 TCGLabel *over_label;
352 MMUAccessType type;
353 int mmu_idx;
354
355 tmp = tcg_temp_new_i32();
356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
357 tcg_gen_addi_i32(tmp, tmp, imm & 15);
358 tcg_gen_andi_i32(tmp, tmp, 15);
359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
360
361 over_label = gen_new_label();
362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
363
364 addr = tcg_temp_new_i64();
365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
366
367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
368 mmu_idx = get_mem_index(s);
369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
370 tcg_constant_i32(mmu_idx));
371
372 gen_set_label(over_label);
373
374 }
375
376 /* Handle the alignment check for AccType_ATOMIC instructions. */
check_atomic_align(DisasContext * s,int rn,MemOp mop)377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
378 {
379 MemOp size = mop & MO_SIZE;
380
381 if (size == MO_8) {
382 return mop;
383 }
384
385 /*
386 * If size == MO_128, this is a LDXP, and the operation is single-copy
387 * atomic for each doubleword, not the entire quadword; it still must
388 * be quadword aligned.
389 */
390 if (size == MO_128) {
391 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
392 MO_ATOM_IFALIGN_PAIR);
393 }
394 if (dc_isar_feature(aa64_lse2, s)) {
395 check_lse2_align(s, rn, 0, true, mop);
396 } else {
397 mop |= MO_ALIGN;
398 }
399 return finalize_memop(s, mop);
400 }
401
402 /* Handle the alignment check for AccType_ORDERED instructions. */
check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
404 bool is_write, MemOp mop)
405 {
406 MemOp size = mop & MO_SIZE;
407
408 if (size == MO_8) {
409 return mop;
410 }
411 if (size == MO_128) {
412 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
413 MO_ATOM_IFALIGN_PAIR);
414 }
415 if (!dc_isar_feature(aa64_lse2, s)) {
416 mop |= MO_ALIGN;
417 } else if (!s->naa) {
418 check_lse2_align(s, rn, imm, is_write, mop);
419 }
420 return finalize_memop(s, mop);
421 }
422
423 typedef struct DisasCompare64 {
424 TCGCond cond;
425 TCGv_i64 value;
426 } DisasCompare64;
427
a64_test_cc(DisasCompare64 * c64,int cc)428 static void a64_test_cc(DisasCompare64 *c64, int cc)
429 {
430 DisasCompare c32;
431
432 arm_test_cc(&c32, cc);
433
434 /*
435 * Sign-extend the 32-bit value so that the GE/LT comparisons work
436 * properly. The NE/EQ comparisons are also fine with this choice.
437 */
438 c64->cond = c32.cond;
439 c64->value = tcg_temp_new_i64();
440 tcg_gen_ext_i32_i64(c64->value, c32.value);
441 }
442
gen_rebuild_hflags(DisasContext * s)443 static void gen_rebuild_hflags(DisasContext *s)
444 {
445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
446 }
447
gen_exception_internal(int excp)448 static void gen_exception_internal(int excp)
449 {
450 assert(excp_is_internal(excp));
451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
452 }
453
gen_exception_internal_insn(DisasContext * s,int excp)454 static void gen_exception_internal_insn(DisasContext *s, int excp)
455 {
456 gen_a64_update_pc(s, 0);
457 gen_exception_internal(excp);
458 s->base.is_jmp = DISAS_NORETURN;
459 }
460
gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
462 {
463 gen_a64_update_pc(s, 0);
464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
465 s->base.is_jmp = DISAS_NORETURN;
466 }
467
gen_step_complete_exception(DisasContext * s)468 static void gen_step_complete_exception(DisasContext *s)
469 {
470 /* We just completed step of an insn. Move from Active-not-pending
471 * to Active-pending, and then also take the swstep exception.
472 * This corresponds to making the (IMPDEF) choice to prioritize
473 * swstep exceptions over asynchronous exceptions taken to an exception
474 * level where debug is disabled. This choice has the advantage that
475 * we do not need to maintain internal state corresponding to the
476 * ISV/EX syndrome bits between completion of the step and generation
477 * of the exception, and our syndrome information is always correct.
478 */
479 gen_ss_advance(s);
480 gen_swstep_exception(s, 1, s->is_ldex);
481 s->base.is_jmp = DISAS_NORETURN;
482 }
483
use_goto_tb(DisasContext * s,uint64_t dest)484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
485 {
486 if (s->ss_active) {
487 return false;
488 }
489 return translator_use_goto_tb(&s->base, dest);
490 }
491
gen_goto_tb(DisasContext * s,int n,int64_t diff)492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
493 {
494 if (use_goto_tb(s, s->pc_curr + diff)) {
495 /*
496 * For pcrel, the pc must always be up-to-date on entry to
497 * the linked TB, so that it can use simple additions for all
498 * further adjustments. For !pcrel, the linked TB is compiled
499 * to know its full virtual address, so we can delay the
500 * update to pc to the unlinked path. A long chain of links
501 * can thus avoid many updates to the PC.
502 */
503 if (tb_cflags(s->base.tb) & CF_PCREL) {
504 gen_a64_update_pc(s, diff);
505 tcg_gen_goto_tb(n);
506 } else {
507 tcg_gen_goto_tb(n);
508 gen_a64_update_pc(s, diff);
509 }
510 tcg_gen_exit_tb(s->base.tb, n);
511 s->base.is_jmp = DISAS_NORETURN;
512 } else {
513 gen_a64_update_pc(s, diff);
514 if (s->ss_active) {
515 gen_step_complete_exception(s);
516 } else {
517 tcg_gen_lookup_and_goto_ptr();
518 s->base.is_jmp = DISAS_NORETURN;
519 }
520 }
521 }
522
523 /*
524 * Register access functions
525 *
526 * These functions are used for directly accessing a register in where
527 * changes to the final register value are likely to be made. If you
528 * need to use a register for temporary calculation (e.g. index type
529 * operations) use the read_* form.
530 *
531 * B1.2.1 Register mappings
532 *
533 * In instruction register encoding 31 can refer to ZR (zero register) or
534 * the SP (stack pointer) depending on context. In QEMU's case we map SP
535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
536 * This is the point of the _sp forms.
537 */
cpu_reg(DisasContext * s,int reg)538 TCGv_i64 cpu_reg(DisasContext *s, int reg)
539 {
540 if (reg == 31) {
541 TCGv_i64 t = tcg_temp_new_i64();
542 tcg_gen_movi_i64(t, 0);
543 return t;
544 } else {
545 return cpu_X[reg];
546 }
547 }
548
549 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
551 {
552 return cpu_X[reg];
553 }
554
555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
556 * representing the register contents. This TCGv is an auto-freed
557 * temporary so it need not be explicitly freed, and may be modified.
558 */
read_cpu_reg(DisasContext * s,int reg,int sf)559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
560 {
561 TCGv_i64 v = tcg_temp_new_i64();
562 if (reg != 31) {
563 if (sf) {
564 tcg_gen_mov_i64(v, cpu_X[reg]);
565 } else {
566 tcg_gen_ext32u_i64(v, cpu_X[reg]);
567 }
568 } else {
569 tcg_gen_movi_i64(v, 0);
570 }
571 return v;
572 }
573
read_cpu_reg_sp(DisasContext * s,int reg,int sf)574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
575 {
576 TCGv_i64 v = tcg_temp_new_i64();
577 if (sf) {
578 tcg_gen_mov_i64(v, cpu_X[reg]);
579 } else {
580 tcg_gen_ext32u_i64(v, cpu_X[reg]);
581 }
582 return v;
583 }
584
585 /* Return the offset into CPUARMState of a slice (from
586 * the least significant end) of FP register Qn (ie
587 * Dn, Sn, Hn or Bn).
588 * (Note that this is not the same mapping as for A32; see cpu.h)
589 */
fp_reg_offset(DisasContext * s,int regno,MemOp size)590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
591 {
592 return vec_reg_offset(s, regno, 0, size);
593 }
594
595 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)596 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
597 {
598 return vec_reg_offset(s, regno, 1, MO_64);
599 }
600
601 /* Convenience accessors for reading and writing single and double
602 * FP registers. Writing clears the upper parts of the associated
603 * 128 bit vector register, as required by the architecture.
604 * Note that unlike the GP register accessors, the values returned
605 * by the read functions must be manually freed.
606 */
read_fp_dreg(DisasContext * s,int reg)607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
608 {
609 TCGv_i64 v = tcg_temp_new_i64();
610
611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
612 return v;
613 }
614
read_fp_sreg(DisasContext * s,int reg)615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
616 {
617 TCGv_i32 v = tcg_temp_new_i32();
618
619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
620 return v;
621 }
622
read_fp_hreg(DisasContext * s,int reg)623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
624 {
625 TCGv_i32 v = tcg_temp_new_i32();
626
627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
628 return v;
629 }
630
631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
632 * If SVE is not enabled, then there are only 128 bits in the vector.
633 */
clear_vec_high(DisasContext * s,bool is_q,int rd)634 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
635 {
636 unsigned ofs = fp_reg_offset(s, rd, MO_64);
637 unsigned vsz = vec_full_reg_size(s);
638
639 /* Nop move, with side effect of clearing the tail. */
640 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
641 }
642
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
644 {
645 unsigned ofs = fp_reg_offset(s, reg, MO_64);
646
647 tcg_gen_st_i64(v, tcg_env, ofs);
648 clear_vec_high(s, false, reg);
649 }
650
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
652 {
653 TCGv_i64 tmp = tcg_temp_new_i64();
654
655 tcg_gen_extu_i32_i64(tmp, v);
656 write_fp_dreg(s, reg, tmp);
657 }
658
659 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
661 GVecGen2Fn *gvec_fn, int vece)
662 {
663 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
664 is_q ? 16 : 8, vec_full_reg_size(s));
665 }
666
667 /* Expand a 2-operand + immediate AdvSIMD vector operation using
668 * an expander function.
669 */
gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
671 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
672 {
673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
674 imm, is_q ? 16 : 8, vec_full_reg_size(s));
675 }
676
677 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
679 GVecGen3Fn *gvec_fn, int vece)
680 {
681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
682 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
683 }
684
685 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
687 int rx, GVecGen4Fn *gvec_fn, int vece)
688 {
689 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
690 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
691 is_q ? 16 : 8, vec_full_reg_size(s));
692 }
693
694 /* Expand a 2-operand operation using an out-of-line helper. */
gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
696 int rn, int data, gen_helper_gvec_2 *fn)
697 {
698 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
699 vec_full_reg_offset(s, rn),
700 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
701 }
702
703 /* Expand a 3-operand operation using an out-of-line helper. */
gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
705 int rn, int rm, int data, gen_helper_gvec_3 *fn)
706 {
707 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
708 vec_full_reg_offset(s, rn),
709 vec_full_reg_offset(s, rm),
710 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
711 }
712
713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
714 * an out-of-line helper.
715 */
gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,bool is_fp16,int data,gen_helper_gvec_3_ptr * fn)716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
717 int rm, bool is_fp16, int data,
718 gen_helper_gvec_3_ptr *fn)
719 {
720 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
721 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
722 vec_full_reg_offset(s, rn),
723 vec_full_reg_offset(s, rm), fpst,
724 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
725 }
726
727 /* Expand a 4-operand operation using an out-of-line helper. */
gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
729 int rm, int ra, int data, gen_helper_gvec_4 *fn)
730 {
731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
732 vec_full_reg_offset(s, rn),
733 vec_full_reg_offset(s, rm),
734 vec_full_reg_offset(s, ra),
735 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
736 }
737
738 /*
739 * Expand a 4-operand operation using an out-of-line helper that takes
740 * a pointer to the CPU env.
741 */
gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)742 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
743 int rm, int ra, int data,
744 gen_helper_gvec_4_ptr *fn)
745 {
746 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
747 vec_full_reg_offset(s, rn),
748 vec_full_reg_offset(s, rm),
749 vec_full_reg_offset(s, ra),
750 tcg_env,
751 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
752 }
753
754 /*
755 * Expand a 4-operand + fpstatus pointer + simd data value operation using
756 * an out-of-line helper.
757 */
gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,bool is_fp16,int data,gen_helper_gvec_4_ptr * fn)758 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
759 int rm, int ra, bool is_fp16, int data,
760 gen_helper_gvec_4_ptr *fn)
761 {
762 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
763 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
764 vec_full_reg_offset(s, rn),
765 vec_full_reg_offset(s, rm),
766 vec_full_reg_offset(s, ra), fpst,
767 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
768 }
769
770 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
771 * than the 32 bit equivalent.
772 */
gen_set_NZ64(TCGv_i64 result)773 static inline void gen_set_NZ64(TCGv_i64 result)
774 {
775 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
776 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
777 }
778
779 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(int sf,TCGv_i64 result)780 static inline void gen_logic_CC(int sf, TCGv_i64 result)
781 {
782 if (sf) {
783 gen_set_NZ64(result);
784 } else {
785 tcg_gen_extrl_i64_i32(cpu_ZF, result);
786 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
787 }
788 tcg_gen_movi_i32(cpu_CF, 0);
789 tcg_gen_movi_i32(cpu_VF, 0);
790 }
791
792 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)793 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
794 {
795 TCGv_i64 result, flag, tmp;
796 result = tcg_temp_new_i64();
797 flag = tcg_temp_new_i64();
798 tmp = tcg_temp_new_i64();
799
800 tcg_gen_movi_i64(tmp, 0);
801 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
802
803 tcg_gen_extrl_i64_i32(cpu_CF, flag);
804
805 gen_set_NZ64(result);
806
807 tcg_gen_xor_i64(flag, result, t0);
808 tcg_gen_xor_i64(tmp, t0, t1);
809 tcg_gen_andc_i64(flag, flag, tmp);
810 tcg_gen_extrh_i64_i32(cpu_VF, flag);
811
812 tcg_gen_mov_i64(dest, result);
813 }
814
gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)815 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
816 {
817 TCGv_i32 t0_32 = tcg_temp_new_i32();
818 TCGv_i32 t1_32 = tcg_temp_new_i32();
819 TCGv_i32 tmp = tcg_temp_new_i32();
820
821 tcg_gen_movi_i32(tmp, 0);
822 tcg_gen_extrl_i64_i32(t0_32, t0);
823 tcg_gen_extrl_i64_i32(t1_32, t1);
824 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
825 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
826 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
827 tcg_gen_xor_i32(tmp, t0_32, t1_32);
828 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
829 tcg_gen_extu_i32_i64(dest, cpu_NF);
830 }
831
gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)832 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
833 {
834 if (sf) {
835 gen_add64_CC(dest, t0, t1);
836 } else {
837 gen_add32_CC(dest, t0, t1);
838 }
839 }
840
841 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)842 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
843 {
844 /* 64 bit arithmetic */
845 TCGv_i64 result, flag, tmp;
846
847 result = tcg_temp_new_i64();
848 flag = tcg_temp_new_i64();
849 tcg_gen_sub_i64(result, t0, t1);
850
851 gen_set_NZ64(result);
852
853 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
854 tcg_gen_extrl_i64_i32(cpu_CF, flag);
855
856 tcg_gen_xor_i64(flag, result, t0);
857 tmp = tcg_temp_new_i64();
858 tcg_gen_xor_i64(tmp, t0, t1);
859 tcg_gen_and_i64(flag, flag, tmp);
860 tcg_gen_extrh_i64_i32(cpu_VF, flag);
861 tcg_gen_mov_i64(dest, result);
862 }
863
gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)864 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
865 {
866 /* 32 bit arithmetic */
867 TCGv_i32 t0_32 = tcg_temp_new_i32();
868 TCGv_i32 t1_32 = tcg_temp_new_i32();
869 TCGv_i32 tmp;
870
871 tcg_gen_extrl_i64_i32(t0_32, t0);
872 tcg_gen_extrl_i64_i32(t1_32, t1);
873 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
874 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
875 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
876 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
877 tmp = tcg_temp_new_i32();
878 tcg_gen_xor_i32(tmp, t0_32, t1_32);
879 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
880 tcg_gen_extu_i32_i64(dest, cpu_NF);
881 }
882
gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)883 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
884 {
885 if (sf) {
886 gen_sub64_CC(dest, t0, t1);
887 } else {
888 gen_sub32_CC(dest, t0, t1);
889 }
890 }
891
892 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)893 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
894 {
895 TCGv_i64 flag = tcg_temp_new_i64();
896 tcg_gen_extu_i32_i64(flag, cpu_CF);
897 tcg_gen_add_i64(dest, t0, t1);
898 tcg_gen_add_i64(dest, dest, flag);
899
900 if (!sf) {
901 tcg_gen_ext32u_i64(dest, dest);
902 }
903 }
904
905 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)906 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
907 {
908 if (sf) {
909 TCGv_i64 result = tcg_temp_new_i64();
910 TCGv_i64 cf_64 = tcg_temp_new_i64();
911 TCGv_i64 vf_64 = tcg_temp_new_i64();
912 TCGv_i64 tmp = tcg_temp_new_i64();
913 TCGv_i64 zero = tcg_constant_i64(0);
914
915 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
916 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
917 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
918 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
919 gen_set_NZ64(result);
920
921 tcg_gen_xor_i64(vf_64, result, t0);
922 tcg_gen_xor_i64(tmp, t0, t1);
923 tcg_gen_andc_i64(vf_64, vf_64, tmp);
924 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
925
926 tcg_gen_mov_i64(dest, result);
927 } else {
928 TCGv_i32 t0_32 = tcg_temp_new_i32();
929 TCGv_i32 t1_32 = tcg_temp_new_i32();
930 TCGv_i32 tmp = tcg_temp_new_i32();
931 TCGv_i32 zero = tcg_constant_i32(0);
932
933 tcg_gen_extrl_i64_i32(t0_32, t0);
934 tcg_gen_extrl_i64_i32(t1_32, t1);
935 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
936 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
937
938 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
939 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
940 tcg_gen_xor_i32(tmp, t0_32, t1_32);
941 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
942 tcg_gen_extu_i32_i64(dest, cpu_NF);
943 }
944 }
945
946 /*
947 * Load/Store generators
948 */
949
950 /*
951 * Store from GPR register to memory.
952 */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)953 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
954 TCGv_i64 tcg_addr, MemOp memop, int memidx,
955 bool iss_valid,
956 unsigned int iss_srt,
957 bool iss_sf, bool iss_ar)
958 {
959 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
960
961 if (iss_valid) {
962 uint32_t syn;
963
964 syn = syn_data_abort_with_iss(0,
965 (memop & MO_SIZE),
966 false,
967 iss_srt,
968 iss_sf,
969 iss_ar,
970 0, 0, 0, 0, 0, false);
971 disas_set_insn_syndrome(s, syn);
972 }
973 }
974
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)975 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
976 TCGv_i64 tcg_addr, MemOp memop,
977 bool iss_valid,
978 unsigned int iss_srt,
979 bool iss_sf, bool iss_ar)
980 {
981 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
982 iss_valid, iss_srt, iss_sf, iss_ar);
983 }
984
985 /*
986 * Load from memory to GPR register
987 */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)988 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
989 MemOp memop, bool extend, int memidx,
990 bool iss_valid, unsigned int iss_srt,
991 bool iss_sf, bool iss_ar)
992 {
993 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
994
995 if (extend && (memop & MO_SIGN)) {
996 g_assert((memop & MO_SIZE) <= MO_32);
997 tcg_gen_ext32u_i64(dest, dest);
998 }
999
1000 if (iss_valid) {
1001 uint32_t syn;
1002
1003 syn = syn_data_abort_with_iss(0,
1004 (memop & MO_SIZE),
1005 (memop & MO_SIGN) != 0,
1006 iss_srt,
1007 iss_sf,
1008 iss_ar,
1009 0, 0, 0, 0, 0, false);
1010 disas_set_insn_syndrome(s, syn);
1011 }
1012 }
1013
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1014 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1015 MemOp memop, bool extend,
1016 bool iss_valid, unsigned int iss_srt,
1017 bool iss_sf, bool iss_ar)
1018 {
1019 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1020 iss_valid, iss_srt, iss_sf, iss_ar);
1021 }
1022
1023 /*
1024 * Store from FP register to memory
1025 */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1026 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1027 {
1028 /* This writes the bottom N bits of a 128 bit wide vector to memory */
1029 TCGv_i64 tmplo = tcg_temp_new_i64();
1030
1031 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1032
1033 if ((mop & MO_SIZE) < MO_128) {
1034 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1035 } else {
1036 TCGv_i64 tmphi = tcg_temp_new_i64();
1037 TCGv_i128 t16 = tcg_temp_new_i128();
1038
1039 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1040 tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1041
1042 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1043 }
1044 }
1045
1046 /*
1047 * Load from memory to FP register
1048 */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1049 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1050 {
1051 /* This always zero-extends and writes to a full 128 bit wide vector */
1052 TCGv_i64 tmplo = tcg_temp_new_i64();
1053 TCGv_i64 tmphi = NULL;
1054
1055 if ((mop & MO_SIZE) < MO_128) {
1056 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1057 } else {
1058 TCGv_i128 t16 = tcg_temp_new_i128();
1059
1060 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1061
1062 tmphi = tcg_temp_new_i64();
1063 tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1064 }
1065
1066 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1067
1068 if (tmphi) {
1069 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1070 }
1071 clear_vec_high(s, tmphi != NULL, destidx);
1072 }
1073
1074 /*
1075 * Vector load/store helpers.
1076 *
1077 * The principal difference between this and a FP load is that we don't
1078 * zero extend as we are filling a partial chunk of the vector register.
1079 * These functions don't support 128 bit loads/stores, which would be
1080 * normal load/store operations.
1081 *
1082 * The _i32 versions are useful when operating on 32 bit quantities
1083 * (eg for floating point single or using Neon helper functions).
1084 */
1085
1086 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1087 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1088 int element, MemOp memop)
1089 {
1090 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1091 switch ((unsigned)memop) {
1092 case MO_8:
1093 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1094 break;
1095 case MO_16:
1096 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1097 break;
1098 case MO_32:
1099 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1100 break;
1101 case MO_8|MO_SIGN:
1102 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1103 break;
1104 case MO_16|MO_SIGN:
1105 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1106 break;
1107 case MO_32|MO_SIGN:
1108 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1109 break;
1110 case MO_64:
1111 case MO_64|MO_SIGN:
1112 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1113 break;
1114 default:
1115 g_assert_not_reached();
1116 }
1117 }
1118
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1119 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1120 int element, MemOp memop)
1121 {
1122 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1123 switch (memop) {
1124 case MO_8:
1125 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1126 break;
1127 case MO_16:
1128 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1129 break;
1130 case MO_8|MO_SIGN:
1131 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1132 break;
1133 case MO_16|MO_SIGN:
1134 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1135 break;
1136 case MO_32:
1137 case MO_32|MO_SIGN:
1138 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1139 break;
1140 default:
1141 g_assert_not_reached();
1142 }
1143 }
1144
1145 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1146 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1147 int element, MemOp memop)
1148 {
1149 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1150 switch (memop) {
1151 case MO_8:
1152 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1153 break;
1154 case MO_16:
1155 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1156 break;
1157 case MO_32:
1158 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1159 break;
1160 case MO_64:
1161 tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1162 break;
1163 default:
1164 g_assert_not_reached();
1165 }
1166 }
1167
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1168 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1169 int destidx, int element, MemOp memop)
1170 {
1171 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1172 switch (memop) {
1173 case MO_8:
1174 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1175 break;
1176 case MO_16:
1177 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1178 break;
1179 case MO_32:
1180 tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1181 break;
1182 default:
1183 g_assert_not_reached();
1184 }
1185 }
1186
1187 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1188 static void do_vec_st(DisasContext *s, int srcidx, int element,
1189 TCGv_i64 tcg_addr, MemOp mop)
1190 {
1191 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1192
1193 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1194 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1195 }
1196
1197 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1198 static void do_vec_ld(DisasContext *s, int destidx, int element,
1199 TCGv_i64 tcg_addr, MemOp mop)
1200 {
1201 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1202
1203 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1204 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1205 }
1206
1207 /* Check that FP/Neon access is enabled. If it is, return
1208 * true. If not, emit code to generate an appropriate exception,
1209 * and return false; the caller should not emit any code for
1210 * the instruction. Note that this check must happen after all
1211 * unallocated-encoding checks (otherwise the syndrome information
1212 * for the resulting exception will be incorrect).
1213 */
fp_access_check_only(DisasContext * s)1214 static bool fp_access_check_only(DisasContext *s)
1215 {
1216 if (s->fp_excp_el) {
1217 assert(!s->fp_access_checked);
1218 s->fp_access_checked = -1;
1219
1220 gen_exception_insn_el(s, 0, EXCP_UDEF,
1221 syn_fp_access_trap(1, 0xe, false, 0),
1222 s->fp_excp_el);
1223 return false;
1224 }
1225 s->fp_access_checked = 1;
1226 return true;
1227 }
1228
fp_access_check(DisasContext * s)1229 static bool fp_access_check(DisasContext *s)
1230 {
1231 if (!fp_access_check_only(s)) {
1232 return false;
1233 }
1234 if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1235 gen_exception_insn(s, 0, EXCP_UDEF,
1236 syn_smetrap(SME_ET_Streaming, false));
1237 return false;
1238 }
1239 return true;
1240 }
1241
1242 /*
1243 * Check that SVE access is enabled. If it is, return true.
1244 * If not, emit code to generate an appropriate exception and return false.
1245 * This function corresponds to CheckSVEEnabled().
1246 */
sve_access_check(DisasContext * s)1247 bool sve_access_check(DisasContext *s)
1248 {
1249 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1250 bool ret;
1251
1252 assert(dc_isar_feature(aa64_sme, s));
1253 ret = sme_sm_enabled_check(s);
1254 s->sve_access_checked = (ret ? 1 : -1);
1255 return ret;
1256 }
1257 if (s->sve_excp_el) {
1258 /* Assert that we only raise one exception per instruction. */
1259 assert(!s->sve_access_checked);
1260 gen_exception_insn_el(s, 0, EXCP_UDEF,
1261 syn_sve_access_trap(), s->sve_excp_el);
1262 s->sve_access_checked = -1;
1263 return false;
1264 }
1265 s->sve_access_checked = 1;
1266 return fp_access_check(s);
1267 }
1268
1269 /*
1270 * Check that SME access is enabled, raise an exception if not.
1271 * Note that this function corresponds to CheckSMEAccess and is
1272 * only used directly for cpregs.
1273 */
sme_access_check(DisasContext * s)1274 static bool sme_access_check(DisasContext *s)
1275 {
1276 if (s->sme_excp_el) {
1277 gen_exception_insn_el(s, 0, EXCP_UDEF,
1278 syn_smetrap(SME_ET_AccessTrap, false),
1279 s->sme_excp_el);
1280 return false;
1281 }
1282 return true;
1283 }
1284
1285 /* This function corresponds to CheckSMEEnabled. */
sme_enabled_check(DisasContext * s)1286 bool sme_enabled_check(DisasContext *s)
1287 {
1288 /*
1289 * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1290 * to be zero when fp_excp_el has priority. This is because we need
1291 * sme_excp_el by itself for cpregs access checks.
1292 */
1293 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1294 bool ret = sme_access_check(s);
1295 s->fp_access_checked = (ret ? 1 : -1);
1296 return ret;
1297 }
1298 return fp_access_check_only(s);
1299 }
1300
1301 /* Common subroutine for CheckSMEAnd*Enabled. */
sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1302 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1303 {
1304 if (!sme_enabled_check(s)) {
1305 return false;
1306 }
1307 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1308 gen_exception_insn(s, 0, EXCP_UDEF,
1309 syn_smetrap(SME_ET_NotStreaming, false));
1310 return false;
1311 }
1312 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1313 gen_exception_insn(s, 0, EXCP_UDEF,
1314 syn_smetrap(SME_ET_InactiveZA, false));
1315 return false;
1316 }
1317 return true;
1318 }
1319
1320 /*
1321 * Expanders for AdvSIMD translation functions.
1322 */
1323
do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1324 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1325 gen_helper_gvec_2 *fn)
1326 {
1327 if (!a->q && a->esz == MO_64) {
1328 return false;
1329 }
1330 if (fp_access_check(s)) {
1331 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1332 }
1333 return true;
1334 }
1335
do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1336 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1337 gen_helper_gvec_3 *fn)
1338 {
1339 if (!a->q && a->esz == MO_64) {
1340 return false;
1341 }
1342 if (fp_access_check(s)) {
1343 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1344 }
1345 return true;
1346 }
1347
do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1348 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1349 {
1350 if (!a->q && a->esz == MO_64) {
1351 return false;
1352 }
1353 if (fp_access_check(s)) {
1354 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1355 }
1356 return true;
1357 }
1358
do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1359 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1360 {
1361 if (a->esz == MO_64) {
1362 return false;
1363 }
1364 if (fp_access_check(s)) {
1365 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1366 }
1367 return true;
1368 }
1369
do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1370 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1371 {
1372 if (a->esz == MO_8) {
1373 return false;
1374 }
1375 return do_gvec_fn3_no64(s, a, fn);
1376 }
1377
do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1378 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1379 {
1380 if (!a->q && a->esz == MO_64) {
1381 return false;
1382 }
1383 if (fp_access_check(s)) {
1384 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1385 }
1386 return true;
1387 }
1388
1389 /*
1390 * This utility function is for doing register extension with an
1391 * optional shift. You will likely want to pass a temporary for the
1392 * destination register. See DecodeRegExtend() in the ARM ARM.
1393 */
ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1394 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1395 int option, unsigned int shift)
1396 {
1397 int extsize = extract32(option, 0, 2);
1398 bool is_signed = extract32(option, 2, 1);
1399
1400 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1401 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1402 }
1403
gen_check_sp_alignment(DisasContext * s)1404 static inline void gen_check_sp_alignment(DisasContext *s)
1405 {
1406 /* The AArch64 architecture mandates that (if enabled via PSTATE
1407 * or SCTLR bits) there is a check that SP is 16-aligned on every
1408 * SP-relative load or store (with an exception generated if it is not).
1409 * In line with general QEMU practice regarding misaligned accesses,
1410 * we omit these checks for the sake of guest program performance.
1411 * This function is provided as a hook so we can more easily add these
1412 * checks in future (possibly as a "favour catching guest program bugs
1413 * over speed" user selectable option).
1414 */
1415 }
1416
1417 /*
1418 * This provides a simple table based table lookup decoder. It is
1419 * intended to be used when the relevant bits for decode are too
1420 * awkwardly placed and switch/if based logic would be confusing and
1421 * deeply nested. Since it's a linear search through the table, tables
1422 * should be kept small.
1423 *
1424 * It returns the first handler where insn & mask == pattern, or
1425 * NULL if there is no match.
1426 * The table is terminated by an empty mask (i.e. 0)
1427 */
lookup_disas_fn(const AArch64DecodeTable * table,uint32_t insn)1428 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1429 uint32_t insn)
1430 {
1431 const AArch64DecodeTable *tptr = table;
1432
1433 while (tptr->mask) {
1434 if ((insn & tptr->mask) == tptr->pattern) {
1435 return tptr->disas_fn;
1436 }
1437 tptr++;
1438 }
1439 return NULL;
1440 }
1441
1442 /*
1443 * The instruction disassembly implemented here matches
1444 * the instruction encoding classifications in chapter C4
1445 * of the ARM Architecture Reference Manual (DDI0487B_a);
1446 * classification names and decode diagrams here should generally
1447 * match up with those in the manual.
1448 */
1449
trans_B(DisasContext * s,arg_i * a)1450 static bool trans_B(DisasContext *s, arg_i *a)
1451 {
1452 reset_btype(s);
1453 gen_goto_tb(s, 0, a->imm);
1454 return true;
1455 }
1456
trans_BL(DisasContext * s,arg_i * a)1457 static bool trans_BL(DisasContext *s, arg_i *a)
1458 {
1459 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1460 reset_btype(s);
1461 gen_goto_tb(s, 0, a->imm);
1462 return true;
1463 }
1464
1465
trans_CBZ(DisasContext * s,arg_cbz * a)1466 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1467 {
1468 DisasLabel match;
1469 TCGv_i64 tcg_cmp;
1470
1471 tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1472 reset_btype(s);
1473
1474 match = gen_disas_label(s);
1475 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1476 tcg_cmp, 0, match.label);
1477 gen_goto_tb(s, 0, 4);
1478 set_disas_label(s, match);
1479 gen_goto_tb(s, 1, a->imm);
1480 return true;
1481 }
1482
trans_TBZ(DisasContext * s,arg_tbz * a)1483 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1484 {
1485 DisasLabel match;
1486 TCGv_i64 tcg_cmp;
1487
1488 tcg_cmp = tcg_temp_new_i64();
1489 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1490
1491 reset_btype(s);
1492
1493 match = gen_disas_label(s);
1494 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1495 tcg_cmp, 0, match.label);
1496 gen_goto_tb(s, 0, 4);
1497 set_disas_label(s, match);
1498 gen_goto_tb(s, 1, a->imm);
1499 return true;
1500 }
1501
trans_B_cond(DisasContext * s,arg_B_cond * a)1502 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1503 {
1504 /* BC.cond is only present with FEAT_HBC */
1505 if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1506 return false;
1507 }
1508 reset_btype(s);
1509 if (a->cond < 0x0e) {
1510 /* genuinely conditional branches */
1511 DisasLabel match = gen_disas_label(s);
1512 arm_gen_test_cc(a->cond, match.label);
1513 gen_goto_tb(s, 0, 4);
1514 set_disas_label(s, match);
1515 gen_goto_tb(s, 1, a->imm);
1516 } else {
1517 /* 0xe and 0xf are both "always" conditions */
1518 gen_goto_tb(s, 0, a->imm);
1519 }
1520 return true;
1521 }
1522
set_btype_for_br(DisasContext * s,int rn)1523 static void set_btype_for_br(DisasContext *s, int rn)
1524 {
1525 if (dc_isar_feature(aa64_bti, s)) {
1526 /* BR to {x16,x17} or !guard -> 1, else 3. */
1527 if (rn == 16 || rn == 17) {
1528 set_btype(s, 1);
1529 } else {
1530 TCGv_i64 pc = tcg_temp_new_i64();
1531 gen_pc_plus_diff(s, pc, 0);
1532 gen_helper_guarded_page_br(tcg_env, pc);
1533 s->btype = -1;
1534 }
1535 }
1536 }
1537
set_btype_for_blr(DisasContext * s)1538 static void set_btype_for_blr(DisasContext *s)
1539 {
1540 if (dc_isar_feature(aa64_bti, s)) {
1541 /* BLR sets BTYPE to 2, regardless of source guarded page. */
1542 set_btype(s, 2);
1543 }
1544 }
1545
trans_BR(DisasContext * s,arg_r * a)1546 static bool trans_BR(DisasContext *s, arg_r *a)
1547 {
1548 set_btype_for_br(s, a->rn);
1549 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1550 s->base.is_jmp = DISAS_JUMP;
1551 return true;
1552 }
1553
trans_BLR(DisasContext * s,arg_r * a)1554 static bool trans_BLR(DisasContext *s, arg_r *a)
1555 {
1556 TCGv_i64 dst = cpu_reg(s, a->rn);
1557 TCGv_i64 lr = cpu_reg(s, 30);
1558 if (dst == lr) {
1559 TCGv_i64 tmp = tcg_temp_new_i64();
1560 tcg_gen_mov_i64(tmp, dst);
1561 dst = tmp;
1562 }
1563 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1564 gen_a64_set_pc(s, dst);
1565 set_btype_for_blr(s);
1566 s->base.is_jmp = DISAS_JUMP;
1567 return true;
1568 }
1569
trans_RET(DisasContext * s,arg_r * a)1570 static bool trans_RET(DisasContext *s, arg_r *a)
1571 {
1572 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1573 s->base.is_jmp = DISAS_JUMP;
1574 return true;
1575 }
1576
auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1577 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1578 TCGv_i64 modifier, bool use_key_a)
1579 {
1580 TCGv_i64 truedst;
1581 /*
1582 * Return the branch target for a BRAA/RETA/etc, which is either
1583 * just the destination dst, or that value with the pauth check
1584 * done and the code removed from the high bits.
1585 */
1586 if (!s->pauth_active) {
1587 return dst;
1588 }
1589
1590 truedst = tcg_temp_new_i64();
1591 if (use_key_a) {
1592 gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1593 } else {
1594 gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1595 }
1596 return truedst;
1597 }
1598
trans_BRAZ(DisasContext * s,arg_braz * a)1599 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1600 {
1601 TCGv_i64 dst;
1602
1603 if (!dc_isar_feature(aa64_pauth, s)) {
1604 return false;
1605 }
1606
1607 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1608 set_btype_for_br(s, a->rn);
1609 gen_a64_set_pc(s, dst);
1610 s->base.is_jmp = DISAS_JUMP;
1611 return true;
1612 }
1613
trans_BLRAZ(DisasContext * s,arg_braz * a)1614 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1615 {
1616 TCGv_i64 dst, lr;
1617
1618 if (!dc_isar_feature(aa64_pauth, s)) {
1619 return false;
1620 }
1621
1622 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1623 lr = cpu_reg(s, 30);
1624 if (dst == lr) {
1625 TCGv_i64 tmp = tcg_temp_new_i64();
1626 tcg_gen_mov_i64(tmp, dst);
1627 dst = tmp;
1628 }
1629 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1630 gen_a64_set_pc(s, dst);
1631 set_btype_for_blr(s);
1632 s->base.is_jmp = DISAS_JUMP;
1633 return true;
1634 }
1635
trans_RETA(DisasContext * s,arg_reta * a)1636 static bool trans_RETA(DisasContext *s, arg_reta *a)
1637 {
1638 TCGv_i64 dst;
1639
1640 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1641 gen_a64_set_pc(s, dst);
1642 s->base.is_jmp = DISAS_JUMP;
1643 return true;
1644 }
1645
trans_BRA(DisasContext * s,arg_bra * a)1646 static bool trans_BRA(DisasContext *s, arg_bra *a)
1647 {
1648 TCGv_i64 dst;
1649
1650 if (!dc_isar_feature(aa64_pauth, s)) {
1651 return false;
1652 }
1653 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1654 gen_a64_set_pc(s, dst);
1655 set_btype_for_br(s, a->rn);
1656 s->base.is_jmp = DISAS_JUMP;
1657 return true;
1658 }
1659
trans_BLRA(DisasContext * s,arg_bra * a)1660 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1661 {
1662 TCGv_i64 dst, lr;
1663
1664 if (!dc_isar_feature(aa64_pauth, s)) {
1665 return false;
1666 }
1667 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1668 lr = cpu_reg(s, 30);
1669 if (dst == lr) {
1670 TCGv_i64 tmp = tcg_temp_new_i64();
1671 tcg_gen_mov_i64(tmp, dst);
1672 dst = tmp;
1673 }
1674 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1675 gen_a64_set_pc(s, dst);
1676 set_btype_for_blr(s);
1677 s->base.is_jmp = DISAS_JUMP;
1678 return true;
1679 }
1680
trans_ERET(DisasContext * s,arg_ERET * a)1681 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1682 {
1683 TCGv_i64 dst;
1684
1685 if (s->current_el == 0) {
1686 return false;
1687 }
1688 if (s->trap_eret) {
1689 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1690 return true;
1691 }
1692 dst = tcg_temp_new_i64();
1693 tcg_gen_ld_i64(dst, tcg_env,
1694 offsetof(CPUARMState, elr_el[s->current_el]));
1695
1696 translator_io_start(&s->base);
1697
1698 gen_helper_exception_return(tcg_env, dst);
1699 /* Must exit loop to check un-masked IRQs */
1700 s->base.is_jmp = DISAS_EXIT;
1701 return true;
1702 }
1703
trans_ERETA(DisasContext * s,arg_reta * a)1704 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1705 {
1706 TCGv_i64 dst;
1707
1708 if (!dc_isar_feature(aa64_pauth, s)) {
1709 return false;
1710 }
1711 if (s->current_el == 0) {
1712 return false;
1713 }
1714 /* The FGT trap takes precedence over an auth trap. */
1715 if (s->trap_eret) {
1716 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1717 return true;
1718 }
1719 dst = tcg_temp_new_i64();
1720 tcg_gen_ld_i64(dst, tcg_env,
1721 offsetof(CPUARMState, elr_el[s->current_el]));
1722
1723 dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1724
1725 translator_io_start(&s->base);
1726
1727 gen_helper_exception_return(tcg_env, dst);
1728 /* Must exit loop to check un-masked IRQs */
1729 s->base.is_jmp = DISAS_EXIT;
1730 return true;
1731 }
1732
trans_NOP(DisasContext * s,arg_NOP * a)1733 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1734 {
1735 return true;
1736 }
1737
trans_YIELD(DisasContext * s,arg_YIELD * a)1738 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1739 {
1740 /*
1741 * When running in MTTCG we don't generate jumps to the yield and
1742 * WFE helpers as it won't affect the scheduling of other vCPUs.
1743 * If we wanted to more completely model WFE/SEV so we don't busy
1744 * spin unnecessarily we would need to do something more involved.
1745 */
1746 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1747 s->base.is_jmp = DISAS_YIELD;
1748 }
1749 return true;
1750 }
1751
trans_WFI(DisasContext * s,arg_WFI * a)1752 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1753 {
1754 s->base.is_jmp = DISAS_WFI;
1755 return true;
1756 }
1757
trans_WFE(DisasContext * s,arg_WFI * a)1758 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1759 {
1760 /*
1761 * When running in MTTCG we don't generate jumps to the yield and
1762 * WFE helpers as it won't affect the scheduling of other vCPUs.
1763 * If we wanted to more completely model WFE/SEV so we don't busy
1764 * spin unnecessarily we would need to do something more involved.
1765 */
1766 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1767 s->base.is_jmp = DISAS_WFE;
1768 }
1769 return true;
1770 }
1771
trans_WFIT(DisasContext * s,arg_WFIT * a)1772 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1773 {
1774 if (!dc_isar_feature(aa64_wfxt, s)) {
1775 return false;
1776 }
1777
1778 /*
1779 * Because we need to pass the register value to the helper,
1780 * it's easier to emit the code now, unlike trans_WFI which
1781 * defers it to aarch64_tr_tb_stop(). That means we need to
1782 * check ss_active so that single-stepping a WFIT doesn't halt.
1783 */
1784 if (s->ss_active) {
1785 /* Act like a NOP under architectural singlestep */
1786 return true;
1787 }
1788
1789 gen_a64_update_pc(s, 4);
1790 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1791 /* Go back to the main loop to check for interrupts */
1792 s->base.is_jmp = DISAS_EXIT;
1793 return true;
1794 }
1795
trans_WFET(DisasContext * s,arg_WFET * a)1796 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1797 {
1798 if (!dc_isar_feature(aa64_wfxt, s)) {
1799 return false;
1800 }
1801
1802 /*
1803 * We rely here on our WFE implementation being a NOP, so we
1804 * don't need to do anything different to handle the WFET timeout
1805 * from what trans_WFE does.
1806 */
1807 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1808 s->base.is_jmp = DISAS_WFE;
1809 }
1810 return true;
1811 }
1812
trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)1813 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1814 {
1815 if (s->pauth_active) {
1816 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
1817 }
1818 return true;
1819 }
1820
trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)1821 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
1822 {
1823 if (s->pauth_active) {
1824 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1825 }
1826 return true;
1827 }
1828
trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)1829 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
1830 {
1831 if (s->pauth_active) {
1832 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1833 }
1834 return true;
1835 }
1836
trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)1837 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
1838 {
1839 if (s->pauth_active) {
1840 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1841 }
1842 return true;
1843 }
1844
trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)1845 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
1846 {
1847 if (s->pauth_active) {
1848 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
1849 }
1850 return true;
1851 }
1852
trans_ESB(DisasContext * s,arg_ESB * a)1853 static bool trans_ESB(DisasContext *s, arg_ESB *a)
1854 {
1855 /* Without RAS, we must implement this as NOP. */
1856 if (dc_isar_feature(aa64_ras, s)) {
1857 /*
1858 * QEMU does not have a source of physical SErrors,
1859 * so we are only concerned with virtual SErrors.
1860 * The pseudocode in the ARM for this case is
1861 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
1862 * AArch64.vESBOperation();
1863 * Most of the condition can be evaluated at translation time.
1864 * Test for EL2 present, and defer test for SEL2 to runtime.
1865 */
1866 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
1867 gen_helper_vesb(tcg_env);
1868 }
1869 }
1870 return true;
1871 }
1872
trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)1873 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
1874 {
1875 if (s->pauth_active) {
1876 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1877 }
1878 return true;
1879 }
1880
trans_PACIASP(DisasContext * s,arg_PACIASP * a)1881 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
1882 {
1883 if (s->pauth_active) {
1884 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1885 }
1886 return true;
1887 }
1888
trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)1889 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
1890 {
1891 if (s->pauth_active) {
1892 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1893 }
1894 return true;
1895 }
1896
trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)1897 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
1898 {
1899 if (s->pauth_active) {
1900 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1901 }
1902 return true;
1903 }
1904
trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)1905 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
1906 {
1907 if (s->pauth_active) {
1908 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1909 }
1910 return true;
1911 }
1912
trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)1913 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
1914 {
1915 if (s->pauth_active) {
1916 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1917 }
1918 return true;
1919 }
1920
trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)1921 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
1922 {
1923 if (s->pauth_active) {
1924 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
1925 }
1926 return true;
1927 }
1928
trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)1929 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
1930 {
1931 if (s->pauth_active) {
1932 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
1933 }
1934 return true;
1935 }
1936
trans_CLREX(DisasContext * s,arg_CLREX * a)1937 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
1938 {
1939 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1940 return true;
1941 }
1942
trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)1943 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
1944 {
1945 /* We handle DSB and DMB the same way */
1946 TCGBar bar;
1947
1948 switch (a->types) {
1949 case 1: /* MBReqTypes_Reads */
1950 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1951 break;
1952 case 2: /* MBReqTypes_Writes */
1953 bar = TCG_BAR_SC | TCG_MO_ST_ST;
1954 break;
1955 default: /* MBReqTypes_All */
1956 bar = TCG_BAR_SC | TCG_MO_ALL;
1957 break;
1958 }
1959 tcg_gen_mb(bar);
1960 return true;
1961 }
1962
trans_ISB(DisasContext * s,arg_ISB * a)1963 static bool trans_ISB(DisasContext *s, arg_ISB *a)
1964 {
1965 /*
1966 * We need to break the TB after this insn to execute
1967 * self-modifying code correctly and also to take
1968 * any pending interrupts immediately.
1969 */
1970 reset_btype(s);
1971 gen_goto_tb(s, 0, 4);
1972 return true;
1973 }
1974
trans_SB(DisasContext * s,arg_SB * a)1975 static bool trans_SB(DisasContext *s, arg_SB *a)
1976 {
1977 if (!dc_isar_feature(aa64_sb, s)) {
1978 return false;
1979 }
1980 /*
1981 * TODO: There is no speculation barrier opcode for TCG;
1982 * MB and end the TB instead.
1983 */
1984 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1985 gen_goto_tb(s, 0, 4);
1986 return true;
1987 }
1988
trans_CFINV(DisasContext * s,arg_CFINV * a)1989 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
1990 {
1991 if (!dc_isar_feature(aa64_condm_4, s)) {
1992 return false;
1993 }
1994 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1995 return true;
1996 }
1997
trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)1998 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
1999 {
2000 TCGv_i32 z;
2001
2002 if (!dc_isar_feature(aa64_condm_5, s)) {
2003 return false;
2004 }
2005
2006 z = tcg_temp_new_i32();
2007
2008 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2009
2010 /*
2011 * (!C & !Z) << 31
2012 * (!(C | Z)) << 31
2013 * ~((C | Z) << 31)
2014 * ~-(C | Z)
2015 * (C | Z) - 1
2016 */
2017 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2018 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2019
2020 /* !(Z & C) */
2021 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2022 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2023
2024 /* (!C & Z) << 31 -> -(Z & ~C) */
2025 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2026 tcg_gen_neg_i32(cpu_VF, cpu_VF);
2027
2028 /* C | Z */
2029 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2030
2031 return true;
2032 }
2033
trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2034 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2035 {
2036 if (!dc_isar_feature(aa64_condm_5, s)) {
2037 return false;
2038 }
2039
2040 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
2041 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
2042
2043 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2044 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2045
2046 tcg_gen_movi_i32(cpu_NF, 0);
2047 tcg_gen_movi_i32(cpu_VF, 0);
2048
2049 return true;
2050 }
2051
trans_MSR_i_UAO(DisasContext * s,arg_i * a)2052 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2053 {
2054 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2055 return false;
2056 }
2057 if (a->imm & 1) {
2058 set_pstate_bits(PSTATE_UAO);
2059 } else {
2060 clear_pstate_bits(PSTATE_UAO);
2061 }
2062 gen_rebuild_hflags(s);
2063 s->base.is_jmp = DISAS_TOO_MANY;
2064 return true;
2065 }
2066
trans_MSR_i_PAN(DisasContext * s,arg_i * a)2067 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2068 {
2069 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2070 return false;
2071 }
2072 if (a->imm & 1) {
2073 set_pstate_bits(PSTATE_PAN);
2074 } else {
2075 clear_pstate_bits(PSTATE_PAN);
2076 }
2077 gen_rebuild_hflags(s);
2078 s->base.is_jmp = DISAS_TOO_MANY;
2079 return true;
2080 }
2081
trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2082 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2083 {
2084 if (s->current_el == 0) {
2085 return false;
2086 }
2087 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2088 s->base.is_jmp = DISAS_TOO_MANY;
2089 return true;
2090 }
2091
trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2092 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2093 {
2094 if (!dc_isar_feature(aa64_ssbs, s)) {
2095 return false;
2096 }
2097 if (a->imm & 1) {
2098 set_pstate_bits(PSTATE_SSBS);
2099 } else {
2100 clear_pstate_bits(PSTATE_SSBS);
2101 }
2102 /* Don't need to rebuild hflags since SSBS is a nop */
2103 s->base.is_jmp = DISAS_TOO_MANY;
2104 return true;
2105 }
2106
trans_MSR_i_DIT(DisasContext * s,arg_i * a)2107 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2108 {
2109 if (!dc_isar_feature(aa64_dit, s)) {
2110 return false;
2111 }
2112 if (a->imm & 1) {
2113 set_pstate_bits(PSTATE_DIT);
2114 } else {
2115 clear_pstate_bits(PSTATE_DIT);
2116 }
2117 /* There's no need to rebuild hflags because DIT is a nop */
2118 s->base.is_jmp = DISAS_TOO_MANY;
2119 return true;
2120 }
2121
trans_MSR_i_TCO(DisasContext * s,arg_i * a)2122 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2123 {
2124 if (dc_isar_feature(aa64_mte, s)) {
2125 /* Full MTE is enabled -- set the TCO bit as directed. */
2126 if (a->imm & 1) {
2127 set_pstate_bits(PSTATE_TCO);
2128 } else {
2129 clear_pstate_bits(PSTATE_TCO);
2130 }
2131 gen_rebuild_hflags(s);
2132 /* Many factors, including TCO, go into MTE_ACTIVE. */
2133 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2134 return true;
2135 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2136 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
2137 return true;
2138 } else {
2139 /* Insn not present */
2140 return false;
2141 }
2142 }
2143
trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2144 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2145 {
2146 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2147 s->base.is_jmp = DISAS_TOO_MANY;
2148 return true;
2149 }
2150
trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2151 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2152 {
2153 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2154 /* Exit the cpu loop to re-evaluate pending IRQs. */
2155 s->base.is_jmp = DISAS_UPDATE_EXIT;
2156 return true;
2157 }
2158
trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2159 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2160 {
2161 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2162 return false;
2163 }
2164
2165 if (a->imm == 0) {
2166 clear_pstate_bits(PSTATE_ALLINT);
2167 } else if (s->current_el > 1) {
2168 set_pstate_bits(PSTATE_ALLINT);
2169 } else {
2170 gen_helper_msr_set_allint_el1(tcg_env);
2171 }
2172
2173 /* Exit the cpu loop to re-evaluate pending IRQs. */
2174 s->base.is_jmp = DISAS_UPDATE_EXIT;
2175 return true;
2176 }
2177
trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2178 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2179 {
2180 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2181 return false;
2182 }
2183 if (sme_access_check(s)) {
2184 int old = s->pstate_sm | (s->pstate_za << 1);
2185 int new = a->imm * 3;
2186
2187 if ((old ^ new) & a->mask) {
2188 /* At least one bit changes. */
2189 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2190 tcg_constant_i32(a->mask));
2191 s->base.is_jmp = DISAS_TOO_MANY;
2192 }
2193 }
2194 return true;
2195 }
2196
gen_get_nzcv(TCGv_i64 tcg_rt)2197 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2198 {
2199 TCGv_i32 tmp = tcg_temp_new_i32();
2200 TCGv_i32 nzcv = tcg_temp_new_i32();
2201
2202 /* build bit 31, N */
2203 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2204 /* build bit 30, Z */
2205 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2206 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2207 /* build bit 29, C */
2208 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2209 /* build bit 28, V */
2210 tcg_gen_shri_i32(tmp, cpu_VF, 31);
2211 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2212 /* generate result */
2213 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2214 }
2215
gen_set_nzcv(TCGv_i64 tcg_rt)2216 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2217 {
2218 TCGv_i32 nzcv = tcg_temp_new_i32();
2219
2220 /* take NZCV from R[t] */
2221 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2222
2223 /* bit 31, N */
2224 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2225 /* bit 30, Z */
2226 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2227 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2228 /* bit 29, C */
2229 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2230 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2231 /* bit 28, V */
2232 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2233 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2234 }
2235
gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2236 static void gen_sysreg_undef(DisasContext *s, bool isread,
2237 uint8_t op0, uint8_t op1, uint8_t op2,
2238 uint8_t crn, uint8_t crm, uint8_t rt)
2239 {
2240 /*
2241 * Generate code to emit an UNDEF with correct syndrome
2242 * information for a failed system register access.
2243 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2244 * but if FEAT_IDST is implemented then read accesses to registers
2245 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2246 * syndrome.
2247 */
2248 uint32_t syndrome;
2249
2250 if (isread && dc_isar_feature(aa64_ids, s) &&
2251 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2252 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2253 } else {
2254 syndrome = syn_uncategorized();
2255 }
2256 gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2257 }
2258
2259 /* MRS - move from system register
2260 * MSR (register) - move to system register
2261 * SYS
2262 * SYSL
2263 * These are all essentially the same insn in 'read' and 'write'
2264 * versions, with varying op0 fields.
2265 */
handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2266 static void handle_sys(DisasContext *s, bool isread,
2267 unsigned int op0, unsigned int op1, unsigned int op2,
2268 unsigned int crn, unsigned int crm, unsigned int rt)
2269 {
2270 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2271 crn, crm, op0, op1, op2);
2272 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2273 bool need_exit_tb = false;
2274 bool nv_trap_to_el2 = false;
2275 bool nv_redirect_reg = false;
2276 bool skip_fp_access_checks = false;
2277 bool nv2_mem_redirect = false;
2278 TCGv_ptr tcg_ri = NULL;
2279 TCGv_i64 tcg_rt;
2280 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2281
2282 if (crn == 11 || crn == 15) {
2283 /*
2284 * Check for TIDCP trap, which must take precedence over
2285 * the UNDEF for "no such register" etc.
2286 */
2287 switch (s->current_el) {
2288 case 0:
2289 if (dc_isar_feature(aa64_tidcp1, s)) {
2290 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2291 }
2292 break;
2293 case 1:
2294 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2295 break;
2296 }
2297 }
2298
2299 if (!ri) {
2300 /* Unknown register; this might be a guest error or a QEMU
2301 * unimplemented feature.
2302 */
2303 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2304 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2305 isread ? "read" : "write", op0, op1, crn, crm, op2);
2306 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2307 return;
2308 }
2309
2310 if (s->nv2 && ri->nv2_redirect_offset) {
2311 /*
2312 * Some registers always redirect to memory; some only do so if
2313 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2314 * pairs which share an offset; see the table in R_CSRPQ).
2315 */
2316 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2317 nv2_mem_redirect = s->nv1;
2318 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2319 nv2_mem_redirect = !s->nv1;
2320 } else {
2321 nv2_mem_redirect = true;
2322 }
2323 }
2324
2325 /* Check access permissions */
2326 if (!cp_access_ok(s->current_el, ri, isread)) {
2327 /*
2328 * FEAT_NV/NV2 handling does not do the usual FP access checks
2329 * for registers only accessible at EL2 (though it *does* do them
2330 * for registers accessible at EL1).
2331 */
2332 skip_fp_access_checks = true;
2333 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2334 /*
2335 * This is one of the few EL2 registers which should redirect
2336 * to the equivalent EL1 register. We do that after running
2337 * the EL2 register's accessfn.
2338 */
2339 nv_redirect_reg = true;
2340 assert(!nv2_mem_redirect);
2341 } else if (nv2_mem_redirect) {
2342 /*
2343 * NV2 redirect-to-memory takes precedence over trap to EL2 or
2344 * UNDEF to EL1.
2345 */
2346 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2347 /*
2348 * This register / instruction exists and is an EL2 register, so
2349 * we must trap to EL2 if accessed in nested virtualization EL1
2350 * instead of UNDEFing. We'll do that after the usual access checks.
2351 * (This makes a difference only for a couple of registers like
2352 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2353 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2354 * an accessfn which does nothing when called from EL1, because
2355 * the trap-to-EL3 controls which would apply to that register
2356 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2357 */
2358 nv_trap_to_el2 = true;
2359 } else {
2360 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2361 return;
2362 }
2363 }
2364
2365 if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2366 /* Emit code to perform further access permissions checks at
2367 * runtime; this may result in an exception.
2368 */
2369 gen_a64_update_pc(s, 0);
2370 tcg_ri = tcg_temp_new_ptr();
2371 gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2372 tcg_constant_i32(key),
2373 tcg_constant_i32(syndrome),
2374 tcg_constant_i32(isread));
2375 } else if (ri->type & ARM_CP_RAISES_EXC) {
2376 /*
2377 * The readfn or writefn might raise an exception;
2378 * synchronize the CPU state in case it does.
2379 */
2380 gen_a64_update_pc(s, 0);
2381 }
2382
2383 if (!skip_fp_access_checks) {
2384 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2385 return;
2386 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2387 return;
2388 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2389 return;
2390 }
2391 }
2392
2393 if (nv_trap_to_el2) {
2394 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2395 return;
2396 }
2397
2398 if (nv_redirect_reg) {
2399 /*
2400 * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2401 * Conveniently in all cases the encoding of the EL1 register is
2402 * identical to the EL2 register except that opc1 is 0.
2403 * Get the reginfo for the EL1 register to use for the actual access.
2404 * We don't use the EL1 register's access function, and
2405 * fine-grained-traps on EL1 also do not apply here.
2406 */
2407 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2408 crn, crm, op0, 0, op2);
2409 ri = get_arm_cp_reginfo(s->cp_regs, key);
2410 assert(ri);
2411 assert(cp_access_ok(s->current_el, ri, isread));
2412 /*
2413 * We might not have done an update_pc earlier, so check we don't
2414 * need it. We could support this in future if necessary.
2415 */
2416 assert(!(ri->type & ARM_CP_RAISES_EXC));
2417 }
2418
2419 if (nv2_mem_redirect) {
2420 /*
2421 * This system register is being redirected into an EL2 memory access.
2422 * This means it is not an IO operation, doesn't change hflags,
2423 * and need not end the TB, because it has no side effects.
2424 *
2425 * The access is 64-bit single copy atomic, guaranteed aligned because
2426 * of the definition of VCNR_EL2. Its endianness depends on
2427 * SCTLR_EL2.EE, not on the data endianness of EL1.
2428 * It is done under either the EL2 translation regime or the EL2&0
2429 * translation regime, depending on HCR_EL2.E2H. It behaves as if
2430 * PSTATE.PAN is 0.
2431 */
2432 TCGv_i64 ptr = tcg_temp_new_i64();
2433 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2434 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2435 int memidx = arm_to_core_mmu_idx(armmemidx);
2436 uint32_t syn;
2437
2438 mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2439
2440 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2441 tcg_gen_addi_i64(ptr, ptr,
2442 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2443 tcg_rt = cpu_reg(s, rt);
2444
2445 syn = syn_data_abort_vncr(0, !isread, 0);
2446 disas_set_insn_syndrome(s, syn);
2447 if (isread) {
2448 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2449 } else {
2450 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2451 }
2452 return;
2453 }
2454
2455 /* Handle special cases first */
2456 switch (ri->type & ARM_CP_SPECIAL_MASK) {
2457 case 0:
2458 break;
2459 case ARM_CP_NOP:
2460 return;
2461 case ARM_CP_NZCV:
2462 tcg_rt = cpu_reg(s, rt);
2463 if (isread) {
2464 gen_get_nzcv(tcg_rt);
2465 } else {
2466 gen_set_nzcv(tcg_rt);
2467 }
2468 return;
2469 case ARM_CP_CURRENTEL:
2470 {
2471 /*
2472 * Reads as current EL value from pstate, which is
2473 * guaranteed to be constant by the tb flags.
2474 * For nested virt we should report EL2.
2475 */
2476 int el = s->nv ? 2 : s->current_el;
2477 tcg_rt = cpu_reg(s, rt);
2478 tcg_gen_movi_i64(tcg_rt, el << 2);
2479 return;
2480 }
2481 case ARM_CP_DC_ZVA:
2482 /* Writes clear the aligned block of memory which rt points into. */
2483 if (s->mte_active[0]) {
2484 int desc = 0;
2485
2486 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2487 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2488 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2489
2490 tcg_rt = tcg_temp_new_i64();
2491 gen_helper_mte_check_zva(tcg_rt, tcg_env,
2492 tcg_constant_i32(desc), cpu_reg(s, rt));
2493 } else {
2494 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2495 }
2496 gen_helper_dc_zva(tcg_env, tcg_rt);
2497 return;
2498 case ARM_CP_DC_GVA:
2499 {
2500 TCGv_i64 clean_addr, tag;
2501
2502 /*
2503 * DC_GVA, like DC_ZVA, requires that we supply the original
2504 * pointer for an invalid page. Probe that address first.
2505 */
2506 tcg_rt = cpu_reg(s, rt);
2507 clean_addr = clean_data_tbi(s, tcg_rt);
2508 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2509
2510 if (s->ata[0]) {
2511 /* Extract the tag from the register to match STZGM. */
2512 tag = tcg_temp_new_i64();
2513 tcg_gen_shri_i64(tag, tcg_rt, 56);
2514 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2515 }
2516 }
2517 return;
2518 case ARM_CP_DC_GZVA:
2519 {
2520 TCGv_i64 clean_addr, tag;
2521
2522 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2523 tcg_rt = cpu_reg(s, rt);
2524 clean_addr = clean_data_tbi(s, tcg_rt);
2525 gen_helper_dc_zva(tcg_env, clean_addr);
2526
2527 if (s->ata[0]) {
2528 /* Extract the tag from the register to match STZGM. */
2529 tag = tcg_temp_new_i64();
2530 tcg_gen_shri_i64(tag, tcg_rt, 56);
2531 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2532 }
2533 }
2534 return;
2535 default:
2536 g_assert_not_reached();
2537 }
2538
2539 if (ri->type & ARM_CP_IO) {
2540 /* I/O operations must end the TB here (whether read or write) */
2541 need_exit_tb = translator_io_start(&s->base);
2542 }
2543
2544 tcg_rt = cpu_reg(s, rt);
2545
2546 if (isread) {
2547 if (ri->type & ARM_CP_CONST) {
2548 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2549 } else if (ri->readfn) {
2550 if (!tcg_ri) {
2551 tcg_ri = gen_lookup_cp_reg(key);
2552 }
2553 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2554 } else {
2555 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2556 }
2557 } else {
2558 if (ri->type & ARM_CP_CONST) {
2559 /* If not forbidden by access permissions, treat as WI */
2560 return;
2561 } else if (ri->writefn) {
2562 if (!tcg_ri) {
2563 tcg_ri = gen_lookup_cp_reg(key);
2564 }
2565 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2566 } else {
2567 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2568 }
2569 }
2570
2571 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2572 /*
2573 * A write to any coprocessor register that ends a TB
2574 * must rebuild the hflags for the next TB.
2575 */
2576 gen_rebuild_hflags(s);
2577 /*
2578 * We default to ending the TB on a coprocessor register write,
2579 * but allow this to be suppressed by the register definition
2580 * (usually only necessary to work around guest bugs).
2581 */
2582 need_exit_tb = true;
2583 }
2584 if (need_exit_tb) {
2585 s->base.is_jmp = DISAS_UPDATE_EXIT;
2586 }
2587 }
2588
trans_SYS(DisasContext * s,arg_SYS * a)2589 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2590 {
2591 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2592 return true;
2593 }
2594
trans_SVC(DisasContext * s,arg_i * a)2595 static bool trans_SVC(DisasContext *s, arg_i *a)
2596 {
2597 /*
2598 * For SVC, HVC and SMC we advance the single-step state
2599 * machine before taking the exception. This is architecturally
2600 * mandated, to ensure that single-stepping a system call
2601 * instruction works properly.
2602 */
2603 uint32_t syndrome = syn_aa64_svc(a->imm);
2604 if (s->fgt_svc) {
2605 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2606 return true;
2607 }
2608 gen_ss_advance(s);
2609 gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2610 return true;
2611 }
2612
trans_HVC(DisasContext * s,arg_i * a)2613 static bool trans_HVC(DisasContext *s, arg_i *a)
2614 {
2615 int target_el = s->current_el == 3 ? 3 : 2;
2616
2617 if (s->current_el == 0) {
2618 unallocated_encoding(s);
2619 return true;
2620 }
2621 /*
2622 * The pre HVC helper handles cases when HVC gets trapped
2623 * as an undefined insn by runtime configuration.
2624 */
2625 gen_a64_update_pc(s, 0);
2626 gen_helper_pre_hvc(tcg_env);
2627 /* Architecture requires ss advance before we do the actual work */
2628 gen_ss_advance(s);
2629 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2630 return true;
2631 }
2632
trans_SMC(DisasContext * s,arg_i * a)2633 static bool trans_SMC(DisasContext *s, arg_i *a)
2634 {
2635 if (s->current_el == 0) {
2636 unallocated_encoding(s);
2637 return true;
2638 }
2639 gen_a64_update_pc(s, 0);
2640 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2641 /* Architecture requires ss advance before we do the actual work */
2642 gen_ss_advance(s);
2643 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2644 return true;
2645 }
2646
trans_BRK(DisasContext * s,arg_i * a)2647 static bool trans_BRK(DisasContext *s, arg_i *a)
2648 {
2649 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2650 return true;
2651 }
2652
trans_HLT(DisasContext * s,arg_i * a)2653 static bool trans_HLT(DisasContext *s, arg_i *a)
2654 {
2655 /*
2656 * HLT. This has two purposes.
2657 * Architecturally, it is an external halting debug instruction.
2658 * Since QEMU doesn't implement external debug, we treat this as
2659 * it is required for halting debug disabled: it will UNDEF.
2660 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2661 */
2662 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2663 gen_exception_internal_insn(s, EXCP_SEMIHOST);
2664 } else {
2665 unallocated_encoding(s);
2666 }
2667 return true;
2668 }
2669
2670 /*
2671 * Load/Store exclusive instructions are implemented by remembering
2672 * the value/address loaded, and seeing if these are the same
2673 * when the store is performed. This is not actually the architecturally
2674 * mandated semantics, but it works for typical guest code sequences
2675 * and avoids having to monitor regular stores.
2676 *
2677 * The store exclusive uses the atomic cmpxchg primitives to avoid
2678 * races in multi-threaded linux-user and when MTTCG softmmu is
2679 * enabled.
2680 */
gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2681 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2682 int size, bool is_pair)
2683 {
2684 int idx = get_mem_index(s);
2685 TCGv_i64 dirty_addr, clean_addr;
2686 MemOp memop = check_atomic_align(s, rn, size + is_pair);
2687
2688 s->is_ldex = true;
2689 dirty_addr = cpu_reg_sp(s, rn);
2690 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2691
2692 g_assert(size <= 3);
2693 if (is_pair) {
2694 g_assert(size >= 2);
2695 if (size == 2) {
2696 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2697 if (s->be_data == MO_LE) {
2698 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2699 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2700 } else {
2701 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2702 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2703 }
2704 } else {
2705 TCGv_i128 t16 = tcg_temp_new_i128();
2706
2707 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2708
2709 if (s->be_data == MO_LE) {
2710 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2711 cpu_exclusive_high, t16);
2712 } else {
2713 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2714 cpu_exclusive_val, t16);
2715 }
2716 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2717 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2718 }
2719 } else {
2720 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2721 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2722 }
2723 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2724 }
2725
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2726 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2727 int rn, int size, int is_pair)
2728 {
2729 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2730 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2731 * [addr] = {Rt};
2732 * if (is_pair) {
2733 * [addr + datasize] = {Rt2};
2734 * }
2735 * {Rd} = 0;
2736 * } else {
2737 * {Rd} = 1;
2738 * }
2739 * env->exclusive_addr = -1;
2740 */
2741 TCGLabel *fail_label = gen_new_label();
2742 TCGLabel *done_label = gen_new_label();
2743 TCGv_i64 tmp, clean_addr;
2744 MemOp memop;
2745
2746 /*
2747 * FIXME: We are out of spec here. We have recorded only the address
2748 * from load_exclusive, not the entire range, and we assume that the
2749 * size of the access on both sides match. The architecture allows the
2750 * store to be smaller than the load, so long as the stored bytes are
2751 * within the range recorded by the load.
2752 */
2753
2754 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2755 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2756 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2757
2758 /*
2759 * The write, and any associated faults, only happen if the virtual
2760 * and physical addresses pass the exclusive monitor check. These
2761 * faults are exceedingly unlikely, because normally the guest uses
2762 * the exact same address register for the load_exclusive, and we
2763 * would have recognized these faults there.
2764 *
2765 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2766 * unaligned 4-byte write within the range of an aligned 8-byte load.
2767 * With LSE2, the store would need to cross a 16-byte boundary when the
2768 * load did not, which would mean the store is outside the range
2769 * recorded for the monitor, which would have failed a corrected monitor
2770 * check above. For now, we assume no size change and retain the
2771 * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2772 *
2773 * It is possible to trigger an MTE fault, by performing the load with
2774 * a virtual address with a valid tag and performing the store with the
2775 * same virtual address and a different invalid tag.
2776 */
2777 memop = size + is_pair;
2778 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2779 memop |= MO_ALIGN;
2780 }
2781 memop = finalize_memop(s, memop);
2782 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2783
2784 tmp = tcg_temp_new_i64();
2785 if (is_pair) {
2786 if (size == 2) {
2787 if (s->be_data == MO_LE) {
2788 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2789 } else {
2790 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2791 }
2792 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2793 cpu_exclusive_val, tmp,
2794 get_mem_index(s), memop);
2795 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2796 } else {
2797 TCGv_i128 t16 = tcg_temp_new_i128();
2798 TCGv_i128 c16 = tcg_temp_new_i128();
2799 TCGv_i64 a, b;
2800
2801 if (s->be_data == MO_LE) {
2802 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2803 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2804 cpu_exclusive_high);
2805 } else {
2806 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
2807 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
2808 cpu_exclusive_val);
2809 }
2810
2811 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
2812 get_mem_index(s), memop);
2813
2814 a = tcg_temp_new_i64();
2815 b = tcg_temp_new_i64();
2816 if (s->be_data == MO_LE) {
2817 tcg_gen_extr_i128_i64(a, b, t16);
2818 } else {
2819 tcg_gen_extr_i128_i64(b, a, t16);
2820 }
2821
2822 tcg_gen_xor_i64(a, a, cpu_exclusive_val);
2823 tcg_gen_xor_i64(b, b, cpu_exclusive_high);
2824 tcg_gen_or_i64(tmp, a, b);
2825
2826 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
2827 }
2828 } else {
2829 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2830 cpu_reg(s, rt), get_mem_index(s), memop);
2831 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2832 }
2833 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2834 tcg_gen_br(done_label);
2835
2836 gen_set_label(fail_label);
2837 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2838 gen_set_label(done_label);
2839 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2840 }
2841
gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)2842 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2843 int rn, int size)
2844 {
2845 TCGv_i64 tcg_rs = cpu_reg(s, rs);
2846 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2847 int memidx = get_mem_index(s);
2848 TCGv_i64 clean_addr;
2849 MemOp memop;
2850
2851 if (rn == 31) {
2852 gen_check_sp_alignment(s);
2853 }
2854 memop = check_atomic_align(s, rn, size);
2855 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2856 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
2857 memidx, memop);
2858 }
2859
gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)2860 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2861 int rn, int size)
2862 {
2863 TCGv_i64 s1 = cpu_reg(s, rs);
2864 TCGv_i64 s2 = cpu_reg(s, rs + 1);
2865 TCGv_i64 t1 = cpu_reg(s, rt);
2866 TCGv_i64 t2 = cpu_reg(s, rt + 1);
2867 TCGv_i64 clean_addr;
2868 int memidx = get_mem_index(s);
2869 MemOp memop;
2870
2871 if (rn == 31) {
2872 gen_check_sp_alignment(s);
2873 }
2874
2875 /* This is a single atomic access, despite the "pair". */
2876 memop = check_atomic_align(s, rn, size + 1);
2877 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2878
2879 if (size == 2) {
2880 TCGv_i64 cmp = tcg_temp_new_i64();
2881 TCGv_i64 val = tcg_temp_new_i64();
2882
2883 if (s->be_data == MO_LE) {
2884 tcg_gen_concat32_i64(val, t1, t2);
2885 tcg_gen_concat32_i64(cmp, s1, s2);
2886 } else {
2887 tcg_gen_concat32_i64(val, t2, t1);
2888 tcg_gen_concat32_i64(cmp, s2, s1);
2889 }
2890
2891 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
2892
2893 if (s->be_data == MO_LE) {
2894 tcg_gen_extr32_i64(s1, s2, cmp);
2895 } else {
2896 tcg_gen_extr32_i64(s2, s1, cmp);
2897 }
2898 } else {
2899 TCGv_i128 cmp = tcg_temp_new_i128();
2900 TCGv_i128 val = tcg_temp_new_i128();
2901
2902 if (s->be_data == MO_LE) {
2903 tcg_gen_concat_i64_i128(val, t1, t2);
2904 tcg_gen_concat_i64_i128(cmp, s1, s2);
2905 } else {
2906 tcg_gen_concat_i64_i128(val, t2, t1);
2907 tcg_gen_concat_i64_i128(cmp, s2, s1);
2908 }
2909
2910 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
2911
2912 if (s->be_data == MO_LE) {
2913 tcg_gen_extr_i128_i64(s1, s2, cmp);
2914 } else {
2915 tcg_gen_extr_i128_i64(s2, s1, cmp);
2916 }
2917 }
2918 }
2919
2920 /*
2921 * Compute the ISS.SF bit for syndrome information if an exception
2922 * is taken on a load or store. This indicates whether the instruction
2923 * is accessing a 32-bit or 64-bit register. This logic is derived
2924 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2925 */
ldst_iss_sf(int size,bool sign,bool ext)2926 static bool ldst_iss_sf(int size, bool sign, bool ext)
2927 {
2928
2929 if (sign) {
2930 /*
2931 * Signed loads are 64 bit results if we are not going to
2932 * do a zero-extend from 32 to 64 after the load.
2933 * (For a store, sign and ext are always false.)
2934 */
2935 return !ext;
2936 } else {
2937 /* Unsigned loads/stores work at the specified size */
2938 return size == MO_64;
2939 }
2940 }
2941
trans_STXR(DisasContext * s,arg_stxr * a)2942 static bool trans_STXR(DisasContext *s, arg_stxr *a)
2943 {
2944 if (a->rn == 31) {
2945 gen_check_sp_alignment(s);
2946 }
2947 if (a->lasr) {
2948 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2949 }
2950 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
2951 return true;
2952 }
2953
trans_LDXR(DisasContext * s,arg_stxr * a)2954 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
2955 {
2956 if (a->rn == 31) {
2957 gen_check_sp_alignment(s);
2958 }
2959 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
2960 if (a->lasr) {
2961 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2962 }
2963 return true;
2964 }
2965
trans_STLR(DisasContext * s,arg_stlr * a)2966 static bool trans_STLR(DisasContext *s, arg_stlr *a)
2967 {
2968 TCGv_i64 clean_addr;
2969 MemOp memop;
2970 bool iss_sf = ldst_iss_sf(a->sz, false, false);
2971
2972 /*
2973 * StoreLORelease is the same as Store-Release for QEMU, but
2974 * needs the feature-test.
2975 */
2976 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
2977 return false;
2978 }
2979 /* Generate ISS for non-exclusive accesses including LASR. */
2980 if (a->rn == 31) {
2981 gen_check_sp_alignment(s);
2982 }
2983 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2984 memop = check_ordered_align(s, a->rn, 0, true, a->sz);
2985 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
2986 true, a->rn != 31, memop);
2987 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
2988 iss_sf, a->lasr);
2989 return true;
2990 }
2991
trans_LDAR(DisasContext * s,arg_stlr * a)2992 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
2993 {
2994 TCGv_i64 clean_addr;
2995 MemOp memop;
2996 bool iss_sf = ldst_iss_sf(a->sz, false, false);
2997
2998 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
2999 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3000 return false;
3001 }
3002 /* Generate ISS for non-exclusive accesses including LASR. */
3003 if (a->rn == 31) {
3004 gen_check_sp_alignment(s);
3005 }
3006 memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3007 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3008 false, a->rn != 31, memop);
3009 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3010 a->rt, iss_sf, a->lasr);
3011 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3012 return true;
3013 }
3014
trans_STXP(DisasContext * s,arg_stxr * a)3015 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3016 {
3017 if (a->rn == 31) {
3018 gen_check_sp_alignment(s);
3019 }
3020 if (a->lasr) {
3021 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3022 }
3023 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3024 return true;
3025 }
3026
trans_LDXP(DisasContext * s,arg_stxr * a)3027 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3028 {
3029 if (a->rn == 31) {
3030 gen_check_sp_alignment(s);
3031 }
3032 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3033 if (a->lasr) {
3034 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3035 }
3036 return true;
3037 }
3038
trans_CASP(DisasContext * s,arg_CASP * a)3039 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3040 {
3041 if (!dc_isar_feature(aa64_atomics, s)) {
3042 return false;
3043 }
3044 if (((a->rt | a->rs) & 1) != 0) {
3045 return false;
3046 }
3047
3048 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3049 return true;
3050 }
3051
trans_CAS(DisasContext * s,arg_CAS * a)3052 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3053 {
3054 if (!dc_isar_feature(aa64_atomics, s)) {
3055 return false;
3056 }
3057 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3058 return true;
3059 }
3060
trans_LD_lit(DisasContext * s,arg_ldlit * a)3061 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3062 {
3063 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3064 TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3065 TCGv_i64 clean_addr = tcg_temp_new_i64();
3066 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3067
3068 gen_pc_plus_diff(s, clean_addr, a->imm);
3069 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3070 false, true, a->rt, iss_sf, false);
3071 return true;
3072 }
3073
trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3074 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3075 {
3076 /* Load register (literal), vector version */
3077 TCGv_i64 clean_addr;
3078 MemOp memop;
3079
3080 if (!fp_access_check(s)) {
3081 return true;
3082 }
3083 memop = finalize_memop_asimd(s, a->sz);
3084 clean_addr = tcg_temp_new_i64();
3085 gen_pc_plus_diff(s, clean_addr, a->imm);
3086 do_fp_ld(s, a->rt, clean_addr, memop);
3087 return true;
3088 }
3089
op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3090 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3091 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3092 uint64_t offset, bool is_store, MemOp mop)
3093 {
3094 if (a->rn == 31) {
3095 gen_check_sp_alignment(s);
3096 }
3097
3098 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3099 if (!a->p) {
3100 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3101 }
3102
3103 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3104 (a->w || a->rn != 31), 2 << a->sz, mop);
3105 }
3106
op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3107 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3108 TCGv_i64 dirty_addr, uint64_t offset)
3109 {
3110 if (a->w) {
3111 if (a->p) {
3112 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3113 }
3114 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3115 }
3116 }
3117
trans_STP(DisasContext * s,arg_ldstpair * a)3118 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3119 {
3120 uint64_t offset = a->imm << a->sz;
3121 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3122 MemOp mop = finalize_memop(s, a->sz);
3123
3124 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3125 tcg_rt = cpu_reg(s, a->rt);
3126 tcg_rt2 = cpu_reg(s, a->rt2);
3127 /*
3128 * We built mop above for the single logical access -- rebuild it
3129 * now for the paired operation.
3130 *
3131 * With LSE2, non-sign-extending pairs are treated atomically if
3132 * aligned, and if unaligned one of the pair will be completely
3133 * within a 16-byte block and that element will be atomic.
3134 * Otherwise each element is separately atomic.
3135 * In all cases, issue one operation with the correct atomicity.
3136 */
3137 mop = a->sz + 1;
3138 if (s->align_mem) {
3139 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3140 }
3141 mop = finalize_memop_pair(s, mop);
3142 if (a->sz == 2) {
3143 TCGv_i64 tmp = tcg_temp_new_i64();
3144
3145 if (s->be_data == MO_LE) {
3146 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3147 } else {
3148 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3149 }
3150 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3151 } else {
3152 TCGv_i128 tmp = tcg_temp_new_i128();
3153
3154 if (s->be_data == MO_LE) {
3155 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3156 } else {
3157 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3158 }
3159 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3160 }
3161 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3162 return true;
3163 }
3164
trans_LDP(DisasContext * s,arg_ldstpair * a)3165 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3166 {
3167 uint64_t offset = a->imm << a->sz;
3168 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3169 MemOp mop = finalize_memop(s, a->sz);
3170
3171 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3172 tcg_rt = cpu_reg(s, a->rt);
3173 tcg_rt2 = cpu_reg(s, a->rt2);
3174
3175 /*
3176 * We built mop above for the single logical access -- rebuild it
3177 * now for the paired operation.
3178 *
3179 * With LSE2, non-sign-extending pairs are treated atomically if
3180 * aligned, and if unaligned one of the pair will be completely
3181 * within a 16-byte block and that element will be atomic.
3182 * Otherwise each element is separately atomic.
3183 * In all cases, issue one operation with the correct atomicity.
3184 *
3185 * This treats sign-extending loads like zero-extending loads,
3186 * since that reuses the most code below.
3187 */
3188 mop = a->sz + 1;
3189 if (s->align_mem) {
3190 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3191 }
3192 mop = finalize_memop_pair(s, mop);
3193 if (a->sz == 2) {
3194 int o2 = s->be_data == MO_LE ? 32 : 0;
3195 int o1 = o2 ^ 32;
3196
3197 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3198 if (a->sign) {
3199 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3200 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3201 } else {
3202 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3203 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3204 }
3205 } else {
3206 TCGv_i128 tmp = tcg_temp_new_i128();
3207
3208 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3209 if (s->be_data == MO_LE) {
3210 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3211 } else {
3212 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3213 }
3214 }
3215 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3216 return true;
3217 }
3218
trans_STP_v(DisasContext * s,arg_ldstpair * a)3219 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3220 {
3221 uint64_t offset = a->imm << a->sz;
3222 TCGv_i64 clean_addr, dirty_addr;
3223 MemOp mop;
3224
3225 if (!fp_access_check(s)) {
3226 return true;
3227 }
3228
3229 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3230 mop = finalize_memop_asimd(s, a->sz);
3231 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3232 do_fp_st(s, a->rt, clean_addr, mop);
3233 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3234 do_fp_st(s, a->rt2, clean_addr, mop);
3235 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3236 return true;
3237 }
3238
trans_LDP_v(DisasContext * s,arg_ldstpair * a)3239 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3240 {
3241 uint64_t offset = a->imm << a->sz;
3242 TCGv_i64 clean_addr, dirty_addr;
3243 MemOp mop;
3244
3245 if (!fp_access_check(s)) {
3246 return true;
3247 }
3248
3249 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3250 mop = finalize_memop_asimd(s, a->sz);
3251 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3252 do_fp_ld(s, a->rt, clean_addr, mop);
3253 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3254 do_fp_ld(s, a->rt2, clean_addr, mop);
3255 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3256 return true;
3257 }
3258
trans_STGP(DisasContext * s,arg_ldstpair * a)3259 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3260 {
3261 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3262 uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3263 MemOp mop;
3264 TCGv_i128 tmp;
3265
3266 /* STGP only comes in one size. */
3267 tcg_debug_assert(a->sz == MO_64);
3268
3269 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3270 return false;
3271 }
3272
3273 if (a->rn == 31) {
3274 gen_check_sp_alignment(s);
3275 }
3276
3277 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3278 if (!a->p) {
3279 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3280 }
3281
3282 clean_addr = clean_data_tbi(s, dirty_addr);
3283 tcg_rt = cpu_reg(s, a->rt);
3284 tcg_rt2 = cpu_reg(s, a->rt2);
3285
3286 /*
3287 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3288 * and one tag operation. We implement it as one single aligned 16-byte
3289 * memory operation for convenience. Note that the alignment ensures
3290 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3291 */
3292 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3293
3294 tmp = tcg_temp_new_i128();
3295 if (s->be_data == MO_LE) {
3296 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3297 } else {
3298 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3299 }
3300 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3301
3302 /* Perform the tag store, if tag access enabled. */
3303 if (s->ata[0]) {
3304 if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3305 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3306 } else {
3307 gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3308 }
3309 }
3310
3311 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3312 return true;
3313 }
3314
op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3315 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3316 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3317 uint64_t offset, bool is_store, MemOp mop)
3318 {
3319 int memidx;
3320
3321 if (a->rn == 31) {
3322 gen_check_sp_alignment(s);
3323 }
3324
3325 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3326 if (!a->p) {
3327 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3328 }
3329 memidx = get_a64_user_mem_index(s, a->unpriv);
3330 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3331 a->w || a->rn != 31,
3332 mop, a->unpriv, memidx);
3333 }
3334
op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3335 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3336 TCGv_i64 dirty_addr, uint64_t offset)
3337 {
3338 if (a->w) {
3339 if (a->p) {
3340 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3341 }
3342 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3343 }
3344 }
3345
trans_STR_i(DisasContext * s,arg_ldst_imm * a)3346 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3347 {
3348 bool iss_sf, iss_valid = !a->w;
3349 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3350 int memidx = get_a64_user_mem_index(s, a->unpriv);
3351 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3352
3353 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3354
3355 tcg_rt = cpu_reg(s, a->rt);
3356 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3357
3358 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3359 iss_valid, a->rt, iss_sf, false);
3360 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3361 return true;
3362 }
3363
trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3364 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3365 {
3366 bool iss_sf, iss_valid = !a->w;
3367 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3368 int memidx = get_a64_user_mem_index(s, a->unpriv);
3369 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3370
3371 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3372
3373 tcg_rt = cpu_reg(s, a->rt);
3374 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3375
3376 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3377 a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3378 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3379 return true;
3380 }
3381
trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3382 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3383 {
3384 TCGv_i64 clean_addr, dirty_addr;
3385 MemOp mop;
3386
3387 if (!fp_access_check(s)) {
3388 return true;
3389 }
3390 mop = finalize_memop_asimd(s, a->sz);
3391 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3392 do_fp_st(s, a->rt, clean_addr, mop);
3393 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3394 return true;
3395 }
3396
trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3397 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3398 {
3399 TCGv_i64 clean_addr, dirty_addr;
3400 MemOp mop;
3401
3402 if (!fp_access_check(s)) {
3403 return true;
3404 }
3405 mop = finalize_memop_asimd(s, a->sz);
3406 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3407 do_fp_ld(s, a->rt, clean_addr, mop);
3408 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3409 return true;
3410 }
3411
op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3412 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3413 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3414 bool is_store, MemOp memop)
3415 {
3416 TCGv_i64 tcg_rm;
3417
3418 if (a->rn == 31) {
3419 gen_check_sp_alignment(s);
3420 }
3421 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3422
3423 tcg_rm = read_cpu_reg(s, a->rm, 1);
3424 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3425
3426 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3427 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3428 }
3429
trans_LDR(DisasContext * s,arg_ldst * a)3430 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3431 {
3432 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3433 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3434 MemOp memop;
3435
3436 if (extract32(a->opt, 1, 1) == 0) {
3437 return false;
3438 }
3439
3440 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3441 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3442 tcg_rt = cpu_reg(s, a->rt);
3443 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3444 a->ext, true, a->rt, iss_sf, false);
3445 return true;
3446 }
3447
trans_STR(DisasContext * s,arg_ldst * a)3448 static bool trans_STR(DisasContext *s, arg_ldst *a)
3449 {
3450 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3451 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3452 MemOp memop;
3453
3454 if (extract32(a->opt, 1, 1) == 0) {
3455 return false;
3456 }
3457
3458 memop = finalize_memop(s, a->sz);
3459 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3460 tcg_rt = cpu_reg(s, a->rt);
3461 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3462 return true;
3463 }
3464
trans_LDR_v(DisasContext * s,arg_ldst * a)3465 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3466 {
3467 TCGv_i64 clean_addr, dirty_addr;
3468 MemOp memop;
3469
3470 if (extract32(a->opt, 1, 1) == 0) {
3471 return false;
3472 }
3473
3474 if (!fp_access_check(s)) {
3475 return true;
3476 }
3477
3478 memop = finalize_memop_asimd(s, a->sz);
3479 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3480 do_fp_ld(s, a->rt, clean_addr, memop);
3481 return true;
3482 }
3483
trans_STR_v(DisasContext * s,arg_ldst * a)3484 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3485 {
3486 TCGv_i64 clean_addr, dirty_addr;
3487 MemOp memop;
3488
3489 if (extract32(a->opt, 1, 1) == 0) {
3490 return false;
3491 }
3492
3493 if (!fp_access_check(s)) {
3494 return true;
3495 }
3496
3497 memop = finalize_memop_asimd(s, a->sz);
3498 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3499 do_fp_st(s, a->rt, clean_addr, memop);
3500 return true;
3501 }
3502
3503
do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3504 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3505 int sign, bool invert)
3506 {
3507 MemOp mop = a->sz | sign;
3508 TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3509
3510 if (a->rn == 31) {
3511 gen_check_sp_alignment(s);
3512 }
3513 mop = check_atomic_align(s, a->rn, mop);
3514 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3515 a->rn != 31, mop);
3516 tcg_rs = read_cpu_reg(s, a->rs, true);
3517 tcg_rt = cpu_reg(s, a->rt);
3518 if (invert) {
3519 tcg_gen_not_i64(tcg_rs, tcg_rs);
3520 }
3521 /*
3522 * The tcg atomic primitives are all full barriers. Therefore we
3523 * can ignore the Acquire and Release bits of this instruction.
3524 */
3525 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3526
3527 if (mop & MO_SIGN) {
3528 switch (a->sz) {
3529 case MO_8:
3530 tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3531 break;
3532 case MO_16:
3533 tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3534 break;
3535 case MO_32:
3536 tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3537 break;
3538 case MO_64:
3539 break;
3540 default:
3541 g_assert_not_reached();
3542 }
3543 }
3544 return true;
3545 }
3546
3547 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3548 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3549 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3550 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3551 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3552 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3553 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3554 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3555 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3556
3557 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3558 {
3559 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3560 TCGv_i64 clean_addr;
3561 MemOp mop;
3562
3563 if (!dc_isar_feature(aa64_atomics, s) ||
3564 !dc_isar_feature(aa64_rcpc_8_3, s)) {
3565 return false;
3566 }
3567 if (a->rn == 31) {
3568 gen_check_sp_alignment(s);
3569 }
3570 mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3571 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3572 a->rn != 31, mop);
3573 /*
3574 * LDAPR* are a special case because they are a simple load, not a
3575 * fetch-and-do-something op.
3576 * The architectural consistency requirements here are weaker than
3577 * full load-acquire (we only need "load-acquire processor consistent"),
3578 * but we choose to implement them as full LDAQ.
3579 */
3580 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3581 true, a->rt, iss_sf, true);
3582 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3583 return true;
3584 }
3585
trans_LDRA(DisasContext * s,arg_LDRA * a)3586 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3587 {
3588 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3589 MemOp memop;
3590
3591 /* Load with pointer authentication */
3592 if (!dc_isar_feature(aa64_pauth, s)) {
3593 return false;
3594 }
3595
3596 if (a->rn == 31) {
3597 gen_check_sp_alignment(s);
3598 }
3599 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3600
3601 if (s->pauth_active) {
3602 if (!a->m) {
3603 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3604 tcg_constant_i64(0));
3605 } else {
3606 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3607 tcg_constant_i64(0));
3608 }
3609 }
3610
3611 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3612
3613 memop = finalize_memop(s, MO_64);
3614
3615 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3616 clean_addr = gen_mte_check1(s, dirty_addr, false,
3617 a->w || a->rn != 31, memop);
3618
3619 tcg_rt = cpu_reg(s, a->rt);
3620 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3621 /* extend */ false, /* iss_valid */ !a->w,
3622 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3623
3624 if (a->w) {
3625 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3626 }
3627 return true;
3628 }
3629
trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3630 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3631 {
3632 TCGv_i64 clean_addr, dirty_addr;
3633 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3634 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3635
3636 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3637 return false;
3638 }
3639
3640 if (a->rn == 31) {
3641 gen_check_sp_alignment(s);
3642 }
3643
3644 mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3645 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3646 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3647 clean_addr = clean_data_tbi(s, dirty_addr);
3648
3649 /*
3650 * Load-AcquirePC semantics; we implement as the slightly more
3651 * restrictive Load-Acquire.
3652 */
3653 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3654 a->rt, iss_sf, true);
3655 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3656 return true;
3657 }
3658
trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3659 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3660 {
3661 TCGv_i64 clean_addr, dirty_addr;
3662 MemOp mop = a->sz;
3663 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3664
3665 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3666 return false;
3667 }
3668
3669 /* TODO: ARMv8.4-LSE SCTLR.nAA */
3670
3671 if (a->rn == 31) {
3672 gen_check_sp_alignment(s);
3673 }
3674
3675 mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3676 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3677 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3678 clean_addr = clean_data_tbi(s, dirty_addr);
3679
3680 /* Store-Release semantics */
3681 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3682 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3683 return true;
3684 }
3685
trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3686 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3687 {
3688 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3689 MemOp endian, align, mop;
3690
3691 int total; /* total bytes */
3692 int elements; /* elements per vector */
3693 int r;
3694 int size = a->sz;
3695
3696 if (!a->p && a->rm != 0) {
3697 /* For non-postindexed accesses the Rm field must be 0 */
3698 return false;
3699 }
3700 if (size == 3 && !a->q && a->selem != 1) {
3701 return false;
3702 }
3703 if (!fp_access_check(s)) {
3704 return true;
3705 }
3706
3707 if (a->rn == 31) {
3708 gen_check_sp_alignment(s);
3709 }
3710
3711 /* For our purposes, bytes are always little-endian. */
3712 endian = s->be_data;
3713 if (size == 0) {
3714 endian = MO_LE;
3715 }
3716
3717 total = a->rpt * a->selem * (a->q ? 16 : 8);
3718 tcg_rn = cpu_reg_sp(s, a->rn);
3719
3720 /*
3721 * Issue the MTE check vs the logical repeat count, before we
3722 * promote consecutive little-endian elements below.
3723 */
3724 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3725 finalize_memop_asimd(s, size));
3726
3727 /*
3728 * Consecutive little-endian elements from a single register
3729 * can be promoted to a larger little-endian operation.
3730 */
3731 align = MO_ALIGN;
3732 if (a->selem == 1 && endian == MO_LE) {
3733 align = pow2_align(size);
3734 size = 3;
3735 }
3736 if (!s->align_mem) {
3737 align = 0;
3738 }
3739 mop = endian | size | align;
3740
3741 elements = (a->q ? 16 : 8) >> size;
3742 tcg_ebytes = tcg_constant_i64(1 << size);
3743 for (r = 0; r < a->rpt; r++) {
3744 int e;
3745 for (e = 0; e < elements; e++) {
3746 int xs;
3747 for (xs = 0; xs < a->selem; xs++) {
3748 int tt = (a->rt + r + xs) % 32;
3749 do_vec_ld(s, tt, e, clean_addr, mop);
3750 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3751 }
3752 }
3753 }
3754
3755 /*
3756 * For non-quad operations, setting a slice of the low 64 bits of
3757 * the register clears the high 64 bits (in the ARM ARM pseudocode
3758 * this is implicit in the fact that 'rval' is a 64 bit wide
3759 * variable). For quad operations, we might still need to zero
3760 * the high bits of SVE.
3761 */
3762 for (r = 0; r < a->rpt * a->selem; r++) {
3763 int tt = (a->rt + r) % 32;
3764 clear_vec_high(s, a->q, tt);
3765 }
3766
3767 if (a->p) {
3768 if (a->rm == 31) {
3769 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3770 } else {
3771 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3772 }
3773 }
3774 return true;
3775 }
3776
trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3777 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3778 {
3779 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3780 MemOp endian, align, mop;
3781
3782 int total; /* total bytes */
3783 int elements; /* elements per vector */
3784 int r;
3785 int size = a->sz;
3786
3787 if (!a->p && a->rm != 0) {
3788 /* For non-postindexed accesses the Rm field must be 0 */
3789 return false;
3790 }
3791 if (size == 3 && !a->q && a->selem != 1) {
3792 return false;
3793 }
3794 if (!fp_access_check(s)) {
3795 return true;
3796 }
3797
3798 if (a->rn == 31) {
3799 gen_check_sp_alignment(s);
3800 }
3801
3802 /* For our purposes, bytes are always little-endian. */
3803 endian = s->be_data;
3804 if (size == 0) {
3805 endian = MO_LE;
3806 }
3807
3808 total = a->rpt * a->selem * (a->q ? 16 : 8);
3809 tcg_rn = cpu_reg_sp(s, a->rn);
3810
3811 /*
3812 * Issue the MTE check vs the logical repeat count, before we
3813 * promote consecutive little-endian elements below.
3814 */
3815 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
3816 finalize_memop_asimd(s, size));
3817
3818 /*
3819 * Consecutive little-endian elements from a single register
3820 * can be promoted to a larger little-endian operation.
3821 */
3822 align = MO_ALIGN;
3823 if (a->selem == 1 && endian == MO_LE) {
3824 align = pow2_align(size);
3825 size = 3;
3826 }
3827 if (!s->align_mem) {
3828 align = 0;
3829 }
3830 mop = endian | size | align;
3831
3832 elements = (a->q ? 16 : 8) >> size;
3833 tcg_ebytes = tcg_constant_i64(1 << size);
3834 for (r = 0; r < a->rpt; r++) {
3835 int e;
3836 for (e = 0; e < elements; e++) {
3837 int xs;
3838 for (xs = 0; xs < a->selem; xs++) {
3839 int tt = (a->rt + r + xs) % 32;
3840 do_vec_st(s, tt, e, clean_addr, mop);
3841 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3842 }
3843 }
3844 }
3845
3846 if (a->p) {
3847 if (a->rm == 31) {
3848 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3849 } else {
3850 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3851 }
3852 }
3853 return true;
3854 }
3855
trans_ST_single(DisasContext * s,arg_ldst_single * a)3856 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
3857 {
3858 int xs, total, rt;
3859 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3860 MemOp mop;
3861
3862 if (!a->p && a->rm != 0) {
3863 return false;
3864 }
3865 if (!fp_access_check(s)) {
3866 return true;
3867 }
3868
3869 if (a->rn == 31) {
3870 gen_check_sp_alignment(s);
3871 }
3872
3873 total = a->selem << a->scale;
3874 tcg_rn = cpu_reg_sp(s, a->rn);
3875
3876 mop = finalize_memop_asimd(s, a->scale);
3877 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
3878 total, mop);
3879
3880 tcg_ebytes = tcg_constant_i64(1 << a->scale);
3881 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3882 do_vec_st(s, rt, a->index, clean_addr, mop);
3883 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3884 }
3885
3886 if (a->p) {
3887 if (a->rm == 31) {
3888 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3889 } else {
3890 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3891 }
3892 }
3893 return true;
3894 }
3895
trans_LD_single(DisasContext * s,arg_ldst_single * a)3896 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
3897 {
3898 int xs, total, rt;
3899 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3900 MemOp mop;
3901
3902 if (!a->p && a->rm != 0) {
3903 return false;
3904 }
3905 if (!fp_access_check(s)) {
3906 return true;
3907 }
3908
3909 if (a->rn == 31) {
3910 gen_check_sp_alignment(s);
3911 }
3912
3913 total = a->selem << a->scale;
3914 tcg_rn = cpu_reg_sp(s, a->rn);
3915
3916 mop = finalize_memop_asimd(s, a->scale);
3917 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3918 total, mop);
3919
3920 tcg_ebytes = tcg_constant_i64(1 << a->scale);
3921 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3922 do_vec_ld(s, rt, a->index, clean_addr, mop);
3923 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3924 }
3925
3926 if (a->p) {
3927 if (a->rm == 31) {
3928 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3929 } else {
3930 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3931 }
3932 }
3933 return true;
3934 }
3935
trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)3936 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
3937 {
3938 int xs, total, rt;
3939 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3940 MemOp mop;
3941
3942 if (!a->p && a->rm != 0) {
3943 return false;
3944 }
3945 if (!fp_access_check(s)) {
3946 return true;
3947 }
3948
3949 if (a->rn == 31) {
3950 gen_check_sp_alignment(s);
3951 }
3952
3953 total = a->selem << a->scale;
3954 tcg_rn = cpu_reg_sp(s, a->rn);
3955
3956 mop = finalize_memop_asimd(s, a->scale);
3957 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
3958 total, mop);
3959
3960 tcg_ebytes = tcg_constant_i64(1 << a->scale);
3961 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
3962 /* Load and replicate to all elements */
3963 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3964
3965 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3966 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
3967 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
3968 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3969 }
3970
3971 if (a->p) {
3972 if (a->rm == 31) {
3973 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3974 } else {
3975 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3976 }
3977 }
3978 return true;
3979 }
3980
trans_STZGM(DisasContext * s,arg_ldst_tag * a)3981 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
3982 {
3983 TCGv_i64 addr, clean_addr, tcg_rt;
3984 int size = 4 << s->dcz_blocksize;
3985
3986 if (!dc_isar_feature(aa64_mte, s)) {
3987 return false;
3988 }
3989 if (s->current_el == 0) {
3990 return false;
3991 }
3992
3993 if (a->rn == 31) {
3994 gen_check_sp_alignment(s);
3995 }
3996
3997 addr = read_cpu_reg_sp(s, a->rn, true);
3998 tcg_gen_addi_i64(addr, addr, a->imm);
3999 tcg_rt = cpu_reg(s, a->rt);
4000
4001 if (s->ata[0]) {
4002 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4003 }
4004 /*
4005 * The non-tags portion of STZGM is mostly like DC_ZVA,
4006 * except the alignment happens before the access.
4007 */
4008 clean_addr = clean_data_tbi(s, addr);
4009 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4010 gen_helper_dc_zva(tcg_env, clean_addr);
4011 return true;
4012 }
4013
trans_STGM(DisasContext * s,arg_ldst_tag * a)4014 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4015 {
4016 TCGv_i64 addr, clean_addr, tcg_rt;
4017
4018 if (!dc_isar_feature(aa64_mte, s)) {
4019 return false;
4020 }
4021 if (s->current_el == 0) {
4022 return false;
4023 }
4024
4025 if (a->rn == 31) {
4026 gen_check_sp_alignment(s);
4027 }
4028
4029 addr = read_cpu_reg_sp(s, a->rn, true);
4030 tcg_gen_addi_i64(addr, addr, a->imm);
4031 tcg_rt = cpu_reg(s, a->rt);
4032
4033 if (s->ata[0]) {
4034 gen_helper_stgm(tcg_env, addr, tcg_rt);
4035 } else {
4036 MMUAccessType acc = MMU_DATA_STORE;
4037 int size = 4 << s->gm_blocksize;
4038
4039 clean_addr = clean_data_tbi(s, addr);
4040 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4041 gen_probe_access(s, clean_addr, acc, size);
4042 }
4043 return true;
4044 }
4045
trans_LDGM(DisasContext * s,arg_ldst_tag * a)4046 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4047 {
4048 TCGv_i64 addr, clean_addr, tcg_rt;
4049
4050 if (!dc_isar_feature(aa64_mte, s)) {
4051 return false;
4052 }
4053 if (s->current_el == 0) {
4054 return false;
4055 }
4056
4057 if (a->rn == 31) {
4058 gen_check_sp_alignment(s);
4059 }
4060
4061 addr = read_cpu_reg_sp(s, a->rn, true);
4062 tcg_gen_addi_i64(addr, addr, a->imm);
4063 tcg_rt = cpu_reg(s, a->rt);
4064
4065 if (s->ata[0]) {
4066 gen_helper_ldgm(tcg_rt, tcg_env, addr);
4067 } else {
4068 MMUAccessType acc = MMU_DATA_LOAD;
4069 int size = 4 << s->gm_blocksize;
4070
4071 clean_addr = clean_data_tbi(s, addr);
4072 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4073 gen_probe_access(s, clean_addr, acc, size);
4074 /* The result tags are zeros. */
4075 tcg_gen_movi_i64(tcg_rt, 0);
4076 }
4077 return true;
4078 }
4079
trans_LDG(DisasContext * s,arg_ldst_tag * a)4080 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4081 {
4082 TCGv_i64 addr, clean_addr, tcg_rt;
4083
4084 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4085 return false;
4086 }
4087
4088 if (a->rn == 31) {
4089 gen_check_sp_alignment(s);
4090 }
4091
4092 addr = read_cpu_reg_sp(s, a->rn, true);
4093 if (!a->p) {
4094 /* pre-index or signed offset */
4095 tcg_gen_addi_i64(addr, addr, a->imm);
4096 }
4097
4098 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4099 tcg_rt = cpu_reg(s, a->rt);
4100 if (s->ata[0]) {
4101 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4102 } else {
4103 /*
4104 * Tag access disabled: we must check for aborts on the load
4105 * load from [rn+offset], and then insert a 0 tag into rt.
4106 */
4107 clean_addr = clean_data_tbi(s, addr);
4108 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4109 gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4110 }
4111
4112 if (a->w) {
4113 /* pre-index or post-index */
4114 if (a->p) {
4115 /* post-index */
4116 tcg_gen_addi_i64(addr, addr, a->imm);
4117 }
4118 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4119 }
4120 return true;
4121 }
4122
do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4123 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4124 {
4125 TCGv_i64 addr, tcg_rt;
4126
4127 if (a->rn == 31) {
4128 gen_check_sp_alignment(s);
4129 }
4130
4131 addr = read_cpu_reg_sp(s, a->rn, true);
4132 if (!a->p) {
4133 /* pre-index or signed offset */
4134 tcg_gen_addi_i64(addr, addr, a->imm);
4135 }
4136 tcg_rt = cpu_reg_sp(s, a->rt);
4137 if (!s->ata[0]) {
4138 /*
4139 * For STG and ST2G, we need to check alignment and probe memory.
4140 * TODO: For STZG and STZ2G, we could rely on the stores below,
4141 * at least for system mode; user-only won't enforce alignment.
4142 */
4143 if (is_pair) {
4144 gen_helper_st2g_stub(tcg_env, addr);
4145 } else {
4146 gen_helper_stg_stub(tcg_env, addr);
4147 }
4148 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4149 if (is_pair) {
4150 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4151 } else {
4152 gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4153 }
4154 } else {
4155 if (is_pair) {
4156 gen_helper_st2g(tcg_env, addr, tcg_rt);
4157 } else {
4158 gen_helper_stg(tcg_env, addr, tcg_rt);
4159 }
4160 }
4161
4162 if (is_zero) {
4163 TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4164 TCGv_i64 zero64 = tcg_constant_i64(0);
4165 TCGv_i128 zero128 = tcg_temp_new_i128();
4166 int mem_index = get_mem_index(s);
4167 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4168
4169 tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4170
4171 /* This is 1 or 2 atomic 16-byte operations. */
4172 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4173 if (is_pair) {
4174 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4175 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4176 }
4177 }
4178
4179 if (a->w) {
4180 /* pre-index or post-index */
4181 if (a->p) {
4182 /* post-index */
4183 tcg_gen_addi_i64(addr, addr, a->imm);
4184 }
4185 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4186 }
4187 return true;
4188 }
4189
4190 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4191 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4192 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4193 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4194
4195 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4196
do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4197 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4198 bool is_setg, SetFn fn)
4199 {
4200 int memidx;
4201 uint32_t syndrome, desc = 0;
4202
4203 if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4204 return false;
4205 }
4206
4207 /*
4208 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4209 * us to pull this check before the CheckMOPSEnabled() test
4210 * (which we do in the helper function)
4211 */
4212 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4213 a->rd == 31 || a->rn == 31) {
4214 return false;
4215 }
4216
4217 memidx = get_a64_user_mem_index(s, a->unpriv);
4218
4219 /*
4220 * We pass option_a == true, matching our implementation;
4221 * we pass wrong_option == false: helper function may set that bit.
4222 */
4223 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4224 is_epilogue, false, true, a->rd, a->rs, a->rn);
4225
4226 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4227 /* We may need to do MTE tag checking, so assemble the descriptor */
4228 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4229 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4230 desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4231 /* SIZEM1 and ALIGN we leave 0 (byte write) */
4232 }
4233 /* The helper function always needs the memidx even with MTE disabled */
4234 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4235
4236 /*
4237 * The helper needs the register numbers, but since they're in
4238 * the syndrome anyway, we let it extract them from there rather
4239 * than passing in an extra three integer arguments.
4240 */
4241 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4242 return true;
4243 }
4244
4245 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4246 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4247 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4248 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4249 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4250 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4251
4252 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4253
do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4254 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4255 {
4256 int rmemidx, wmemidx;
4257 uint32_t syndrome, rdesc = 0, wdesc = 0;
4258 bool wunpriv = extract32(a->options, 0, 1);
4259 bool runpriv = extract32(a->options, 1, 1);
4260
4261 /*
4262 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4263 * us to pull this check before the CheckMOPSEnabled() test
4264 * (which we do in the helper function)
4265 */
4266 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4267 a->rd == 31 || a->rs == 31 || a->rn == 31) {
4268 return false;
4269 }
4270
4271 rmemidx = get_a64_user_mem_index(s, runpriv);
4272 wmemidx = get_a64_user_mem_index(s, wunpriv);
4273
4274 /*
4275 * We pass option_a == true, matching our implementation;
4276 * we pass wrong_option == false: helper function may set that bit.
4277 */
4278 syndrome = syn_mop(false, false, a->options, is_epilogue,
4279 false, true, a->rd, a->rs, a->rn);
4280
4281 /* If we need to do MTE tag checking, assemble the descriptors */
4282 if (s->mte_active[runpriv]) {
4283 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4284 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4285 }
4286 if (s->mte_active[wunpriv]) {
4287 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4288 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4289 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4290 }
4291 /* The helper function needs these parts of the descriptor regardless */
4292 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4293 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4294
4295 /*
4296 * The helper needs the register numbers, but since they're in
4297 * the syndrome anyway, we let it extract them from there rather
4298 * than passing in an extra three integer arguments.
4299 */
4300 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4301 tcg_constant_i32(rdesc));
4302 return true;
4303 }
4304
4305 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4306 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4307 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4308 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4309 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4310 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4311
4312 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4313
gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4314 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4315 bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4316 {
4317 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4318 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4319 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4320
4321 fn(tcg_rd, tcg_rn, tcg_imm);
4322 if (!a->sf) {
4323 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4324 }
4325 return true;
4326 }
4327
4328 /*
4329 * PC-rel. addressing
4330 */
4331
trans_ADR(DisasContext * s,arg_ri * a)4332 static bool trans_ADR(DisasContext *s, arg_ri *a)
4333 {
4334 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4335 return true;
4336 }
4337
trans_ADRP(DisasContext * s,arg_ri * a)4338 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4339 {
4340 int64_t offset = (int64_t)a->imm << 12;
4341
4342 /* The page offset is ok for CF_PCREL. */
4343 offset -= s->pc_curr & 0xfff;
4344 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4345 return true;
4346 }
4347
4348 /*
4349 * Add/subtract (immediate)
4350 */
4351 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4352 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4353 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4354 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4355
4356 /*
4357 * Add/subtract (immediate, with tags)
4358 */
4359
gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4360 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4361 bool sub_op)
4362 {
4363 TCGv_i64 tcg_rn, tcg_rd;
4364 int imm;
4365
4366 imm = a->uimm6 << LOG2_TAG_GRANULE;
4367 if (sub_op) {
4368 imm = -imm;
4369 }
4370
4371 tcg_rn = cpu_reg_sp(s, a->rn);
4372 tcg_rd = cpu_reg_sp(s, a->rd);
4373
4374 if (s->ata[0]) {
4375 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4376 tcg_constant_i32(imm),
4377 tcg_constant_i32(a->uimm4));
4378 } else {
4379 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4380 gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4381 }
4382 return true;
4383 }
4384
TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4385 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4386 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4387
4388 /* The input should be a value in the bottom e bits (with higher
4389 * bits zero); returns that value replicated into every element
4390 * of size e in a 64 bit integer.
4391 */
4392 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4393 {
4394 assert(e != 0);
4395 while (e < 64) {
4396 mask |= mask << e;
4397 e *= 2;
4398 }
4399 return mask;
4400 }
4401
4402 /*
4403 * Logical (immediate)
4404 */
4405
4406 /*
4407 * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4408 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4409 * value (ie should cause a guest UNDEF exception), and true if they are
4410 * valid, in which case the decoded bit pattern is written to result.
4411 */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4412 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4413 unsigned int imms, unsigned int immr)
4414 {
4415 uint64_t mask;
4416 unsigned e, levels, s, r;
4417 int len;
4418
4419 assert(immn < 2 && imms < 64 && immr < 64);
4420
4421 /* The bit patterns we create here are 64 bit patterns which
4422 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4423 * 64 bits each. Each element contains the same value: a run
4424 * of between 1 and e-1 non-zero bits, rotated within the
4425 * element by between 0 and e-1 bits.
4426 *
4427 * The element size and run length are encoded into immn (1 bit)
4428 * and imms (6 bits) as follows:
4429 * 64 bit elements: immn = 1, imms = <length of run - 1>
4430 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4431 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4432 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4433 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4434 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4435 * Notice that immn = 0, imms = 11111x is the only combination
4436 * not covered by one of the above options; this is reserved.
4437 * Further, <length of run - 1> all-ones is a reserved pattern.
4438 *
4439 * In all cases the rotation is by immr % e (and immr is 6 bits).
4440 */
4441
4442 /* First determine the element size */
4443 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4444 if (len < 1) {
4445 /* This is the immn == 0, imms == 0x11111x case */
4446 return false;
4447 }
4448 e = 1 << len;
4449
4450 levels = e - 1;
4451 s = imms & levels;
4452 r = immr & levels;
4453
4454 if (s == levels) {
4455 /* <length of run - 1> mustn't be all-ones. */
4456 return false;
4457 }
4458
4459 /* Create the value of one element: s+1 set bits rotated
4460 * by r within the element (which is e bits wide)...
4461 */
4462 mask = MAKE_64BIT_MASK(0, s + 1);
4463 if (r) {
4464 mask = (mask >> r) | (mask << (e - r));
4465 mask &= MAKE_64BIT_MASK(0, e);
4466 }
4467 /* ...then replicate the element over the whole 64 bit value */
4468 mask = bitfield_replicate(mask, e);
4469 *result = mask;
4470 return true;
4471 }
4472
gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4473 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4474 void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4475 {
4476 TCGv_i64 tcg_rd, tcg_rn;
4477 uint64_t imm;
4478
4479 /* Some immediate field values are reserved. */
4480 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4481 extract32(a->dbm, 0, 6),
4482 extract32(a->dbm, 6, 6))) {
4483 return false;
4484 }
4485 if (!a->sf) {
4486 imm &= 0xffffffffull;
4487 }
4488
4489 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4490 tcg_rn = cpu_reg(s, a->rn);
4491
4492 fn(tcg_rd, tcg_rn, imm);
4493 if (set_cc) {
4494 gen_logic_CC(a->sf, tcg_rd);
4495 }
4496 if (!a->sf) {
4497 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4498 }
4499 return true;
4500 }
4501
TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4502 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4503 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4504 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4505 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4506
4507 /*
4508 * Move wide (immediate)
4509 */
4510
4511 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4512 {
4513 int pos = a->hw << 4;
4514 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4515 return true;
4516 }
4517
trans_MOVN(DisasContext * s,arg_movw * a)4518 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4519 {
4520 int pos = a->hw << 4;
4521 uint64_t imm = a->imm;
4522
4523 imm = ~(imm << pos);
4524 if (!a->sf) {
4525 imm = (uint32_t)imm;
4526 }
4527 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4528 return true;
4529 }
4530
trans_MOVK(DisasContext * s,arg_movw * a)4531 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4532 {
4533 int pos = a->hw << 4;
4534 TCGv_i64 tcg_rd, tcg_im;
4535
4536 tcg_rd = cpu_reg(s, a->rd);
4537 tcg_im = tcg_constant_i64(a->imm);
4538 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4539 if (!a->sf) {
4540 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4541 }
4542 return true;
4543 }
4544
4545 /*
4546 * Bitfield
4547 */
4548
trans_SBFM(DisasContext * s,arg_SBFM * a)4549 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4550 {
4551 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4552 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4553 unsigned int bitsize = a->sf ? 64 : 32;
4554 unsigned int ri = a->immr;
4555 unsigned int si = a->imms;
4556 unsigned int pos, len;
4557
4558 if (si >= ri) {
4559 /* Wd<s-r:0> = Wn<s:r> */
4560 len = (si - ri) + 1;
4561 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4562 if (!a->sf) {
4563 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4564 }
4565 } else {
4566 /* Wd<32+s-r,32-r> = Wn<s:0> */
4567 len = si + 1;
4568 pos = (bitsize - ri) & (bitsize - 1);
4569
4570 if (len < ri) {
4571 /*
4572 * Sign extend the destination field from len to fill the
4573 * balance of the word. Let the deposit below insert all
4574 * of those sign bits.
4575 */
4576 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4577 len = ri;
4578 }
4579
4580 /*
4581 * We start with zero, and we haven't modified any bits outside
4582 * bitsize, therefore no final zero-extension is unneeded for !sf.
4583 */
4584 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4585 }
4586 return true;
4587 }
4588
trans_UBFM(DisasContext * s,arg_UBFM * a)4589 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4590 {
4591 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4592 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4593 unsigned int bitsize = a->sf ? 64 : 32;
4594 unsigned int ri = a->immr;
4595 unsigned int si = a->imms;
4596 unsigned int pos, len;
4597
4598 tcg_rd = cpu_reg(s, a->rd);
4599 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4600
4601 if (si >= ri) {
4602 /* Wd<s-r:0> = Wn<s:r> */
4603 len = (si - ri) + 1;
4604 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4605 } else {
4606 /* Wd<32+s-r,32-r> = Wn<s:0> */
4607 len = si + 1;
4608 pos = (bitsize - ri) & (bitsize - 1);
4609 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4610 }
4611 return true;
4612 }
4613
trans_BFM(DisasContext * s,arg_BFM * a)4614 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4615 {
4616 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4617 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4618 unsigned int bitsize = a->sf ? 64 : 32;
4619 unsigned int ri = a->immr;
4620 unsigned int si = a->imms;
4621 unsigned int pos, len;
4622
4623 tcg_rd = cpu_reg(s, a->rd);
4624 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4625
4626 if (si >= ri) {
4627 /* Wd<s-r:0> = Wn<s:r> */
4628 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4629 len = (si - ri) + 1;
4630 pos = 0;
4631 } else {
4632 /* Wd<32+s-r,32-r> = Wn<s:0> */
4633 len = si + 1;
4634 pos = (bitsize - ri) & (bitsize - 1);
4635 }
4636
4637 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4638 if (!a->sf) {
4639 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4640 }
4641 return true;
4642 }
4643
trans_EXTR(DisasContext * s,arg_extract * a)4644 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4645 {
4646 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4647
4648 tcg_rd = cpu_reg(s, a->rd);
4649
4650 if (unlikely(a->imm == 0)) {
4651 /*
4652 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4653 * so an extract from bit 0 is a special case.
4654 */
4655 if (a->sf) {
4656 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4657 } else {
4658 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4659 }
4660 } else {
4661 tcg_rm = cpu_reg(s, a->rm);
4662 tcg_rn = cpu_reg(s, a->rn);
4663
4664 if (a->sf) {
4665 /* Specialization to ROR happens in EXTRACT2. */
4666 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4667 } else {
4668 TCGv_i32 t0 = tcg_temp_new_i32();
4669
4670 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4671 if (a->rm == a->rn) {
4672 tcg_gen_rotri_i32(t0, t0, a->imm);
4673 } else {
4674 TCGv_i32 t1 = tcg_temp_new_i32();
4675 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4676 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4677 }
4678 tcg_gen_extu_i32_i64(tcg_rd, t0);
4679 }
4680 }
4681 return true;
4682 }
4683
trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4684 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4685 {
4686 if (fp_access_check(s)) {
4687 int len = (a->len + 1) * 16;
4688
4689 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4690 vec_full_reg_offset(s, a->rm), tcg_env,
4691 a->q ? 16 : 8, vec_full_reg_size(s),
4692 (len << 6) | (a->tbx << 5) | a->rn,
4693 gen_helper_simd_tblx);
4694 }
4695 return true;
4696 }
4697
4698 typedef int simd_permute_idx_fn(int i, int part, int elements);
4699
do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4700 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4701 simd_permute_idx_fn *fn, int part)
4702 {
4703 MemOp esz = a->esz;
4704 int datasize = a->q ? 16 : 8;
4705 int elements = datasize >> esz;
4706 TCGv_i64 tcg_res[2], tcg_ele;
4707
4708 if (esz == MO_64 && !a->q) {
4709 return false;
4710 }
4711 if (!fp_access_check(s)) {
4712 return true;
4713 }
4714
4715 tcg_res[0] = tcg_temp_new_i64();
4716 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4717 tcg_ele = tcg_temp_new_i64();
4718
4719 for (int i = 0; i < elements; i++) {
4720 int o, w, idx;
4721
4722 idx = fn(i, part, elements);
4723 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4724 idx & (elements - 1), esz);
4725
4726 w = (i << (esz + 3)) / 64;
4727 o = (i << (esz + 3)) % 64;
4728 if (o == 0) {
4729 tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4730 } else {
4731 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4732 }
4733 }
4734
4735 for (int i = a->q; i >= 0; --i) {
4736 write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4737 }
4738 clear_vec_high(s, a->q, a->rd);
4739 return true;
4740 }
4741
permute_load_uzp(int i,int part,int elements)4742 static int permute_load_uzp(int i, int part, int elements)
4743 {
4744 return 2 * i + part;
4745 }
4746
4747 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4748 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4749
permute_load_trn(int i,int part,int elements)4750 static int permute_load_trn(int i, int part, int elements)
4751 {
4752 return (i & 1) * elements + (i & ~1) + part;
4753 }
4754
4755 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4756 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4757
permute_load_zip(int i,int part,int elements)4758 static int permute_load_zip(int i, int part, int elements)
4759 {
4760 return (i & 1) * elements + ((part * elements + i) >> 1);
4761 }
4762
4763 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4764 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4765
4766 /*
4767 * Cryptographic AES, SHA, SHA512
4768 */
4769
4770 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4771 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4772 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4773 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4774
4775 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4776 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4777 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4778 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4779
4780 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4781 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4782 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4783
4784 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4785 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4786 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4787
4788 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4789 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4790 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)4791 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4792 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4793 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4794 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4795
4796 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4797 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4798
4799 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4800 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4801
4802 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4803 {
4804 if (!dc_isar_feature(aa64_sm3, s)) {
4805 return false;
4806 }
4807 if (fp_access_check(s)) {
4808 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
4809 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
4810 TCGv_i32 tcg_op3 = tcg_temp_new_i32();
4811 TCGv_i32 tcg_res = tcg_temp_new_i32();
4812 unsigned vsz, dofs;
4813
4814 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
4815 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
4816 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
4817
4818 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
4819 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
4820 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
4821 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
4822
4823 /* Clear the whole register first, then store bits [127:96]. */
4824 vsz = vec_full_reg_size(s);
4825 dofs = vec_full_reg_offset(s, a->rd);
4826 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
4827 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
4828 }
4829 return true;
4830 }
4831
do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)4832 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
4833 {
4834 if (fp_access_check(s)) {
4835 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
4836 }
4837 return true;
4838 }
TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)4839 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
4840 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
4841 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
4842 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
4843
4844 static bool trans_XAR(DisasContext *s, arg_XAR *a)
4845 {
4846 if (!dc_isar_feature(aa64_sha3, s)) {
4847 return false;
4848 }
4849 if (fp_access_check(s)) {
4850 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
4851 vec_full_reg_offset(s, a->rn),
4852 vec_full_reg_offset(s, a->rm), a->imm, 16,
4853 vec_full_reg_size(s));
4854 }
4855 return true;
4856 }
4857
4858 /*
4859 * Advanced SIMD copy
4860 */
4861
decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)4862 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
4863 {
4864 unsigned esz = ctz32(imm);
4865 if (esz <= MO_64) {
4866 *pesz = esz;
4867 *pidx = imm >> (esz + 1);
4868 return true;
4869 }
4870 return false;
4871 }
4872
trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)4873 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
4874 {
4875 MemOp esz;
4876 unsigned idx;
4877
4878 if (!decode_esz_idx(a->imm, &esz, &idx)) {
4879 return false;
4880 }
4881 if (fp_access_check(s)) {
4882 /*
4883 * This instruction just extracts the specified element and
4884 * zero-extends it into the bottom of the destination register.
4885 */
4886 TCGv_i64 tmp = tcg_temp_new_i64();
4887 read_vec_element(s, tmp, a->rn, idx, esz);
4888 write_fp_dreg(s, a->rd, tmp);
4889 }
4890 return true;
4891 }
4892
trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)4893 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
4894 {
4895 MemOp esz;
4896 unsigned idx;
4897
4898 if (!decode_esz_idx(a->imm, &esz, &idx)) {
4899 return false;
4900 }
4901 if (esz == MO_64 && !a->q) {
4902 return false;
4903 }
4904 if (fp_access_check(s)) {
4905 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
4906 vec_reg_offset(s, a->rn, idx, esz),
4907 a->q ? 16 : 8, vec_full_reg_size(s));
4908 }
4909 return true;
4910 }
4911
trans_DUP_general(DisasContext * s,arg_DUP_general * a)4912 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
4913 {
4914 MemOp esz;
4915 unsigned idx;
4916
4917 if (!decode_esz_idx(a->imm, &esz, &idx)) {
4918 return false;
4919 }
4920 if (esz == MO_64 && !a->q) {
4921 return false;
4922 }
4923 if (fp_access_check(s)) {
4924 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4925 a->q ? 16 : 8, vec_full_reg_size(s),
4926 cpu_reg(s, a->rn));
4927 }
4928 return true;
4929 }
4930
do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)4931 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
4932 {
4933 MemOp esz;
4934 unsigned idx;
4935
4936 if (!decode_esz_idx(a->imm, &esz, &idx)) {
4937 return false;
4938 }
4939 if (is_signed) {
4940 if (esz == MO_64 || (esz == MO_32 && !a->q)) {
4941 return false;
4942 }
4943 } else {
4944 if (esz == MO_64 ? !a->q : a->q) {
4945 return false;
4946 }
4947 }
4948 if (fp_access_check(s)) {
4949 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4950 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
4951 if (is_signed && !a->q) {
4952 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4953 }
4954 }
4955 return true;
4956 }
4957
TRANS(SMOV,do_smov_umov,a,MO_SIGN)4958 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
4959 TRANS(UMOV, do_smov_umov, a, 0)
4960
4961 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
4962 {
4963 MemOp esz;
4964 unsigned idx;
4965
4966 if (!decode_esz_idx(a->imm, &esz, &idx)) {
4967 return false;
4968 }
4969 if (fp_access_check(s)) {
4970 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
4971 clear_vec_high(s, true, a->rd);
4972 }
4973 return true;
4974 }
4975
trans_INS_element(DisasContext * s,arg_INS_element * a)4976 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
4977 {
4978 MemOp esz;
4979 unsigned didx, sidx;
4980
4981 if (!decode_esz_idx(a->di, &esz, &didx)) {
4982 return false;
4983 }
4984 sidx = a->si >> esz;
4985 if (fp_access_check(s)) {
4986 TCGv_i64 tmp = tcg_temp_new_i64();
4987
4988 read_vec_element(s, tmp, a->rn, sidx, esz);
4989 write_vec_element(s, tmp, a->rd, didx, esz);
4990
4991 /* INS is considered a 128-bit write for SVE. */
4992 clear_vec_high(s, true, a->rd);
4993 }
4994 return true;
4995 }
4996
4997 /*
4998 * Advanced SIMD three same
4999 */
5000
5001 typedef struct FPScalar {
5002 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5003 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5004 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5005 } FPScalar;
5006
do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f)5007 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f)
5008 {
5009 switch (a->esz) {
5010 case MO_64:
5011 if (fp_access_check(s)) {
5012 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5013 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5014 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5015 write_fp_dreg(s, a->rd, t0);
5016 }
5017 break;
5018 case MO_32:
5019 if (fp_access_check(s)) {
5020 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5021 TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5022 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
5023 write_fp_sreg(s, a->rd, t0);
5024 }
5025 break;
5026 case MO_16:
5027 if (!dc_isar_feature(aa64_fp16, s)) {
5028 return false;
5029 }
5030 if (fp_access_check(s)) {
5031 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5032 TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5033 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
5034 write_fp_sreg(s, a->rd, t0);
5035 }
5036 break;
5037 default:
5038 return false;
5039 }
5040 return true;
5041 }
5042
5043 static const FPScalar f_scalar_fadd = {
5044 gen_helper_vfp_addh,
5045 gen_helper_vfp_adds,
5046 gen_helper_vfp_addd,
5047 };
5048 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd)
5049
5050 static const FPScalar f_scalar_fsub = {
5051 gen_helper_vfp_subh,
5052 gen_helper_vfp_subs,
5053 gen_helper_vfp_subd,
5054 };
5055 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub)
5056
5057 static const FPScalar f_scalar_fdiv = {
5058 gen_helper_vfp_divh,
5059 gen_helper_vfp_divs,
5060 gen_helper_vfp_divd,
5061 };
5062 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv)
5063
5064 static const FPScalar f_scalar_fmul = {
5065 gen_helper_vfp_mulh,
5066 gen_helper_vfp_muls,
5067 gen_helper_vfp_muld,
5068 };
5069 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul)
5070
5071 static const FPScalar f_scalar_fmax = {
5072 gen_helper_advsimd_maxh,
5073 gen_helper_vfp_maxs,
5074 gen_helper_vfp_maxd,
5075 };
5076 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax)
5077
5078 static const FPScalar f_scalar_fmin = {
5079 gen_helper_advsimd_minh,
5080 gen_helper_vfp_mins,
5081 gen_helper_vfp_mind,
5082 };
5083 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin)
5084
5085 static const FPScalar f_scalar_fmaxnm = {
5086 gen_helper_advsimd_maxnumh,
5087 gen_helper_vfp_maxnums,
5088 gen_helper_vfp_maxnumd,
5089 };
5090 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm)
5091
5092 static const FPScalar f_scalar_fminnm = {
5093 gen_helper_advsimd_minnumh,
5094 gen_helper_vfp_minnums,
5095 gen_helper_vfp_minnumd,
5096 };
5097 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm)
5098
5099 static const FPScalar f_scalar_fmulx = {
5100 gen_helper_advsimd_mulxh,
5101 gen_helper_vfp_mulxs,
5102 gen_helper_vfp_mulxd,
5103 };
5104 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx)
5105
gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5106 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5107 {
5108 gen_helper_vfp_mulh(d, n, m, s);
5109 gen_vfp_negh(d, d);
5110 }
5111
gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5112 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5113 {
5114 gen_helper_vfp_muls(d, n, m, s);
5115 gen_vfp_negs(d, d);
5116 }
5117
gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5118 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5119 {
5120 gen_helper_vfp_muld(d, n, m, s);
5121 gen_vfp_negd(d, d);
5122 }
5123
5124 static const FPScalar f_scalar_fnmul = {
5125 gen_fnmul_h,
5126 gen_fnmul_s,
5127 gen_fnmul_d,
5128 };
5129 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul)
5130
5131 static const FPScalar f_scalar_fcmeq = {
5132 gen_helper_advsimd_ceq_f16,
5133 gen_helper_neon_ceq_f32,
5134 gen_helper_neon_ceq_f64,
5135 };
5136 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq)
5137
5138 static const FPScalar f_scalar_fcmge = {
5139 gen_helper_advsimd_cge_f16,
5140 gen_helper_neon_cge_f32,
5141 gen_helper_neon_cge_f64,
5142 };
5143 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge)
5144
5145 static const FPScalar f_scalar_fcmgt = {
5146 gen_helper_advsimd_cgt_f16,
5147 gen_helper_neon_cgt_f32,
5148 gen_helper_neon_cgt_f64,
5149 };
5150 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt)
5151
5152 static const FPScalar f_scalar_facge = {
5153 gen_helper_advsimd_acge_f16,
5154 gen_helper_neon_acge_f32,
5155 gen_helper_neon_acge_f64,
5156 };
5157 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge)
5158
5159 static const FPScalar f_scalar_facgt = {
5160 gen_helper_advsimd_acgt_f16,
5161 gen_helper_neon_acgt_f32,
5162 gen_helper_neon_acgt_f64,
5163 };
5164 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt)
5165
gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5166 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5167 {
5168 gen_helper_vfp_subh(d, n, m, s);
5169 gen_vfp_absh(d, d);
5170 }
5171
gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5172 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5173 {
5174 gen_helper_vfp_subs(d, n, m, s);
5175 gen_vfp_abss(d, d);
5176 }
5177
gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5178 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5179 {
5180 gen_helper_vfp_subd(d, n, m, s);
5181 gen_vfp_absd(d, d);
5182 }
5183
5184 static const FPScalar f_scalar_fabd = {
5185 gen_fabd_h,
5186 gen_fabd_s,
5187 gen_fabd_d,
5188 };
5189 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd)
5190
5191 static const FPScalar f_scalar_frecps = {
5192 gen_helper_recpsf_f16,
5193 gen_helper_recpsf_f32,
5194 gen_helper_recpsf_f64,
5195 };
5196 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps)
5197
5198 static const FPScalar f_scalar_frsqrts = {
5199 gen_helper_rsqrtsf_f16,
5200 gen_helper_rsqrtsf_f32,
5201 gen_helper_rsqrtsf_f64,
5202 };
5203 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts)
5204
do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5205 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5206 MemOp sgn_n, MemOp sgn_m,
5207 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5208 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5209 {
5210 TCGv_i64 t0, t1, t2, qc;
5211 MemOp esz = a->esz;
5212
5213 if (!fp_access_check(s)) {
5214 return true;
5215 }
5216
5217 t0 = tcg_temp_new_i64();
5218 t1 = tcg_temp_new_i64();
5219 t2 = tcg_temp_new_i64();
5220 qc = tcg_temp_new_i64();
5221 read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5222 read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5223 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5224
5225 if (esz == MO_64) {
5226 gen_d(t0, qc, t1, t2);
5227 } else {
5228 gen_bhs(t0, qc, t1, t2, esz);
5229 tcg_gen_ext_i64(t0, t0, esz);
5230 }
5231
5232 write_fp_dreg(s, a->rd, t0);
5233 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5234 return true;
5235 }
5236
TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5237 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5238 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5239 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5240 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5241 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5242 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5243
5244 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5245 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5246 {
5247 if (fp_access_check(s)) {
5248 TCGv_i64 t0 = tcg_temp_new_i64();
5249 TCGv_i64 t1 = tcg_temp_new_i64();
5250
5251 read_vec_element(s, t0, a->rn, 0, MO_64);
5252 read_vec_element(s, t1, a->rm, 0, MO_64);
5253 fn(t0, t0, t1);
5254 write_fp_dreg(s, a->rd, t0);
5255 }
5256 return true;
5257 }
5258
5259 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5260 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5261 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5262 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5263 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5264 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5265
5266 typedef struct ENVScalar2 {
5267 NeonGenTwoOpEnvFn *gen_bhs[3];
5268 NeonGenTwo64OpEnvFn *gen_d;
5269 } ENVScalar2;
5270
do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5271 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5272 {
5273 if (!fp_access_check(s)) {
5274 return true;
5275 }
5276 if (a->esz == MO_64) {
5277 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5278 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5279 f->gen_d(t0, tcg_env, t0, t1);
5280 write_fp_dreg(s, a->rd, t0);
5281 } else {
5282 TCGv_i32 t0 = tcg_temp_new_i32();
5283 TCGv_i32 t1 = tcg_temp_new_i32();
5284
5285 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5286 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5287 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5288 write_fp_sreg(s, a->rd, t0);
5289 }
5290 return true;
5291 }
5292
5293 static const ENVScalar2 f_scalar_sqshl = {
5294 { gen_helper_neon_qshl_s8,
5295 gen_helper_neon_qshl_s16,
5296 gen_helper_neon_qshl_s32 },
5297 gen_helper_neon_qshl_s64,
5298 };
5299 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5300
5301 static const ENVScalar2 f_scalar_uqshl = {
5302 { gen_helper_neon_qshl_u8,
5303 gen_helper_neon_qshl_u16,
5304 gen_helper_neon_qshl_u32 },
5305 gen_helper_neon_qshl_u64,
5306 };
5307 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5308
5309 static const ENVScalar2 f_scalar_sqrshl = {
5310 { gen_helper_neon_qrshl_s8,
5311 gen_helper_neon_qrshl_s16,
5312 gen_helper_neon_qrshl_s32 },
5313 gen_helper_neon_qrshl_s64,
5314 };
5315 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5316
5317 static const ENVScalar2 f_scalar_uqrshl = {
5318 { gen_helper_neon_qrshl_u8,
5319 gen_helper_neon_qrshl_u16,
5320 gen_helper_neon_qrshl_u32 },
5321 gen_helper_neon_qrshl_u64,
5322 };
5323 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5324
do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5325 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5326 const ENVScalar2 *f)
5327 {
5328 if (a->esz == MO_16 || a->esz == MO_32) {
5329 return do_env_scalar2(s, a, f);
5330 }
5331 return false;
5332 }
5333
5334 static const ENVScalar2 f_scalar_sqdmulh = {
5335 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5336 };
5337 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5338
5339 static const ENVScalar2 f_scalar_sqrdmulh = {
5340 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5341 };
5342 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5343
5344 typedef struct ENVScalar3 {
5345 NeonGenThreeOpEnvFn *gen_hs[2];
5346 } ENVScalar3;
5347
do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5348 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5349 const ENVScalar3 *f)
5350 {
5351 TCGv_i32 t0, t1, t2;
5352
5353 if (a->esz != MO_16 && a->esz != MO_32) {
5354 return false;
5355 }
5356 if (!fp_access_check(s)) {
5357 return true;
5358 }
5359
5360 t0 = tcg_temp_new_i32();
5361 t1 = tcg_temp_new_i32();
5362 t2 = tcg_temp_new_i32();
5363 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5364 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5365 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5366 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5367 write_fp_sreg(s, a->rd, t0);
5368 return true;
5369 }
5370
5371 static const ENVScalar3 f_scalar_sqrdmlah = {
5372 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5373 };
5374 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5375
5376 static const ENVScalar3 f_scalar_sqrdmlsh = {
5377 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5378 };
5379 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5380
do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5381 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5382 {
5383 if (fp_access_check(s)) {
5384 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5385 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5386 tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5387 write_fp_dreg(s, a->rd, t0);
5388 }
5389 return true;
5390 }
5391
TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5392 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5393 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5394 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5395 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5396 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5397 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5398
5399 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5400 gen_helper_gvec_3_ptr * const fns[3])
5401 {
5402 MemOp esz = a->esz;
5403
5404 switch (esz) {
5405 case MO_64:
5406 if (!a->q) {
5407 return false;
5408 }
5409 break;
5410 case MO_32:
5411 break;
5412 case MO_16:
5413 if (!dc_isar_feature(aa64_fp16, s)) {
5414 return false;
5415 }
5416 break;
5417 default:
5418 return false;
5419 }
5420 if (fp_access_check(s)) {
5421 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
5422 esz == MO_16, data, fns[esz - 1]);
5423 }
5424 return true;
5425 }
5426
5427 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5428 gen_helper_gvec_fadd_h,
5429 gen_helper_gvec_fadd_s,
5430 gen_helper_gvec_fadd_d,
5431 };
5432 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5433
5434 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5435 gen_helper_gvec_fsub_h,
5436 gen_helper_gvec_fsub_s,
5437 gen_helper_gvec_fsub_d,
5438 };
5439 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5440
5441 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5442 gen_helper_gvec_fdiv_h,
5443 gen_helper_gvec_fdiv_s,
5444 gen_helper_gvec_fdiv_d,
5445 };
5446 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5447
5448 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5449 gen_helper_gvec_fmul_h,
5450 gen_helper_gvec_fmul_s,
5451 gen_helper_gvec_fmul_d,
5452 };
5453 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5454
5455 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5456 gen_helper_gvec_fmax_h,
5457 gen_helper_gvec_fmax_s,
5458 gen_helper_gvec_fmax_d,
5459 };
5460 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
5461
5462 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5463 gen_helper_gvec_fmin_h,
5464 gen_helper_gvec_fmin_s,
5465 gen_helper_gvec_fmin_d,
5466 };
5467 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
5468
5469 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5470 gen_helper_gvec_fmaxnum_h,
5471 gen_helper_gvec_fmaxnum_s,
5472 gen_helper_gvec_fmaxnum_d,
5473 };
5474 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5475
5476 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5477 gen_helper_gvec_fminnum_h,
5478 gen_helper_gvec_fminnum_s,
5479 gen_helper_gvec_fminnum_d,
5480 };
5481 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5482
5483 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5484 gen_helper_gvec_fmulx_h,
5485 gen_helper_gvec_fmulx_s,
5486 gen_helper_gvec_fmulx_d,
5487 };
5488 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5489
5490 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5491 gen_helper_gvec_vfma_h,
5492 gen_helper_gvec_vfma_s,
5493 gen_helper_gvec_vfma_d,
5494 };
5495 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5496
5497 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5498 gen_helper_gvec_vfms_h,
5499 gen_helper_gvec_vfms_s,
5500 gen_helper_gvec_vfms_d,
5501 };
5502 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
5503
5504 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5505 gen_helper_gvec_fceq_h,
5506 gen_helper_gvec_fceq_s,
5507 gen_helper_gvec_fceq_d,
5508 };
5509 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5510
5511 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5512 gen_helper_gvec_fcge_h,
5513 gen_helper_gvec_fcge_s,
5514 gen_helper_gvec_fcge_d,
5515 };
5516 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5517
5518 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5519 gen_helper_gvec_fcgt_h,
5520 gen_helper_gvec_fcgt_s,
5521 gen_helper_gvec_fcgt_d,
5522 };
5523 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5524
5525 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5526 gen_helper_gvec_facge_h,
5527 gen_helper_gvec_facge_s,
5528 gen_helper_gvec_facge_d,
5529 };
5530 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5531
5532 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5533 gen_helper_gvec_facgt_h,
5534 gen_helper_gvec_facgt_s,
5535 gen_helper_gvec_facgt_d,
5536 };
5537 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5538
5539 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5540 gen_helper_gvec_fabd_h,
5541 gen_helper_gvec_fabd_s,
5542 gen_helper_gvec_fabd_d,
5543 };
5544 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd)
5545
5546 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5547 gen_helper_gvec_recps_h,
5548 gen_helper_gvec_recps_s,
5549 gen_helper_gvec_recps_d,
5550 };
5551 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps)
5552
5553 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5554 gen_helper_gvec_rsqrts_h,
5555 gen_helper_gvec_rsqrts_s,
5556 gen_helper_gvec_rsqrts_d,
5557 };
5558 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts)
5559
5560 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5561 gen_helper_gvec_faddp_h,
5562 gen_helper_gvec_faddp_s,
5563 gen_helper_gvec_faddp_d,
5564 };
5565 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5566
5567 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5568 gen_helper_gvec_fmaxp_h,
5569 gen_helper_gvec_fmaxp_s,
5570 gen_helper_gvec_fmaxp_d,
5571 };
5572 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp)
5573
5574 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5575 gen_helper_gvec_fminp_h,
5576 gen_helper_gvec_fminp_s,
5577 gen_helper_gvec_fminp_d,
5578 };
5579 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp)
5580
5581 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5582 gen_helper_gvec_fmaxnump_h,
5583 gen_helper_gvec_fmaxnump_s,
5584 gen_helper_gvec_fmaxnump_d,
5585 };
5586 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5587
5588 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5589 gen_helper_gvec_fminnump_h,
5590 gen_helper_gvec_fminnump_s,
5591 gen_helper_gvec_fminnump_d,
5592 };
5593 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5594
do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)5595 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5596 {
5597 if (fp_access_check(s)) {
5598 int data = (is_2 << 1) | is_s;
5599 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5600 vec_full_reg_offset(s, a->rn),
5601 vec_full_reg_offset(s, a->rm), tcg_env,
5602 a->q ? 16 : 8, vec_full_reg_size(s),
5603 data, gen_helper_gvec_fmlal_a64);
5604 }
5605 return true;
5606 }
5607
TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)5608 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
5609 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
5610 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
5611 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
5612
5613 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
5614 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
5615 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
5616 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
5617 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
5618
5619 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
5620 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
5621 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
5622 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
5623 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
5624
5625 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
5626 {
5627 if (fp_access_check(s)) {
5628 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
5629 }
5630 return true;
5631 }
5632
5633 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
5634 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
5635 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
5636
TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)5637 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
5638 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
5639 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
5640 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
5641 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
5642 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
5643
5644 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
5645 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
5646 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
5647 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
5648 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
5649 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
5650 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
5651 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
5652
5653 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
5654 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
5655 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
5656 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
5657 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
5658 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
5659 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
5660 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
5661 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
5662 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
5663 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
5664 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
5665 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
5666 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
5667 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
5668 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
5669 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
5670 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
5671 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
5672 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
5673
5674 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
5675 {
5676 if (a->esz == MO_64 && !a->q) {
5677 return false;
5678 }
5679 if (fp_access_check(s)) {
5680 tcg_gen_gvec_cmp(cond, a->esz,
5681 vec_full_reg_offset(s, a->rd),
5682 vec_full_reg_offset(s, a->rn),
5683 vec_full_reg_offset(s, a->rm),
5684 a->q ? 16 : 8, vec_full_reg_size(s));
5685 }
5686 return true;
5687 }
5688
TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)5689 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
5690 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
5691 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
5692 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
5693 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
5694 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
5695
5696 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
5697 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
5698 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
5699 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
5700
5701 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
5702 gen_helper_gvec_4 *fn)
5703 {
5704 if (fp_access_check(s)) {
5705 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5706 }
5707 return true;
5708 }
5709
do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)5710 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
5711 gen_helper_gvec_4_ptr *fn)
5712 {
5713 if (fp_access_check(s)) {
5714 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
5715 }
5716 return true;
5717 }
5718
TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)5719 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
5720 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
5721 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
5722 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
5723 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
5724 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
5725 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
5726 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
5727
5728 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
5729 {
5730 if (!dc_isar_feature(aa64_bf16, s)) {
5731 return false;
5732 }
5733 if (fp_access_check(s)) {
5734 /* Q bit selects BFMLALB vs BFMLALT. */
5735 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
5736 gen_helper_gvec_bfmlal);
5737 }
5738 return true;
5739 }
5740
5741 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
5742 gen_helper_gvec_fcaddh,
5743 gen_helper_gvec_fcadds,
5744 gen_helper_gvec_fcaddd,
5745 };
5746 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
5747 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
5748
trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)5749 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
5750 {
5751 gen_helper_gvec_4_ptr *fn;
5752
5753 if (!dc_isar_feature(aa64_fcma, s)) {
5754 return false;
5755 }
5756 switch (a->esz) {
5757 case MO_64:
5758 if (!a->q) {
5759 return false;
5760 }
5761 fn = gen_helper_gvec_fcmlad;
5762 break;
5763 case MO_32:
5764 fn = gen_helper_gvec_fcmlas;
5765 break;
5766 case MO_16:
5767 if (!dc_isar_feature(aa64_fp16, s)) {
5768 return false;
5769 }
5770 fn = gen_helper_gvec_fcmlah;
5771 break;
5772 default:
5773 return false;
5774 }
5775 if (fp_access_check(s)) {
5776 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
5777 a->esz == MO_16, a->rot, fn);
5778 }
5779 return true;
5780 }
5781
5782 /*
5783 * Widening vector x vector/indexed.
5784 *
5785 * These read from the top or bottom half of a 128-bit vector.
5786 * After widening, optionally accumulate with a 128-bit vector.
5787 * Implement these inline, as the number of elements are limited
5788 * and the related SVE and SME operations on larger vectors use
5789 * even/odd elements instead of top/bottom half.
5790 *
5791 * If idx >= 0, operand 2 is indexed, otherwise vector.
5792 * If acc, operand 0 is loaded with rd.
5793 */
5794
5795 /* For low half, iterating up. */
do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)5796 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
5797 int rd, int rn, int rm, int idx,
5798 NeonGenTwo64OpFn *fn, bool acc)
5799 {
5800 TCGv_i64 tcg_op0 = tcg_temp_new_i64();
5801 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5802 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5803 MemOp esz = memop & MO_SIZE;
5804 int half = 8 >> esz;
5805 int top_swap, top_half;
5806
5807 /* There are no 64x64->128 bit operations. */
5808 if (esz >= MO_64) {
5809 return false;
5810 }
5811 if (!fp_access_check(s)) {
5812 return true;
5813 }
5814
5815 if (idx >= 0) {
5816 read_vec_element(s, tcg_op2, rm, idx, memop);
5817 }
5818
5819 /*
5820 * For top half inputs, iterate forward; backward for bottom half.
5821 * This means the store to the destination will not occur until
5822 * overlapping input inputs are consumed.
5823 * Use top_swap to conditionally invert the forward iteration index.
5824 */
5825 top_swap = top ? 0 : half - 1;
5826 top_half = top ? half : 0;
5827
5828 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
5829 int elt = elt_fwd ^ top_swap;
5830
5831 read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
5832 if (idx < 0) {
5833 read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
5834 }
5835 if (acc) {
5836 read_vec_element(s, tcg_op0, rd, elt, memop + 1);
5837 }
5838 fn(tcg_op0, tcg_op1, tcg_op2);
5839 write_vec_element(s, tcg_op0, rd, elt, esz + 1);
5840 }
5841 clear_vec_high(s, 1, rd);
5842 return true;
5843 }
5844
gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5845 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5846 {
5847 TCGv_i64 t = tcg_temp_new_i64();
5848 tcg_gen_mul_i64(t, n, m);
5849 tcg_gen_add_i64(d, d, t);
5850 }
5851
gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5852 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5853 {
5854 TCGv_i64 t = tcg_temp_new_i64();
5855 tcg_gen_mul_i64(t, n, m);
5856 tcg_gen_sub_i64(d, d, t);
5857 }
5858
5859 TRANS(SMULL_v, do_3op_widening,
5860 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5861 tcg_gen_mul_i64, false)
5862 TRANS(UMULL_v, do_3op_widening,
5863 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5864 tcg_gen_mul_i64, false)
5865 TRANS(SMLAL_v, do_3op_widening,
5866 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5867 gen_muladd_i64, true)
5868 TRANS(UMLAL_v, do_3op_widening,
5869 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5870 gen_muladd_i64, true)
5871 TRANS(SMLSL_v, do_3op_widening,
5872 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5873 gen_mulsub_i64, true)
5874 TRANS(UMLSL_v, do_3op_widening,
5875 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5876 gen_mulsub_i64, true)
5877
5878 TRANS(SMULL_vi, do_3op_widening,
5879 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5880 tcg_gen_mul_i64, false)
5881 TRANS(UMULL_vi, do_3op_widening,
5882 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5883 tcg_gen_mul_i64, false)
5884 TRANS(SMLAL_vi, do_3op_widening,
5885 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5886 gen_muladd_i64, true)
5887 TRANS(UMLAL_vi, do_3op_widening,
5888 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5889 gen_muladd_i64, true)
5890 TRANS(SMLSL_vi, do_3op_widening,
5891 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
5892 gen_mulsub_i64, true)
5893 TRANS(UMLSL_vi, do_3op_widening,
5894 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
5895 gen_mulsub_i64, true)
5896
gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5897 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5898 {
5899 TCGv_i64 t1 = tcg_temp_new_i64();
5900 TCGv_i64 t2 = tcg_temp_new_i64();
5901
5902 tcg_gen_sub_i64(t1, n, m);
5903 tcg_gen_sub_i64(t2, m, n);
5904 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
5905 }
5906
gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5907 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5908 {
5909 TCGv_i64 t1 = tcg_temp_new_i64();
5910 TCGv_i64 t2 = tcg_temp_new_i64();
5911
5912 tcg_gen_sub_i64(t1, n, m);
5913 tcg_gen_sub_i64(t2, m, n);
5914 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
5915 }
5916
gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5917 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5918 {
5919 TCGv_i64 t = tcg_temp_new_i64();
5920 gen_sabd_i64(t, n, m);
5921 tcg_gen_add_i64(d, d, t);
5922 }
5923
gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5924 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5925 {
5926 TCGv_i64 t = tcg_temp_new_i64();
5927 gen_uabd_i64(t, n, m);
5928 tcg_gen_add_i64(d, d, t);
5929 }
5930
5931 TRANS(SADDL_v, do_3op_widening,
5932 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5933 tcg_gen_add_i64, false)
5934 TRANS(UADDL_v, do_3op_widening,
5935 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5936 tcg_gen_add_i64, false)
5937 TRANS(SSUBL_v, do_3op_widening,
5938 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5939 tcg_gen_sub_i64, false)
5940 TRANS(USUBL_v, do_3op_widening,
5941 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5942 tcg_gen_sub_i64, false)
5943 TRANS(SABDL_v, do_3op_widening,
5944 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5945 gen_sabd_i64, false)
5946 TRANS(UABDL_v, do_3op_widening,
5947 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5948 gen_uabd_i64, false)
5949 TRANS(SABAL_v, do_3op_widening,
5950 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
5951 gen_saba_i64, true)
5952 TRANS(UABAL_v, do_3op_widening,
5953 a->esz, a->q, a->rd, a->rn, a->rm, -1,
5954 gen_uaba_i64, true)
5955
gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5956 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5957 {
5958 tcg_gen_mul_i64(d, n, m);
5959 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
5960 }
5961
gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5962 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5963 {
5964 tcg_gen_mul_i64(d, n, m);
5965 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
5966 }
5967
gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5968 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5969 {
5970 TCGv_i64 t = tcg_temp_new_i64();
5971
5972 tcg_gen_mul_i64(t, n, m);
5973 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5974 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5975 }
5976
gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5977 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5978 {
5979 TCGv_i64 t = tcg_temp_new_i64();
5980
5981 tcg_gen_mul_i64(t, n, m);
5982 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
5983 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
5984 }
5985
gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5986 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5987 {
5988 TCGv_i64 t = tcg_temp_new_i64();
5989
5990 tcg_gen_mul_i64(t, n, m);
5991 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
5992 tcg_gen_neg_i64(t, t);
5993 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
5994 }
5995
gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5996 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
5997 {
5998 TCGv_i64 t = tcg_temp_new_i64();
5999
6000 tcg_gen_mul_i64(t, n, m);
6001 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6002 tcg_gen_neg_i64(t, t);
6003 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6004 }
6005
6006 TRANS(SQDMULL_v, do_3op_widening,
6007 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6008 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6009 TRANS(SQDMLAL_v, do_3op_widening,
6010 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6011 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6012 TRANS(SQDMLSL_v, do_3op_widening,
6013 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6014 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6015
6016 TRANS(SQDMULL_vi, do_3op_widening,
6017 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6018 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6019 TRANS(SQDMLAL_vi, do_3op_widening,
6020 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6021 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6022 TRANS(SQDMLSL_vi, do_3op_widening,
6023 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6024 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6025
do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6026 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6027 MemOp sign, bool sub)
6028 {
6029 TCGv_i64 tcg_op0, tcg_op1;
6030 MemOp esz = a->esz;
6031 int half = 8 >> esz;
6032 bool top = a->q;
6033 int top_swap = top ? 0 : half - 1;
6034 int top_half = top ? half : 0;
6035
6036 /* There are no 64x64->128 bit operations. */
6037 if (esz >= MO_64) {
6038 return false;
6039 }
6040 if (!fp_access_check(s)) {
6041 return true;
6042 }
6043 tcg_op0 = tcg_temp_new_i64();
6044 tcg_op1 = tcg_temp_new_i64();
6045
6046 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6047 int elt = elt_fwd ^ top_swap;
6048
6049 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6050 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6051 if (sub) {
6052 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6053 } else {
6054 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6055 }
6056 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6057 }
6058 clear_vec_high(s, 1, a->rd);
6059 return true;
6060 }
6061
TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6062 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6063 TRANS(UADDW, do_addsub_wide, a, 0, false)
6064 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6065 TRANS(USUBW, do_addsub_wide, a, 0, true)
6066
6067 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6068 bool sub, bool round)
6069 {
6070 TCGv_i64 tcg_op0, tcg_op1;
6071 MemOp esz = a->esz;
6072 int half = 8 >> esz;
6073 bool top = a->q;
6074 int ebits = 8 << esz;
6075 uint64_t rbit = 1ull << (ebits - 1);
6076 int top_swap, top_half;
6077
6078 /* There are no 128x128->64 bit operations. */
6079 if (esz >= MO_64) {
6080 return false;
6081 }
6082 if (!fp_access_check(s)) {
6083 return true;
6084 }
6085 tcg_op0 = tcg_temp_new_i64();
6086 tcg_op1 = tcg_temp_new_i64();
6087
6088 /*
6089 * For top half inputs, iterate backward; forward for bottom half.
6090 * This means the store to the destination will not occur until
6091 * overlapping input inputs are consumed.
6092 */
6093 top_swap = top ? half - 1 : 0;
6094 top_half = top ? half : 0;
6095
6096 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6097 int elt = elt_fwd ^ top_swap;
6098
6099 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6100 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6101 if (sub) {
6102 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6103 } else {
6104 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6105 }
6106 if (round) {
6107 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6108 }
6109 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6110 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6111 }
6112 clear_vec_high(s, top, a->rd);
6113 return true;
6114 }
6115
TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6116 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6117 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6118 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6119 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6120
6121 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6122 {
6123 if (fp_access_check(s)) {
6124 /* The Q field specifies lo/hi half input for these insns. */
6125 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6126 }
6127 return true;
6128 }
6129
TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6130 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6131 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6132
6133 /*
6134 * Advanced SIMD scalar/vector x indexed element
6135 */
6136
6137 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6138 {
6139 switch (a->esz) {
6140 case MO_64:
6141 if (fp_access_check(s)) {
6142 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6143 TCGv_i64 t1 = tcg_temp_new_i64();
6144
6145 read_vec_element(s, t1, a->rm, a->idx, MO_64);
6146 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6147 write_fp_dreg(s, a->rd, t0);
6148 }
6149 break;
6150 case MO_32:
6151 if (fp_access_check(s)) {
6152 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6153 TCGv_i32 t1 = tcg_temp_new_i32();
6154
6155 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6156 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6157 write_fp_sreg(s, a->rd, t0);
6158 }
6159 break;
6160 case MO_16:
6161 if (!dc_isar_feature(aa64_fp16, s)) {
6162 return false;
6163 }
6164 if (fp_access_check(s)) {
6165 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6166 TCGv_i32 t1 = tcg_temp_new_i32();
6167
6168 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6169 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6170 write_fp_sreg(s, a->rd, t0);
6171 }
6172 break;
6173 default:
6174 g_assert_not_reached();
6175 }
6176 return true;
6177 }
6178
6179 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6180 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6181
do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6182 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6183 {
6184 switch (a->esz) {
6185 case MO_64:
6186 if (fp_access_check(s)) {
6187 TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6188 TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6189 TCGv_i64 t2 = tcg_temp_new_i64();
6190
6191 read_vec_element(s, t2, a->rm, a->idx, MO_64);
6192 if (neg) {
6193 gen_vfp_negd(t1, t1);
6194 }
6195 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6196 write_fp_dreg(s, a->rd, t0);
6197 }
6198 break;
6199 case MO_32:
6200 if (fp_access_check(s)) {
6201 TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6202 TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6203 TCGv_i32 t2 = tcg_temp_new_i32();
6204
6205 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6206 if (neg) {
6207 gen_vfp_negs(t1, t1);
6208 }
6209 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR));
6210 write_fp_sreg(s, a->rd, t0);
6211 }
6212 break;
6213 case MO_16:
6214 if (!dc_isar_feature(aa64_fp16, s)) {
6215 return false;
6216 }
6217 if (fp_access_check(s)) {
6218 TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6219 TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6220 TCGv_i32 t2 = tcg_temp_new_i32();
6221
6222 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6223 if (neg) {
6224 gen_vfp_negh(t1, t1);
6225 }
6226 gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6227 fpstatus_ptr(FPST_FPCR_F16));
6228 write_fp_sreg(s, a->rd, t0);
6229 }
6230 break;
6231 default:
6232 g_assert_not_reached();
6233 }
6234 return true;
6235 }
6236
TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6237 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6238 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6239
6240 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6241 const ENVScalar2 *f)
6242 {
6243 if (a->esz < MO_16 || a->esz > MO_32) {
6244 return false;
6245 }
6246 if (fp_access_check(s)) {
6247 TCGv_i32 t0 = tcg_temp_new_i32();
6248 TCGv_i32 t1 = tcg_temp_new_i32();
6249
6250 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6251 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6252 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6253 write_fp_sreg(s, a->rd, t0);
6254 }
6255 return true;
6256 }
6257
6258 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6259 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6260
do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6261 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6262 const ENVScalar3 *f)
6263 {
6264 if (a->esz < MO_16 || a->esz > MO_32) {
6265 return false;
6266 }
6267 if (fp_access_check(s)) {
6268 TCGv_i32 t0 = tcg_temp_new_i32();
6269 TCGv_i32 t1 = tcg_temp_new_i32();
6270 TCGv_i32 t2 = tcg_temp_new_i32();
6271
6272 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6273 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6274 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6275 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6276 write_fp_sreg(s, a->rd, t0);
6277 }
6278 return true;
6279 }
6280
6281 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6282 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6283
do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6284 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6285 NeonGenTwo64OpFn *fn, bool acc)
6286 {
6287 if (fp_access_check(s)) {
6288 TCGv_i64 t0 = tcg_temp_new_i64();
6289 TCGv_i64 t1 = tcg_temp_new_i64();
6290 TCGv_i64 t2 = tcg_temp_new_i64();
6291 unsigned vsz, dofs;
6292
6293 if (acc) {
6294 read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6295 }
6296 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6297 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6298 fn(t0, t1, t2);
6299
6300 /* Clear the whole register first, then store scalar. */
6301 vsz = vec_full_reg_size(s);
6302 dofs = vec_full_reg_offset(s, a->rd);
6303 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
6304 write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6305 }
6306 return true;
6307 }
6308
6309 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6310 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6311 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6312 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6313 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6314 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6315
do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6316 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6317 gen_helper_gvec_3_ptr * const fns[3])
6318 {
6319 MemOp esz = a->esz;
6320
6321 switch (esz) {
6322 case MO_64:
6323 if (!a->q) {
6324 return false;
6325 }
6326 break;
6327 case MO_32:
6328 break;
6329 case MO_16:
6330 if (!dc_isar_feature(aa64_fp16, s)) {
6331 return false;
6332 }
6333 break;
6334 default:
6335 g_assert_not_reached();
6336 }
6337 if (fp_access_check(s)) {
6338 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6339 esz == MO_16, a->idx, fns[esz - 1]);
6340 }
6341 return true;
6342 }
6343
6344 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6345 gen_helper_gvec_fmul_idx_h,
6346 gen_helper_gvec_fmul_idx_s,
6347 gen_helper_gvec_fmul_idx_d,
6348 };
6349 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6350
6351 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6352 gen_helper_gvec_fmulx_idx_h,
6353 gen_helper_gvec_fmulx_idx_s,
6354 gen_helper_gvec_fmulx_idx_d,
6355 };
TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6356 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6357
6358 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6359 {
6360 static gen_helper_gvec_4_ptr * const fns[3] = {
6361 gen_helper_gvec_fmla_idx_h,
6362 gen_helper_gvec_fmla_idx_s,
6363 gen_helper_gvec_fmla_idx_d,
6364 };
6365 MemOp esz = a->esz;
6366
6367 switch (esz) {
6368 case MO_64:
6369 if (!a->q) {
6370 return false;
6371 }
6372 break;
6373 case MO_32:
6374 break;
6375 case MO_16:
6376 if (!dc_isar_feature(aa64_fp16, s)) {
6377 return false;
6378 }
6379 break;
6380 default:
6381 g_assert_not_reached();
6382 }
6383 if (fp_access_check(s)) {
6384 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6385 esz == MO_16, (a->idx << 1) | neg,
6386 fns[esz - 1]);
6387 }
6388 return true;
6389 }
6390
TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6391 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6392 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6393
6394 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6395 {
6396 if (fp_access_check(s)) {
6397 int data = (a->idx << 2) | (is_2 << 1) | is_s;
6398 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6399 vec_full_reg_offset(s, a->rn),
6400 vec_full_reg_offset(s, a->rm), tcg_env,
6401 a->q ? 16 : 8, vec_full_reg_size(s),
6402 data, gen_helper_gvec_fmlal_idx_a64);
6403 }
6404 return true;
6405 }
6406
TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6407 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6408 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6409 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6410 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6411
6412 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6413 gen_helper_gvec_3 * const fns[2])
6414 {
6415 assert(a->esz == MO_16 || a->esz == MO_32);
6416 if (fp_access_check(s)) {
6417 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6418 }
6419 return true;
6420 }
6421
6422 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6423 gen_helper_gvec_mul_idx_h,
6424 gen_helper_gvec_mul_idx_s,
6425 };
TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6426 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6427
6428 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6429 {
6430 static gen_helper_gvec_4 * const fns[2][2] = {
6431 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6432 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6433 };
6434
6435 assert(a->esz == MO_16 || a->esz == MO_32);
6436 if (fp_access_check(s)) {
6437 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6438 a->idx, fns[a->esz - 1][sub]);
6439 }
6440 return true;
6441 }
6442
TRANS(MLA_vi,do_mla_vector_idx,a,false)6443 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6444 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6445
6446 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6447 gen_helper_gvec_4 * const fns[2])
6448 {
6449 assert(a->esz == MO_16 || a->esz == MO_32);
6450 if (fp_access_check(s)) {
6451 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6452 vec_full_reg_offset(s, a->rn),
6453 vec_full_reg_offset(s, a->rm),
6454 offsetof(CPUARMState, vfp.qc),
6455 a->q ? 16 : 8, vec_full_reg_size(s),
6456 a->idx, fns[a->esz - 1]);
6457 }
6458 return true;
6459 }
6460
6461 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6462 gen_helper_neon_sqdmulh_idx_h,
6463 gen_helper_neon_sqdmulh_idx_s,
6464 };
6465 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6466
6467 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6468 gen_helper_neon_sqrdmulh_idx_h,
6469 gen_helper_neon_sqrdmulh_idx_s,
6470 };
6471 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6472
6473 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6474 gen_helper_neon_sqrdmlah_idx_h,
6475 gen_helper_neon_sqrdmlah_idx_s,
6476 };
6477 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6478 f_vector_idx_sqrdmlah)
6479
6480 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6481 gen_helper_neon_sqrdmlsh_idx_h,
6482 gen_helper_neon_sqrdmlsh_idx_s,
6483 };
TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6484 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6485 f_vector_idx_sqrdmlsh)
6486
6487 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6488 gen_helper_gvec_4 *fn)
6489 {
6490 if (fp_access_check(s)) {
6491 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6492 }
6493 return true;
6494 }
6495
do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6496 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6497 gen_helper_gvec_4_ptr *fn)
6498 {
6499 if (fp_access_check(s)) {
6500 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6501 }
6502 return true;
6503 }
6504
TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6505 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6506 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6507 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6508 gen_helper_gvec_sudot_idx_b)
6509 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6510 gen_helper_gvec_usdot_idx_b)
6511 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6512 gen_helper_gvec_bfdot_idx)
6513
6514 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6515 {
6516 if (!dc_isar_feature(aa64_bf16, s)) {
6517 return false;
6518 }
6519 if (fp_access_check(s)) {
6520 /* Q bit selects BFMLALB vs BFMLALT. */
6521 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
6522 (a->idx << 1) | a->q,
6523 gen_helper_gvec_bfmlal_idx);
6524 }
6525 return true;
6526 }
6527
trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6528 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6529 {
6530 gen_helper_gvec_4_ptr *fn;
6531
6532 if (!dc_isar_feature(aa64_fcma, s)) {
6533 return false;
6534 }
6535 switch (a->esz) {
6536 case MO_16:
6537 if (!dc_isar_feature(aa64_fp16, s)) {
6538 return false;
6539 }
6540 fn = gen_helper_gvec_fcmlah_idx;
6541 break;
6542 case MO_32:
6543 fn = gen_helper_gvec_fcmlas_idx;
6544 break;
6545 default:
6546 g_assert_not_reached();
6547 }
6548 if (fp_access_check(s)) {
6549 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6550 a->esz == MO_16, (a->idx << 2) | a->rot, fn);
6551 }
6552 return true;
6553 }
6554
6555 /*
6556 * Advanced SIMD scalar pairwise
6557 */
6558
do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6559 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6560 {
6561 switch (a->esz) {
6562 case MO_64:
6563 if (fp_access_check(s)) {
6564 TCGv_i64 t0 = tcg_temp_new_i64();
6565 TCGv_i64 t1 = tcg_temp_new_i64();
6566
6567 read_vec_element(s, t0, a->rn, 0, MO_64);
6568 read_vec_element(s, t1, a->rn, 1, MO_64);
6569 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6570 write_fp_dreg(s, a->rd, t0);
6571 }
6572 break;
6573 case MO_32:
6574 if (fp_access_check(s)) {
6575 TCGv_i32 t0 = tcg_temp_new_i32();
6576 TCGv_i32 t1 = tcg_temp_new_i32();
6577
6578 read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6579 read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6580 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR));
6581 write_fp_sreg(s, a->rd, t0);
6582 }
6583 break;
6584 case MO_16:
6585 if (!dc_isar_feature(aa64_fp16, s)) {
6586 return false;
6587 }
6588 if (fp_access_check(s)) {
6589 TCGv_i32 t0 = tcg_temp_new_i32();
6590 TCGv_i32 t1 = tcg_temp_new_i32();
6591
6592 read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6593 read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6594 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16));
6595 write_fp_sreg(s, a->rd, t0);
6596 }
6597 break;
6598 default:
6599 g_assert_not_reached();
6600 }
6601 return true;
6602 }
6603
6604 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6605 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax)
6606 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin)
6607 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6608 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6609
trans_ADDP_s(DisasContext * s,arg_rr_e * a)6610 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6611 {
6612 if (fp_access_check(s)) {
6613 TCGv_i64 t0 = tcg_temp_new_i64();
6614 TCGv_i64 t1 = tcg_temp_new_i64();
6615
6616 read_vec_element(s, t0, a->rn, 0, MO_64);
6617 read_vec_element(s, t1, a->rn, 1, MO_64);
6618 tcg_gen_add_i64(t0, t0, t1);
6619 write_fp_dreg(s, a->rd, t0);
6620 }
6621 return true;
6622 }
6623
6624 /*
6625 * Floating-point conditional select
6626 */
6627
trans_FCSEL(DisasContext * s,arg_FCSEL * a)6628 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
6629 {
6630 TCGv_i64 t_true, t_false;
6631 DisasCompare64 c;
6632
6633 switch (a->esz) {
6634 case MO_32:
6635 case MO_64:
6636 break;
6637 case MO_16:
6638 if (!dc_isar_feature(aa64_fp16, s)) {
6639 return false;
6640 }
6641 break;
6642 default:
6643 return false;
6644 }
6645
6646 if (!fp_access_check(s)) {
6647 return true;
6648 }
6649
6650 /* Zero extend sreg & hreg inputs to 64 bits now. */
6651 t_true = tcg_temp_new_i64();
6652 t_false = tcg_temp_new_i64();
6653 read_vec_element(s, t_true, a->rn, 0, a->esz);
6654 read_vec_element(s, t_false, a->rm, 0, a->esz);
6655
6656 a64_test_cc(&c, a->cond);
6657 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
6658 t_true, t_false);
6659
6660 /*
6661 * Note that sregs & hregs write back zeros to the high bits,
6662 * and we've already done the zero-extension.
6663 */
6664 write_fp_dreg(s, a->rd, t_true);
6665 return true;
6666 }
6667
6668 /*
6669 * Advanced SIMD Extract
6670 */
6671
trans_EXT_d(DisasContext * s,arg_EXT_d * a)6672 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
6673 {
6674 if (fp_access_check(s)) {
6675 TCGv_i64 lo = read_fp_dreg(s, a->rn);
6676 if (a->imm != 0) {
6677 TCGv_i64 hi = read_fp_dreg(s, a->rm);
6678 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
6679 }
6680 write_fp_dreg(s, a->rd, lo);
6681 }
6682 return true;
6683 }
6684
trans_EXT_q(DisasContext * s,arg_EXT_q * a)6685 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
6686 {
6687 TCGv_i64 lo, hi;
6688 int pos = (a->imm & 7) * 8;
6689 int elt = a->imm >> 3;
6690
6691 if (!fp_access_check(s)) {
6692 return true;
6693 }
6694
6695 lo = tcg_temp_new_i64();
6696 hi = tcg_temp_new_i64();
6697
6698 read_vec_element(s, lo, a->rn, elt, MO_64);
6699 elt++;
6700 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
6701 elt++;
6702
6703 if (pos != 0) {
6704 TCGv_i64 hh = tcg_temp_new_i64();
6705 tcg_gen_extract2_i64(lo, lo, hi, pos);
6706 read_vec_element(s, hh, a->rm, elt & 1, MO_64);
6707 tcg_gen_extract2_i64(hi, hi, hh, pos);
6708 }
6709
6710 write_vec_element(s, lo, a->rd, 0, MO_64);
6711 write_vec_element(s, hi, a->rd, 1, MO_64);
6712 clear_vec_high(s, true, a->rd);
6713 return true;
6714 }
6715
6716 /*
6717 * Floating-point data-processing (3 source)
6718 */
6719
do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)6720 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
6721 {
6722 TCGv_ptr fpst;
6723
6724 /*
6725 * These are fused multiply-add. Note that doing the negations here
6726 * as separate steps is correct: an input NaN should come out with
6727 * its sign bit flipped if it is a negated-input.
6728 */
6729 switch (a->esz) {
6730 case MO_64:
6731 if (fp_access_check(s)) {
6732 TCGv_i64 tn = read_fp_dreg(s, a->rn);
6733 TCGv_i64 tm = read_fp_dreg(s, a->rm);
6734 TCGv_i64 ta = read_fp_dreg(s, a->ra);
6735
6736 if (neg_a) {
6737 gen_vfp_negd(ta, ta);
6738 }
6739 if (neg_n) {
6740 gen_vfp_negd(tn, tn);
6741 }
6742 fpst = fpstatus_ptr(FPST_FPCR);
6743 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
6744 write_fp_dreg(s, a->rd, ta);
6745 }
6746 break;
6747
6748 case MO_32:
6749 if (fp_access_check(s)) {
6750 TCGv_i32 tn = read_fp_sreg(s, a->rn);
6751 TCGv_i32 tm = read_fp_sreg(s, a->rm);
6752 TCGv_i32 ta = read_fp_sreg(s, a->ra);
6753
6754 if (neg_a) {
6755 gen_vfp_negs(ta, ta);
6756 }
6757 if (neg_n) {
6758 gen_vfp_negs(tn, tn);
6759 }
6760 fpst = fpstatus_ptr(FPST_FPCR);
6761 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
6762 write_fp_sreg(s, a->rd, ta);
6763 }
6764 break;
6765
6766 case MO_16:
6767 if (!dc_isar_feature(aa64_fp16, s)) {
6768 return false;
6769 }
6770 if (fp_access_check(s)) {
6771 TCGv_i32 tn = read_fp_hreg(s, a->rn);
6772 TCGv_i32 tm = read_fp_hreg(s, a->rm);
6773 TCGv_i32 ta = read_fp_hreg(s, a->ra);
6774
6775 if (neg_a) {
6776 gen_vfp_negh(ta, ta);
6777 }
6778 if (neg_n) {
6779 gen_vfp_negh(tn, tn);
6780 }
6781 fpst = fpstatus_ptr(FPST_FPCR_F16);
6782 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
6783 write_fp_sreg(s, a->rd, ta);
6784 }
6785 break;
6786
6787 default:
6788 return false;
6789 }
6790 return true;
6791 }
6792
TRANS(FMADD,do_fmadd,a,false,false)6793 TRANS(FMADD, do_fmadd, a, false, false)
6794 TRANS(FNMADD, do_fmadd, a, true, true)
6795 TRANS(FMSUB, do_fmadd, a, false, true)
6796 TRANS(FNMSUB, do_fmadd, a, true, false)
6797
6798 /*
6799 * Advanced SIMD Across Lanes
6800 */
6801
6802 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
6803 MemOp src_sign, NeonGenTwo64OpFn *fn)
6804 {
6805 TCGv_i64 tcg_res, tcg_elt;
6806 MemOp src_mop = a->esz | src_sign;
6807 int elements = (a->q ? 16 : 8) >> a->esz;
6808
6809 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
6810 if (elements < 4) {
6811 return false;
6812 }
6813 if (!fp_access_check(s)) {
6814 return true;
6815 }
6816
6817 tcg_res = tcg_temp_new_i64();
6818 tcg_elt = tcg_temp_new_i64();
6819
6820 read_vec_element(s, tcg_res, a->rn, 0, src_mop);
6821 for (int i = 1; i < elements; i++) {
6822 read_vec_element(s, tcg_elt, a->rn, i, src_mop);
6823 fn(tcg_res, tcg_res, tcg_elt);
6824 }
6825
6826 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
6827 write_fp_dreg(s, a->rd, tcg_res);
6828 return true;
6829 }
6830
6831 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)6832 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
6833 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
6834 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
6835 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
6836 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
6837 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
6838
6839 /*
6840 * do_fp_reduction helper
6841 *
6842 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6843 * important for correct NaN propagation that we do these
6844 * operations in exactly the order specified by the pseudocode.
6845 *
6846 * This is a recursive function.
6847 */
6848 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
6849 int ebase, int ecount, TCGv_ptr fpst,
6850 NeonGenTwoSingleOpFn *fn)
6851 {
6852 if (ecount == 1) {
6853 TCGv_i32 tcg_elem = tcg_temp_new_i32();
6854 read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
6855 return tcg_elem;
6856 } else {
6857 int half = ecount >> 1;
6858 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6859
6860 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
6861 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
6862 tcg_res = tcg_temp_new_i32();
6863
6864 fn(tcg_res, tcg_lo, tcg_hi, fpst);
6865 return tcg_res;
6866 }
6867 }
6868
do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fn)6869 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
6870 NeonGenTwoSingleOpFn *fn)
6871 {
6872 if (fp_access_check(s)) {
6873 MemOp esz = a->esz;
6874 int elts = (a->q ? 16 : 8) >> esz;
6875 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
6876 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn);
6877 write_fp_sreg(s, a->rd, res);
6878 }
6879 return true;
6880 }
6881
TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_advsimd_maxnumh)6882 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh)
6883 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh)
6884 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh)
6885 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh)
6886
6887 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums)
6888 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums)
6889 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs)
6890 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins)
6891
6892 /*
6893 * Floating-point Immediate
6894 */
6895
6896 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
6897 {
6898 switch (a->esz) {
6899 case MO_32:
6900 case MO_64:
6901 break;
6902 case MO_16:
6903 if (!dc_isar_feature(aa64_fp16, s)) {
6904 return false;
6905 }
6906 break;
6907 default:
6908 return false;
6909 }
6910 if (fp_access_check(s)) {
6911 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
6912 write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
6913 }
6914 return true;
6915 }
6916
6917 /*
6918 * Advanced SIMD Modified Immediate
6919 */
6920
trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)6921 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
6922 {
6923 if (!dc_isar_feature(aa64_fp16, s)) {
6924 return false;
6925 }
6926 if (fp_access_check(s)) {
6927 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
6928 a->q ? 16 : 8, vec_full_reg_size(s),
6929 vfp_expand_imm(MO_16, a->abcdefgh));
6930 }
6931 return true;
6932 }
6933
gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)6934 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
6935 int64_t c, uint32_t oprsz, uint32_t maxsz)
6936 {
6937 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
6938 }
6939
trans_Vimm(DisasContext * s,arg_Vimm * a)6940 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
6941 {
6942 GVecGen2iFn *fn;
6943
6944 /* Handle decode of cmode/op here between ORR/BIC/MOVI */
6945 if ((a->cmode & 1) && a->cmode < 12) {
6946 /* For op=1, the imm will be inverted, so BIC becomes AND. */
6947 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
6948 } else {
6949 /* There is one unallocated cmode/op combination in this space */
6950 if (a->cmode == 15 && a->op == 1 && a->q == 0) {
6951 return false;
6952 }
6953 fn = gen_movi;
6954 }
6955
6956 if (fp_access_check(s)) {
6957 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
6958 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
6959 }
6960 return true;
6961 }
6962
6963 /*
6964 * Advanced SIMD Shift by Immediate
6965 */
6966
do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)6967 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
6968 {
6969 if (fp_access_check(s)) {
6970 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
6971 }
6972 return true;
6973 }
6974
6975 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
6976 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
6977 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
6978 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
6979 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
6980 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
6981 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
6982 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
6983 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
6984 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
6985 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)6986 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
6987 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
6988 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
6989
6990 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
6991 {
6992 TCGv_i64 tcg_rn, tcg_rd;
6993 int esz = a->esz;
6994 int esize;
6995
6996 if (!fp_access_check(s)) {
6997 return true;
6998 }
6999
7000 /*
7001 * For the LL variants the store is larger than the load,
7002 * so if rd == rn we would overwrite parts of our input.
7003 * So load everything right now and use shifts in the main loop.
7004 */
7005 tcg_rd = tcg_temp_new_i64();
7006 tcg_rn = tcg_temp_new_i64();
7007 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7008
7009 esize = 8 << esz;
7010 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7011 if (is_u) {
7012 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7013 } else {
7014 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7015 }
7016 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7017 write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7018 }
7019 clear_vec_high(s, true, a->rd);
7020 return true;
7021 }
7022
TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7023 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7024 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7025
7026 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7027 {
7028 assert(shift >= 0 && shift <= 64);
7029 tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7030 }
7031
gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7032 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7033 {
7034 assert(shift >= 0 && shift <= 64);
7035 if (shift == 64) {
7036 tcg_gen_movi_i64(dst, 0);
7037 } else {
7038 tcg_gen_shri_i64(dst, src, shift);
7039 }
7040 }
7041
gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7042 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7043 {
7044 gen_sshr_d(src, src, shift);
7045 tcg_gen_add_i64(dst, dst, src);
7046 }
7047
gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7048 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7049 {
7050 gen_ushr_d(src, src, shift);
7051 tcg_gen_add_i64(dst, dst, src);
7052 }
7053
gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7054 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7055 {
7056 assert(shift >= 0 && shift <= 32);
7057 if (shift) {
7058 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7059 tcg_gen_add_i64(dst, src, rnd);
7060 tcg_gen_sari_i64(dst, dst, shift);
7061 } else {
7062 tcg_gen_mov_i64(dst, src);
7063 }
7064 }
7065
gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7066 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7067 {
7068 assert(shift >= 0 && shift <= 32);
7069 if (shift) {
7070 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7071 tcg_gen_add_i64(dst, src, rnd);
7072 tcg_gen_shri_i64(dst, dst, shift);
7073 } else {
7074 tcg_gen_mov_i64(dst, src);
7075 }
7076 }
7077
gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7078 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7079 {
7080 assert(shift >= 0 && shift <= 64);
7081 if (shift == 0) {
7082 tcg_gen_mov_i64(dst, src);
7083 } else if (shift == 64) {
7084 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7085 tcg_gen_movi_i64(dst, 0);
7086 } else {
7087 TCGv_i64 rnd = tcg_temp_new_i64();
7088 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7089 tcg_gen_sari_i64(dst, src, shift);
7090 tcg_gen_add_i64(dst, dst, rnd);
7091 }
7092 }
7093
gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7094 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7095 {
7096 assert(shift >= 0 && shift <= 64);
7097 if (shift == 0) {
7098 tcg_gen_mov_i64(dst, src);
7099 } else if (shift == 64) {
7100 /* Rounding will propagate bit 63 into bit 64. */
7101 tcg_gen_shri_i64(dst, src, 63);
7102 } else {
7103 TCGv_i64 rnd = tcg_temp_new_i64();
7104 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7105 tcg_gen_shri_i64(dst, src, shift);
7106 tcg_gen_add_i64(dst, dst, rnd);
7107 }
7108 }
7109
gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7110 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7111 {
7112 gen_srshr_d(src, src, shift);
7113 tcg_gen_add_i64(dst, dst, src);
7114 }
7115
gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7116 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7117 {
7118 gen_urshr_d(src, src, shift);
7119 tcg_gen_add_i64(dst, dst, src);
7120 }
7121
gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7122 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7123 {
7124 /* If shift is 64, dst is unchanged. */
7125 if (shift != 64) {
7126 tcg_gen_shri_i64(src, src, shift);
7127 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7128 }
7129 }
7130
gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7131 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7132 {
7133 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7134 }
7135
do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7136 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7137 WideShiftImmFn * const fns[3], MemOp sign)
7138 {
7139 TCGv_i64 tcg_rn, tcg_rd;
7140 int esz = a->esz;
7141 int esize;
7142 WideShiftImmFn *fn;
7143
7144 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7145
7146 if (!fp_access_check(s)) {
7147 return true;
7148 }
7149
7150 tcg_rn = tcg_temp_new_i64();
7151 tcg_rd = tcg_temp_new_i64();
7152 tcg_gen_movi_i64(tcg_rd, 0);
7153
7154 fn = fns[esz];
7155 esize = 8 << esz;
7156 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7157 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7158 fn(tcg_rn, tcg_rn, a->imm);
7159 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7160 }
7161
7162 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7163 clear_vec_high(s, a->q, a->rd);
7164 return true;
7165 }
7166
gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7167 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7168 {
7169 tcg_gen_sari_i64(d, s, i);
7170 tcg_gen_ext16u_i64(d, d);
7171 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7172 }
7173
gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7174 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7175 {
7176 tcg_gen_sari_i64(d, s, i);
7177 tcg_gen_ext32u_i64(d, d);
7178 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7179 }
7180
gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7181 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7182 {
7183 gen_sshr_d(d, s, i);
7184 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7185 }
7186
gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7187 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7188 {
7189 tcg_gen_shri_i64(d, s, i);
7190 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7191 }
7192
gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7193 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7194 {
7195 tcg_gen_shri_i64(d, s, i);
7196 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7197 }
7198
gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7199 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7200 {
7201 gen_ushr_d(d, s, i);
7202 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7203 }
7204
gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7205 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7206 {
7207 tcg_gen_sari_i64(d, s, i);
7208 tcg_gen_ext16u_i64(d, d);
7209 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7210 }
7211
gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7212 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7213 {
7214 tcg_gen_sari_i64(d, s, i);
7215 tcg_gen_ext32u_i64(d, d);
7216 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7217 }
7218
gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7219 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7220 {
7221 gen_sshr_d(d, s, i);
7222 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7223 }
7224
gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7225 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7226 {
7227 gen_srshr_bhs(d, s, i);
7228 tcg_gen_ext16u_i64(d, d);
7229 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7230 }
7231
gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7232 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7233 {
7234 gen_srshr_bhs(d, s, i);
7235 tcg_gen_ext32u_i64(d, d);
7236 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7237 }
7238
gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7239 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7240 {
7241 gen_srshr_d(d, s, i);
7242 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7243 }
7244
gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7245 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7246 {
7247 gen_urshr_bhs(d, s, i);
7248 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7249 }
7250
gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7251 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7252 {
7253 gen_urshr_bhs(d, s, i);
7254 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7255 }
7256
gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7257 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7258 {
7259 gen_urshr_d(d, s, i);
7260 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7261 }
7262
gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7263 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7264 {
7265 gen_srshr_bhs(d, s, i);
7266 tcg_gen_ext16u_i64(d, d);
7267 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7268 }
7269
gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7270 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7271 {
7272 gen_srshr_bhs(d, s, i);
7273 tcg_gen_ext32u_i64(d, d);
7274 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7275 }
7276
gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7277 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7278 {
7279 gen_srshr_d(d, s, i);
7280 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7281 }
7282
7283 static WideShiftImmFn * const shrn_fns[] = {
7284 tcg_gen_shri_i64,
7285 tcg_gen_shri_i64,
7286 gen_ushr_d,
7287 };
7288 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7289
7290 static WideShiftImmFn * const rshrn_fns[] = {
7291 gen_urshr_bhs,
7292 gen_urshr_bhs,
7293 gen_urshr_d,
7294 };
7295 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7296
7297 static WideShiftImmFn * const sqshrn_fns[] = {
7298 gen_sqshrn_b,
7299 gen_sqshrn_h,
7300 gen_sqshrn_s,
7301 };
7302 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7303
7304 static WideShiftImmFn * const uqshrn_fns[] = {
7305 gen_uqshrn_b,
7306 gen_uqshrn_h,
7307 gen_uqshrn_s,
7308 };
7309 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7310
7311 static WideShiftImmFn * const sqshrun_fns[] = {
7312 gen_sqshrun_b,
7313 gen_sqshrun_h,
7314 gen_sqshrun_s,
7315 };
7316 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7317
7318 static WideShiftImmFn * const sqrshrn_fns[] = {
7319 gen_sqrshrn_b,
7320 gen_sqrshrn_h,
7321 gen_sqrshrn_s,
7322 };
7323 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7324
7325 static WideShiftImmFn * const uqrshrn_fns[] = {
7326 gen_uqrshrn_b,
7327 gen_uqrshrn_h,
7328 gen_uqrshrn_s,
7329 };
7330 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7331
7332 static WideShiftImmFn * const sqrshrun_fns[] = {
7333 gen_sqrshrun_b,
7334 gen_sqrshrun_h,
7335 gen_sqrshrun_s,
7336 };
TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7337 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7338
7339 /*
7340 * Advanced SIMD Scalar Shift by Immediate
7341 */
7342
7343 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7344 WideShiftImmFn *fn, bool accumulate,
7345 MemOp sign)
7346 {
7347 if (fp_access_check(s)) {
7348 TCGv_i64 rd = tcg_temp_new_i64();
7349 TCGv_i64 rn = tcg_temp_new_i64();
7350
7351 read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7352 if (accumulate) {
7353 read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7354 }
7355 fn(rd, rn, a->imm);
7356 write_fp_dreg(s, a->rd, rd);
7357 }
7358 return true;
7359 }
7360
7361 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7362 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7363 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7364 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7365 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7366 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7367 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7368 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7369 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7370
7371 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7372 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7373
trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7374 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7375 NeonGenTwoOpEnvFn *fn)
7376 {
7377 TCGv_i32 t = tcg_temp_new_i32();
7378 tcg_gen_extrl_i64_i32(t, s);
7379 fn(t, tcg_env, t, tcg_constant_i32(i));
7380 tcg_gen_extu_i32_i64(d, t);
7381 }
7382
gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7383 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7384 {
7385 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7386 }
7387
gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7388 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7389 {
7390 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7391 }
7392
gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7393 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7394 {
7395 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7396 }
7397
gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7398 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7399 {
7400 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7401 }
7402
gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7403 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7404 {
7405 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7406 }
7407
gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7408 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7409 {
7410 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7411 }
7412
gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7413 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7414 {
7415 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7416 }
7417
gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7418 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7419 {
7420 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7421 }
7422
gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7423 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7424 {
7425 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7426 }
7427
gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7428 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7429 {
7430 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7431 }
7432
gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7433 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7434 {
7435 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7436 }
7437
gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7438 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7439 {
7440 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7441 }
7442
7443 static WideShiftImmFn * const f_scalar_sqshli[] = {
7444 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7445 };
7446
7447 static WideShiftImmFn * const f_scalar_uqshli[] = {
7448 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7449 };
7450
7451 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7452 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7453 };
7454
7455 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7456 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7457 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7458 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7459
do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7460 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7461 WideShiftImmFn * const fns[3],
7462 MemOp sign, bool zext)
7463 {
7464 MemOp esz = a->esz;
7465
7466 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7467
7468 if (fp_access_check(s)) {
7469 TCGv_i64 rd = tcg_temp_new_i64();
7470 TCGv_i64 rn = tcg_temp_new_i64();
7471
7472 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7473 fns[esz](rd, rn, a->imm);
7474 if (zext) {
7475 tcg_gen_ext_i64(rd, rd, esz);
7476 }
7477 write_fp_dreg(s, a->rd, rd);
7478 }
7479 return true;
7480 }
7481
TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7482 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7483 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7484 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7485 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7486 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7487 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7488
7489 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7490 * Note that it is the caller's responsibility to ensure that the
7491 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7492 * mandated semantics for out of range shifts.
7493 */
7494 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7495 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7496 {
7497 switch (shift_type) {
7498 case A64_SHIFT_TYPE_LSL:
7499 tcg_gen_shl_i64(dst, src, shift_amount);
7500 break;
7501 case A64_SHIFT_TYPE_LSR:
7502 tcg_gen_shr_i64(dst, src, shift_amount);
7503 break;
7504 case A64_SHIFT_TYPE_ASR:
7505 if (!sf) {
7506 tcg_gen_ext32s_i64(dst, src);
7507 }
7508 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
7509 break;
7510 case A64_SHIFT_TYPE_ROR:
7511 if (sf) {
7512 tcg_gen_rotr_i64(dst, src, shift_amount);
7513 } else {
7514 TCGv_i32 t0, t1;
7515 t0 = tcg_temp_new_i32();
7516 t1 = tcg_temp_new_i32();
7517 tcg_gen_extrl_i64_i32(t0, src);
7518 tcg_gen_extrl_i64_i32(t1, shift_amount);
7519 tcg_gen_rotr_i32(t0, t0, t1);
7520 tcg_gen_extu_i32_i64(dst, t0);
7521 }
7522 break;
7523 default:
7524 assert(FALSE); /* all shift types should be handled */
7525 break;
7526 }
7527
7528 if (!sf) { /* zero extend final result */
7529 tcg_gen_ext32u_i64(dst, dst);
7530 }
7531 }
7532
7533 /* Shift a TCGv src by immediate, put result in dst.
7534 * The shift amount must be in range (this should always be true as the
7535 * relevant instructions will UNDEF on bad shift immediates).
7536 */
shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)7537 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
7538 enum a64_shift_type shift_type, unsigned int shift_i)
7539 {
7540 assert(shift_i < (sf ? 64 : 32));
7541
7542 if (shift_i == 0) {
7543 tcg_gen_mov_i64(dst, src);
7544 } else {
7545 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
7546 }
7547 }
7548
7549 /* Logical (shifted register)
7550 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
7551 * +----+-----+-----------+-------+---+------+--------+------+------+
7552 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
7553 * +----+-----+-----------+-------+---+------+--------+------+------+
7554 */
disas_logic_reg(DisasContext * s,uint32_t insn)7555 static void disas_logic_reg(DisasContext *s, uint32_t insn)
7556 {
7557 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
7558 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
7559
7560 sf = extract32(insn, 31, 1);
7561 opc = extract32(insn, 29, 2);
7562 shift_type = extract32(insn, 22, 2);
7563 invert = extract32(insn, 21, 1);
7564 rm = extract32(insn, 16, 5);
7565 shift_amount = extract32(insn, 10, 6);
7566 rn = extract32(insn, 5, 5);
7567 rd = extract32(insn, 0, 5);
7568
7569 if (!sf && (shift_amount & (1 << 5))) {
7570 unallocated_encoding(s);
7571 return;
7572 }
7573
7574 tcg_rd = cpu_reg(s, rd);
7575
7576 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
7577 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
7578 * register-register MOV and MVN, so it is worth special casing.
7579 */
7580 tcg_rm = cpu_reg(s, rm);
7581 if (invert) {
7582 tcg_gen_not_i64(tcg_rd, tcg_rm);
7583 if (!sf) {
7584 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7585 }
7586 } else {
7587 if (sf) {
7588 tcg_gen_mov_i64(tcg_rd, tcg_rm);
7589 } else {
7590 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
7591 }
7592 }
7593 return;
7594 }
7595
7596 tcg_rm = read_cpu_reg(s, rm, sf);
7597
7598 if (shift_amount) {
7599 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
7600 }
7601
7602 tcg_rn = cpu_reg(s, rn);
7603
7604 switch (opc | (invert << 2)) {
7605 case 0: /* AND */
7606 case 3: /* ANDS */
7607 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7608 break;
7609 case 1: /* ORR */
7610 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
7611 break;
7612 case 2: /* EOR */
7613 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
7614 break;
7615 case 4: /* BIC */
7616 case 7: /* BICS */
7617 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
7618 break;
7619 case 5: /* ORN */
7620 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
7621 break;
7622 case 6: /* EON */
7623 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
7624 break;
7625 default:
7626 assert(FALSE);
7627 break;
7628 }
7629
7630 if (!sf) {
7631 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7632 }
7633
7634 if (opc == 3) {
7635 gen_logic_CC(sf, tcg_rd);
7636 }
7637 }
7638
7639 /*
7640 * Add/subtract (extended register)
7641 *
7642 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
7643 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
7644 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
7645 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
7646 *
7647 * sf: 0 -> 32bit, 1 -> 64bit
7648 * op: 0 -> add , 1 -> sub
7649 * S: 1 -> set flags
7650 * opt: 00
7651 * option: extension type (see DecodeRegExtend)
7652 * imm3: optional shift to Rm
7653 *
7654 * Rd = Rn + LSL(extend(Rm), amount)
7655 */
disas_add_sub_ext_reg(DisasContext * s,uint32_t insn)7656 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
7657 {
7658 int rd = extract32(insn, 0, 5);
7659 int rn = extract32(insn, 5, 5);
7660 int imm3 = extract32(insn, 10, 3);
7661 int option = extract32(insn, 13, 3);
7662 int rm = extract32(insn, 16, 5);
7663 int opt = extract32(insn, 22, 2);
7664 bool setflags = extract32(insn, 29, 1);
7665 bool sub_op = extract32(insn, 30, 1);
7666 bool sf = extract32(insn, 31, 1);
7667
7668 TCGv_i64 tcg_rm, tcg_rn; /* temps */
7669 TCGv_i64 tcg_rd;
7670 TCGv_i64 tcg_result;
7671
7672 if (imm3 > 4 || opt != 0) {
7673 unallocated_encoding(s);
7674 return;
7675 }
7676
7677 /* non-flag setting ops may use SP */
7678 if (!setflags) {
7679 tcg_rd = cpu_reg_sp(s, rd);
7680 } else {
7681 tcg_rd = cpu_reg(s, rd);
7682 }
7683 tcg_rn = read_cpu_reg_sp(s, rn, sf);
7684
7685 tcg_rm = read_cpu_reg(s, rm, sf);
7686 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
7687
7688 tcg_result = tcg_temp_new_i64();
7689
7690 if (!setflags) {
7691 if (sub_op) {
7692 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7693 } else {
7694 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
7695 }
7696 } else {
7697 if (sub_op) {
7698 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
7699 } else {
7700 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
7701 }
7702 }
7703
7704 if (sf) {
7705 tcg_gen_mov_i64(tcg_rd, tcg_result);
7706 } else {
7707 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
7708 }
7709 }
7710
7711 /*
7712 * Add/subtract (shifted register)
7713 *
7714 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
7715 * +--+--+--+-----------+-----+--+-------+---------+------+------+
7716 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
7717 * +--+--+--+-----------+-----+--+-------+---------+------+------+
7718 *
7719 * sf: 0 -> 32bit, 1 -> 64bit
7720 * op: 0 -> add , 1 -> sub
7721 * S: 1 -> set flags
7722 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
7723 * imm6: Shift amount to apply to Rm before the add/sub
7724 */
disas_add_sub_reg(DisasContext * s,uint32_t insn)7725 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
7726 {
7727 int rd = extract32(insn, 0, 5);
7728 int rn = extract32(insn, 5, 5);
7729 int imm6 = extract32(insn, 10, 6);
7730 int rm = extract32(insn, 16, 5);
7731 int shift_type = extract32(insn, 22, 2);
7732 bool setflags = extract32(insn, 29, 1);
7733 bool sub_op = extract32(insn, 30, 1);
7734 bool sf = extract32(insn, 31, 1);
7735
7736 TCGv_i64 tcg_rd = cpu_reg(s, rd);
7737 TCGv_i64 tcg_rn, tcg_rm;
7738 TCGv_i64 tcg_result;
7739
7740 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
7741 unallocated_encoding(s);
7742 return;
7743 }
7744
7745 tcg_rn = read_cpu_reg(s, rn, sf);
7746 tcg_rm = read_cpu_reg(s, rm, sf);
7747
7748 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
7749
7750 tcg_result = tcg_temp_new_i64();
7751
7752 if (!setflags) {
7753 if (sub_op) {
7754 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
7755 } else {
7756 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
7757 }
7758 } else {
7759 if (sub_op) {
7760 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
7761 } else {
7762 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
7763 }
7764 }
7765
7766 if (sf) {
7767 tcg_gen_mov_i64(tcg_rd, tcg_result);
7768 } else {
7769 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
7770 }
7771 }
7772
7773 /* Data-processing (3 source)
7774 *
7775 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
7776 * +--+------+-----------+------+------+----+------+------+------+
7777 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
7778 * +--+------+-----------+------+------+----+------+------+------+
7779 */
disas_data_proc_3src(DisasContext * s,uint32_t insn)7780 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
7781 {
7782 int rd = extract32(insn, 0, 5);
7783 int rn = extract32(insn, 5, 5);
7784 int ra = extract32(insn, 10, 5);
7785 int rm = extract32(insn, 16, 5);
7786 int op_id = (extract32(insn, 29, 3) << 4) |
7787 (extract32(insn, 21, 3) << 1) |
7788 extract32(insn, 15, 1);
7789 bool sf = extract32(insn, 31, 1);
7790 bool is_sub = extract32(op_id, 0, 1);
7791 bool is_high = extract32(op_id, 2, 1);
7792 bool is_signed = false;
7793 TCGv_i64 tcg_op1;
7794 TCGv_i64 tcg_op2;
7795 TCGv_i64 tcg_tmp;
7796
7797 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
7798 switch (op_id) {
7799 case 0x42: /* SMADDL */
7800 case 0x43: /* SMSUBL */
7801 case 0x44: /* SMULH */
7802 is_signed = true;
7803 break;
7804 case 0x0: /* MADD (32bit) */
7805 case 0x1: /* MSUB (32bit) */
7806 case 0x40: /* MADD (64bit) */
7807 case 0x41: /* MSUB (64bit) */
7808 case 0x4a: /* UMADDL */
7809 case 0x4b: /* UMSUBL */
7810 case 0x4c: /* UMULH */
7811 break;
7812 default:
7813 unallocated_encoding(s);
7814 return;
7815 }
7816
7817 if (is_high) {
7818 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
7819 TCGv_i64 tcg_rd = cpu_reg(s, rd);
7820 TCGv_i64 tcg_rn = cpu_reg(s, rn);
7821 TCGv_i64 tcg_rm = cpu_reg(s, rm);
7822
7823 if (is_signed) {
7824 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7825 } else {
7826 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
7827 }
7828 return;
7829 }
7830
7831 tcg_op1 = tcg_temp_new_i64();
7832 tcg_op2 = tcg_temp_new_i64();
7833 tcg_tmp = tcg_temp_new_i64();
7834
7835 if (op_id < 0x42) {
7836 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
7837 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
7838 } else {
7839 if (is_signed) {
7840 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
7841 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
7842 } else {
7843 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
7844 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
7845 }
7846 }
7847
7848 if (ra == 31 && !is_sub) {
7849 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
7850 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
7851 } else {
7852 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
7853 if (is_sub) {
7854 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7855 } else {
7856 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
7857 }
7858 }
7859
7860 if (!sf) {
7861 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
7862 }
7863 }
7864
7865 /* Add/subtract (with carry)
7866 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
7867 * +--+--+--+------------------------+------+-------------+------+-----+
7868 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd |
7869 * +--+--+--+------------------------+------+-------------+------+-----+
7870 */
7871
disas_adc_sbc(DisasContext * s,uint32_t insn)7872 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
7873 {
7874 unsigned int sf, op, setflags, rm, rn, rd;
7875 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
7876
7877 sf = extract32(insn, 31, 1);
7878 op = extract32(insn, 30, 1);
7879 setflags = extract32(insn, 29, 1);
7880 rm = extract32(insn, 16, 5);
7881 rn = extract32(insn, 5, 5);
7882 rd = extract32(insn, 0, 5);
7883
7884 tcg_rd = cpu_reg(s, rd);
7885 tcg_rn = cpu_reg(s, rn);
7886
7887 if (op) {
7888 tcg_y = tcg_temp_new_i64();
7889 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
7890 } else {
7891 tcg_y = cpu_reg(s, rm);
7892 }
7893
7894 if (setflags) {
7895 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
7896 } else {
7897 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
7898 }
7899 }
7900
7901 /*
7902 * Rotate right into flags
7903 * 31 30 29 21 15 10 5 4 0
7904 * +--+--+--+-----------------+--------+-----------+------+--+------+
7905 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask |
7906 * +--+--+--+-----------------+--------+-----------+------+--+------+
7907 */
disas_rotate_right_into_flags(DisasContext * s,uint32_t insn)7908 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
7909 {
7910 int mask = extract32(insn, 0, 4);
7911 int o2 = extract32(insn, 4, 1);
7912 int rn = extract32(insn, 5, 5);
7913 int imm6 = extract32(insn, 15, 6);
7914 int sf_op_s = extract32(insn, 29, 3);
7915 TCGv_i64 tcg_rn;
7916 TCGv_i32 nzcv;
7917
7918 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
7919 unallocated_encoding(s);
7920 return;
7921 }
7922
7923 tcg_rn = read_cpu_reg(s, rn, 1);
7924 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
7925
7926 nzcv = tcg_temp_new_i32();
7927 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
7928
7929 if (mask & 8) { /* N */
7930 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
7931 }
7932 if (mask & 4) { /* Z */
7933 tcg_gen_not_i32(cpu_ZF, nzcv);
7934 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
7935 }
7936 if (mask & 2) { /* C */
7937 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
7938 }
7939 if (mask & 1) { /* V */
7940 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
7941 }
7942 }
7943
7944 /*
7945 * Evaluate into flags
7946 * 31 30 29 21 15 14 10 5 4 0
7947 * +--+--+--+-----------------+---------+----+---------+------+--+------+
7948 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask |
7949 * +--+--+--+-----------------+---------+----+---------+------+--+------+
7950 */
disas_evaluate_into_flags(DisasContext * s,uint32_t insn)7951 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
7952 {
7953 int o3_mask = extract32(insn, 0, 5);
7954 int rn = extract32(insn, 5, 5);
7955 int o2 = extract32(insn, 15, 6);
7956 int sz = extract32(insn, 14, 1);
7957 int sf_op_s = extract32(insn, 29, 3);
7958 TCGv_i32 tmp;
7959 int shift;
7960
7961 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
7962 !dc_isar_feature(aa64_condm_4, s)) {
7963 unallocated_encoding(s);
7964 return;
7965 }
7966 shift = sz ? 16 : 24; /* SETF16 or SETF8 */
7967
7968 tmp = tcg_temp_new_i32();
7969 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
7970 tcg_gen_shli_i32(cpu_NF, tmp, shift);
7971 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
7972 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
7973 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
7974 }
7975
7976 /* Conditional compare (immediate / register)
7977 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
7978 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7979 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
7980 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
7981 * [1] y [0] [0]
7982 */
disas_cc(DisasContext * s,uint32_t insn)7983 static void disas_cc(DisasContext *s, uint32_t insn)
7984 {
7985 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
7986 TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
7987 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
7988 DisasCompare c;
7989
7990 if (!extract32(insn, 29, 1)) {
7991 unallocated_encoding(s);
7992 return;
7993 }
7994 if (insn & (1 << 10 | 1 << 4)) {
7995 unallocated_encoding(s);
7996 return;
7997 }
7998 sf = extract32(insn, 31, 1);
7999 op = extract32(insn, 30, 1);
8000 is_imm = extract32(insn, 11, 1);
8001 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
8002 cond = extract32(insn, 12, 4);
8003 rn = extract32(insn, 5, 5);
8004 nzcv = extract32(insn, 0, 4);
8005
8006 /* Set T0 = !COND. */
8007 tcg_t0 = tcg_temp_new_i32();
8008 arm_test_cc(&c, cond);
8009 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8010
8011 /* Load the arguments for the new comparison. */
8012 if (is_imm) {
8013 tcg_y = tcg_temp_new_i64();
8014 tcg_gen_movi_i64(tcg_y, y);
8015 } else {
8016 tcg_y = cpu_reg(s, y);
8017 }
8018 tcg_rn = cpu_reg(s, rn);
8019
8020 /* Set the flags for the new comparison. */
8021 tcg_tmp = tcg_temp_new_i64();
8022 if (op) {
8023 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
8024 } else {
8025 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
8026 }
8027
8028 /* If COND was false, force the flags to #nzcv. Compute two masks
8029 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8030 * For tcg hosts that support ANDC, we can make do with just T1.
8031 * In either case, allow the tcg optimizer to delete any unused mask.
8032 */
8033 tcg_t1 = tcg_temp_new_i32();
8034 tcg_t2 = tcg_temp_new_i32();
8035 tcg_gen_neg_i32(tcg_t1, tcg_t0);
8036 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8037
8038 if (nzcv & 8) { /* N */
8039 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8040 } else {
8041 if (TCG_TARGET_HAS_andc_i32) {
8042 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8043 } else {
8044 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8045 }
8046 }
8047 if (nzcv & 4) { /* Z */
8048 if (TCG_TARGET_HAS_andc_i32) {
8049 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8050 } else {
8051 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8052 }
8053 } else {
8054 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8055 }
8056 if (nzcv & 2) { /* C */
8057 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8058 } else {
8059 if (TCG_TARGET_HAS_andc_i32) {
8060 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8061 } else {
8062 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8063 }
8064 }
8065 if (nzcv & 1) { /* V */
8066 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8067 } else {
8068 if (TCG_TARGET_HAS_andc_i32) {
8069 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8070 } else {
8071 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8072 }
8073 }
8074 }
8075
8076 /* Conditional select
8077 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
8078 * +----+----+---+-----------------+------+------+-----+------+------+
8079 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
8080 * +----+----+---+-----------------+------+------+-----+------+------+
8081 */
disas_cond_select(DisasContext * s,uint32_t insn)8082 static void disas_cond_select(DisasContext *s, uint32_t insn)
8083 {
8084 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
8085 TCGv_i64 tcg_rd, zero;
8086 DisasCompare64 c;
8087
8088 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
8089 /* S == 1 or op2<1> == 1 */
8090 unallocated_encoding(s);
8091 return;
8092 }
8093 sf = extract32(insn, 31, 1);
8094 else_inv = extract32(insn, 30, 1);
8095 rm = extract32(insn, 16, 5);
8096 cond = extract32(insn, 12, 4);
8097 else_inc = extract32(insn, 10, 1);
8098 rn = extract32(insn, 5, 5);
8099 rd = extract32(insn, 0, 5);
8100
8101 tcg_rd = cpu_reg(s, rd);
8102
8103 a64_test_cc(&c, cond);
8104 zero = tcg_constant_i64(0);
8105
8106 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
8107 /* CSET & CSETM. */
8108 if (else_inv) {
8109 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8110 tcg_rd, c.value, zero);
8111 } else {
8112 tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8113 tcg_rd, c.value, zero);
8114 }
8115 } else {
8116 TCGv_i64 t_true = cpu_reg(s, rn);
8117 TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
8118 if (else_inv && else_inc) {
8119 tcg_gen_neg_i64(t_false, t_false);
8120 } else if (else_inv) {
8121 tcg_gen_not_i64(t_false, t_false);
8122 } else if (else_inc) {
8123 tcg_gen_addi_i64(t_false, t_false, 1);
8124 }
8125 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8126 }
8127
8128 if (!sf) {
8129 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8130 }
8131 }
8132
handle_clz(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8133 static void handle_clz(DisasContext *s, unsigned int sf,
8134 unsigned int rn, unsigned int rd)
8135 {
8136 TCGv_i64 tcg_rd, tcg_rn;
8137 tcg_rd = cpu_reg(s, rd);
8138 tcg_rn = cpu_reg(s, rn);
8139
8140 if (sf) {
8141 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8142 } else {
8143 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8144 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8145 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
8146 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8147 }
8148 }
8149
handle_cls(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8150 static void handle_cls(DisasContext *s, unsigned int sf,
8151 unsigned int rn, unsigned int rd)
8152 {
8153 TCGv_i64 tcg_rd, tcg_rn;
8154 tcg_rd = cpu_reg(s, rd);
8155 tcg_rn = cpu_reg(s, rn);
8156
8157 if (sf) {
8158 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8159 } else {
8160 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8161 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8162 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
8163 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8164 }
8165 }
8166
handle_rbit(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8167 static void handle_rbit(DisasContext *s, unsigned int sf,
8168 unsigned int rn, unsigned int rd)
8169 {
8170 TCGv_i64 tcg_rd, tcg_rn;
8171 tcg_rd = cpu_reg(s, rd);
8172 tcg_rn = cpu_reg(s, rn);
8173
8174 if (sf) {
8175 gen_helper_rbit64(tcg_rd, tcg_rn);
8176 } else {
8177 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
8178 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
8179 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
8180 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
8181 }
8182 }
8183
8184 /* REV with sf==1, opcode==3 ("REV64") */
handle_rev64(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8185 static void handle_rev64(DisasContext *s, unsigned int sf,
8186 unsigned int rn, unsigned int rd)
8187 {
8188 if (!sf) {
8189 unallocated_encoding(s);
8190 return;
8191 }
8192 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
8193 }
8194
8195 /* REV with sf==0, opcode==2
8196 * REV32 (sf==1, opcode==2)
8197 */
handle_rev32(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8198 static void handle_rev32(DisasContext *s, unsigned int sf,
8199 unsigned int rn, unsigned int rd)
8200 {
8201 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8202 TCGv_i64 tcg_rn = cpu_reg(s, rn);
8203
8204 if (sf) {
8205 tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8206 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8207 } else {
8208 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8209 }
8210 }
8211
8212 /* REV16 (opcode==1) */
handle_rev16(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8213 static void handle_rev16(DisasContext *s, unsigned int sf,
8214 unsigned int rn, unsigned int rd)
8215 {
8216 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8217 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8218 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
8219 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
8220
8221 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8222 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8223 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8224 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8225 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8226 }
8227
8228 /* Data-processing (1 source)
8229 * 31 30 29 28 21 20 16 15 10 9 5 4 0
8230 * +----+---+---+-----------------+---------+--------+------+------+
8231 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
8232 * +----+---+---+-----------------+---------+--------+------+------+
8233 */
disas_data_proc_1src(DisasContext * s,uint32_t insn)8234 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
8235 {
8236 unsigned int sf, opcode, opcode2, rn, rd;
8237 TCGv_i64 tcg_rd;
8238
8239 if (extract32(insn, 29, 1)) {
8240 unallocated_encoding(s);
8241 return;
8242 }
8243
8244 sf = extract32(insn, 31, 1);
8245 opcode = extract32(insn, 10, 6);
8246 opcode2 = extract32(insn, 16, 5);
8247 rn = extract32(insn, 5, 5);
8248 rd = extract32(insn, 0, 5);
8249
8250 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
8251
8252 switch (MAP(sf, opcode2, opcode)) {
8253 case MAP(0, 0x00, 0x00): /* RBIT */
8254 case MAP(1, 0x00, 0x00):
8255 handle_rbit(s, sf, rn, rd);
8256 break;
8257 case MAP(0, 0x00, 0x01): /* REV16 */
8258 case MAP(1, 0x00, 0x01):
8259 handle_rev16(s, sf, rn, rd);
8260 break;
8261 case MAP(0, 0x00, 0x02): /* REV/REV32 */
8262 case MAP(1, 0x00, 0x02):
8263 handle_rev32(s, sf, rn, rd);
8264 break;
8265 case MAP(1, 0x00, 0x03): /* REV64 */
8266 handle_rev64(s, sf, rn, rd);
8267 break;
8268 case MAP(0, 0x00, 0x04): /* CLZ */
8269 case MAP(1, 0x00, 0x04):
8270 handle_clz(s, sf, rn, rd);
8271 break;
8272 case MAP(0, 0x00, 0x05): /* CLS */
8273 case MAP(1, 0x00, 0x05):
8274 handle_cls(s, sf, rn, rd);
8275 break;
8276 case MAP(1, 0x01, 0x00): /* PACIA */
8277 if (s->pauth_active) {
8278 tcg_rd = cpu_reg(s, rd);
8279 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8280 } else if (!dc_isar_feature(aa64_pauth, s)) {
8281 goto do_unallocated;
8282 }
8283 break;
8284 case MAP(1, 0x01, 0x01): /* PACIB */
8285 if (s->pauth_active) {
8286 tcg_rd = cpu_reg(s, rd);
8287 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8288 } else if (!dc_isar_feature(aa64_pauth, s)) {
8289 goto do_unallocated;
8290 }
8291 break;
8292 case MAP(1, 0x01, 0x02): /* PACDA */
8293 if (s->pauth_active) {
8294 tcg_rd = cpu_reg(s, rd);
8295 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8296 } else if (!dc_isar_feature(aa64_pauth, s)) {
8297 goto do_unallocated;
8298 }
8299 break;
8300 case MAP(1, 0x01, 0x03): /* PACDB */
8301 if (s->pauth_active) {
8302 tcg_rd = cpu_reg(s, rd);
8303 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8304 } else if (!dc_isar_feature(aa64_pauth, s)) {
8305 goto do_unallocated;
8306 }
8307 break;
8308 case MAP(1, 0x01, 0x04): /* AUTIA */
8309 if (s->pauth_active) {
8310 tcg_rd = cpu_reg(s, rd);
8311 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8312 } else if (!dc_isar_feature(aa64_pauth, s)) {
8313 goto do_unallocated;
8314 }
8315 break;
8316 case MAP(1, 0x01, 0x05): /* AUTIB */
8317 if (s->pauth_active) {
8318 tcg_rd = cpu_reg(s, rd);
8319 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8320 } else if (!dc_isar_feature(aa64_pauth, s)) {
8321 goto do_unallocated;
8322 }
8323 break;
8324 case MAP(1, 0x01, 0x06): /* AUTDA */
8325 if (s->pauth_active) {
8326 tcg_rd = cpu_reg(s, rd);
8327 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8328 } else if (!dc_isar_feature(aa64_pauth, s)) {
8329 goto do_unallocated;
8330 }
8331 break;
8332 case MAP(1, 0x01, 0x07): /* AUTDB */
8333 if (s->pauth_active) {
8334 tcg_rd = cpu_reg(s, rd);
8335 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn));
8336 } else if (!dc_isar_feature(aa64_pauth, s)) {
8337 goto do_unallocated;
8338 }
8339 break;
8340 case MAP(1, 0x01, 0x08): /* PACIZA */
8341 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8342 goto do_unallocated;
8343 } else if (s->pauth_active) {
8344 tcg_rd = cpu_reg(s, rd);
8345 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8346 }
8347 break;
8348 case MAP(1, 0x01, 0x09): /* PACIZB */
8349 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8350 goto do_unallocated;
8351 } else if (s->pauth_active) {
8352 tcg_rd = cpu_reg(s, rd);
8353 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8354 }
8355 break;
8356 case MAP(1, 0x01, 0x0a): /* PACDZA */
8357 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8358 goto do_unallocated;
8359 } else if (s->pauth_active) {
8360 tcg_rd = cpu_reg(s, rd);
8361 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8362 }
8363 break;
8364 case MAP(1, 0x01, 0x0b): /* PACDZB */
8365 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8366 goto do_unallocated;
8367 } else if (s->pauth_active) {
8368 tcg_rd = cpu_reg(s, rd);
8369 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8370 }
8371 break;
8372 case MAP(1, 0x01, 0x0c): /* AUTIZA */
8373 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8374 goto do_unallocated;
8375 } else if (s->pauth_active) {
8376 tcg_rd = cpu_reg(s, rd);
8377 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8378 }
8379 break;
8380 case MAP(1, 0x01, 0x0d): /* AUTIZB */
8381 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8382 goto do_unallocated;
8383 } else if (s->pauth_active) {
8384 tcg_rd = cpu_reg(s, rd);
8385 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8386 }
8387 break;
8388 case MAP(1, 0x01, 0x0e): /* AUTDZA */
8389 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8390 goto do_unallocated;
8391 } else if (s->pauth_active) {
8392 tcg_rd = cpu_reg(s, rd);
8393 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8394 }
8395 break;
8396 case MAP(1, 0x01, 0x0f): /* AUTDZB */
8397 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8398 goto do_unallocated;
8399 } else if (s->pauth_active) {
8400 tcg_rd = cpu_reg(s, rd);
8401 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0));
8402 }
8403 break;
8404 case MAP(1, 0x01, 0x10): /* XPACI */
8405 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8406 goto do_unallocated;
8407 } else if (s->pauth_active) {
8408 tcg_rd = cpu_reg(s, rd);
8409 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd);
8410 }
8411 break;
8412 case MAP(1, 0x01, 0x11): /* XPACD */
8413 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
8414 goto do_unallocated;
8415 } else if (s->pauth_active) {
8416 tcg_rd = cpu_reg(s, rd);
8417 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd);
8418 }
8419 break;
8420 default:
8421 do_unallocated:
8422 unallocated_encoding(s);
8423 break;
8424 }
8425
8426 #undef MAP
8427 }
8428
handle_div(DisasContext * s,bool is_signed,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)8429 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
8430 unsigned int rm, unsigned int rn, unsigned int rd)
8431 {
8432 TCGv_i64 tcg_n, tcg_m, tcg_rd;
8433 tcg_rd = cpu_reg(s, rd);
8434
8435 if (!sf && is_signed) {
8436 tcg_n = tcg_temp_new_i64();
8437 tcg_m = tcg_temp_new_i64();
8438 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
8439 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
8440 } else {
8441 tcg_n = read_cpu_reg(s, rn, sf);
8442 tcg_m = read_cpu_reg(s, rm, sf);
8443 }
8444
8445 if (is_signed) {
8446 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
8447 } else {
8448 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
8449 }
8450
8451 if (!sf) { /* zero extend final result */
8452 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8453 }
8454 }
8455
8456 /* LSLV, LSRV, ASRV, RORV */
handle_shift_reg(DisasContext * s,enum a64_shift_type shift_type,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)8457 static void handle_shift_reg(DisasContext *s,
8458 enum a64_shift_type shift_type, unsigned int sf,
8459 unsigned int rm, unsigned int rn, unsigned int rd)
8460 {
8461 TCGv_i64 tcg_shift = tcg_temp_new_i64();
8462 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8463 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
8464
8465 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
8466 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
8467 }
8468
8469 /* CRC32[BHWX], CRC32C[BHWX] */
handle_crc32(DisasContext * s,unsigned int sf,unsigned int sz,bool crc32c,unsigned int rm,unsigned int rn,unsigned int rd)8470 static void handle_crc32(DisasContext *s,
8471 unsigned int sf, unsigned int sz, bool crc32c,
8472 unsigned int rm, unsigned int rn, unsigned int rd)
8473 {
8474 TCGv_i64 tcg_acc, tcg_val;
8475 TCGv_i32 tcg_bytes;
8476
8477 if (!dc_isar_feature(aa64_crc32, s)
8478 || (sf == 1 && sz != 3)
8479 || (sf == 0 && sz == 3)) {
8480 unallocated_encoding(s);
8481 return;
8482 }
8483
8484 if (sz == 3) {
8485 tcg_val = cpu_reg(s, rm);
8486 } else {
8487 uint64_t mask;
8488 switch (sz) {
8489 case 0:
8490 mask = 0xFF;
8491 break;
8492 case 1:
8493 mask = 0xFFFF;
8494 break;
8495 case 2:
8496 mask = 0xFFFFFFFF;
8497 break;
8498 default:
8499 g_assert_not_reached();
8500 }
8501 tcg_val = tcg_temp_new_i64();
8502 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
8503 }
8504
8505 tcg_acc = cpu_reg(s, rn);
8506 tcg_bytes = tcg_constant_i32(1 << sz);
8507
8508 if (crc32c) {
8509 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
8510 } else {
8511 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
8512 }
8513 }
8514
8515 /* Data-processing (2 source)
8516 * 31 30 29 28 21 20 16 15 10 9 5 4 0
8517 * +----+---+---+-----------------+------+--------+------+------+
8518 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
8519 * +----+---+---+-----------------+------+--------+------+------+
8520 */
disas_data_proc_2src(DisasContext * s,uint32_t insn)8521 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
8522 {
8523 unsigned int sf, rm, opcode, rn, rd, setflag;
8524 sf = extract32(insn, 31, 1);
8525 setflag = extract32(insn, 29, 1);
8526 rm = extract32(insn, 16, 5);
8527 opcode = extract32(insn, 10, 6);
8528 rn = extract32(insn, 5, 5);
8529 rd = extract32(insn, 0, 5);
8530
8531 if (setflag && opcode != 0) {
8532 unallocated_encoding(s);
8533 return;
8534 }
8535
8536 switch (opcode) {
8537 case 0: /* SUBP(S) */
8538 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8539 goto do_unallocated;
8540 } else {
8541 TCGv_i64 tcg_n, tcg_m, tcg_d;
8542
8543 tcg_n = read_cpu_reg_sp(s, rn, true);
8544 tcg_m = read_cpu_reg_sp(s, rm, true);
8545 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8546 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8547 tcg_d = cpu_reg(s, rd);
8548
8549 if (setflag) {
8550 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8551 } else {
8552 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8553 }
8554 }
8555 break;
8556 case 2: /* UDIV */
8557 handle_div(s, false, sf, rm, rn, rd);
8558 break;
8559 case 3: /* SDIV */
8560 handle_div(s, true, sf, rm, rn, rd);
8561 break;
8562 case 4: /* IRG */
8563 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8564 goto do_unallocated;
8565 }
8566 if (s->ata[0]) {
8567 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env,
8568 cpu_reg_sp(s, rn), cpu_reg(s, rm));
8569 } else {
8570 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
8571 cpu_reg_sp(s, rn));
8572 }
8573 break;
8574 case 5: /* GMI */
8575 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
8576 goto do_unallocated;
8577 } else {
8578 TCGv_i64 t = tcg_temp_new_i64();
8579
8580 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4);
8581 tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8582 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t);
8583 }
8584 break;
8585 case 8: /* LSLV */
8586 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
8587 break;
8588 case 9: /* LSRV */
8589 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
8590 break;
8591 case 10: /* ASRV */
8592 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
8593 break;
8594 case 11: /* RORV */
8595 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
8596 break;
8597 case 12: /* PACGA */
8598 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
8599 goto do_unallocated;
8600 }
8601 gen_helper_pacga(cpu_reg(s, rd), tcg_env,
8602 cpu_reg(s, rn), cpu_reg_sp(s, rm));
8603 break;
8604 case 16:
8605 case 17:
8606 case 18:
8607 case 19:
8608 case 20:
8609 case 21:
8610 case 22:
8611 case 23: /* CRC32 */
8612 {
8613 int sz = extract32(opcode, 0, 2);
8614 bool crc32c = extract32(opcode, 2, 1);
8615 handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
8616 break;
8617 }
8618 default:
8619 do_unallocated:
8620 unallocated_encoding(s);
8621 break;
8622 }
8623 }
8624
8625 /*
8626 * Data processing - register
8627 * 31 30 29 28 25 21 20 16 10 0
8628 * +--+---+--+---+-------+-----+-------+-------+---------+
8629 * | |op0| |op1| 1 0 1 | op2 | | op3 | |
8630 * +--+---+--+---+-------+-----+-------+-------+---------+
8631 */
disas_data_proc_reg(DisasContext * s,uint32_t insn)8632 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
8633 {
8634 int op0 = extract32(insn, 30, 1);
8635 int op1 = extract32(insn, 28, 1);
8636 int op2 = extract32(insn, 21, 4);
8637 int op3 = extract32(insn, 10, 6);
8638
8639 if (!op1) {
8640 if (op2 & 8) {
8641 if (op2 & 1) {
8642 /* Add/sub (extended register) */
8643 disas_add_sub_ext_reg(s, insn);
8644 } else {
8645 /* Add/sub (shifted register) */
8646 disas_add_sub_reg(s, insn);
8647 }
8648 } else {
8649 /* Logical (shifted register) */
8650 disas_logic_reg(s, insn);
8651 }
8652 return;
8653 }
8654
8655 switch (op2) {
8656 case 0x0:
8657 switch (op3) {
8658 case 0x00: /* Add/subtract (with carry) */
8659 disas_adc_sbc(s, insn);
8660 break;
8661
8662 case 0x01: /* Rotate right into flags */
8663 case 0x21:
8664 disas_rotate_right_into_flags(s, insn);
8665 break;
8666
8667 case 0x02: /* Evaluate into flags */
8668 case 0x12:
8669 case 0x22:
8670 case 0x32:
8671 disas_evaluate_into_flags(s, insn);
8672 break;
8673
8674 default:
8675 goto do_unallocated;
8676 }
8677 break;
8678
8679 case 0x2: /* Conditional compare */
8680 disas_cc(s, insn); /* both imm and reg forms */
8681 break;
8682
8683 case 0x4: /* Conditional select */
8684 disas_cond_select(s, insn);
8685 break;
8686
8687 case 0x6: /* Data-processing */
8688 if (op0) { /* (1 source) */
8689 disas_data_proc_1src(s, insn);
8690 } else { /* (2 source) */
8691 disas_data_proc_2src(s, insn);
8692 }
8693 break;
8694 case 0x8 ... 0xf: /* (3 source) */
8695 disas_data_proc_3src(s, insn);
8696 break;
8697
8698 default:
8699 do_unallocated:
8700 unallocated_encoding(s);
8701 break;
8702 }
8703 }
8704
handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)8705 static void handle_fp_compare(DisasContext *s, int size,
8706 unsigned int rn, unsigned int rm,
8707 bool cmp_with_zero, bool signal_all_nans)
8708 {
8709 TCGv_i64 tcg_flags = tcg_temp_new_i64();
8710 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8711
8712 if (size == MO_64) {
8713 TCGv_i64 tcg_vn, tcg_vm;
8714
8715 tcg_vn = read_fp_dreg(s, rn);
8716 if (cmp_with_zero) {
8717 tcg_vm = tcg_constant_i64(0);
8718 } else {
8719 tcg_vm = read_fp_dreg(s, rm);
8720 }
8721 if (signal_all_nans) {
8722 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8723 } else {
8724 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8725 }
8726 } else {
8727 TCGv_i32 tcg_vn = tcg_temp_new_i32();
8728 TCGv_i32 tcg_vm = tcg_temp_new_i32();
8729
8730 read_vec_element_i32(s, tcg_vn, rn, 0, size);
8731 if (cmp_with_zero) {
8732 tcg_gen_movi_i32(tcg_vm, 0);
8733 } else {
8734 read_vec_element_i32(s, tcg_vm, rm, 0, size);
8735 }
8736
8737 switch (size) {
8738 case MO_32:
8739 if (signal_all_nans) {
8740 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8741 } else {
8742 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8743 }
8744 break;
8745 case MO_16:
8746 if (signal_all_nans) {
8747 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8748 } else {
8749 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
8750 }
8751 break;
8752 default:
8753 g_assert_not_reached();
8754 }
8755 }
8756
8757 gen_set_nzcv(tcg_flags);
8758 }
8759
8760 /* Floating point compare
8761 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
8762 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
8763 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
8764 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
8765 */
disas_fp_compare(DisasContext * s,uint32_t insn)8766 static void disas_fp_compare(DisasContext *s, uint32_t insn)
8767 {
8768 unsigned int mos, type, rm, op, rn, opc, op2r;
8769 int size;
8770
8771 mos = extract32(insn, 29, 3);
8772 type = extract32(insn, 22, 2);
8773 rm = extract32(insn, 16, 5);
8774 op = extract32(insn, 14, 2);
8775 rn = extract32(insn, 5, 5);
8776 opc = extract32(insn, 3, 2);
8777 op2r = extract32(insn, 0, 3);
8778
8779 if (mos || op || op2r) {
8780 unallocated_encoding(s);
8781 return;
8782 }
8783
8784 switch (type) {
8785 case 0:
8786 size = MO_32;
8787 break;
8788 case 1:
8789 size = MO_64;
8790 break;
8791 case 3:
8792 size = MO_16;
8793 if (dc_isar_feature(aa64_fp16, s)) {
8794 break;
8795 }
8796 /* fallthru */
8797 default:
8798 unallocated_encoding(s);
8799 return;
8800 }
8801
8802 if (!fp_access_check(s)) {
8803 return;
8804 }
8805
8806 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
8807 }
8808
8809 /* Floating point conditional compare
8810 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
8811 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
8812 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
8813 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
8814 */
disas_fp_ccomp(DisasContext * s,uint32_t insn)8815 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
8816 {
8817 unsigned int mos, type, rm, cond, rn, op, nzcv;
8818 TCGLabel *label_continue = NULL;
8819 int size;
8820
8821 mos = extract32(insn, 29, 3);
8822 type = extract32(insn, 22, 2);
8823 rm = extract32(insn, 16, 5);
8824 cond = extract32(insn, 12, 4);
8825 rn = extract32(insn, 5, 5);
8826 op = extract32(insn, 4, 1);
8827 nzcv = extract32(insn, 0, 4);
8828
8829 if (mos) {
8830 unallocated_encoding(s);
8831 return;
8832 }
8833
8834 switch (type) {
8835 case 0:
8836 size = MO_32;
8837 break;
8838 case 1:
8839 size = MO_64;
8840 break;
8841 case 3:
8842 size = MO_16;
8843 if (dc_isar_feature(aa64_fp16, s)) {
8844 break;
8845 }
8846 /* fallthru */
8847 default:
8848 unallocated_encoding(s);
8849 return;
8850 }
8851
8852 if (!fp_access_check(s)) {
8853 return;
8854 }
8855
8856 if (cond < 0x0e) { /* not always */
8857 TCGLabel *label_match = gen_new_label();
8858 label_continue = gen_new_label();
8859 arm_gen_test_cc(cond, label_match);
8860 /* nomatch: */
8861 gen_set_nzcv(tcg_constant_i64(nzcv << 28));
8862 tcg_gen_br(label_continue);
8863 gen_set_label(label_match);
8864 }
8865
8866 handle_fp_compare(s, size, rn, rm, false, op);
8867
8868 if (cond < 0x0e) {
8869 gen_set_label(label_continue);
8870 }
8871 }
8872
8873 /* Floating-point data-processing (1 source) - half precision */
handle_fp_1src_half(DisasContext * s,int opcode,int rd,int rn)8874 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
8875 {
8876 TCGv_ptr fpst = NULL;
8877 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
8878 TCGv_i32 tcg_res = tcg_temp_new_i32();
8879
8880 switch (opcode) {
8881 case 0x0: /* FMOV */
8882 tcg_gen_mov_i32(tcg_res, tcg_op);
8883 break;
8884 case 0x1: /* FABS */
8885 gen_vfp_absh(tcg_res, tcg_op);
8886 break;
8887 case 0x2: /* FNEG */
8888 gen_vfp_negh(tcg_res, tcg_op);
8889 break;
8890 case 0x3: /* FSQRT */
8891 fpst = fpstatus_ptr(FPST_FPCR_F16);
8892 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
8893 break;
8894 case 0x8: /* FRINTN */
8895 case 0x9: /* FRINTP */
8896 case 0xa: /* FRINTM */
8897 case 0xb: /* FRINTZ */
8898 case 0xc: /* FRINTA */
8899 {
8900 TCGv_i32 tcg_rmode;
8901
8902 fpst = fpstatus_ptr(FPST_FPCR_F16);
8903 tcg_rmode = gen_set_rmode(opcode & 7, fpst);
8904 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8905 gen_restore_rmode(tcg_rmode, fpst);
8906 break;
8907 }
8908 case 0xe: /* FRINTX */
8909 fpst = fpstatus_ptr(FPST_FPCR_F16);
8910 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
8911 break;
8912 case 0xf: /* FRINTI */
8913 fpst = fpstatus_ptr(FPST_FPCR_F16);
8914 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
8915 break;
8916 default:
8917 g_assert_not_reached();
8918 }
8919
8920 write_fp_sreg(s, rd, tcg_res);
8921 }
8922
8923 /* Floating-point data-processing (1 source) - single precision */
handle_fp_1src_single(DisasContext * s,int opcode,int rd,int rn)8924 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
8925 {
8926 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
8927 TCGv_i32 tcg_op, tcg_res;
8928 TCGv_ptr fpst;
8929 int rmode = -1;
8930
8931 tcg_op = read_fp_sreg(s, rn);
8932 tcg_res = tcg_temp_new_i32();
8933
8934 switch (opcode) {
8935 case 0x0: /* FMOV */
8936 tcg_gen_mov_i32(tcg_res, tcg_op);
8937 goto done;
8938 case 0x1: /* FABS */
8939 gen_vfp_abss(tcg_res, tcg_op);
8940 goto done;
8941 case 0x2: /* FNEG */
8942 gen_vfp_negs(tcg_res, tcg_op);
8943 goto done;
8944 case 0x3: /* FSQRT */
8945 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
8946 goto done;
8947 case 0x6: /* BFCVT */
8948 gen_fpst = gen_helper_bfcvt;
8949 break;
8950 case 0x8: /* FRINTN */
8951 case 0x9: /* FRINTP */
8952 case 0xa: /* FRINTM */
8953 case 0xb: /* FRINTZ */
8954 case 0xc: /* FRINTA */
8955 rmode = opcode & 7;
8956 gen_fpst = gen_helper_rints;
8957 break;
8958 case 0xe: /* FRINTX */
8959 gen_fpst = gen_helper_rints_exact;
8960 break;
8961 case 0xf: /* FRINTI */
8962 gen_fpst = gen_helper_rints;
8963 break;
8964 case 0x10: /* FRINT32Z */
8965 rmode = FPROUNDING_ZERO;
8966 gen_fpst = gen_helper_frint32_s;
8967 break;
8968 case 0x11: /* FRINT32X */
8969 gen_fpst = gen_helper_frint32_s;
8970 break;
8971 case 0x12: /* FRINT64Z */
8972 rmode = FPROUNDING_ZERO;
8973 gen_fpst = gen_helper_frint64_s;
8974 break;
8975 case 0x13: /* FRINT64X */
8976 gen_fpst = gen_helper_frint64_s;
8977 break;
8978 default:
8979 g_assert_not_reached();
8980 }
8981
8982 fpst = fpstatus_ptr(FPST_FPCR);
8983 if (rmode >= 0) {
8984 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
8985 gen_fpst(tcg_res, tcg_op, fpst);
8986 gen_restore_rmode(tcg_rmode, fpst);
8987 } else {
8988 gen_fpst(tcg_res, tcg_op, fpst);
8989 }
8990
8991 done:
8992 write_fp_sreg(s, rd, tcg_res);
8993 }
8994
8995 /* Floating-point data-processing (1 source) - double precision */
handle_fp_1src_double(DisasContext * s,int opcode,int rd,int rn)8996 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
8997 {
8998 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
8999 TCGv_i64 tcg_op, tcg_res;
9000 TCGv_ptr fpst;
9001 int rmode = -1;
9002
9003 switch (opcode) {
9004 case 0x0: /* FMOV */
9005 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
9006 return;
9007 }
9008
9009 tcg_op = read_fp_dreg(s, rn);
9010 tcg_res = tcg_temp_new_i64();
9011
9012 switch (opcode) {
9013 case 0x1: /* FABS */
9014 gen_vfp_absd(tcg_res, tcg_op);
9015 goto done;
9016 case 0x2: /* FNEG */
9017 gen_vfp_negd(tcg_res, tcg_op);
9018 goto done;
9019 case 0x3: /* FSQRT */
9020 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
9021 goto done;
9022 case 0x8: /* FRINTN */
9023 case 0x9: /* FRINTP */
9024 case 0xa: /* FRINTM */
9025 case 0xb: /* FRINTZ */
9026 case 0xc: /* FRINTA */
9027 rmode = opcode & 7;
9028 gen_fpst = gen_helper_rintd;
9029 break;
9030 case 0xe: /* FRINTX */
9031 gen_fpst = gen_helper_rintd_exact;
9032 break;
9033 case 0xf: /* FRINTI */
9034 gen_fpst = gen_helper_rintd;
9035 break;
9036 case 0x10: /* FRINT32Z */
9037 rmode = FPROUNDING_ZERO;
9038 gen_fpst = gen_helper_frint32_d;
9039 break;
9040 case 0x11: /* FRINT32X */
9041 gen_fpst = gen_helper_frint32_d;
9042 break;
9043 case 0x12: /* FRINT64Z */
9044 rmode = FPROUNDING_ZERO;
9045 gen_fpst = gen_helper_frint64_d;
9046 break;
9047 case 0x13: /* FRINT64X */
9048 gen_fpst = gen_helper_frint64_d;
9049 break;
9050 default:
9051 g_assert_not_reached();
9052 }
9053
9054 fpst = fpstatus_ptr(FPST_FPCR);
9055 if (rmode >= 0) {
9056 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst);
9057 gen_fpst(tcg_res, tcg_op, fpst);
9058 gen_restore_rmode(tcg_rmode, fpst);
9059 } else {
9060 gen_fpst(tcg_res, tcg_op, fpst);
9061 }
9062
9063 done:
9064 write_fp_dreg(s, rd, tcg_res);
9065 }
9066
handle_fp_fcvt(DisasContext * s,int opcode,int rd,int rn,int dtype,int ntype)9067 static void handle_fp_fcvt(DisasContext *s, int opcode,
9068 int rd, int rn, int dtype, int ntype)
9069 {
9070 switch (ntype) {
9071 case 0x0:
9072 {
9073 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
9074 if (dtype == 1) {
9075 /* Single to double */
9076 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9077 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env);
9078 write_fp_dreg(s, rd, tcg_rd);
9079 } else {
9080 /* Single to half */
9081 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9082 TCGv_i32 ahp = get_ahp_flag();
9083 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9084
9085 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9086 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
9087 write_fp_sreg(s, rd, tcg_rd);
9088 }
9089 break;
9090 }
9091 case 0x1:
9092 {
9093 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9094 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9095 if (dtype == 0) {
9096 /* Double to single */
9097 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env);
9098 } else {
9099 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9100 TCGv_i32 ahp = get_ahp_flag();
9101 /* Double to half */
9102 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9103 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
9104 }
9105 write_fp_sreg(s, rd, tcg_rd);
9106 break;
9107 }
9108 case 0x3:
9109 {
9110 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
9111 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
9112 TCGv_i32 tcg_ahp = get_ahp_flag();
9113 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
9114 if (dtype == 0) {
9115 /* Half to single */
9116 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9117 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9118 write_fp_sreg(s, rd, tcg_rd);
9119 } else {
9120 /* Half to double */
9121 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9122 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9123 write_fp_dreg(s, rd, tcg_rd);
9124 }
9125 break;
9126 }
9127 default:
9128 g_assert_not_reached();
9129 }
9130 }
9131
9132 /* Floating point data-processing (1 source)
9133 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
9134 * +---+---+---+-----------+------+---+--------+-----------+------+------+
9135 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
9136 * +---+---+---+-----------+------+---+--------+-----------+------+------+
9137 */
disas_fp_1src(DisasContext * s,uint32_t insn)9138 static void disas_fp_1src(DisasContext *s, uint32_t insn)
9139 {
9140 int mos = extract32(insn, 29, 3);
9141 int type = extract32(insn, 22, 2);
9142 int opcode = extract32(insn, 15, 6);
9143 int rn = extract32(insn, 5, 5);
9144 int rd = extract32(insn, 0, 5);
9145
9146 if (mos) {
9147 goto do_unallocated;
9148 }
9149
9150 switch (opcode) {
9151 case 0x4: case 0x5: case 0x7:
9152 {
9153 /* FCVT between half, single and double precision */
9154 int dtype = extract32(opcode, 0, 2);
9155 if (type == 2 || dtype == type) {
9156 goto do_unallocated;
9157 }
9158 if (!fp_access_check(s)) {
9159 return;
9160 }
9161
9162 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
9163 break;
9164 }
9165
9166 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
9167 if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
9168 goto do_unallocated;
9169 }
9170 /* fall through */
9171 case 0x0 ... 0x3:
9172 case 0x8 ... 0xc:
9173 case 0xe ... 0xf:
9174 /* 32-to-32 and 64-to-64 ops */
9175 switch (type) {
9176 case 0:
9177 if (!fp_access_check(s)) {
9178 return;
9179 }
9180 handle_fp_1src_single(s, opcode, rd, rn);
9181 break;
9182 case 1:
9183 if (!fp_access_check(s)) {
9184 return;
9185 }
9186 handle_fp_1src_double(s, opcode, rd, rn);
9187 break;
9188 case 3:
9189 if (!dc_isar_feature(aa64_fp16, s)) {
9190 goto do_unallocated;
9191 }
9192
9193 if (!fp_access_check(s)) {
9194 return;
9195 }
9196 handle_fp_1src_half(s, opcode, rd, rn);
9197 break;
9198 default:
9199 goto do_unallocated;
9200 }
9201 break;
9202
9203 case 0x6:
9204 switch (type) {
9205 case 1: /* BFCVT */
9206 if (!dc_isar_feature(aa64_bf16, s)) {
9207 goto do_unallocated;
9208 }
9209 if (!fp_access_check(s)) {
9210 return;
9211 }
9212 handle_fp_1src_single(s, opcode, rd, rn);
9213 break;
9214 default:
9215 goto do_unallocated;
9216 }
9217 break;
9218
9219 default:
9220 do_unallocated:
9221 unallocated_encoding(s);
9222 break;
9223 }
9224 }
9225
9226 /* Handle floating point <=> fixed point conversions. Note that we can
9227 * also deal with fp <=> integer conversions as a special case (scale == 64)
9228 * OPTME: consider handling that special case specially or at least skipping
9229 * the call to scalbn in the helpers for zero shifts.
9230 */
handle_fpfpcvt(DisasContext * s,int rd,int rn,int opcode,bool itof,int rmode,int scale,int sf,int type)9231 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
9232 bool itof, int rmode, int scale, int sf, int type)
9233 {
9234 bool is_signed = !(opcode & 1);
9235 TCGv_ptr tcg_fpstatus;
9236 TCGv_i32 tcg_shift, tcg_single;
9237 TCGv_i64 tcg_double;
9238
9239 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
9240
9241 tcg_shift = tcg_constant_i32(64 - scale);
9242
9243 if (itof) {
9244 TCGv_i64 tcg_int = cpu_reg(s, rn);
9245 if (!sf) {
9246 TCGv_i64 tcg_extend = tcg_temp_new_i64();
9247
9248 if (is_signed) {
9249 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
9250 } else {
9251 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
9252 }
9253
9254 tcg_int = tcg_extend;
9255 }
9256
9257 switch (type) {
9258 case 1: /* float64 */
9259 tcg_double = tcg_temp_new_i64();
9260 if (is_signed) {
9261 gen_helper_vfp_sqtod(tcg_double, tcg_int,
9262 tcg_shift, tcg_fpstatus);
9263 } else {
9264 gen_helper_vfp_uqtod(tcg_double, tcg_int,
9265 tcg_shift, tcg_fpstatus);
9266 }
9267 write_fp_dreg(s, rd, tcg_double);
9268 break;
9269
9270 case 0: /* float32 */
9271 tcg_single = tcg_temp_new_i32();
9272 if (is_signed) {
9273 gen_helper_vfp_sqtos(tcg_single, tcg_int,
9274 tcg_shift, tcg_fpstatus);
9275 } else {
9276 gen_helper_vfp_uqtos(tcg_single, tcg_int,
9277 tcg_shift, tcg_fpstatus);
9278 }
9279 write_fp_sreg(s, rd, tcg_single);
9280 break;
9281
9282 case 3: /* float16 */
9283 tcg_single = tcg_temp_new_i32();
9284 if (is_signed) {
9285 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
9286 tcg_shift, tcg_fpstatus);
9287 } else {
9288 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
9289 tcg_shift, tcg_fpstatus);
9290 }
9291 write_fp_sreg(s, rd, tcg_single);
9292 break;
9293
9294 default:
9295 g_assert_not_reached();
9296 }
9297 } else {
9298 TCGv_i64 tcg_int = cpu_reg(s, rd);
9299 TCGv_i32 tcg_rmode;
9300
9301 if (extract32(opcode, 2, 1)) {
9302 /* There are too many rounding modes to all fit into rmode,
9303 * so FCVTA[US] is a special case.
9304 */
9305 rmode = FPROUNDING_TIEAWAY;
9306 }
9307
9308 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9309
9310 switch (type) {
9311 case 1: /* float64 */
9312 tcg_double = read_fp_dreg(s, rn);
9313 if (is_signed) {
9314 if (!sf) {
9315 gen_helper_vfp_tosld(tcg_int, tcg_double,
9316 tcg_shift, tcg_fpstatus);
9317 } else {
9318 gen_helper_vfp_tosqd(tcg_int, tcg_double,
9319 tcg_shift, tcg_fpstatus);
9320 }
9321 } else {
9322 if (!sf) {
9323 gen_helper_vfp_tould(tcg_int, tcg_double,
9324 tcg_shift, tcg_fpstatus);
9325 } else {
9326 gen_helper_vfp_touqd(tcg_int, tcg_double,
9327 tcg_shift, tcg_fpstatus);
9328 }
9329 }
9330 if (!sf) {
9331 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9332 }
9333 break;
9334
9335 case 0: /* float32 */
9336 tcg_single = read_fp_sreg(s, rn);
9337 if (sf) {
9338 if (is_signed) {
9339 gen_helper_vfp_tosqs(tcg_int, tcg_single,
9340 tcg_shift, tcg_fpstatus);
9341 } else {
9342 gen_helper_vfp_touqs(tcg_int, tcg_single,
9343 tcg_shift, tcg_fpstatus);
9344 }
9345 } else {
9346 TCGv_i32 tcg_dest = tcg_temp_new_i32();
9347 if (is_signed) {
9348 gen_helper_vfp_tosls(tcg_dest, tcg_single,
9349 tcg_shift, tcg_fpstatus);
9350 } else {
9351 gen_helper_vfp_touls(tcg_dest, tcg_single,
9352 tcg_shift, tcg_fpstatus);
9353 }
9354 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
9355 }
9356 break;
9357
9358 case 3: /* float16 */
9359 tcg_single = read_fp_sreg(s, rn);
9360 if (sf) {
9361 if (is_signed) {
9362 gen_helper_vfp_tosqh(tcg_int, tcg_single,
9363 tcg_shift, tcg_fpstatus);
9364 } else {
9365 gen_helper_vfp_touqh(tcg_int, tcg_single,
9366 tcg_shift, tcg_fpstatus);
9367 }
9368 } else {
9369 TCGv_i32 tcg_dest = tcg_temp_new_i32();
9370 if (is_signed) {
9371 gen_helper_vfp_toslh(tcg_dest, tcg_single,
9372 tcg_shift, tcg_fpstatus);
9373 } else {
9374 gen_helper_vfp_toulh(tcg_dest, tcg_single,
9375 tcg_shift, tcg_fpstatus);
9376 }
9377 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
9378 }
9379 break;
9380
9381 default:
9382 g_assert_not_reached();
9383 }
9384
9385 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9386 }
9387 }
9388
9389 /* Floating point <-> fixed point conversions
9390 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
9391 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
9392 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
9393 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
9394 */
disas_fp_fixed_conv(DisasContext * s,uint32_t insn)9395 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
9396 {
9397 int rd = extract32(insn, 0, 5);
9398 int rn = extract32(insn, 5, 5);
9399 int scale = extract32(insn, 10, 6);
9400 int opcode = extract32(insn, 16, 3);
9401 int rmode = extract32(insn, 19, 2);
9402 int type = extract32(insn, 22, 2);
9403 bool sbit = extract32(insn, 29, 1);
9404 bool sf = extract32(insn, 31, 1);
9405 bool itof;
9406
9407 if (sbit || (!sf && scale < 32)) {
9408 unallocated_encoding(s);
9409 return;
9410 }
9411
9412 switch (type) {
9413 case 0: /* float32 */
9414 case 1: /* float64 */
9415 break;
9416 case 3: /* float16 */
9417 if (dc_isar_feature(aa64_fp16, s)) {
9418 break;
9419 }
9420 /* fallthru */
9421 default:
9422 unallocated_encoding(s);
9423 return;
9424 }
9425
9426 switch ((rmode << 3) | opcode) {
9427 case 0x2: /* SCVTF */
9428 case 0x3: /* UCVTF */
9429 itof = true;
9430 break;
9431 case 0x18: /* FCVTZS */
9432 case 0x19: /* FCVTZU */
9433 itof = false;
9434 break;
9435 default:
9436 unallocated_encoding(s);
9437 return;
9438 }
9439
9440 if (!fp_access_check(s)) {
9441 return;
9442 }
9443
9444 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
9445 }
9446
handle_fmov(DisasContext * s,int rd,int rn,int type,bool itof)9447 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
9448 {
9449 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
9450 * without conversion.
9451 */
9452
9453 if (itof) {
9454 TCGv_i64 tcg_rn = cpu_reg(s, rn);
9455 TCGv_i64 tmp;
9456
9457 switch (type) {
9458 case 0:
9459 /* 32 bit */
9460 tmp = tcg_temp_new_i64();
9461 tcg_gen_ext32u_i64(tmp, tcg_rn);
9462 write_fp_dreg(s, rd, tmp);
9463 break;
9464 case 1:
9465 /* 64 bit */
9466 write_fp_dreg(s, rd, tcg_rn);
9467 break;
9468 case 2:
9469 /* 64 bit to top half. */
9470 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd));
9471 clear_vec_high(s, true, rd);
9472 break;
9473 case 3:
9474 /* 16 bit */
9475 tmp = tcg_temp_new_i64();
9476 tcg_gen_ext16u_i64(tmp, tcg_rn);
9477 write_fp_dreg(s, rd, tmp);
9478 break;
9479 default:
9480 g_assert_not_reached();
9481 }
9482 } else {
9483 TCGv_i64 tcg_rd = cpu_reg(s, rd);
9484
9485 switch (type) {
9486 case 0:
9487 /* 32 bit */
9488 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32));
9489 break;
9490 case 1:
9491 /* 64 bit */
9492 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64));
9493 break;
9494 case 2:
9495 /* 64 bits from top half */
9496 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn));
9497 break;
9498 case 3:
9499 /* 16 bit */
9500 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16));
9501 break;
9502 default:
9503 g_assert_not_reached();
9504 }
9505 }
9506 }
9507
handle_fjcvtzs(DisasContext * s,int rd,int rn)9508 static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
9509 {
9510 TCGv_i64 t = read_fp_dreg(s, rn);
9511 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
9512
9513 gen_helper_fjcvtzs(t, t, fpstatus);
9514
9515 tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
9516 tcg_gen_extrh_i64_i32(cpu_ZF, t);
9517 tcg_gen_movi_i32(cpu_CF, 0);
9518 tcg_gen_movi_i32(cpu_NF, 0);
9519 tcg_gen_movi_i32(cpu_VF, 0);
9520 }
9521
9522 /* Floating point <-> integer conversions
9523 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
9524 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
9525 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
9526 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
9527 */
disas_fp_int_conv(DisasContext * s,uint32_t insn)9528 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
9529 {
9530 int rd = extract32(insn, 0, 5);
9531 int rn = extract32(insn, 5, 5);
9532 int opcode = extract32(insn, 16, 3);
9533 int rmode = extract32(insn, 19, 2);
9534 int type = extract32(insn, 22, 2);
9535 bool sbit = extract32(insn, 29, 1);
9536 bool sf = extract32(insn, 31, 1);
9537 bool itof = false;
9538
9539 if (sbit) {
9540 goto do_unallocated;
9541 }
9542
9543 switch (opcode) {
9544 case 2: /* SCVTF */
9545 case 3: /* UCVTF */
9546 itof = true;
9547 /* fallthru */
9548 case 4: /* FCVTAS */
9549 case 5: /* FCVTAU */
9550 if (rmode != 0) {
9551 goto do_unallocated;
9552 }
9553 /* fallthru */
9554 case 0: /* FCVT[NPMZ]S */
9555 case 1: /* FCVT[NPMZ]U */
9556 switch (type) {
9557 case 0: /* float32 */
9558 case 1: /* float64 */
9559 break;
9560 case 3: /* float16 */
9561 if (!dc_isar_feature(aa64_fp16, s)) {
9562 goto do_unallocated;
9563 }
9564 break;
9565 default:
9566 goto do_unallocated;
9567 }
9568 if (!fp_access_check(s)) {
9569 return;
9570 }
9571 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
9572 break;
9573
9574 default:
9575 switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
9576 case 0b01100110: /* FMOV half <-> 32-bit int */
9577 case 0b01100111:
9578 case 0b11100110: /* FMOV half <-> 64-bit int */
9579 case 0b11100111:
9580 if (!dc_isar_feature(aa64_fp16, s)) {
9581 goto do_unallocated;
9582 }
9583 /* fallthru */
9584 case 0b00000110: /* FMOV 32-bit */
9585 case 0b00000111:
9586 case 0b10100110: /* FMOV 64-bit */
9587 case 0b10100111:
9588 case 0b11001110: /* FMOV top half of 128-bit */
9589 case 0b11001111:
9590 if (!fp_access_check(s)) {
9591 return;
9592 }
9593 itof = opcode & 1;
9594 handle_fmov(s, rd, rn, type, itof);
9595 break;
9596
9597 case 0b00111110: /* FJCVTZS */
9598 if (!dc_isar_feature(aa64_jscvt, s)) {
9599 goto do_unallocated;
9600 } else if (fp_access_check(s)) {
9601 handle_fjcvtzs(s, rd, rn);
9602 }
9603 break;
9604
9605 default:
9606 do_unallocated:
9607 unallocated_encoding(s);
9608 return;
9609 }
9610 break;
9611 }
9612 }
9613
9614 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
9615 * 31 30 29 28 25 24 0
9616 * +---+---+---+---------+-----------------------------+
9617 * | | 0 | | 1 1 1 1 | |
9618 * +---+---+---+---------+-----------------------------+
9619 */
disas_data_proc_fp(DisasContext * s,uint32_t insn)9620 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
9621 {
9622 if (extract32(insn, 24, 1)) {
9623 unallocated_encoding(s); /* in decodetree */
9624 } else if (extract32(insn, 21, 1) == 0) {
9625 /* Floating point to fixed point conversions */
9626 disas_fp_fixed_conv(s, insn);
9627 } else {
9628 switch (extract32(insn, 10, 2)) {
9629 case 1:
9630 /* Floating point conditional compare */
9631 disas_fp_ccomp(s, insn);
9632 break;
9633 case 2:
9634 /* Floating point data-processing (2 source) */
9635 unallocated_encoding(s); /* in decodetree */
9636 break;
9637 case 3:
9638 /* Floating point conditional select */
9639 unallocated_encoding(s); /* in decodetree */
9640 break;
9641 case 0:
9642 switch (ctz32(extract32(insn, 12, 4))) {
9643 case 0: /* [15:12] == xxx1 */
9644 /* Floating point immediate */
9645 unallocated_encoding(s); /* in decodetree */
9646 break;
9647 case 1: /* [15:12] == xx10 */
9648 /* Floating point compare */
9649 disas_fp_compare(s, insn);
9650 break;
9651 case 2: /* [15:12] == x100 */
9652 /* Floating point data-processing (1 source) */
9653 disas_fp_1src(s, insn);
9654 break;
9655 case 3: /* [15:12] == 1000 */
9656 unallocated_encoding(s);
9657 break;
9658 default: /* [15:12] == 0000 */
9659 /* Floating point <-> integer conversions */
9660 disas_fp_int_conv(s, insn);
9661 break;
9662 }
9663 break;
9664 }
9665 }
9666 }
9667
9668 /* Common vector code for handling integer to FP conversion */
handle_simd_intfp_conv(DisasContext * s,int rd,int rn,int elements,int is_signed,int fracbits,int size)9669 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
9670 int elements, int is_signed,
9671 int fracbits, int size)
9672 {
9673 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9674 TCGv_i32 tcg_shift = NULL;
9675
9676 MemOp mop = size | (is_signed ? MO_SIGN : 0);
9677 int pass;
9678
9679 if (fracbits || size == MO_64) {
9680 tcg_shift = tcg_constant_i32(fracbits);
9681 }
9682
9683 if (size == MO_64) {
9684 TCGv_i64 tcg_int64 = tcg_temp_new_i64();
9685 TCGv_i64 tcg_double = tcg_temp_new_i64();
9686
9687 for (pass = 0; pass < elements; pass++) {
9688 read_vec_element(s, tcg_int64, rn, pass, mop);
9689
9690 if (is_signed) {
9691 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
9692 tcg_shift, tcg_fpst);
9693 } else {
9694 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
9695 tcg_shift, tcg_fpst);
9696 }
9697 if (elements == 1) {
9698 write_fp_dreg(s, rd, tcg_double);
9699 } else {
9700 write_vec_element(s, tcg_double, rd, pass, MO_64);
9701 }
9702 }
9703 } else {
9704 TCGv_i32 tcg_int32 = tcg_temp_new_i32();
9705 TCGv_i32 tcg_float = tcg_temp_new_i32();
9706
9707 for (pass = 0; pass < elements; pass++) {
9708 read_vec_element_i32(s, tcg_int32, rn, pass, mop);
9709
9710 switch (size) {
9711 case MO_32:
9712 if (fracbits) {
9713 if (is_signed) {
9714 gen_helper_vfp_sltos(tcg_float, tcg_int32,
9715 tcg_shift, tcg_fpst);
9716 } else {
9717 gen_helper_vfp_ultos(tcg_float, tcg_int32,
9718 tcg_shift, tcg_fpst);
9719 }
9720 } else {
9721 if (is_signed) {
9722 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
9723 } else {
9724 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
9725 }
9726 }
9727 break;
9728 case MO_16:
9729 if (fracbits) {
9730 if (is_signed) {
9731 gen_helper_vfp_sltoh(tcg_float, tcg_int32,
9732 tcg_shift, tcg_fpst);
9733 } else {
9734 gen_helper_vfp_ultoh(tcg_float, tcg_int32,
9735 tcg_shift, tcg_fpst);
9736 }
9737 } else {
9738 if (is_signed) {
9739 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
9740 } else {
9741 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
9742 }
9743 }
9744 break;
9745 default:
9746 g_assert_not_reached();
9747 }
9748
9749 if (elements == 1) {
9750 write_fp_sreg(s, rd, tcg_float);
9751 } else {
9752 write_vec_element_i32(s, tcg_float, rd, pass, size);
9753 }
9754 }
9755 }
9756
9757 clear_vec_high(s, elements << size == 16, rd);
9758 }
9759
9760 /* UCVTF/SCVTF - Integer to FP conversion */
handle_simd_shift_intfp_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)9761 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
9762 bool is_q, bool is_u,
9763 int immh, int immb, int opcode,
9764 int rn, int rd)
9765 {
9766 int size, elements, fracbits;
9767 int immhb = immh << 3 | immb;
9768
9769 if (immh & 8) {
9770 size = MO_64;
9771 if (!is_scalar && !is_q) {
9772 unallocated_encoding(s);
9773 return;
9774 }
9775 } else if (immh & 4) {
9776 size = MO_32;
9777 } else if (immh & 2) {
9778 size = MO_16;
9779 if (!dc_isar_feature(aa64_fp16, s)) {
9780 unallocated_encoding(s);
9781 return;
9782 }
9783 } else {
9784 /* immh == 0 would be a failure of the decode logic */
9785 g_assert(immh == 1);
9786 unallocated_encoding(s);
9787 return;
9788 }
9789
9790 if (is_scalar) {
9791 elements = 1;
9792 } else {
9793 elements = (8 << is_q) >> size;
9794 }
9795 fracbits = (16 << size) - immhb;
9796
9797 if (!fp_access_check(s)) {
9798 return;
9799 }
9800
9801 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
9802 }
9803
9804 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
handle_simd_shift_fpint_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int rn,int rd)9805 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
9806 bool is_q, bool is_u,
9807 int immh, int immb, int rn, int rd)
9808 {
9809 int immhb = immh << 3 | immb;
9810 int pass, size, fracbits;
9811 TCGv_ptr tcg_fpstatus;
9812 TCGv_i32 tcg_rmode, tcg_shift;
9813
9814 if (immh & 0x8) {
9815 size = MO_64;
9816 if (!is_scalar && !is_q) {
9817 unallocated_encoding(s);
9818 return;
9819 }
9820 } else if (immh & 0x4) {
9821 size = MO_32;
9822 } else if (immh & 0x2) {
9823 size = MO_16;
9824 if (!dc_isar_feature(aa64_fp16, s)) {
9825 unallocated_encoding(s);
9826 return;
9827 }
9828 } else {
9829 /* Should have split out AdvSIMD modified immediate earlier. */
9830 assert(immh == 1);
9831 unallocated_encoding(s);
9832 return;
9833 }
9834
9835 if (!fp_access_check(s)) {
9836 return;
9837 }
9838
9839 assert(!(is_scalar && is_q));
9840
9841 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9842 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus);
9843 fracbits = (16 << size) - immhb;
9844 tcg_shift = tcg_constant_i32(fracbits);
9845
9846 if (size == MO_64) {
9847 int maxpass = is_scalar ? 1 : 2;
9848
9849 for (pass = 0; pass < maxpass; pass++) {
9850 TCGv_i64 tcg_op = tcg_temp_new_i64();
9851
9852 read_vec_element(s, tcg_op, rn, pass, MO_64);
9853 if (is_u) {
9854 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9855 } else {
9856 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9857 }
9858 write_vec_element(s, tcg_op, rd, pass, MO_64);
9859 }
9860 clear_vec_high(s, is_q, rd);
9861 } else {
9862 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9863 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9864
9865 switch (size) {
9866 case MO_16:
9867 if (is_u) {
9868 fn = gen_helper_vfp_touhh;
9869 } else {
9870 fn = gen_helper_vfp_toshh;
9871 }
9872 break;
9873 case MO_32:
9874 if (is_u) {
9875 fn = gen_helper_vfp_touls;
9876 } else {
9877 fn = gen_helper_vfp_tosls;
9878 }
9879 break;
9880 default:
9881 g_assert_not_reached();
9882 }
9883
9884 for (pass = 0; pass < maxpass; pass++) {
9885 TCGv_i32 tcg_op = tcg_temp_new_i32();
9886
9887 read_vec_element_i32(s, tcg_op, rn, pass, size);
9888 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9889 if (is_scalar) {
9890 if (size == MO_16 && !is_u) {
9891 tcg_gen_ext16u_i32(tcg_op, tcg_op);
9892 }
9893 write_fp_sreg(s, rd, tcg_op);
9894 } else {
9895 write_vec_element_i32(s, tcg_op, rd, pass, size);
9896 }
9897 }
9898 if (!is_scalar) {
9899 clear_vec_high(s, is_q, rd);
9900 }
9901 }
9902
9903 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9904 }
9905
9906 /* AdvSIMD scalar shift by immediate
9907 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
9908 * +-----+---+-------------+------+------+--------+---+------+------+
9909 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
9910 * +-----+---+-------------+------+------+--------+---+------+------+
9911 *
9912 * This is the scalar version so it works on a fixed sized registers
9913 */
disas_simd_scalar_shift_imm(DisasContext * s,uint32_t insn)9914 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
9915 {
9916 int rd = extract32(insn, 0, 5);
9917 int rn = extract32(insn, 5, 5);
9918 int opcode = extract32(insn, 11, 5);
9919 int immb = extract32(insn, 16, 3);
9920 int immh = extract32(insn, 19, 4);
9921 bool is_u = extract32(insn, 29, 1);
9922
9923 if (immh == 0) {
9924 unallocated_encoding(s);
9925 return;
9926 }
9927
9928 switch (opcode) {
9929 case 0x1c: /* SCVTF, UCVTF */
9930 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
9931 opcode, rn, rd);
9932 break;
9933 case 0x1f: /* FCVTZS, FCVTZU */
9934 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
9935 break;
9936 default:
9937 case 0x00: /* SSHR / USHR */
9938 case 0x02: /* SSRA / USRA */
9939 case 0x04: /* SRSHR / URSHR */
9940 case 0x06: /* SRSRA / URSRA */
9941 case 0x08: /* SRI */
9942 case 0x0a: /* SHL / SLI */
9943 case 0x0c: /* SQSHLU */
9944 case 0x0e: /* SQSHL, UQSHL */
9945 case 0x10: /* SQSHRUN */
9946 case 0x11: /* SQRSHRUN */
9947 case 0x12: /* SQSHRN, UQSHRN */
9948 case 0x13: /* SQRSHRN, UQRSHRN */
9949 unallocated_encoding(s);
9950 break;
9951 }
9952 }
9953
handle_2misc_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i32 tcg_rmode,TCGv_ptr tcg_fpstatus)9954 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9955 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9956 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9957 {
9958 /* Handle 64->64 opcodes which are shared between the scalar and
9959 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9960 * is valid in either group and also the double-precision fp ops.
9961 * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9962 * requires them.
9963 */
9964 TCGCond cond;
9965
9966 switch (opcode) {
9967 case 0x4: /* CLS, CLZ */
9968 if (u) {
9969 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9970 } else {
9971 tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9972 }
9973 break;
9974 case 0x5: /* NOT */
9975 /* This opcode is shared with CNT and RBIT but we have earlier
9976 * enforced that size == 3 if and only if this is the NOT insn.
9977 */
9978 tcg_gen_not_i64(tcg_rd, tcg_rn);
9979 break;
9980 case 0x7: /* SQABS, SQNEG */
9981 if (u) {
9982 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn);
9983 } else {
9984 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn);
9985 }
9986 break;
9987 case 0xa: /* CMLT */
9988 cond = TCG_COND_LT;
9989 do_cmop:
9990 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */
9991 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0));
9992 break;
9993 case 0x8: /* CMGT, CMGE */
9994 cond = u ? TCG_COND_GE : TCG_COND_GT;
9995 goto do_cmop;
9996 case 0x9: /* CMEQ, CMLE */
9997 cond = u ? TCG_COND_LE : TCG_COND_EQ;
9998 goto do_cmop;
9999 case 0xb: /* ABS, NEG */
10000 if (u) {
10001 tcg_gen_neg_i64(tcg_rd, tcg_rn);
10002 } else {
10003 tcg_gen_abs_i64(tcg_rd, tcg_rn);
10004 }
10005 break;
10006 case 0x2f: /* FABS */
10007 gen_vfp_absd(tcg_rd, tcg_rn);
10008 break;
10009 case 0x6f: /* FNEG */
10010 gen_vfp_negd(tcg_rd, tcg_rn);
10011 break;
10012 case 0x7f: /* FSQRT */
10013 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env);
10014 break;
10015 case 0x1a: /* FCVTNS */
10016 case 0x1b: /* FCVTMS */
10017 case 0x1c: /* FCVTAS */
10018 case 0x3a: /* FCVTPS */
10019 case 0x3b: /* FCVTZS */
10020 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10021 break;
10022 case 0x5a: /* FCVTNU */
10023 case 0x5b: /* FCVTMU */
10024 case 0x5c: /* FCVTAU */
10025 case 0x7a: /* FCVTPU */
10026 case 0x7b: /* FCVTZU */
10027 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus);
10028 break;
10029 case 0x18: /* FRINTN */
10030 case 0x19: /* FRINTM */
10031 case 0x38: /* FRINTP */
10032 case 0x39: /* FRINTZ */
10033 case 0x58: /* FRINTA */
10034 case 0x79: /* FRINTI */
10035 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
10036 break;
10037 case 0x59: /* FRINTX */
10038 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
10039 break;
10040 case 0x1e: /* FRINT32Z */
10041 case 0x5e: /* FRINT32X */
10042 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
10043 break;
10044 case 0x1f: /* FRINT64Z */
10045 case 0x5f: /* FRINT64X */
10046 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
10047 break;
10048 default:
10049 g_assert_not_reached();
10050 }
10051 }
10052
handle_2misc_fcmp_zero(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)10053 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
10054 bool is_scalar, bool is_u, bool is_q,
10055 int size, int rn, int rd)
10056 {
10057 bool is_double = (size == MO_64);
10058 TCGv_ptr fpst;
10059
10060 if (!fp_access_check(s)) {
10061 return;
10062 }
10063
10064 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
10065
10066 if (is_double) {
10067 TCGv_i64 tcg_op = tcg_temp_new_i64();
10068 TCGv_i64 tcg_zero = tcg_constant_i64(0);
10069 TCGv_i64 tcg_res = tcg_temp_new_i64();
10070 NeonGenTwoDoubleOpFn *genfn;
10071 bool swap = false;
10072 int pass;
10073
10074 switch (opcode) {
10075 case 0x2e: /* FCMLT (zero) */
10076 swap = true;
10077 /* fallthrough */
10078 case 0x2c: /* FCMGT (zero) */
10079 genfn = gen_helper_neon_cgt_f64;
10080 break;
10081 case 0x2d: /* FCMEQ (zero) */
10082 genfn = gen_helper_neon_ceq_f64;
10083 break;
10084 case 0x6d: /* FCMLE (zero) */
10085 swap = true;
10086 /* fall through */
10087 case 0x6c: /* FCMGE (zero) */
10088 genfn = gen_helper_neon_cge_f64;
10089 break;
10090 default:
10091 g_assert_not_reached();
10092 }
10093
10094 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10095 read_vec_element(s, tcg_op, rn, pass, MO_64);
10096 if (swap) {
10097 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10098 } else {
10099 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10100 }
10101 write_vec_element(s, tcg_res, rd, pass, MO_64);
10102 }
10103
10104 clear_vec_high(s, !is_scalar, rd);
10105 } else {
10106 TCGv_i32 tcg_op = tcg_temp_new_i32();
10107 TCGv_i32 tcg_zero = tcg_constant_i32(0);
10108 TCGv_i32 tcg_res = tcg_temp_new_i32();
10109 NeonGenTwoSingleOpFn *genfn;
10110 bool swap = false;
10111 int pass, maxpasses;
10112
10113 if (size == MO_16) {
10114 switch (opcode) {
10115 case 0x2e: /* FCMLT (zero) */
10116 swap = true;
10117 /* fall through */
10118 case 0x2c: /* FCMGT (zero) */
10119 genfn = gen_helper_advsimd_cgt_f16;
10120 break;
10121 case 0x2d: /* FCMEQ (zero) */
10122 genfn = gen_helper_advsimd_ceq_f16;
10123 break;
10124 case 0x6d: /* FCMLE (zero) */
10125 swap = true;
10126 /* fall through */
10127 case 0x6c: /* FCMGE (zero) */
10128 genfn = gen_helper_advsimd_cge_f16;
10129 break;
10130 default:
10131 g_assert_not_reached();
10132 }
10133 } else {
10134 switch (opcode) {
10135 case 0x2e: /* FCMLT (zero) */
10136 swap = true;
10137 /* fall through */
10138 case 0x2c: /* FCMGT (zero) */
10139 genfn = gen_helper_neon_cgt_f32;
10140 break;
10141 case 0x2d: /* FCMEQ (zero) */
10142 genfn = gen_helper_neon_ceq_f32;
10143 break;
10144 case 0x6d: /* FCMLE (zero) */
10145 swap = true;
10146 /* fall through */
10147 case 0x6c: /* FCMGE (zero) */
10148 genfn = gen_helper_neon_cge_f32;
10149 break;
10150 default:
10151 g_assert_not_reached();
10152 }
10153 }
10154
10155 if (is_scalar) {
10156 maxpasses = 1;
10157 } else {
10158 int vector_size = 8 << is_q;
10159 maxpasses = vector_size >> size;
10160 }
10161
10162 for (pass = 0; pass < maxpasses; pass++) {
10163 read_vec_element_i32(s, tcg_op, rn, pass, size);
10164 if (swap) {
10165 genfn(tcg_res, tcg_zero, tcg_op, fpst);
10166 } else {
10167 genfn(tcg_res, tcg_op, tcg_zero, fpst);
10168 }
10169 if (is_scalar) {
10170 write_fp_sreg(s, rd, tcg_res);
10171 } else {
10172 write_vec_element_i32(s, tcg_res, rd, pass, size);
10173 }
10174 }
10175
10176 if (!is_scalar) {
10177 clear_vec_high(s, is_q, rd);
10178 }
10179 }
10180 }
10181
handle_2misc_reciprocal(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)10182 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10183 bool is_scalar, bool is_u, bool is_q,
10184 int size, int rn, int rd)
10185 {
10186 bool is_double = (size == 3);
10187 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10188
10189 if (is_double) {
10190 TCGv_i64 tcg_op = tcg_temp_new_i64();
10191 TCGv_i64 tcg_res = tcg_temp_new_i64();
10192 int pass;
10193
10194 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10195 read_vec_element(s, tcg_op, rn, pass, MO_64);
10196 switch (opcode) {
10197 case 0x3d: /* FRECPE */
10198 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10199 break;
10200 case 0x3f: /* FRECPX */
10201 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10202 break;
10203 case 0x7d: /* FRSQRTE */
10204 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10205 break;
10206 default:
10207 g_assert_not_reached();
10208 }
10209 write_vec_element(s, tcg_res, rd, pass, MO_64);
10210 }
10211 clear_vec_high(s, !is_scalar, rd);
10212 } else {
10213 TCGv_i32 tcg_op = tcg_temp_new_i32();
10214 TCGv_i32 tcg_res = tcg_temp_new_i32();
10215 int pass, maxpasses;
10216
10217 if (is_scalar) {
10218 maxpasses = 1;
10219 } else {
10220 maxpasses = is_q ? 4 : 2;
10221 }
10222
10223 for (pass = 0; pass < maxpasses; pass++) {
10224 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10225
10226 switch (opcode) {
10227 case 0x3c: /* URECPE */
10228 gen_helper_recpe_u32(tcg_res, tcg_op);
10229 break;
10230 case 0x3d: /* FRECPE */
10231 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10232 break;
10233 case 0x3f: /* FRECPX */
10234 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10235 break;
10236 case 0x7d: /* FRSQRTE */
10237 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10238 break;
10239 default:
10240 g_assert_not_reached();
10241 }
10242
10243 if (is_scalar) {
10244 write_fp_sreg(s, rd, tcg_res);
10245 } else {
10246 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10247 }
10248 }
10249 if (!is_scalar) {
10250 clear_vec_high(s, is_q, rd);
10251 }
10252 }
10253 }
10254
handle_2misc_narrow(DisasContext * s,bool scalar,int opcode,bool u,bool is_q,int size,int rn,int rd)10255 static void handle_2misc_narrow(DisasContext *s, bool scalar,
10256 int opcode, bool u, bool is_q,
10257 int size, int rn, int rd)
10258 {
10259 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10260 * in the source becomes a size element in the destination).
10261 */
10262 int pass;
10263 TCGv_i64 tcg_res[2];
10264 int destelt = is_q ? 2 : 0;
10265 int passes = scalar ? 1 : 2;
10266
10267 if (scalar) {
10268 tcg_res[1] = tcg_constant_i64(0);
10269 }
10270
10271 for (pass = 0; pass < passes; pass++) {
10272 TCGv_i64 tcg_op = tcg_temp_new_i64();
10273 NeonGenOne64OpFn *genfn = NULL;
10274 NeonGenOne64OpEnvFn *genenvfn = NULL;
10275
10276 if (scalar) {
10277 read_vec_element(s, tcg_op, rn, pass, size + 1);
10278 } else {
10279 read_vec_element(s, tcg_op, rn, pass, MO_64);
10280 }
10281 tcg_res[pass] = tcg_temp_new_i64();
10282
10283 switch (opcode) {
10284 case 0x12: /* XTN, SQXTUN */
10285 {
10286 static NeonGenOne64OpFn * const xtnfns[3] = {
10287 gen_helper_neon_narrow_u8,
10288 gen_helper_neon_narrow_u16,
10289 tcg_gen_ext32u_i64,
10290 };
10291 static NeonGenOne64OpEnvFn * const sqxtunfns[3] = {
10292 gen_helper_neon_unarrow_sat8,
10293 gen_helper_neon_unarrow_sat16,
10294 gen_helper_neon_unarrow_sat32,
10295 };
10296 if (u) {
10297 genenvfn = sqxtunfns[size];
10298 } else {
10299 genfn = xtnfns[size];
10300 }
10301 break;
10302 }
10303 case 0x14: /* SQXTN, UQXTN */
10304 {
10305 static NeonGenOne64OpEnvFn * const fns[3][2] = {
10306 { gen_helper_neon_narrow_sat_s8,
10307 gen_helper_neon_narrow_sat_u8 },
10308 { gen_helper_neon_narrow_sat_s16,
10309 gen_helper_neon_narrow_sat_u16 },
10310 { gen_helper_neon_narrow_sat_s32,
10311 gen_helper_neon_narrow_sat_u32 },
10312 };
10313 genenvfn = fns[size][u];
10314 break;
10315 }
10316 case 0x16: /* FCVTN, FCVTN2 */
10317 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10318 if (size == 2) {
10319 TCGv_i32 tmp = tcg_temp_new_i32();
10320 gen_helper_vfp_fcvtsd(tmp, tcg_op, tcg_env);
10321 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10322 } else {
10323 TCGv_i32 tcg_lo = tcg_temp_new_i32();
10324 TCGv_i32 tcg_hi = tcg_temp_new_i32();
10325 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10326 TCGv_i32 ahp = get_ahp_flag();
10327
10328 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10329 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10330 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10331 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
10332 tcg_gen_extu_i32_i64(tcg_res[pass], tcg_lo);
10333 }
10334 break;
10335 case 0x36: /* BFCVTN, BFCVTN2 */
10336 {
10337 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10338 TCGv_i32 tmp = tcg_temp_new_i32();
10339 gen_helper_bfcvt_pair(tmp, tcg_op, fpst);
10340 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10341 }
10342 break;
10343 case 0x56: /* FCVTXN, FCVTXN2 */
10344 {
10345 /*
10346 * 64 bit to 32 bit float conversion
10347 * with von Neumann rounding (round to odd)
10348 */
10349 TCGv_i32 tmp = tcg_temp_new_i32();
10350 assert(size == 2);
10351 gen_helper_fcvtx_f64_to_f32(tmp, tcg_op, tcg_env);
10352 tcg_gen_extu_i32_i64(tcg_res[pass], tmp);
10353 }
10354 break;
10355 default:
10356 g_assert_not_reached();
10357 }
10358
10359 if (genfn) {
10360 genfn(tcg_res[pass], tcg_op);
10361 } else if (genenvfn) {
10362 genenvfn(tcg_res[pass], tcg_env, tcg_op);
10363 }
10364 }
10365
10366 for (pass = 0; pass < 2; pass++) {
10367 write_vec_element(s, tcg_res[pass], rd, destelt + pass, MO_32);
10368 }
10369 clear_vec_high(s, is_q, rd);
10370 }
10371
10372 /* AdvSIMD scalar two reg misc
10373 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
10374 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10375 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
10376 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10377 */
disas_simd_scalar_two_reg_misc(DisasContext * s,uint32_t insn)10378 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10379 {
10380 int rd = extract32(insn, 0, 5);
10381 int rn = extract32(insn, 5, 5);
10382 int opcode = extract32(insn, 12, 5);
10383 int size = extract32(insn, 22, 2);
10384 bool u = extract32(insn, 29, 1);
10385 bool is_fcvt = false;
10386 int rmode;
10387 TCGv_i32 tcg_rmode;
10388 TCGv_ptr tcg_fpstatus;
10389
10390 switch (opcode) {
10391 case 0x7: /* SQABS / SQNEG */
10392 break;
10393 case 0xa: /* CMLT */
10394 if (u) {
10395 unallocated_encoding(s);
10396 return;
10397 }
10398 /* fall through */
10399 case 0x8: /* CMGT, CMGE */
10400 case 0x9: /* CMEQ, CMLE */
10401 case 0xb: /* ABS, NEG */
10402 if (size != 3) {
10403 unallocated_encoding(s);
10404 return;
10405 }
10406 break;
10407 case 0x12: /* SQXTUN */
10408 if (!u) {
10409 unallocated_encoding(s);
10410 return;
10411 }
10412 /* fall through */
10413 case 0x14: /* SQXTN, UQXTN */
10414 if (size == 3) {
10415 unallocated_encoding(s);
10416 return;
10417 }
10418 if (!fp_access_check(s)) {
10419 return;
10420 }
10421 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10422 return;
10423 case 0xc ... 0xf:
10424 case 0x16 ... 0x1d:
10425 case 0x1f:
10426 /* Floating point: U, size[1] and opcode indicate operation;
10427 * size[0] indicates single or double precision.
10428 */
10429 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10430 size = extract32(size, 0, 1) ? 3 : 2;
10431 switch (opcode) {
10432 case 0x2c: /* FCMGT (zero) */
10433 case 0x2d: /* FCMEQ (zero) */
10434 case 0x2e: /* FCMLT (zero) */
10435 case 0x6c: /* FCMGE (zero) */
10436 case 0x6d: /* FCMLE (zero) */
10437 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10438 return;
10439 case 0x1d: /* SCVTF */
10440 case 0x5d: /* UCVTF */
10441 {
10442 bool is_signed = (opcode == 0x1d);
10443 if (!fp_access_check(s)) {
10444 return;
10445 }
10446 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10447 return;
10448 }
10449 case 0x3d: /* FRECPE */
10450 case 0x3f: /* FRECPX */
10451 case 0x7d: /* FRSQRTE */
10452 if (!fp_access_check(s)) {
10453 return;
10454 }
10455 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10456 return;
10457 case 0x1a: /* FCVTNS */
10458 case 0x1b: /* FCVTMS */
10459 case 0x3a: /* FCVTPS */
10460 case 0x3b: /* FCVTZS */
10461 case 0x5a: /* FCVTNU */
10462 case 0x5b: /* FCVTMU */
10463 case 0x7a: /* FCVTPU */
10464 case 0x7b: /* FCVTZU */
10465 is_fcvt = true;
10466 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10467 break;
10468 case 0x1c: /* FCVTAS */
10469 case 0x5c: /* FCVTAU */
10470 /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10471 is_fcvt = true;
10472 rmode = FPROUNDING_TIEAWAY;
10473 break;
10474 case 0x56: /* FCVTXN, FCVTXN2 */
10475 if (size == 2) {
10476 unallocated_encoding(s);
10477 return;
10478 }
10479 if (!fp_access_check(s)) {
10480 return;
10481 }
10482 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10483 return;
10484 default:
10485 unallocated_encoding(s);
10486 return;
10487 }
10488 break;
10489 default:
10490 case 0x3: /* USQADD / SUQADD */
10491 unallocated_encoding(s);
10492 return;
10493 }
10494
10495 if (!fp_access_check(s)) {
10496 return;
10497 }
10498
10499 if (is_fcvt) {
10500 tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10501 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
10502 } else {
10503 tcg_fpstatus = NULL;
10504 tcg_rmode = NULL;
10505 }
10506
10507 if (size == 3) {
10508 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10509 TCGv_i64 tcg_rd = tcg_temp_new_i64();
10510
10511 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10512 write_fp_dreg(s, rd, tcg_rd);
10513 } else {
10514 TCGv_i32 tcg_rn = tcg_temp_new_i32();
10515 TCGv_i32 tcg_rd = tcg_temp_new_i32();
10516
10517 read_vec_element_i32(s, tcg_rn, rn, 0, size);
10518
10519 switch (opcode) {
10520 case 0x7: /* SQABS, SQNEG */
10521 {
10522 NeonGenOneOpEnvFn *genfn;
10523 static NeonGenOneOpEnvFn * const fns[3][2] = {
10524 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10525 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10526 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10527 };
10528 genfn = fns[size][u];
10529 genfn(tcg_rd, tcg_env, tcg_rn);
10530 break;
10531 }
10532 case 0x1a: /* FCVTNS */
10533 case 0x1b: /* FCVTMS */
10534 case 0x1c: /* FCVTAS */
10535 case 0x3a: /* FCVTPS */
10536 case 0x3b: /* FCVTZS */
10537 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10538 tcg_fpstatus);
10539 break;
10540 case 0x5a: /* FCVTNU */
10541 case 0x5b: /* FCVTMU */
10542 case 0x5c: /* FCVTAU */
10543 case 0x7a: /* FCVTPU */
10544 case 0x7b: /* FCVTZU */
10545 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0),
10546 tcg_fpstatus);
10547 break;
10548 default:
10549 g_assert_not_reached();
10550 }
10551
10552 write_fp_sreg(s, rd, tcg_rd);
10553 }
10554
10555 if (is_fcvt) {
10556 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
10557 }
10558 }
10559
10560 /* AdvSIMD shift by immediate
10561 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
10562 * +---+---+---+-------------+------+------+--------+---+------+------+
10563 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
10564 * +---+---+---+-------------+------+------+--------+---+------+------+
10565 */
disas_simd_shift_imm(DisasContext * s,uint32_t insn)10566 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10567 {
10568 int rd = extract32(insn, 0, 5);
10569 int rn = extract32(insn, 5, 5);
10570 int opcode = extract32(insn, 11, 5);
10571 int immb = extract32(insn, 16, 3);
10572 int immh = extract32(insn, 19, 4);
10573 bool is_u = extract32(insn, 29, 1);
10574 bool is_q = extract32(insn, 30, 1);
10575
10576 if (immh == 0) {
10577 unallocated_encoding(s);
10578 return;
10579 }
10580
10581 switch (opcode) {
10582 case 0x1c: /* SCVTF / UCVTF */
10583 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10584 opcode, rn, rd);
10585 break;
10586 case 0x1f: /* FCVTZS/ FCVTZU */
10587 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10588 return;
10589 default:
10590 case 0x00: /* SSHR / USHR */
10591 case 0x02: /* SSRA / USRA (accumulate) */
10592 case 0x04: /* SRSHR / URSHR (rounding) */
10593 case 0x06: /* SRSRA / URSRA (accum + rounding) */
10594 case 0x08: /* SRI */
10595 case 0x0a: /* SHL / SLI */
10596 case 0x0c: /* SQSHLU */
10597 case 0x0e: /* SQSHL, UQSHL */
10598 case 0x10: /* SHRN / SQSHRUN */
10599 case 0x11: /* RSHRN / SQRSHRUN */
10600 case 0x12: /* SQSHRN / UQSHRN */
10601 case 0x13: /* SQRSHRN / UQRSHRN */
10602 case 0x14: /* SSHLL / USHLL */
10603 unallocated_encoding(s);
10604 return;
10605 }
10606 }
10607
handle_2misc_widening(DisasContext * s,int opcode,bool is_q,int size,int rn,int rd)10608 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10609 int size, int rn, int rd)
10610 {
10611 /* Handle 2-reg-misc ops which are widening (so each size element
10612 * in the source becomes a 2*size element in the destination.
10613 * The only instruction like this is FCVTL.
10614 */
10615 int pass;
10616
10617 if (size == 3) {
10618 /* 32 -> 64 bit fp conversion */
10619 TCGv_i64 tcg_res[2];
10620 int srcelt = is_q ? 2 : 0;
10621
10622 for (pass = 0; pass < 2; pass++) {
10623 TCGv_i32 tcg_op = tcg_temp_new_i32();
10624 tcg_res[pass] = tcg_temp_new_i64();
10625
10626 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10627 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env);
10628 }
10629 for (pass = 0; pass < 2; pass++) {
10630 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10631 }
10632 } else {
10633 /* 16 -> 32 bit fp conversion */
10634 int srcelt = is_q ? 4 : 0;
10635 TCGv_i32 tcg_res[4];
10636 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10637 TCGv_i32 ahp = get_ahp_flag();
10638
10639 for (pass = 0; pass < 4; pass++) {
10640 tcg_res[pass] = tcg_temp_new_i32();
10641
10642 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10643 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10644 fpst, ahp);
10645 }
10646 for (pass = 0; pass < 4; pass++) {
10647 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10648 }
10649 }
10650 }
10651
handle_rev(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)10652 static void handle_rev(DisasContext *s, int opcode, bool u,
10653 bool is_q, int size, int rn, int rd)
10654 {
10655 int op = (opcode << 1) | u;
10656 int opsz = op + size;
10657 int grp_size = 3 - opsz;
10658 int dsize = is_q ? 128 : 64;
10659 int i;
10660
10661 if (opsz >= 3) {
10662 unallocated_encoding(s);
10663 return;
10664 }
10665
10666 if (!fp_access_check(s)) {
10667 return;
10668 }
10669
10670 if (size == 0) {
10671 /* Special case bytes, use bswap op on each group of elements */
10672 int groups = dsize / (8 << grp_size);
10673
10674 for (i = 0; i < groups; i++) {
10675 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
10676
10677 read_vec_element(s, tcg_tmp, rn, i, grp_size);
10678 switch (grp_size) {
10679 case MO_16:
10680 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10681 break;
10682 case MO_32:
10683 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
10684 break;
10685 case MO_64:
10686 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
10687 break;
10688 default:
10689 g_assert_not_reached();
10690 }
10691 write_vec_element(s, tcg_tmp, rd, i, grp_size);
10692 }
10693 clear_vec_high(s, is_q, rd);
10694 } else {
10695 int revmask = (1 << grp_size) - 1;
10696 int esize = 8 << size;
10697 int elements = dsize / esize;
10698 TCGv_i64 tcg_rn = tcg_temp_new_i64();
10699 TCGv_i64 tcg_rd[2];
10700
10701 for (i = 0; i < 2; i++) {
10702 tcg_rd[i] = tcg_temp_new_i64();
10703 tcg_gen_movi_i64(tcg_rd[i], 0);
10704 }
10705
10706 for (i = 0; i < elements; i++) {
10707 int e_rev = (i & 0xf) ^ revmask;
10708 int w = (e_rev * esize) / 64;
10709 int o = (e_rev * esize) % 64;
10710
10711 read_vec_element(s, tcg_rn, rn, i, size);
10712 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize);
10713 }
10714
10715 for (i = 0; i < 2; i++) {
10716 write_vec_element(s, tcg_rd[i], rd, i, MO_64);
10717 }
10718 clear_vec_high(s, true, rd);
10719 }
10720 }
10721
handle_2misc_pairwise(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)10722 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
10723 bool is_q, int size, int rn, int rd)
10724 {
10725 /* Implement the pairwise operations from 2-misc:
10726 * SADDLP, UADDLP, SADALP, UADALP.
10727 * These all add pairs of elements in the input to produce a
10728 * double-width result element in the output (possibly accumulating).
10729 */
10730 bool accum = (opcode == 0x6);
10731 int maxpass = is_q ? 2 : 1;
10732 int pass;
10733 TCGv_i64 tcg_res[2];
10734
10735 if (size == 2) {
10736 /* 32 + 32 -> 64 op */
10737 MemOp memop = size + (u ? 0 : MO_SIGN);
10738
10739 for (pass = 0; pass < maxpass; pass++) {
10740 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10741 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10742
10743 tcg_res[pass] = tcg_temp_new_i64();
10744
10745 read_vec_element(s, tcg_op1, rn, pass * 2, memop);
10746 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
10747 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10748 if (accum) {
10749 read_vec_element(s, tcg_op1, rd, pass, MO_64);
10750 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
10751 }
10752 }
10753 } else {
10754 for (pass = 0; pass < maxpass; pass++) {
10755 TCGv_i64 tcg_op = tcg_temp_new_i64();
10756 NeonGenOne64OpFn *genfn;
10757 static NeonGenOne64OpFn * const fns[2][2] = {
10758 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
10759 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
10760 };
10761
10762 genfn = fns[size][u];
10763
10764 tcg_res[pass] = tcg_temp_new_i64();
10765
10766 read_vec_element(s, tcg_op, rn, pass, MO_64);
10767 genfn(tcg_res[pass], tcg_op);
10768
10769 if (accum) {
10770 read_vec_element(s, tcg_op, rd, pass, MO_64);
10771 if (size == 0) {
10772 gen_helper_neon_addl_u16(tcg_res[pass],
10773 tcg_res[pass], tcg_op);
10774 } else {
10775 gen_helper_neon_addl_u32(tcg_res[pass],
10776 tcg_res[pass], tcg_op);
10777 }
10778 }
10779 }
10780 }
10781 if (!is_q) {
10782 tcg_res[1] = tcg_constant_i64(0);
10783 }
10784 for (pass = 0; pass < 2; pass++) {
10785 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10786 }
10787 }
10788
handle_shll(DisasContext * s,bool is_q,int size,int rn,int rd)10789 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
10790 {
10791 /* Implement SHLL and SHLL2 */
10792 int pass;
10793 int part = is_q ? 2 : 0;
10794 TCGv_i64 tcg_res[2];
10795
10796 for (pass = 0; pass < 2; pass++) {
10797 static NeonGenWidenFn * const widenfns[3] = {
10798 gen_helper_neon_widen_u8,
10799 gen_helper_neon_widen_u16,
10800 tcg_gen_extu_i32_i64,
10801 };
10802 NeonGenWidenFn *widenfn = widenfns[size];
10803 TCGv_i32 tcg_op = tcg_temp_new_i32();
10804
10805 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
10806 tcg_res[pass] = tcg_temp_new_i64();
10807 widenfn(tcg_res[pass], tcg_op);
10808 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
10809 }
10810
10811 for (pass = 0; pass < 2; pass++) {
10812 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10813 }
10814 }
10815
10816 /* AdvSIMD two reg misc
10817 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
10818 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10819 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
10820 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10821 */
disas_simd_two_reg_misc(DisasContext * s,uint32_t insn)10822 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
10823 {
10824 int size = extract32(insn, 22, 2);
10825 int opcode = extract32(insn, 12, 5);
10826 bool u = extract32(insn, 29, 1);
10827 bool is_q = extract32(insn, 30, 1);
10828 int rn = extract32(insn, 5, 5);
10829 int rd = extract32(insn, 0, 5);
10830 bool need_fpstatus = false;
10831 int rmode = -1;
10832 TCGv_i32 tcg_rmode;
10833 TCGv_ptr tcg_fpstatus;
10834
10835 switch (opcode) {
10836 case 0x0: /* REV64, REV32 */
10837 case 0x1: /* REV16 */
10838 handle_rev(s, opcode, u, is_q, size, rn, rd);
10839 return;
10840 case 0x5: /* CNT, NOT, RBIT */
10841 if (u && size == 0) {
10842 /* NOT */
10843 break;
10844 } else if (u && size == 1) {
10845 /* RBIT */
10846 break;
10847 } else if (!u && size == 0) {
10848 /* CNT */
10849 break;
10850 }
10851 unallocated_encoding(s);
10852 return;
10853 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
10854 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
10855 if (size == 3) {
10856 unallocated_encoding(s);
10857 return;
10858 }
10859 if (!fp_access_check(s)) {
10860 return;
10861 }
10862
10863 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10864 return;
10865 case 0x4: /* CLS, CLZ */
10866 if (size == 3) {
10867 unallocated_encoding(s);
10868 return;
10869 }
10870 break;
10871 case 0x2: /* SADDLP, UADDLP */
10872 case 0x6: /* SADALP, UADALP */
10873 if (size == 3) {
10874 unallocated_encoding(s);
10875 return;
10876 }
10877 if (!fp_access_check(s)) {
10878 return;
10879 }
10880 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10881 return;
10882 case 0x13: /* SHLL, SHLL2 */
10883 if (u == 0 || size == 3) {
10884 unallocated_encoding(s);
10885 return;
10886 }
10887 if (!fp_access_check(s)) {
10888 return;
10889 }
10890 handle_shll(s, is_q, size, rn, rd);
10891 return;
10892 case 0xa: /* CMLT */
10893 if (u == 1) {
10894 unallocated_encoding(s);
10895 return;
10896 }
10897 /* fall through */
10898 case 0x8: /* CMGT, CMGE */
10899 case 0x9: /* CMEQ, CMLE */
10900 case 0xb: /* ABS, NEG */
10901 if (size == 3 && !is_q) {
10902 unallocated_encoding(s);
10903 return;
10904 }
10905 break;
10906 case 0x7: /* SQABS, SQNEG */
10907 if (size == 3 && !is_q) {
10908 unallocated_encoding(s);
10909 return;
10910 }
10911 break;
10912 case 0xc ... 0xf:
10913 case 0x16 ... 0x1f:
10914 {
10915 /* Floating point: U, size[1] and opcode indicate operation;
10916 * size[0] indicates single or double precision.
10917 */
10918 int is_double = extract32(size, 0, 1);
10919 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10920 size = is_double ? 3 : 2;
10921 switch (opcode) {
10922 case 0x2f: /* FABS */
10923 case 0x6f: /* FNEG */
10924 if (size == 3 && !is_q) {
10925 unallocated_encoding(s);
10926 return;
10927 }
10928 break;
10929 case 0x1d: /* SCVTF */
10930 case 0x5d: /* UCVTF */
10931 {
10932 bool is_signed = (opcode == 0x1d) ? true : false;
10933 int elements = is_double ? 2 : is_q ? 4 : 2;
10934 if (is_double && !is_q) {
10935 unallocated_encoding(s);
10936 return;
10937 }
10938 if (!fp_access_check(s)) {
10939 return;
10940 }
10941 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10942 return;
10943 }
10944 case 0x2c: /* FCMGT (zero) */
10945 case 0x2d: /* FCMEQ (zero) */
10946 case 0x2e: /* FCMLT (zero) */
10947 case 0x6c: /* FCMGE (zero) */
10948 case 0x6d: /* FCMLE (zero) */
10949 if (size == 3 && !is_q) {
10950 unallocated_encoding(s);
10951 return;
10952 }
10953 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10954 return;
10955 case 0x7f: /* FSQRT */
10956 if (size == 3 && !is_q) {
10957 unallocated_encoding(s);
10958 return;
10959 }
10960 break;
10961 case 0x1a: /* FCVTNS */
10962 case 0x1b: /* FCVTMS */
10963 case 0x3a: /* FCVTPS */
10964 case 0x3b: /* FCVTZS */
10965 case 0x5a: /* FCVTNU */
10966 case 0x5b: /* FCVTMU */
10967 case 0x7a: /* FCVTPU */
10968 case 0x7b: /* FCVTZU */
10969 need_fpstatus = true;
10970 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10971 if (size == 3 && !is_q) {
10972 unallocated_encoding(s);
10973 return;
10974 }
10975 break;
10976 case 0x5c: /* FCVTAU */
10977 case 0x1c: /* FCVTAS */
10978 need_fpstatus = true;
10979 rmode = FPROUNDING_TIEAWAY;
10980 if (size == 3 && !is_q) {
10981 unallocated_encoding(s);
10982 return;
10983 }
10984 break;
10985 case 0x3c: /* URECPE */
10986 if (size == 3) {
10987 unallocated_encoding(s);
10988 return;
10989 }
10990 /* fall through */
10991 case 0x3d: /* FRECPE */
10992 case 0x7d: /* FRSQRTE */
10993 if (size == 3 && !is_q) {
10994 unallocated_encoding(s);
10995 return;
10996 }
10997 if (!fp_access_check(s)) {
10998 return;
10999 }
11000 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11001 return;
11002 case 0x56: /* FCVTXN, FCVTXN2 */
11003 if (size == 2) {
11004 unallocated_encoding(s);
11005 return;
11006 }
11007 /* fall through */
11008 case 0x16: /* FCVTN, FCVTN2 */
11009 /* handle_2misc_narrow does a 2*size -> size operation, but these
11010 * instructions encode the source size rather than dest size.
11011 */
11012 if (!fp_access_check(s)) {
11013 return;
11014 }
11015 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11016 return;
11017 case 0x36: /* BFCVTN, BFCVTN2 */
11018 if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
11019 unallocated_encoding(s);
11020 return;
11021 }
11022 if (!fp_access_check(s)) {
11023 return;
11024 }
11025 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11026 return;
11027 case 0x17: /* FCVTL, FCVTL2 */
11028 if (!fp_access_check(s)) {
11029 return;
11030 }
11031 handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11032 return;
11033 case 0x18: /* FRINTN */
11034 case 0x19: /* FRINTM */
11035 case 0x38: /* FRINTP */
11036 case 0x39: /* FRINTZ */
11037 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11038 /* fall through */
11039 case 0x59: /* FRINTX */
11040 case 0x79: /* FRINTI */
11041 need_fpstatus = true;
11042 if (size == 3 && !is_q) {
11043 unallocated_encoding(s);
11044 return;
11045 }
11046 break;
11047 case 0x58: /* FRINTA */
11048 rmode = FPROUNDING_TIEAWAY;
11049 need_fpstatus = true;
11050 if (size == 3 && !is_q) {
11051 unallocated_encoding(s);
11052 return;
11053 }
11054 break;
11055 case 0x7c: /* URSQRTE */
11056 if (size == 3) {
11057 unallocated_encoding(s);
11058 return;
11059 }
11060 break;
11061 case 0x1e: /* FRINT32Z */
11062 case 0x1f: /* FRINT64Z */
11063 rmode = FPROUNDING_ZERO;
11064 /* fall through */
11065 case 0x5e: /* FRINT32X */
11066 case 0x5f: /* FRINT64X */
11067 need_fpstatus = true;
11068 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
11069 unallocated_encoding(s);
11070 return;
11071 }
11072 break;
11073 default:
11074 unallocated_encoding(s);
11075 return;
11076 }
11077 break;
11078 }
11079 default:
11080 case 0x3: /* SUQADD, USQADD */
11081 unallocated_encoding(s);
11082 return;
11083 }
11084
11085 if (!fp_access_check(s)) {
11086 return;
11087 }
11088
11089 if (need_fpstatus || rmode >= 0) {
11090 tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
11091 } else {
11092 tcg_fpstatus = NULL;
11093 }
11094 if (rmode >= 0) {
11095 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11096 } else {
11097 tcg_rmode = NULL;
11098 }
11099
11100 switch (opcode) {
11101 case 0x5:
11102 if (u && size == 0) { /* NOT */
11103 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11104 return;
11105 }
11106 break;
11107 case 0x8: /* CMGT, CMGE */
11108 if (u) {
11109 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
11110 } else {
11111 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
11112 }
11113 return;
11114 case 0x9: /* CMEQ, CMLE */
11115 if (u) {
11116 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
11117 } else {
11118 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
11119 }
11120 return;
11121 case 0xa: /* CMLT */
11122 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
11123 return;
11124 case 0xb:
11125 if (u) { /* ABS, NEG */
11126 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11127 } else {
11128 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
11129 }
11130 return;
11131 }
11132
11133 if (size == 3) {
11134 /* All 64-bit element operations can be shared with scalar 2misc */
11135 int pass;
11136
11137 /* Coverity claims (size == 3 && !is_q) has been eliminated
11138 * from all paths leading to here.
11139 */
11140 tcg_debug_assert(is_q);
11141 for (pass = 0; pass < 2; pass++) {
11142 TCGv_i64 tcg_op = tcg_temp_new_i64();
11143 TCGv_i64 tcg_res = tcg_temp_new_i64();
11144
11145 read_vec_element(s, tcg_op, rn, pass, MO_64);
11146
11147 handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11148 tcg_rmode, tcg_fpstatus);
11149
11150 write_vec_element(s, tcg_res, rd, pass, MO_64);
11151 }
11152 } else {
11153 int pass;
11154
11155 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11156 TCGv_i32 tcg_op = tcg_temp_new_i32();
11157 TCGv_i32 tcg_res = tcg_temp_new_i32();
11158
11159 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11160
11161 if (size == 2) {
11162 /* Special cases for 32 bit elements */
11163 switch (opcode) {
11164 case 0x4: /* CLS */
11165 if (u) {
11166 tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11167 } else {
11168 tcg_gen_clrsb_i32(tcg_res, tcg_op);
11169 }
11170 break;
11171 case 0x7: /* SQABS, SQNEG */
11172 if (u) {
11173 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op);
11174 } else {
11175 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op);
11176 }
11177 break;
11178 case 0x2f: /* FABS */
11179 gen_vfp_abss(tcg_res, tcg_op);
11180 break;
11181 case 0x6f: /* FNEG */
11182 gen_vfp_negs(tcg_res, tcg_op);
11183 break;
11184 case 0x7f: /* FSQRT */
11185 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
11186 break;
11187 case 0x1a: /* FCVTNS */
11188 case 0x1b: /* FCVTMS */
11189 case 0x1c: /* FCVTAS */
11190 case 0x3a: /* FCVTPS */
11191 case 0x3b: /* FCVTZS */
11192 gen_helper_vfp_tosls(tcg_res, tcg_op,
11193 tcg_constant_i32(0), tcg_fpstatus);
11194 break;
11195 case 0x5a: /* FCVTNU */
11196 case 0x5b: /* FCVTMU */
11197 case 0x5c: /* FCVTAU */
11198 case 0x7a: /* FCVTPU */
11199 case 0x7b: /* FCVTZU */
11200 gen_helper_vfp_touls(tcg_res, tcg_op,
11201 tcg_constant_i32(0), tcg_fpstatus);
11202 break;
11203 case 0x18: /* FRINTN */
11204 case 0x19: /* FRINTM */
11205 case 0x38: /* FRINTP */
11206 case 0x39: /* FRINTZ */
11207 case 0x58: /* FRINTA */
11208 case 0x79: /* FRINTI */
11209 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11210 break;
11211 case 0x59: /* FRINTX */
11212 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11213 break;
11214 case 0x7c: /* URSQRTE */
11215 gen_helper_rsqrte_u32(tcg_res, tcg_op);
11216 break;
11217 case 0x1e: /* FRINT32Z */
11218 case 0x5e: /* FRINT32X */
11219 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
11220 break;
11221 case 0x1f: /* FRINT64Z */
11222 case 0x5f: /* FRINT64X */
11223 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
11224 break;
11225 default:
11226 g_assert_not_reached();
11227 }
11228 } else {
11229 /* Use helpers for 8 and 16 bit elements */
11230 switch (opcode) {
11231 case 0x5: /* CNT, RBIT */
11232 /* For these two insns size is part of the opcode specifier
11233 * (handled earlier); they always operate on byte elements.
11234 */
11235 if (u) {
11236 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11237 } else {
11238 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11239 }
11240 break;
11241 case 0x7: /* SQABS, SQNEG */
11242 {
11243 NeonGenOneOpEnvFn *genfn;
11244 static NeonGenOneOpEnvFn * const fns[2][2] = {
11245 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11246 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11247 };
11248 genfn = fns[size][u];
11249 genfn(tcg_res, tcg_env, tcg_op);
11250 break;
11251 }
11252 case 0x4: /* CLS, CLZ */
11253 if (u) {
11254 if (size == 0) {
11255 gen_helper_neon_clz_u8(tcg_res, tcg_op);
11256 } else {
11257 gen_helper_neon_clz_u16(tcg_res, tcg_op);
11258 }
11259 } else {
11260 if (size == 0) {
11261 gen_helper_neon_cls_s8(tcg_res, tcg_op);
11262 } else {
11263 gen_helper_neon_cls_s16(tcg_res, tcg_op);
11264 }
11265 }
11266 break;
11267 default:
11268 g_assert_not_reached();
11269 }
11270 }
11271
11272 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11273 }
11274 }
11275 clear_vec_high(s, is_q, rd);
11276
11277 if (tcg_rmode) {
11278 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11279 }
11280 }
11281
11282 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11283 *
11284 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
11285 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11286 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
11287 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11288 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11289 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11290 *
11291 * This actually covers two groups where scalar access is governed by
11292 * bit 28. A bunch of the instructions (float to integral) only exist
11293 * in the vector form and are un-allocated for the scalar decode. Also
11294 * in the scalar decode Q is always 1.
11295 */
disas_simd_two_reg_misc_fp16(DisasContext * s,uint32_t insn)11296 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11297 {
11298 int fpop, opcode, a, u;
11299 int rn, rd;
11300 bool is_q;
11301 bool is_scalar;
11302 bool only_in_vector = false;
11303
11304 int pass;
11305 TCGv_i32 tcg_rmode = NULL;
11306 TCGv_ptr tcg_fpstatus = NULL;
11307 bool need_fpst = true;
11308 int rmode = -1;
11309
11310 if (!dc_isar_feature(aa64_fp16, s)) {
11311 unallocated_encoding(s);
11312 return;
11313 }
11314
11315 rd = extract32(insn, 0, 5);
11316 rn = extract32(insn, 5, 5);
11317
11318 a = extract32(insn, 23, 1);
11319 u = extract32(insn, 29, 1);
11320 is_scalar = extract32(insn, 28, 1);
11321 is_q = extract32(insn, 30, 1);
11322
11323 opcode = extract32(insn, 12, 5);
11324 fpop = deposit32(opcode, 5, 1, a);
11325 fpop = deposit32(fpop, 6, 1, u);
11326
11327 switch (fpop) {
11328 case 0x1d: /* SCVTF */
11329 case 0x5d: /* UCVTF */
11330 {
11331 int elements;
11332
11333 if (is_scalar) {
11334 elements = 1;
11335 } else {
11336 elements = (is_q ? 8 : 4);
11337 }
11338
11339 if (!fp_access_check(s)) {
11340 return;
11341 }
11342 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11343 return;
11344 }
11345 break;
11346 case 0x2c: /* FCMGT (zero) */
11347 case 0x2d: /* FCMEQ (zero) */
11348 case 0x2e: /* FCMLT (zero) */
11349 case 0x6c: /* FCMGE (zero) */
11350 case 0x6d: /* FCMLE (zero) */
11351 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11352 return;
11353 case 0x3d: /* FRECPE */
11354 case 0x3f: /* FRECPX */
11355 break;
11356 case 0x18: /* FRINTN */
11357 only_in_vector = true;
11358 rmode = FPROUNDING_TIEEVEN;
11359 break;
11360 case 0x19: /* FRINTM */
11361 only_in_vector = true;
11362 rmode = FPROUNDING_NEGINF;
11363 break;
11364 case 0x38: /* FRINTP */
11365 only_in_vector = true;
11366 rmode = FPROUNDING_POSINF;
11367 break;
11368 case 0x39: /* FRINTZ */
11369 only_in_vector = true;
11370 rmode = FPROUNDING_ZERO;
11371 break;
11372 case 0x58: /* FRINTA */
11373 only_in_vector = true;
11374 rmode = FPROUNDING_TIEAWAY;
11375 break;
11376 case 0x59: /* FRINTX */
11377 case 0x79: /* FRINTI */
11378 only_in_vector = true;
11379 /* current rounding mode */
11380 break;
11381 case 0x1a: /* FCVTNS */
11382 rmode = FPROUNDING_TIEEVEN;
11383 break;
11384 case 0x1b: /* FCVTMS */
11385 rmode = FPROUNDING_NEGINF;
11386 break;
11387 case 0x1c: /* FCVTAS */
11388 rmode = FPROUNDING_TIEAWAY;
11389 break;
11390 case 0x3a: /* FCVTPS */
11391 rmode = FPROUNDING_POSINF;
11392 break;
11393 case 0x3b: /* FCVTZS */
11394 rmode = FPROUNDING_ZERO;
11395 break;
11396 case 0x5a: /* FCVTNU */
11397 rmode = FPROUNDING_TIEEVEN;
11398 break;
11399 case 0x5b: /* FCVTMU */
11400 rmode = FPROUNDING_NEGINF;
11401 break;
11402 case 0x5c: /* FCVTAU */
11403 rmode = FPROUNDING_TIEAWAY;
11404 break;
11405 case 0x7a: /* FCVTPU */
11406 rmode = FPROUNDING_POSINF;
11407 break;
11408 case 0x7b: /* FCVTZU */
11409 rmode = FPROUNDING_ZERO;
11410 break;
11411 case 0x2f: /* FABS */
11412 case 0x6f: /* FNEG */
11413 need_fpst = false;
11414 break;
11415 case 0x7d: /* FRSQRTE */
11416 case 0x7f: /* FSQRT (vector) */
11417 break;
11418 default:
11419 unallocated_encoding(s);
11420 return;
11421 }
11422
11423
11424 /* Check additional constraints for the scalar encoding */
11425 if (is_scalar) {
11426 if (!is_q) {
11427 unallocated_encoding(s);
11428 return;
11429 }
11430 /* FRINTxx is only in the vector form */
11431 if (only_in_vector) {
11432 unallocated_encoding(s);
11433 return;
11434 }
11435 }
11436
11437 if (!fp_access_check(s)) {
11438 return;
11439 }
11440
11441 if (rmode >= 0 || need_fpst) {
11442 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
11443 }
11444
11445 if (rmode >= 0) {
11446 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
11447 }
11448
11449 if (is_scalar) {
11450 TCGv_i32 tcg_op = read_fp_hreg(s, rn);
11451 TCGv_i32 tcg_res = tcg_temp_new_i32();
11452
11453 switch (fpop) {
11454 case 0x1a: /* FCVTNS */
11455 case 0x1b: /* FCVTMS */
11456 case 0x1c: /* FCVTAS */
11457 case 0x3a: /* FCVTPS */
11458 case 0x3b: /* FCVTZS */
11459 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11460 break;
11461 case 0x3d: /* FRECPE */
11462 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11463 break;
11464 case 0x3f: /* FRECPX */
11465 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11466 break;
11467 case 0x5a: /* FCVTNU */
11468 case 0x5b: /* FCVTMU */
11469 case 0x5c: /* FCVTAU */
11470 case 0x7a: /* FCVTPU */
11471 case 0x7b: /* FCVTZU */
11472 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11473 break;
11474 case 0x6f: /* FNEG */
11475 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11476 break;
11477 case 0x7d: /* FRSQRTE */
11478 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11479 break;
11480 default:
11481 g_assert_not_reached();
11482 }
11483
11484 /* limit any sign extension going on */
11485 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11486 write_fp_sreg(s, rd, tcg_res);
11487 } else {
11488 for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11489 TCGv_i32 tcg_op = tcg_temp_new_i32();
11490 TCGv_i32 tcg_res = tcg_temp_new_i32();
11491
11492 read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11493
11494 switch (fpop) {
11495 case 0x1a: /* FCVTNS */
11496 case 0x1b: /* FCVTMS */
11497 case 0x1c: /* FCVTAS */
11498 case 0x3a: /* FCVTPS */
11499 case 0x3b: /* FCVTZS */
11500 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11501 break;
11502 case 0x3d: /* FRECPE */
11503 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11504 break;
11505 case 0x5a: /* FCVTNU */
11506 case 0x5b: /* FCVTMU */
11507 case 0x5c: /* FCVTAU */
11508 case 0x7a: /* FCVTPU */
11509 case 0x7b: /* FCVTZU */
11510 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11511 break;
11512 case 0x18: /* FRINTN */
11513 case 0x19: /* FRINTM */
11514 case 0x38: /* FRINTP */
11515 case 0x39: /* FRINTZ */
11516 case 0x58: /* FRINTA */
11517 case 0x79: /* FRINTI */
11518 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11519 break;
11520 case 0x59: /* FRINTX */
11521 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11522 break;
11523 case 0x2f: /* FABS */
11524 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11525 break;
11526 case 0x6f: /* FNEG */
11527 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11528 break;
11529 case 0x7d: /* FRSQRTE */
11530 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11531 break;
11532 case 0x7f: /* FSQRT */
11533 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11534 break;
11535 default:
11536 g_assert_not_reached();
11537 }
11538
11539 write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11540 }
11541
11542 clear_vec_high(s, is_q, rd);
11543 }
11544
11545 if (tcg_rmode) {
11546 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
11547 }
11548 }
11549
11550 /* C3.6 Data processing - SIMD, inc Crypto
11551 *
11552 * As the decode gets a little complex we are using a table based
11553 * approach for this part of the decode.
11554 */
11555 static const AArch64DecodeTable data_proc_simd[] = {
11556 /* pattern , mask , fn */
11557 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11558 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11559 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11560 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11561 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
11562 { 0x00000000, 0x00000000, NULL }
11563 };
11564
disas_data_proc_simd(DisasContext * s,uint32_t insn)11565 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11566 {
11567 /* Note that this is called with all non-FP cases from
11568 * table C3-6 so it must UNDEF for entries not specifically
11569 * allocated to instructions in that table.
11570 */
11571 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11572 if (fn) {
11573 fn(s, insn);
11574 } else {
11575 unallocated_encoding(s);
11576 }
11577 }
11578
11579 /* C3.6 Data processing - SIMD and floating point */
disas_data_proc_simd_fp(DisasContext * s,uint32_t insn)11580 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11581 {
11582 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11583 disas_data_proc_fp(s, insn);
11584 } else {
11585 /* SIMD, including crypto */
11586 disas_data_proc_simd(s, insn);
11587 }
11588 }
11589
trans_OK(DisasContext * s,arg_OK * a)11590 static bool trans_OK(DisasContext *s, arg_OK *a)
11591 {
11592 return true;
11593 }
11594
trans_FAIL(DisasContext * s,arg_OK * a)11595 static bool trans_FAIL(DisasContext *s, arg_OK *a)
11596 {
11597 s->is_nonstreaming = true;
11598 return true;
11599 }
11600
11601 /**
11602 * btype_destination_ok:
11603 * @insn: The instruction at the branch destination
11604 * @bt: SCTLR_ELx.BT
11605 * @btype: PSTATE.BTYPE, and is non-zero
11606 *
11607 * On a guarded page, there are a limited number of insns
11608 * that may be present at the branch target:
11609 * - branch target identifiers,
11610 * - paciasp, pacibsp,
11611 * - BRK insn
11612 * - HLT insn
11613 * Anything else causes a Branch Target Exception.
11614 *
11615 * Return true if the branch is compatible, false to raise BTITRAP.
11616 */
btype_destination_ok(uint32_t insn,bool bt,int btype)11617 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
11618 {
11619 if ((insn & 0xfffff01fu) == 0xd503201fu) {
11620 /* HINT space */
11621 switch (extract32(insn, 5, 7)) {
11622 case 0b011001: /* PACIASP */
11623 case 0b011011: /* PACIBSP */
11624 /*
11625 * If SCTLR_ELx.BT, then PACI*SP are not compatible
11626 * with btype == 3. Otherwise all btype are ok.
11627 */
11628 return !bt || btype != 3;
11629 case 0b100000: /* BTI */
11630 /* Not compatible with any btype. */
11631 return false;
11632 case 0b100010: /* BTI c */
11633 /* Not compatible with btype == 3 */
11634 return btype != 3;
11635 case 0b100100: /* BTI j */
11636 /* Not compatible with btype == 2 */
11637 return btype != 2;
11638 case 0b100110: /* BTI jc */
11639 /* Compatible with any btype. */
11640 return true;
11641 }
11642 } else {
11643 switch (insn & 0xffe0001fu) {
11644 case 0xd4200000u: /* BRK */
11645 case 0xd4400000u: /* HLT */
11646 /* Give priority to the breakpoint exception. */
11647 return true;
11648 }
11649 }
11650 return false;
11651 }
11652
11653 /* C3.1 A64 instruction index by encoding */
disas_a64_legacy(DisasContext * s,uint32_t insn)11654 static void disas_a64_legacy(DisasContext *s, uint32_t insn)
11655 {
11656 switch (extract32(insn, 25, 4)) {
11657 case 0x5:
11658 case 0xd: /* Data processing - register */
11659 disas_data_proc_reg(s, insn);
11660 break;
11661 case 0x7:
11662 case 0xf: /* Data processing - SIMD and floating point */
11663 disas_data_proc_simd_fp(s, insn);
11664 break;
11665 default:
11666 unallocated_encoding(s);
11667 break;
11668 }
11669 }
11670
aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)11671 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11672 CPUState *cpu)
11673 {
11674 DisasContext *dc = container_of(dcbase, DisasContext, base);
11675 CPUARMState *env = cpu_env(cpu);
11676 ARMCPU *arm_cpu = env_archcpu(env);
11677 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
11678 int bound, core_mmu_idx;
11679
11680 dc->isar = &arm_cpu->isar;
11681 dc->condjmp = 0;
11682 dc->pc_save = dc->base.pc_first;
11683 dc->aarch64 = true;
11684 dc->thumb = false;
11685 dc->sctlr_b = 0;
11686 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
11687 dc->condexec_mask = 0;
11688 dc->condexec_cond = 0;
11689 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
11690 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
11691 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
11692 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
11693 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
11694 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11695 #if !defined(CONFIG_USER_ONLY)
11696 dc->user = (dc->current_el == 0);
11697 #endif
11698 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
11699 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
11700 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
11701 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
11702 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
11703 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
11704 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
11705 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
11706 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
11707 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
11708 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
11709 dc->bt = EX_TBFLAG_A64(tb_flags, BT);
11710 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
11711 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
11712 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
11713 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
11714 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
11715 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
11716 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
11717 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
11718 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
11719 dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
11720 dc->nv = EX_TBFLAG_A64(tb_flags, NV);
11721 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
11722 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
11723 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
11724 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
11725 dc->vec_len = 0;
11726 dc->vec_stride = 0;
11727 dc->cp_regs = arm_cpu->cp_regs;
11728 dc->features = env->features;
11729 dc->dcz_blocksize = arm_cpu->dcz_blocksize;
11730 dc->gm_blocksize = arm_cpu->gm_blocksize;
11731
11732 #ifdef CONFIG_USER_ONLY
11733 /* In sve_probe_page, we assume TBI is enabled. */
11734 tcg_debug_assert(dc->tbid & 1);
11735 #endif
11736
11737 dc->lse2 = dc_isar_feature(aa64_lse2, dc);
11738
11739 /* Single step state. The code-generation logic here is:
11740 * SS_ACTIVE == 0:
11741 * generate code with no special handling for single-stepping (except
11742 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11743 * this happens anyway because those changes are all system register or
11744 * PSTATE writes).
11745 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11746 * emit code for one insn
11747 * emit code to clear PSTATE.SS
11748 * emit code to generate software step exception for completed step
11749 * end TB (as usual for having generated an exception)
11750 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11751 * emit code to generate a software step exception
11752 * end the TB
11753 */
11754 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
11755 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
11756 dc->is_ldex = false;
11757
11758 /* Bound the number of insns to execute to those left on the page. */
11759 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11760
11761 /* If architectural single step active, limit to 1. */
11762 if (dc->ss_active) {
11763 bound = 1;
11764 }
11765 dc->base.max_insns = MIN(dc->base.max_insns, bound);
11766 }
11767
aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)11768 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
11769 {
11770 }
11771
aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)11772 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11773 {
11774 DisasContext *dc = container_of(dcbase, DisasContext, base);
11775 target_ulong pc_arg = dc->base.pc_next;
11776
11777 if (tb_cflags(dcbase->tb) & CF_PCREL) {
11778 pc_arg &= ~TARGET_PAGE_MASK;
11779 }
11780 tcg_gen_insn_start(pc_arg, 0, 0);
11781 dc->insn_start_updated = false;
11782 }
11783
aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)11784 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11785 {
11786 DisasContext *s = container_of(dcbase, DisasContext, base);
11787 CPUARMState *env = cpu_env(cpu);
11788 uint64_t pc = s->base.pc_next;
11789 uint32_t insn;
11790
11791 /* Singlestep exceptions have the highest priority. */
11792 if (s->ss_active && !s->pstate_ss) {
11793 /* Singlestep state is Active-pending.
11794 * If we're in this state at the start of a TB then either
11795 * a) we just took an exception to an EL which is being debugged
11796 * and this is the first insn in the exception handler
11797 * b) debug exceptions were masked and we just unmasked them
11798 * without changing EL (eg by clearing PSTATE.D)
11799 * In either case we're going to take a swstep exception in the
11800 * "did not step an insn" case, and so the syndrome ISV and EX
11801 * bits should be zero.
11802 */
11803 assert(s->base.num_insns == 1);
11804 gen_swstep_exception(s, 0, 0);
11805 s->base.is_jmp = DISAS_NORETURN;
11806 s->base.pc_next = pc + 4;
11807 return;
11808 }
11809
11810 if (pc & 3) {
11811 /*
11812 * PC alignment fault. This has priority over the instruction abort
11813 * that we would receive from a translation fault via arm_ldl_code.
11814 * This should only be possible after an indirect branch, at the
11815 * start of the TB.
11816 */
11817 assert(s->base.num_insns == 1);
11818 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
11819 s->base.is_jmp = DISAS_NORETURN;
11820 s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
11821 return;
11822 }
11823
11824 s->pc_curr = pc;
11825 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
11826 s->insn = insn;
11827 s->base.pc_next = pc + 4;
11828
11829 s->fp_access_checked = 0;
11830 s->sve_access_checked = 0;
11831
11832 if (s->pstate_il) {
11833 /*
11834 * Illegal execution state. This has priority over BTI
11835 * exceptions, but comes after instruction abort exceptions.
11836 */
11837 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
11838 return;
11839 }
11840
11841 if (dc_isar_feature(aa64_bti, s)) {
11842 if (s->base.num_insns == 1) {
11843 /* First insn can have btype set to non-zero. */
11844 tcg_debug_assert(s->btype >= 0);
11845
11846 /*
11847 * Note that the Branch Target Exception has fairly high
11848 * priority -- below debugging exceptions but above most
11849 * everything else. This allows us to handle this now
11850 * instead of waiting until the insn is otherwise decoded.
11851 *
11852 * We can check all but the guarded page check here;
11853 * defer the latter to a helper.
11854 */
11855 if (s->btype != 0
11856 && !btype_destination_ok(insn, s->bt, s->btype)) {
11857 gen_helper_guarded_page_check(tcg_env);
11858 }
11859 } else {
11860 /* Not the first insn: btype must be 0. */
11861 tcg_debug_assert(s->btype == 0);
11862 }
11863 }
11864
11865 s->is_nonstreaming = false;
11866 if (s->sme_trap_nonstreaming) {
11867 disas_sme_fa64(s, insn);
11868 }
11869
11870 if (!disas_a64(s, insn) &&
11871 !disas_sme(s, insn) &&
11872 !disas_sve(s, insn)) {
11873 disas_a64_legacy(s, insn);
11874 }
11875
11876 /*
11877 * After execution of most insns, btype is reset to 0.
11878 * Note that we set btype == -1 when the insn sets btype.
11879 */
11880 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
11881 reset_btype(s);
11882 }
11883 }
11884
aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)11885 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11886 {
11887 DisasContext *dc = container_of(dcbase, DisasContext, base);
11888
11889 if (unlikely(dc->ss_active)) {
11890 /* Note that this means single stepping WFI doesn't halt the CPU.
11891 * For conditional branch insns this is harmless unreachable code as
11892 * gen_goto_tb() has already handled emitting the debug exception
11893 * (and thus a tb-jump is not possible when singlestepping).
11894 */
11895 switch (dc->base.is_jmp) {
11896 default:
11897 gen_a64_update_pc(dc, 4);
11898 /* fall through */
11899 case DISAS_EXIT:
11900 case DISAS_JUMP:
11901 gen_step_complete_exception(dc);
11902 break;
11903 case DISAS_NORETURN:
11904 break;
11905 }
11906 } else {
11907 switch (dc->base.is_jmp) {
11908 case DISAS_NEXT:
11909 case DISAS_TOO_MANY:
11910 gen_goto_tb(dc, 1, 4);
11911 break;
11912 default:
11913 case DISAS_UPDATE_EXIT:
11914 gen_a64_update_pc(dc, 4);
11915 /* fall through */
11916 case DISAS_EXIT:
11917 tcg_gen_exit_tb(NULL, 0);
11918 break;
11919 case DISAS_UPDATE_NOCHAIN:
11920 gen_a64_update_pc(dc, 4);
11921 /* fall through */
11922 case DISAS_JUMP:
11923 tcg_gen_lookup_and_goto_ptr();
11924 break;
11925 case DISAS_NORETURN:
11926 case DISAS_SWI:
11927 break;
11928 case DISAS_WFE:
11929 gen_a64_update_pc(dc, 4);
11930 gen_helper_wfe(tcg_env);
11931 break;
11932 case DISAS_YIELD:
11933 gen_a64_update_pc(dc, 4);
11934 gen_helper_yield(tcg_env);
11935 break;
11936 case DISAS_WFI:
11937 /*
11938 * This is a special case because we don't want to just halt
11939 * the CPU if trying to debug across a WFI.
11940 */
11941 gen_a64_update_pc(dc, 4);
11942 gen_helper_wfi(tcg_env, tcg_constant_i32(4));
11943 /*
11944 * The helper doesn't necessarily throw an exception, but we
11945 * must go back to the main loop to check for interrupts anyway.
11946 */
11947 tcg_gen_exit_tb(NULL, 0);
11948 break;
11949 }
11950 }
11951 }
11952
11953 const TranslatorOps aarch64_translator_ops = {
11954 .init_disas_context = aarch64_tr_init_disas_context,
11955 .tb_start = aarch64_tr_tb_start,
11956 .insn_start = aarch64_tr_insn_start,
11957 .translate_insn = aarch64_tr_translate_insn,
11958 .tb_stop = aarch64_tr_tb_stop,
11959 };
11960