1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34
35 static const char *regnames[] = {
36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41
42 enum a64_shift_type {
43 A64_SHIFT_TYPE_LSL = 0,
44 A64_SHIFT_TYPE_LSR = 1,
45 A64_SHIFT_TYPE_ASR = 2,
46 A64_SHIFT_TYPE_ROR = 3
47 };
48
49 /*
50 * Helpers for extracting complex instruction fields
51 */
52
53 /*
54 * For load/store with an unsigned 12 bit immediate scaled by the element
55 * size. The input has the immediate field in bits [14:3] and the element
56 * size in [2:0].
57 */
uimm_scaled(DisasContext * s,int x)58 static int uimm_scaled(DisasContext *s, int x)
59 {
60 unsigned imm = x >> 3;
61 unsigned scale = extract32(x, 0, 3);
62 return imm << scale;
63 }
64
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
scale_by_log2_tag_granule(DisasContext * s,int x)66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68 return x << LOG2_TAG_GRANULE;
69 }
70
71 /*
72 * Include the generated decoders.
73 */
74
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77
78 /* initialize TCG globals. */
a64_translate_init(void)79 void a64_translate_init(void)
80 {
81 int i;
82
83 cpu_pc = tcg_global_mem_new_i64(tcg_env,
84 offsetof(CPUARMState, pc),
85 "pc");
86 for (i = 0; i < 32; i++) {
87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88 offsetof(CPUARMState, xregs[i]),
89 regnames[i]);
90 }
91
92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93 offsetof(CPUARMState, exclusive_high), "exclusive_high");
94 }
95
96 /*
97 * Return the core mmu_idx to use for A64 load/store insns which
98 * have a "unprivileged load/store" variant. Those insns access
99 * EL0 if executed from an EL which has control over EL0 (usually
100 * EL1) but behave like normal loads and stores if executed from
101 * elsewhere (eg EL3).
102 *
103 * @unpriv : true for the unprivileged encoding; false for the
104 * normal encoding (in which case we will return the same
105 * thing as get_mem_index().
106 */
get_a64_user_mem_index(DisasContext * s,bool unpriv)107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108 {
109 /*
110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111 * which is the usual mmu_idx for this cpu state.
112 */
113 ARMMMUIdx useridx = s->mmu_idx;
114
115 if (unpriv && s->unpriv) {
116 /*
117 * We have pre-computed the condition for AccType_UNPRIV.
118 * Therefore we should never get here with a mmu_idx for
119 * which we do not know the corresponding user mmu_idx.
120 */
121 switch (useridx) {
122 case ARMMMUIdx_E10_1:
123 case ARMMMUIdx_E10_1_PAN:
124 useridx = ARMMMUIdx_E10_0;
125 break;
126 case ARMMMUIdx_E20_2:
127 case ARMMMUIdx_E20_2_PAN:
128 useridx = ARMMMUIdx_E20_0;
129 break;
130 default:
131 g_assert_not_reached();
132 }
133 }
134 return arm_to_core_mmu_idx(useridx);
135 }
136
set_btype_raw(int val)137 static void set_btype_raw(int val)
138 {
139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140 offsetof(CPUARMState, btype));
141 }
142
set_btype(DisasContext * s,int val)143 static void set_btype(DisasContext *s, int val)
144 {
145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
146 tcg_debug_assert(val >= 1 && val <= 3);
147 set_btype_raw(val);
148 s->btype = -1;
149 }
150
reset_btype(DisasContext * s)151 static void reset_btype(DisasContext *s)
152 {
153 if (s->btype != 0) {
154 set_btype_raw(0);
155 s->btype = 0;
156 }
157 }
158
gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160 {
161 assert(s->pc_save != -1);
162 if (tb_cflags(s->base.tb) & CF_PCREL) {
163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164 } else {
165 tcg_gen_movi_i64(dest, s->pc_curr + diff);
166 }
167 }
168
gen_a64_update_pc(DisasContext * s,target_long diff)169 void gen_a64_update_pc(DisasContext *s, target_long diff)
170 {
171 gen_pc_plus_diff(s, cpu_pc, diff);
172 s->pc_save = s->pc_curr + diff;
173 }
174
175 /*
176 * Handle Top Byte Ignore (TBI) bits.
177 *
178 * If address tagging is enabled via the TCR TBI bits:
179 * + for EL2 and EL3 there is only one TBI bit, and if it is set
180 * then the address is zero-extended, clearing bits [63:56]
181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182 * and TBI1 controls addresses with bit 55 == 1.
183 * If the appropriate TBI bit is set for the address then
184 * the address is sign-extended from bit 55 into bits [63:56]
185 *
186 * Here We have concatenated TBI{1,0} into tbi.
187 */
gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189 TCGv_i64 src, int tbi)
190 {
191 if (tbi == 0) {
192 /* Load unmodified address */
193 tcg_gen_mov_i64(dst, src);
194 } else if (!regime_has_2_ranges(s->mmu_idx)) {
195 /* Force tag byte to all zero */
196 tcg_gen_extract_i64(dst, src, 0, 56);
197 } else {
198 /* Sign-extend from bit 55. */
199 tcg_gen_sextract_i64(dst, src, 0, 56);
200
201 switch (tbi) {
202 case 1:
203 /* tbi0 but !tbi1: only use the extension if positive */
204 tcg_gen_and_i64(dst, dst, src);
205 break;
206 case 2:
207 /* !tbi0 but tbi1: only use the extension if negative */
208 tcg_gen_or_i64(dst, dst, src);
209 break;
210 case 3:
211 /* tbi0 and tbi1: always use the extension */
212 break;
213 default:
214 g_assert_not_reached();
215 }
216 }
217 }
218
gen_a64_set_pc(DisasContext * s,TCGv_i64 src)219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220 {
221 /*
222 * If address tagging is enabled for instructions via the TCR TBI bits,
223 * then loading an address into the PC will clear out any tag.
224 */
225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226 s->pc_save = -1;
227 }
228
229 /*
230 * Handle MTE and/or TBI.
231 *
232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
233 * for the tag to be present in the FAR_ELx register. But for user-only
234 * mode we do not have a TLB with which to implement this, so we must
235 * remove the top byte now.
236 *
237 * Always return a fresh temporary that we can increment independently
238 * of the write-back address.
239 */
240
clean_data_tbi(DisasContext * s,TCGv_i64 addr)241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242 {
243 TCGv_i64 clean = tcg_temp_new_i64();
244 #ifdef CONFIG_USER_ONLY
245 gen_top_byte_ignore(s, clean, addr, s->tbid);
246 #else
247 tcg_gen_mov_i64(clean, addr);
248 #endif
249 return clean;
250 }
251
252 /* Insert a zero tag into src, with the result at dst. */
gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254 {
255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256 }
257
gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259 MMUAccessType acc, int log2_size)
260 {
261 gen_helper_probe_access(tcg_env, ptr,
262 tcg_constant_i32(acc),
263 tcg_constant_i32(get_mem_index(s)),
264 tcg_constant_i32(1 << log2_size));
265 }
266
267 /*
268 * For MTE, check a single logical or atomic access. This probes a single
269 * address, the exact one specified. The size and alignment of the access
270 * is not relevant to MTE, per se, but watchpoints do require the size,
271 * and we want to recognize those before making any other changes to state.
272 */
gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274 bool is_write, bool tag_checked,
275 MemOp memop, bool is_unpriv,
276 int core_idx)
277 {
278 if (tag_checked && s->mte_active[is_unpriv]) {
279 TCGv_i64 ret;
280 int desc = 0;
281
282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288
289 ret = tcg_temp_new_i64();
290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291
292 return ret;
293 }
294 return clean_data_tbi(s, addr);
295 }
296
gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298 bool tag_checked, MemOp memop)
299 {
300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301 false, get_mem_index(s));
302 }
303
304 /*
305 * For MTE, check multiple logical sequential accesses.
306 */
gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308 bool tag_checked, int total_size, MemOp single_mop)
309 {
310 if (tag_checked && s->mte_active[0]) {
311 TCGv_i64 ret;
312 int desc = 0;
313
314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320
321 ret = tcg_temp_new_i64();
322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323
324 return ret;
325 }
326 return clean_data_tbi(s, addr);
327 }
328
329 /*
330 * Generate the special alignment check that applies to AccType_ATOMIC
331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332 * naturally aligned, but it must not cross a 16-byte boundary.
333 * See AArch64.CheckAlignment().
334 */
check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)335 static void check_lse2_align(DisasContext *s, int rn, int imm,
336 bool is_write, MemOp mop)
337 {
338 TCGv_i32 tmp;
339 TCGv_i64 addr;
340 TCGLabel *over_label;
341 MMUAccessType type;
342 int mmu_idx;
343
344 tmp = tcg_temp_new_i32();
345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346 tcg_gen_addi_i32(tmp, tmp, imm & 15);
347 tcg_gen_andi_i32(tmp, tmp, 15);
348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349
350 over_label = gen_new_label();
351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352
353 addr = tcg_temp_new_i64();
354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355
356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357 mmu_idx = get_mem_index(s);
358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359 tcg_constant_i32(mmu_idx));
360
361 gen_set_label(over_label);
362
363 }
364
365 /* Handle the alignment check for AccType_ATOMIC instructions. */
check_atomic_align(DisasContext * s,int rn,MemOp mop)366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367 {
368 MemOp size = mop & MO_SIZE;
369
370 if (size == MO_8) {
371 return mop;
372 }
373
374 /*
375 * If size == MO_128, this is a LDXP, and the operation is single-copy
376 * atomic for each doubleword, not the entire quadword; it still must
377 * be quadword aligned.
378 */
379 if (size == MO_128) {
380 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381 MO_ATOM_IFALIGN_PAIR);
382 }
383 if (dc_isar_feature(aa64_lse2, s)) {
384 check_lse2_align(s, rn, 0, true, mop);
385 } else {
386 mop |= MO_ALIGN;
387 }
388 return finalize_memop(s, mop);
389 }
390
391 /* Handle the alignment check for AccType_ORDERED instructions. */
check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393 bool is_write, MemOp mop)
394 {
395 MemOp size = mop & MO_SIZE;
396
397 if (size == MO_8) {
398 return mop;
399 }
400 if (size == MO_128) {
401 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402 MO_ATOM_IFALIGN_PAIR);
403 }
404 if (!dc_isar_feature(aa64_lse2, s)) {
405 mop |= MO_ALIGN;
406 } else if (!s->naa) {
407 check_lse2_align(s, rn, imm, is_write, mop);
408 }
409 return finalize_memop(s, mop);
410 }
411
412 typedef struct DisasCompare64 {
413 TCGCond cond;
414 TCGv_i64 value;
415 } DisasCompare64;
416
a64_test_cc(DisasCompare64 * c64,int cc)417 static void a64_test_cc(DisasCompare64 *c64, int cc)
418 {
419 DisasCompare c32;
420
421 arm_test_cc(&c32, cc);
422
423 /*
424 * Sign-extend the 32-bit value so that the GE/LT comparisons work
425 * properly. The NE/EQ comparisons are also fine with this choice.
426 */
427 c64->cond = c32.cond;
428 c64->value = tcg_temp_new_i64();
429 tcg_gen_ext_i32_i64(c64->value, c32.value);
430 }
431
gen_rebuild_hflags(DisasContext * s)432 static void gen_rebuild_hflags(DisasContext *s)
433 {
434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435 }
436
gen_exception_internal(int excp)437 static void gen_exception_internal(int excp)
438 {
439 assert(excp_is_internal(excp));
440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441 }
442
gen_exception_internal_insn(DisasContext * s,int excp)443 static void gen_exception_internal_insn(DisasContext *s, int excp)
444 {
445 gen_a64_update_pc(s, 0);
446 gen_exception_internal(excp);
447 s->base.is_jmp = DISAS_NORETURN;
448 }
449
gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451 {
452 gen_a64_update_pc(s, 0);
453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454 s->base.is_jmp = DISAS_NORETURN;
455 }
456
gen_step_complete_exception(DisasContext * s)457 static void gen_step_complete_exception(DisasContext *s)
458 {
459 /* We just completed step of an insn. Move from Active-not-pending
460 * to Active-pending, and then also take the swstep exception.
461 * This corresponds to making the (IMPDEF) choice to prioritize
462 * swstep exceptions over asynchronous exceptions taken to an exception
463 * level where debug is disabled. This choice has the advantage that
464 * we do not need to maintain internal state corresponding to the
465 * ISV/EX syndrome bits between completion of the step and generation
466 * of the exception, and our syndrome information is always correct.
467 */
468 gen_ss_advance(s);
469 gen_swstep_exception(s, 1, s->is_ldex);
470 s->base.is_jmp = DISAS_NORETURN;
471 }
472
use_goto_tb(DisasContext * s,uint64_t dest)473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474 {
475 if (s->ss_active) {
476 return false;
477 }
478 return translator_use_goto_tb(&s->base, dest);
479 }
480
gen_goto_tb(DisasContext * s,int n,int64_t diff)481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482 {
483 if (use_goto_tb(s, s->pc_curr + diff)) {
484 /*
485 * For pcrel, the pc must always be up-to-date on entry to
486 * the linked TB, so that it can use simple additions for all
487 * further adjustments. For !pcrel, the linked TB is compiled
488 * to know its full virtual address, so we can delay the
489 * update to pc to the unlinked path. A long chain of links
490 * can thus avoid many updates to the PC.
491 */
492 if (tb_cflags(s->base.tb) & CF_PCREL) {
493 gen_a64_update_pc(s, diff);
494 tcg_gen_goto_tb(n);
495 } else {
496 tcg_gen_goto_tb(n);
497 gen_a64_update_pc(s, diff);
498 }
499 tcg_gen_exit_tb(s->base.tb, n);
500 s->base.is_jmp = DISAS_NORETURN;
501 } else {
502 gen_a64_update_pc(s, diff);
503 if (s->ss_active) {
504 gen_step_complete_exception(s);
505 } else {
506 tcg_gen_lookup_and_goto_ptr();
507 s->base.is_jmp = DISAS_NORETURN;
508 }
509 }
510 }
511
512 /*
513 * Register access functions
514 *
515 * These functions are used for directly accessing a register in where
516 * changes to the final register value are likely to be made. If you
517 * need to use a register for temporary calculation (e.g. index type
518 * operations) use the read_* form.
519 *
520 * B1.2.1 Register mappings
521 *
522 * In instruction register encoding 31 can refer to ZR (zero register) or
523 * the SP (stack pointer) depending on context. In QEMU's case we map SP
524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525 * This is the point of the _sp forms.
526 */
cpu_reg(DisasContext * s,int reg)527 TCGv_i64 cpu_reg(DisasContext *s, int reg)
528 {
529 if (reg == 31) {
530 TCGv_i64 t = tcg_temp_new_i64();
531 tcg_gen_movi_i64(t, 0);
532 return t;
533 } else {
534 return cpu_X[reg];
535 }
536 }
537
538 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540 {
541 return cpu_X[reg];
542 }
543
544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545 * representing the register contents. This TCGv is an auto-freed
546 * temporary so it need not be explicitly freed, and may be modified.
547 */
read_cpu_reg(DisasContext * s,int reg,int sf)548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549 {
550 TCGv_i64 v = tcg_temp_new_i64();
551 if (reg != 31) {
552 if (sf) {
553 tcg_gen_mov_i64(v, cpu_X[reg]);
554 } else {
555 tcg_gen_ext32u_i64(v, cpu_X[reg]);
556 }
557 } else {
558 tcg_gen_movi_i64(v, 0);
559 }
560 return v;
561 }
562
read_cpu_reg_sp(DisasContext * s,int reg,int sf)563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564 {
565 TCGv_i64 v = tcg_temp_new_i64();
566 if (sf) {
567 tcg_gen_mov_i64(v, cpu_X[reg]);
568 } else {
569 tcg_gen_ext32u_i64(v, cpu_X[reg]);
570 }
571 return v;
572 }
573
574 /* Return the offset into CPUARMState of a slice (from
575 * the least significant end) of FP register Qn (ie
576 * Dn, Sn, Hn or Bn).
577 * (Note that this is not the same mapping as for A32; see cpu.h)
578 */
fp_reg_offset(DisasContext * s,int regno,MemOp size)579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580 {
581 return vec_reg_offset(s, regno, 0, size);
582 }
583
584 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)585 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586 {
587 return vec_reg_offset(s, regno, 1, MO_64);
588 }
589
590 /* Convenience accessors for reading and writing single and double
591 * FP registers. Writing clears the upper parts of the associated
592 * 128 bit vector register, as required by the architecture.
593 * Note that unlike the GP register accessors, the values returned
594 * by the read functions must be manually freed.
595 */
read_fp_dreg(DisasContext * s,int reg)596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597 {
598 TCGv_i64 v = tcg_temp_new_i64();
599
600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601 return v;
602 }
603
read_fp_sreg(DisasContext * s,int reg)604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605 {
606 TCGv_i32 v = tcg_temp_new_i32();
607
608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609 return v;
610 }
611
read_fp_hreg(DisasContext * s,int reg)612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613 {
614 TCGv_i32 v = tcg_temp_new_i32();
615
616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617 return v;
618 }
619
clear_vec(DisasContext * s,int rd)620 static void clear_vec(DisasContext *s, int rd)
621 {
622 unsigned ofs = fp_reg_offset(s, rd, MO_64);
623 unsigned vsz = vec_full_reg_size(s);
624
625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626 }
627
628 /*
629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630 * If SVE is not enabled, then there are only 128 bits in the vector.
631 */
clear_vec_high(DisasContext * s,bool is_q,int rd)632 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633 {
634 unsigned ofs = fp_reg_offset(s, rd, MO_64);
635 unsigned vsz = vec_full_reg_size(s);
636
637 /* Nop move, with side effect of clearing the tail. */
638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639 }
640
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642 {
643 unsigned ofs = fp_reg_offset(s, reg, MO_64);
644
645 tcg_gen_st_i64(v, tcg_env, ofs);
646 clear_vec_high(s, false, reg);
647 }
648
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650 {
651 TCGv_i64 tmp = tcg_temp_new_i64();
652
653 tcg_gen_extu_i32_i64(tmp, v);
654 write_fp_dreg(s, reg, tmp);
655 }
656
657 /*
658 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
659 * - if FPCR.NEP == 0, clear the high elements of reg
660 * - if FPCR.NEP == 1, set the high elements of reg from mergereg
661 * (i.e. merge the result with those high elements)
662 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
663 */
write_fp_dreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i64 v)664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
665 TCGv_i64 v)
666 {
667 if (!s->fpcr_nep) {
668 write_fp_dreg(s, reg, v);
669 return;
670 }
671
672 /*
673 * Move from mergereg to reg; this sets the high elements and
674 * clears the bits above 128 as a side effect.
675 */
676 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
677 vec_full_reg_offset(s, mergereg),
678 16, vec_full_reg_size(s));
679 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
680 }
681
682 /*
683 * Write a single-prec result, but only clear the higher elements
684 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
685 */
write_fp_sreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
687 TCGv_i32 v)
688 {
689 if (!s->fpcr_nep) {
690 write_fp_sreg(s, reg, v);
691 return;
692 }
693
694 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
695 vec_full_reg_offset(s, mergereg),
696 16, vec_full_reg_size(s));
697 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
698 }
699
700 /*
701 * Write a half-prec result, but only clear the higher elements
702 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
703 * The caller must ensure that the top 16 bits of v are zero.
704 */
write_fp_hreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
706 TCGv_i32 v)
707 {
708 if (!s->fpcr_nep) {
709 write_fp_sreg(s, reg, v);
710 return;
711 }
712
713 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
714 vec_full_reg_offset(s, mergereg),
715 16, vec_full_reg_size(s));
716 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
717 }
718
719 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
721 GVecGen2Fn *gvec_fn, int vece)
722 {
723 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
724 is_q ? 16 : 8, vec_full_reg_size(s));
725 }
726
727 /* Expand a 2-operand + immediate AdvSIMD vector operation using
728 * an expander function.
729 */
gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
731 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
732 {
733 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
734 imm, is_q ? 16 : 8, vec_full_reg_size(s));
735 }
736
737 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
739 GVecGen3Fn *gvec_fn, int vece)
740 {
741 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
742 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
743 }
744
745 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
747 int rx, GVecGen4Fn *gvec_fn, int vece)
748 {
749 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
750 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
751 is_q ? 16 : 8, vec_full_reg_size(s));
752 }
753
754 /* Expand a 2-operand operation using an out-of-line helper. */
gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
756 int rn, int data, gen_helper_gvec_2 *fn)
757 {
758 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
759 vec_full_reg_offset(s, rn),
760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762
763 /* Expand a 3-operand operation using an out-of-line helper. */
gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
765 int rn, int rm, int data, gen_helper_gvec_3 *fn)
766 {
767 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
768 vec_full_reg_offset(s, rn),
769 vec_full_reg_offset(s, rm),
770 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
771 }
772
773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
774 * an out-of-line helper.
775 */
gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_3_ptr * fn)776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
777 int rm, ARMFPStatusFlavour fpsttype, int data,
778 gen_helper_gvec_3_ptr *fn)
779 {
780 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
781 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
782 vec_full_reg_offset(s, rn),
783 vec_full_reg_offset(s, rm), fpst,
784 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
785 }
786
787 /* Expand a 4-operand operation using an out-of-line helper. */
gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
789 int rm, int ra, int data, gen_helper_gvec_4 *fn)
790 {
791 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
792 vec_full_reg_offset(s, rn),
793 vec_full_reg_offset(s, rm),
794 vec_full_reg_offset(s, ra),
795 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
796 }
797
798 /*
799 * Expand a 4-operand operation using an out-of-line helper that takes
800 * a pointer to the CPU env.
801 */
gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
803 int rm, int ra, int data,
804 gen_helper_gvec_4_ptr *fn)
805 {
806 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
807 vec_full_reg_offset(s, rn),
808 vec_full_reg_offset(s, rm),
809 vec_full_reg_offset(s, ra),
810 tcg_env,
811 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
812 }
813
814 /*
815 * Expand a 4-operand + fpstatus pointer + simd data value operation using
816 * an out-of-line helper.
817 */
gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_4_ptr * fn)818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
819 int rm, int ra, ARMFPStatusFlavour fpsttype,
820 int data,
821 gen_helper_gvec_4_ptr *fn)
822 {
823 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
824 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
825 vec_full_reg_offset(s, rn),
826 vec_full_reg_offset(s, rm),
827 vec_full_reg_offset(s, ra), fpst,
828 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
829 }
830
831 /*
832 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
833 * These functions implement
834 * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
835 * which for float32 is
836 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
837 * and similarly for the other float sizes.
838 */
gen_vfp_ah_negh(TCGv_i32 d,TCGv_i32 s)839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
840 {
841 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
842
843 gen_vfp_negh(chs_s, s);
844 gen_vfp_absh(abs_s, s);
845 tcg_gen_movcond_i32(TCG_COND_GTU, d,
846 abs_s, tcg_constant_i32(0x7c00),
847 s, chs_s);
848 }
849
gen_vfp_ah_negs(TCGv_i32 d,TCGv_i32 s)850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
851 {
852 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
853
854 gen_vfp_negs(chs_s, s);
855 gen_vfp_abss(abs_s, s);
856 tcg_gen_movcond_i32(TCG_COND_GTU, d,
857 abs_s, tcg_constant_i32(0x7f800000UL),
858 s, chs_s);
859 }
860
gen_vfp_ah_negd(TCGv_i64 d,TCGv_i64 s)861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
862 {
863 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
864
865 gen_vfp_negd(chs_s, s);
866 gen_vfp_absd(abs_s, s);
867 tcg_gen_movcond_i64(TCG_COND_GTU, d,
868 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
869 s, chs_s);
870 }
871
872 /*
873 * These functions implement
874 * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
875 * which for float32 is
876 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
877 * and similarly for the other float sizes.
878 */
gen_vfp_ah_absh(TCGv_i32 d,TCGv_i32 s)879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
880 {
881 TCGv_i32 abs_s = tcg_temp_new_i32();
882
883 gen_vfp_absh(abs_s, s);
884 tcg_gen_movcond_i32(TCG_COND_GTU, d,
885 abs_s, tcg_constant_i32(0x7c00),
886 s, abs_s);
887 }
888
gen_vfp_ah_abss(TCGv_i32 d,TCGv_i32 s)889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
890 {
891 TCGv_i32 abs_s = tcg_temp_new_i32();
892
893 gen_vfp_abss(abs_s, s);
894 tcg_gen_movcond_i32(TCG_COND_GTU, d,
895 abs_s, tcg_constant_i32(0x7f800000UL),
896 s, abs_s);
897 }
898
gen_vfp_ah_absd(TCGv_i64 d,TCGv_i64 s)899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
900 {
901 TCGv_i64 abs_s = tcg_temp_new_i64();
902
903 gen_vfp_absd(abs_s, s);
904 tcg_gen_movcond_i64(TCG_COND_GTU, d,
905 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
906 s, abs_s);
907 }
908
gen_vfp_maybe_ah_negh(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
910 {
911 if (dc->fpcr_ah) {
912 gen_vfp_ah_negh(d, s);
913 } else {
914 gen_vfp_negh(d, s);
915 }
916 }
917
gen_vfp_maybe_ah_negs(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
919 {
920 if (dc->fpcr_ah) {
921 gen_vfp_ah_negs(d, s);
922 } else {
923 gen_vfp_negs(d, s);
924 }
925 }
926
gen_vfp_maybe_ah_negd(DisasContext * dc,TCGv_i64 d,TCGv_i64 s)927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
928 {
929 if (dc->fpcr_ah) {
930 gen_vfp_ah_negd(d, s);
931 } else {
932 gen_vfp_negd(d, s);
933 }
934 }
935
936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
937 * than the 32 bit equivalent.
938 */
gen_set_NZ64(TCGv_i64 result)939 static inline void gen_set_NZ64(TCGv_i64 result)
940 {
941 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
942 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
943 }
944
945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(int sf,TCGv_i64 result)946 static inline void gen_logic_CC(int sf, TCGv_i64 result)
947 {
948 if (sf) {
949 gen_set_NZ64(result);
950 } else {
951 tcg_gen_extrl_i64_i32(cpu_ZF, result);
952 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
953 }
954 tcg_gen_movi_i32(cpu_CF, 0);
955 tcg_gen_movi_i32(cpu_VF, 0);
956 }
957
958 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
960 {
961 TCGv_i64 result, flag, tmp;
962 result = tcg_temp_new_i64();
963 flag = tcg_temp_new_i64();
964 tmp = tcg_temp_new_i64();
965
966 tcg_gen_movi_i64(tmp, 0);
967 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
968
969 tcg_gen_extrl_i64_i32(cpu_CF, flag);
970
971 gen_set_NZ64(result);
972
973 tcg_gen_xor_i64(flag, result, t0);
974 tcg_gen_xor_i64(tmp, t0, t1);
975 tcg_gen_andc_i64(flag, flag, tmp);
976 tcg_gen_extrh_i64_i32(cpu_VF, flag);
977
978 tcg_gen_mov_i64(dest, result);
979 }
980
gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
982 {
983 TCGv_i32 t0_32 = tcg_temp_new_i32();
984 TCGv_i32 t1_32 = tcg_temp_new_i32();
985 TCGv_i32 tmp = tcg_temp_new_i32();
986
987 tcg_gen_movi_i32(tmp, 0);
988 tcg_gen_extrl_i64_i32(t0_32, t0);
989 tcg_gen_extrl_i64_i32(t1_32, t1);
990 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
991 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
992 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
993 tcg_gen_xor_i32(tmp, t0_32, t1_32);
994 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
995 tcg_gen_extu_i32_i64(dest, cpu_NF);
996 }
997
gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
999 {
1000 if (sf) {
1001 gen_add64_CC(dest, t0, t1);
1002 } else {
1003 gen_add32_CC(dest, t0, t1);
1004 }
1005 }
1006
1007 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1009 {
1010 /* 64 bit arithmetic */
1011 TCGv_i64 result, flag, tmp;
1012
1013 result = tcg_temp_new_i64();
1014 flag = tcg_temp_new_i64();
1015 tcg_gen_sub_i64(result, t0, t1);
1016
1017 gen_set_NZ64(result);
1018
1019 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1020 tcg_gen_extrl_i64_i32(cpu_CF, flag);
1021
1022 tcg_gen_xor_i64(flag, result, t0);
1023 tmp = tcg_temp_new_i64();
1024 tcg_gen_xor_i64(tmp, t0, t1);
1025 tcg_gen_and_i64(flag, flag, tmp);
1026 tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027 tcg_gen_mov_i64(dest, result);
1028 }
1029
gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1031 {
1032 /* 32 bit arithmetic */
1033 TCGv_i32 t0_32 = tcg_temp_new_i32();
1034 TCGv_i32 t1_32 = tcg_temp_new_i32();
1035 TCGv_i32 tmp;
1036
1037 tcg_gen_extrl_i64_i32(t0_32, t0);
1038 tcg_gen_extrl_i64_i32(t1_32, t1);
1039 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1040 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1041 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043 tmp = tcg_temp_new_i32();
1044 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1045 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1046 tcg_gen_extu_i32_i64(dest, cpu_NF);
1047 }
1048
gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1050 {
1051 if (sf) {
1052 gen_sub64_CC(dest, t0, t1);
1053 } else {
1054 gen_sub32_CC(dest, t0, t1);
1055 }
1056 }
1057
1058 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1060 {
1061 TCGv_i64 flag = tcg_temp_new_i64();
1062 tcg_gen_extu_i32_i64(flag, cpu_CF);
1063 tcg_gen_add_i64(dest, t0, t1);
1064 tcg_gen_add_i64(dest, dest, flag);
1065
1066 if (!sf) {
1067 tcg_gen_ext32u_i64(dest, dest);
1068 }
1069 }
1070
1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1073 {
1074 if (sf) {
1075 TCGv_i64 result = tcg_temp_new_i64();
1076 TCGv_i64 cf_64 = tcg_temp_new_i64();
1077 TCGv_i64 vf_64 = tcg_temp_new_i64();
1078 TCGv_i64 tmp = tcg_temp_new_i64();
1079 TCGv_i64 zero = tcg_constant_i64(0);
1080
1081 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1082 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
1083 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
1084 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1085 gen_set_NZ64(result);
1086
1087 tcg_gen_xor_i64(vf_64, result, t0);
1088 tcg_gen_xor_i64(tmp, t0, t1);
1089 tcg_gen_andc_i64(vf_64, vf_64, tmp);
1090 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1091
1092 tcg_gen_mov_i64(dest, result);
1093 } else {
1094 TCGv_i32 t0_32 = tcg_temp_new_i32();
1095 TCGv_i32 t1_32 = tcg_temp_new_i32();
1096 TCGv_i32 tmp = tcg_temp_new_i32();
1097 TCGv_i32 zero = tcg_constant_i32(0);
1098
1099 tcg_gen_extrl_i64_i32(t0_32, t0);
1100 tcg_gen_extrl_i64_i32(t1_32, t1);
1101 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
1102 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
1103
1104 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1105 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1106 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1107 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1108 tcg_gen_extu_i32_i64(dest, cpu_NF);
1109 }
1110 }
1111
1112 /*
1113 * Load/Store generators
1114 */
1115
1116 /*
1117 * Store from GPR register to memory.
1118 */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1119 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1120 TCGv_i64 tcg_addr, MemOp memop, int memidx,
1121 bool iss_valid,
1122 unsigned int iss_srt,
1123 bool iss_sf, bool iss_ar)
1124 {
1125 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1126
1127 if (iss_valid) {
1128 uint32_t syn;
1129
1130 syn = syn_data_abort_with_iss(0,
1131 (memop & MO_SIZE),
1132 false,
1133 iss_srt,
1134 iss_sf,
1135 iss_ar,
1136 0, 0, 0, 0, 0, false);
1137 disas_set_insn_syndrome(s, syn);
1138 }
1139 }
1140
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1141 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1142 TCGv_i64 tcg_addr, MemOp memop,
1143 bool iss_valid,
1144 unsigned int iss_srt,
1145 bool iss_sf, bool iss_ar)
1146 {
1147 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1148 iss_valid, iss_srt, iss_sf, iss_ar);
1149 }
1150
1151 /*
1152 * Load from memory to GPR register
1153 */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1154 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1155 MemOp memop, bool extend, int memidx,
1156 bool iss_valid, unsigned int iss_srt,
1157 bool iss_sf, bool iss_ar)
1158 {
1159 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1160
1161 if (extend && (memop & MO_SIGN)) {
1162 g_assert((memop & MO_SIZE) <= MO_32);
1163 tcg_gen_ext32u_i64(dest, dest);
1164 }
1165
1166 if (iss_valid) {
1167 uint32_t syn;
1168
1169 syn = syn_data_abort_with_iss(0,
1170 (memop & MO_SIZE),
1171 (memop & MO_SIGN) != 0,
1172 iss_srt,
1173 iss_sf,
1174 iss_ar,
1175 0, 0, 0, 0, 0, false);
1176 disas_set_insn_syndrome(s, syn);
1177 }
1178 }
1179
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1180 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1181 MemOp memop, bool extend,
1182 bool iss_valid, unsigned int iss_srt,
1183 bool iss_sf, bool iss_ar)
1184 {
1185 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1186 iss_valid, iss_srt, iss_sf, iss_ar);
1187 }
1188
1189 /*
1190 * Store from FP register to memory
1191 */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1192 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1193 {
1194 /* This writes the bottom N bits of a 128 bit wide vector to memory */
1195 TCGv_i64 tmplo = tcg_temp_new_i64();
1196
1197 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1198
1199 if ((mop & MO_SIZE) < MO_128) {
1200 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1201 } else {
1202 TCGv_i64 tmphi = tcg_temp_new_i64();
1203 TCGv_i128 t16 = tcg_temp_new_i128();
1204
1205 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1206 tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1207
1208 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1209 }
1210 }
1211
1212 /*
1213 * Load from memory to FP register
1214 */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1215 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1216 {
1217 /* This always zero-extends and writes to a full 128 bit wide vector */
1218 TCGv_i64 tmplo = tcg_temp_new_i64();
1219 TCGv_i64 tmphi = NULL;
1220
1221 if ((mop & MO_SIZE) < MO_128) {
1222 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1223 } else {
1224 TCGv_i128 t16 = tcg_temp_new_i128();
1225
1226 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1227
1228 tmphi = tcg_temp_new_i64();
1229 tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1230 }
1231
1232 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1233
1234 if (tmphi) {
1235 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1236 }
1237 clear_vec_high(s, tmphi != NULL, destidx);
1238 }
1239
1240 /*
1241 * Vector load/store helpers.
1242 *
1243 * The principal difference between this and a FP load is that we don't
1244 * zero extend as we are filling a partial chunk of the vector register.
1245 * These functions don't support 128 bit loads/stores, which would be
1246 * normal load/store operations.
1247 *
1248 * The _i32 versions are useful when operating on 32 bit quantities
1249 * (eg for floating point single or using Neon helper functions).
1250 */
1251
1252 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1253 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1254 int element, MemOp memop)
1255 {
1256 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1257 switch ((unsigned)memop) {
1258 case MO_8:
1259 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1260 break;
1261 case MO_16:
1262 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1263 break;
1264 case MO_32:
1265 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1266 break;
1267 case MO_8|MO_SIGN:
1268 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1269 break;
1270 case MO_16|MO_SIGN:
1271 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1272 break;
1273 case MO_32|MO_SIGN:
1274 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1275 break;
1276 case MO_64:
1277 case MO_64|MO_SIGN:
1278 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1279 break;
1280 default:
1281 g_assert_not_reached();
1282 }
1283 }
1284
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1285 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1286 int element, MemOp memop)
1287 {
1288 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1289 switch (memop) {
1290 case MO_8:
1291 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1292 break;
1293 case MO_16:
1294 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1295 break;
1296 case MO_8|MO_SIGN:
1297 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1298 break;
1299 case MO_16|MO_SIGN:
1300 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1301 break;
1302 case MO_32:
1303 case MO_32|MO_SIGN:
1304 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1305 break;
1306 default:
1307 g_assert_not_reached();
1308 }
1309 }
1310
1311 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1312 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1313 int element, MemOp memop)
1314 {
1315 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1316 switch (memop) {
1317 case MO_8:
1318 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1319 break;
1320 case MO_16:
1321 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1322 break;
1323 case MO_32:
1324 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1325 break;
1326 case MO_64:
1327 tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1328 break;
1329 default:
1330 g_assert_not_reached();
1331 }
1332 }
1333
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1334 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1335 int destidx, int element, MemOp memop)
1336 {
1337 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1338 switch (memop) {
1339 case MO_8:
1340 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1341 break;
1342 case MO_16:
1343 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1344 break;
1345 case MO_32:
1346 tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1347 break;
1348 default:
1349 g_assert_not_reached();
1350 }
1351 }
1352
1353 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1354 static void do_vec_st(DisasContext *s, int srcidx, int element,
1355 TCGv_i64 tcg_addr, MemOp mop)
1356 {
1357 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1358
1359 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1360 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1361 }
1362
1363 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1364 static void do_vec_ld(DisasContext *s, int destidx, int element,
1365 TCGv_i64 tcg_addr, MemOp mop)
1366 {
1367 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1368
1369 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1370 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1371 }
1372
1373 /* Check that FP/Neon access is enabled. If it is, return
1374 * true. If not, emit code to generate an appropriate exception,
1375 * and return false; the caller should not emit any code for
1376 * the instruction. Note that this check must happen after all
1377 * unallocated-encoding checks (otherwise the syndrome information
1378 * for the resulting exception will be incorrect).
1379 */
fp_access_check_only(DisasContext * s)1380 static bool fp_access_check_only(DisasContext *s)
1381 {
1382 if (s->fp_excp_el) {
1383 assert(!s->fp_access_checked);
1384 s->fp_access_checked = -1;
1385
1386 gen_exception_insn_el(s, 0, EXCP_UDEF,
1387 syn_fp_access_trap(1, 0xe, false, 0),
1388 s->fp_excp_el);
1389 return false;
1390 }
1391 s->fp_access_checked = 1;
1392 return true;
1393 }
1394
fp_access_check(DisasContext * s)1395 static bool fp_access_check(DisasContext *s)
1396 {
1397 if (!fp_access_check_only(s)) {
1398 return false;
1399 }
1400 if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1401 gen_exception_insn(s, 0, EXCP_UDEF,
1402 syn_smetrap(SME_ET_Streaming, false));
1403 return false;
1404 }
1405 return true;
1406 }
1407
1408 /*
1409 * Return <0 for non-supported element sizes, with MO_16 controlled by
1410 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1411 */
fp_access_check_scalar_hsd(DisasContext * s,MemOp esz)1412 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1413 {
1414 switch (esz) {
1415 case MO_64:
1416 case MO_32:
1417 break;
1418 case MO_16:
1419 if (!dc_isar_feature(aa64_fp16, s)) {
1420 return -1;
1421 }
1422 break;
1423 default:
1424 return -1;
1425 }
1426 return fp_access_check(s);
1427 }
1428
1429 /* Likewise, but vector MO_64 must have two elements. */
fp_access_check_vector_hsd(DisasContext * s,bool is_q,MemOp esz)1430 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1431 {
1432 switch (esz) {
1433 case MO_64:
1434 if (!is_q) {
1435 return -1;
1436 }
1437 break;
1438 case MO_32:
1439 break;
1440 case MO_16:
1441 if (!dc_isar_feature(aa64_fp16, s)) {
1442 return -1;
1443 }
1444 break;
1445 default:
1446 return -1;
1447 }
1448 return fp_access_check(s);
1449 }
1450
1451 /*
1452 * Check that SVE access is enabled. If it is, return true.
1453 * If not, emit code to generate an appropriate exception and return false.
1454 * This function corresponds to CheckSVEEnabled().
1455 */
sve_access_check(DisasContext * s)1456 bool sve_access_check(DisasContext *s)
1457 {
1458 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
1459 bool ret;
1460
1461 assert(dc_isar_feature(aa64_sme, s));
1462 ret = sme_sm_enabled_check(s);
1463 s->sve_access_checked = (ret ? 1 : -1);
1464 return ret;
1465 }
1466 if (s->sve_excp_el) {
1467 /* Assert that we only raise one exception per instruction. */
1468 assert(!s->sve_access_checked);
1469 gen_exception_insn_el(s, 0, EXCP_UDEF,
1470 syn_sve_access_trap(), s->sve_excp_el);
1471 s->sve_access_checked = -1;
1472 return false;
1473 }
1474 s->sve_access_checked = 1;
1475 return fp_access_check(s);
1476 }
1477
1478 /*
1479 * Check that SME access is enabled, raise an exception if not.
1480 * Note that this function corresponds to CheckSMEAccess and is
1481 * only used directly for cpregs.
1482 */
sme_access_check(DisasContext * s)1483 static bool sme_access_check(DisasContext *s)
1484 {
1485 if (s->sme_excp_el) {
1486 gen_exception_insn_el(s, 0, EXCP_UDEF,
1487 syn_smetrap(SME_ET_AccessTrap, false),
1488 s->sme_excp_el);
1489 return false;
1490 }
1491 return true;
1492 }
1493
1494 /* This function corresponds to CheckSMEEnabled. */
sme_enabled_check(DisasContext * s)1495 bool sme_enabled_check(DisasContext *s)
1496 {
1497 /*
1498 * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1499 * to be zero when fp_excp_el has priority. This is because we need
1500 * sme_excp_el by itself for cpregs access checks.
1501 */
1502 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
1503 bool ret = sme_access_check(s);
1504 s->fp_access_checked = (ret ? 1 : -1);
1505 return ret;
1506 }
1507 return fp_access_check_only(s);
1508 }
1509
1510 /* Common subroutine for CheckSMEAnd*Enabled. */
sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1511 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1512 {
1513 if (!sme_enabled_check(s)) {
1514 return false;
1515 }
1516 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1517 gen_exception_insn(s, 0, EXCP_UDEF,
1518 syn_smetrap(SME_ET_NotStreaming, false));
1519 return false;
1520 }
1521 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1522 gen_exception_insn(s, 0, EXCP_UDEF,
1523 syn_smetrap(SME_ET_InactiveZA, false));
1524 return false;
1525 }
1526 return true;
1527 }
1528
1529 /*
1530 * Expanders for AdvSIMD translation functions.
1531 */
1532
do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1533 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1534 gen_helper_gvec_2 *fn)
1535 {
1536 if (!a->q && a->esz == MO_64) {
1537 return false;
1538 }
1539 if (fp_access_check(s)) {
1540 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1541 }
1542 return true;
1543 }
1544
do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1545 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1546 gen_helper_gvec_3 *fn)
1547 {
1548 if (!a->q && a->esz == MO_64) {
1549 return false;
1550 }
1551 if (fp_access_check(s)) {
1552 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1553 }
1554 return true;
1555 }
1556
do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1557 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1558 {
1559 if (!a->q && a->esz == MO_64) {
1560 return false;
1561 }
1562 if (fp_access_check(s)) {
1563 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1564 }
1565 return true;
1566 }
1567
do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1568 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1569 {
1570 if (a->esz == MO_64) {
1571 return false;
1572 }
1573 if (fp_access_check(s)) {
1574 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1575 }
1576 return true;
1577 }
1578
do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1579 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1580 {
1581 if (a->esz == MO_8) {
1582 return false;
1583 }
1584 return do_gvec_fn3_no64(s, a, fn);
1585 }
1586
do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1587 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1588 {
1589 if (!a->q && a->esz == MO_64) {
1590 return false;
1591 }
1592 if (fp_access_check(s)) {
1593 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1594 }
1595 return true;
1596 }
1597
1598 /*
1599 * This utility function is for doing register extension with an
1600 * optional shift. You will likely want to pass a temporary for the
1601 * destination register. See DecodeRegExtend() in the ARM ARM.
1602 */
ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1603 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1604 int option, unsigned int shift)
1605 {
1606 int extsize = extract32(option, 0, 2);
1607 bool is_signed = extract32(option, 2, 1);
1608
1609 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1610 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1611 }
1612
gen_check_sp_alignment(DisasContext * s)1613 static inline void gen_check_sp_alignment(DisasContext *s)
1614 {
1615 /* The AArch64 architecture mandates that (if enabled via PSTATE
1616 * or SCTLR bits) there is a check that SP is 16-aligned on every
1617 * SP-relative load or store (with an exception generated if it is not).
1618 * In line with general QEMU practice regarding misaligned accesses,
1619 * we omit these checks for the sake of guest program performance.
1620 * This function is provided as a hook so we can more easily add these
1621 * checks in future (possibly as a "favour catching guest program bugs
1622 * over speed" user selectable option).
1623 */
1624 }
1625
1626 /*
1627 * The instruction disassembly implemented here matches
1628 * the instruction encoding classifications in chapter C4
1629 * of the ARM Architecture Reference Manual (DDI0487B_a);
1630 * classification names and decode diagrams here should generally
1631 * match up with those in the manual.
1632 */
1633
trans_B(DisasContext * s,arg_i * a)1634 static bool trans_B(DisasContext *s, arg_i *a)
1635 {
1636 reset_btype(s);
1637 gen_goto_tb(s, 0, a->imm);
1638 return true;
1639 }
1640
trans_BL(DisasContext * s,arg_i * a)1641 static bool trans_BL(DisasContext *s, arg_i *a)
1642 {
1643 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1644 reset_btype(s);
1645 gen_goto_tb(s, 0, a->imm);
1646 return true;
1647 }
1648
1649
trans_CBZ(DisasContext * s,arg_cbz * a)1650 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1651 {
1652 DisasLabel match;
1653 TCGv_i64 tcg_cmp;
1654
1655 tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1656 reset_btype(s);
1657
1658 match = gen_disas_label(s);
1659 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1660 tcg_cmp, 0, match.label);
1661 gen_goto_tb(s, 0, 4);
1662 set_disas_label(s, match);
1663 gen_goto_tb(s, 1, a->imm);
1664 return true;
1665 }
1666
trans_TBZ(DisasContext * s,arg_tbz * a)1667 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1668 {
1669 DisasLabel match;
1670 TCGv_i64 tcg_cmp;
1671
1672 tcg_cmp = tcg_temp_new_i64();
1673 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1674
1675 reset_btype(s);
1676
1677 match = gen_disas_label(s);
1678 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1679 tcg_cmp, 0, match.label);
1680 gen_goto_tb(s, 0, 4);
1681 set_disas_label(s, match);
1682 gen_goto_tb(s, 1, a->imm);
1683 return true;
1684 }
1685
trans_B_cond(DisasContext * s,arg_B_cond * a)1686 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1687 {
1688 /* BC.cond is only present with FEAT_HBC */
1689 if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1690 return false;
1691 }
1692 reset_btype(s);
1693 if (a->cond < 0x0e) {
1694 /* genuinely conditional branches */
1695 DisasLabel match = gen_disas_label(s);
1696 arm_gen_test_cc(a->cond, match.label);
1697 gen_goto_tb(s, 0, 4);
1698 set_disas_label(s, match);
1699 gen_goto_tb(s, 1, a->imm);
1700 } else {
1701 /* 0xe and 0xf are both "always" conditions */
1702 gen_goto_tb(s, 0, a->imm);
1703 }
1704 return true;
1705 }
1706
set_btype_for_br(DisasContext * s,int rn)1707 static void set_btype_for_br(DisasContext *s, int rn)
1708 {
1709 if (dc_isar_feature(aa64_bti, s)) {
1710 /* BR to {x16,x17} or !guard -> 1, else 3. */
1711 if (rn == 16 || rn == 17) {
1712 set_btype(s, 1);
1713 } else {
1714 TCGv_i64 pc = tcg_temp_new_i64();
1715 gen_pc_plus_diff(s, pc, 0);
1716 gen_helper_guarded_page_br(tcg_env, pc);
1717 s->btype = -1;
1718 }
1719 }
1720 }
1721
set_btype_for_blr(DisasContext * s)1722 static void set_btype_for_blr(DisasContext *s)
1723 {
1724 if (dc_isar_feature(aa64_bti, s)) {
1725 /* BLR sets BTYPE to 2, regardless of source guarded page. */
1726 set_btype(s, 2);
1727 }
1728 }
1729
trans_BR(DisasContext * s,arg_r * a)1730 static bool trans_BR(DisasContext *s, arg_r *a)
1731 {
1732 set_btype_for_br(s, a->rn);
1733 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1734 s->base.is_jmp = DISAS_JUMP;
1735 return true;
1736 }
1737
trans_BLR(DisasContext * s,arg_r * a)1738 static bool trans_BLR(DisasContext *s, arg_r *a)
1739 {
1740 TCGv_i64 dst = cpu_reg(s, a->rn);
1741 TCGv_i64 lr = cpu_reg(s, 30);
1742 if (dst == lr) {
1743 TCGv_i64 tmp = tcg_temp_new_i64();
1744 tcg_gen_mov_i64(tmp, dst);
1745 dst = tmp;
1746 }
1747 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1748 gen_a64_set_pc(s, dst);
1749 set_btype_for_blr(s);
1750 s->base.is_jmp = DISAS_JUMP;
1751 return true;
1752 }
1753
trans_RET(DisasContext * s,arg_r * a)1754 static bool trans_RET(DisasContext *s, arg_r *a)
1755 {
1756 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1757 s->base.is_jmp = DISAS_JUMP;
1758 return true;
1759 }
1760
auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1761 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1762 TCGv_i64 modifier, bool use_key_a)
1763 {
1764 TCGv_i64 truedst;
1765 /*
1766 * Return the branch target for a BRAA/RETA/etc, which is either
1767 * just the destination dst, or that value with the pauth check
1768 * done and the code removed from the high bits.
1769 */
1770 if (!s->pauth_active) {
1771 return dst;
1772 }
1773
1774 truedst = tcg_temp_new_i64();
1775 if (use_key_a) {
1776 gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1777 } else {
1778 gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1779 }
1780 return truedst;
1781 }
1782
trans_BRAZ(DisasContext * s,arg_braz * a)1783 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1784 {
1785 TCGv_i64 dst;
1786
1787 if (!dc_isar_feature(aa64_pauth, s)) {
1788 return false;
1789 }
1790
1791 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1792 set_btype_for_br(s, a->rn);
1793 gen_a64_set_pc(s, dst);
1794 s->base.is_jmp = DISAS_JUMP;
1795 return true;
1796 }
1797
trans_BLRAZ(DisasContext * s,arg_braz * a)1798 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1799 {
1800 TCGv_i64 dst, lr;
1801
1802 if (!dc_isar_feature(aa64_pauth, s)) {
1803 return false;
1804 }
1805
1806 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1807 lr = cpu_reg(s, 30);
1808 if (dst == lr) {
1809 TCGv_i64 tmp = tcg_temp_new_i64();
1810 tcg_gen_mov_i64(tmp, dst);
1811 dst = tmp;
1812 }
1813 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1814 gen_a64_set_pc(s, dst);
1815 set_btype_for_blr(s);
1816 s->base.is_jmp = DISAS_JUMP;
1817 return true;
1818 }
1819
trans_RETA(DisasContext * s,arg_reta * a)1820 static bool trans_RETA(DisasContext *s, arg_reta *a)
1821 {
1822 TCGv_i64 dst;
1823
1824 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1825 gen_a64_set_pc(s, dst);
1826 s->base.is_jmp = DISAS_JUMP;
1827 return true;
1828 }
1829
trans_BRA(DisasContext * s,arg_bra * a)1830 static bool trans_BRA(DisasContext *s, arg_bra *a)
1831 {
1832 TCGv_i64 dst;
1833
1834 if (!dc_isar_feature(aa64_pauth, s)) {
1835 return false;
1836 }
1837 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1838 gen_a64_set_pc(s, dst);
1839 set_btype_for_br(s, a->rn);
1840 s->base.is_jmp = DISAS_JUMP;
1841 return true;
1842 }
1843
trans_BLRA(DisasContext * s,arg_bra * a)1844 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1845 {
1846 TCGv_i64 dst, lr;
1847
1848 if (!dc_isar_feature(aa64_pauth, s)) {
1849 return false;
1850 }
1851 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1852 lr = cpu_reg(s, 30);
1853 if (dst == lr) {
1854 TCGv_i64 tmp = tcg_temp_new_i64();
1855 tcg_gen_mov_i64(tmp, dst);
1856 dst = tmp;
1857 }
1858 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1859 gen_a64_set_pc(s, dst);
1860 set_btype_for_blr(s);
1861 s->base.is_jmp = DISAS_JUMP;
1862 return true;
1863 }
1864
trans_ERET(DisasContext * s,arg_ERET * a)1865 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1866 {
1867 TCGv_i64 dst;
1868
1869 if (s->current_el == 0) {
1870 return false;
1871 }
1872 if (s->trap_eret) {
1873 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1874 return true;
1875 }
1876 dst = tcg_temp_new_i64();
1877 tcg_gen_ld_i64(dst, tcg_env,
1878 offsetof(CPUARMState, elr_el[s->current_el]));
1879
1880 translator_io_start(&s->base);
1881
1882 gen_helper_exception_return(tcg_env, dst);
1883 /* Must exit loop to check un-masked IRQs */
1884 s->base.is_jmp = DISAS_EXIT;
1885 return true;
1886 }
1887
trans_ERETA(DisasContext * s,arg_reta * a)1888 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1889 {
1890 TCGv_i64 dst;
1891
1892 if (!dc_isar_feature(aa64_pauth, s)) {
1893 return false;
1894 }
1895 if (s->current_el == 0) {
1896 return false;
1897 }
1898 /* The FGT trap takes precedence over an auth trap. */
1899 if (s->trap_eret) {
1900 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1901 return true;
1902 }
1903 dst = tcg_temp_new_i64();
1904 tcg_gen_ld_i64(dst, tcg_env,
1905 offsetof(CPUARMState, elr_el[s->current_el]));
1906
1907 dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1908
1909 translator_io_start(&s->base);
1910
1911 gen_helper_exception_return(tcg_env, dst);
1912 /* Must exit loop to check un-masked IRQs */
1913 s->base.is_jmp = DISAS_EXIT;
1914 return true;
1915 }
1916
trans_NOP(DisasContext * s,arg_NOP * a)1917 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1918 {
1919 return true;
1920 }
1921
trans_YIELD(DisasContext * s,arg_YIELD * a)1922 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1923 {
1924 /*
1925 * When running in MTTCG we don't generate jumps to the yield and
1926 * WFE helpers as it won't affect the scheduling of other vCPUs.
1927 * If we wanted to more completely model WFE/SEV so we don't busy
1928 * spin unnecessarily we would need to do something more involved.
1929 */
1930 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1931 s->base.is_jmp = DISAS_YIELD;
1932 }
1933 return true;
1934 }
1935
trans_WFI(DisasContext * s,arg_WFI * a)1936 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1937 {
1938 s->base.is_jmp = DISAS_WFI;
1939 return true;
1940 }
1941
trans_WFE(DisasContext * s,arg_WFI * a)1942 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1943 {
1944 /*
1945 * When running in MTTCG we don't generate jumps to the yield and
1946 * WFE helpers as it won't affect the scheduling of other vCPUs.
1947 * If we wanted to more completely model WFE/SEV so we don't busy
1948 * spin unnecessarily we would need to do something more involved.
1949 */
1950 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1951 s->base.is_jmp = DISAS_WFE;
1952 }
1953 return true;
1954 }
1955
trans_WFIT(DisasContext * s,arg_WFIT * a)1956 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1957 {
1958 if (!dc_isar_feature(aa64_wfxt, s)) {
1959 return false;
1960 }
1961
1962 /*
1963 * Because we need to pass the register value to the helper,
1964 * it's easier to emit the code now, unlike trans_WFI which
1965 * defers it to aarch64_tr_tb_stop(). That means we need to
1966 * check ss_active so that single-stepping a WFIT doesn't halt.
1967 */
1968 if (s->ss_active) {
1969 /* Act like a NOP under architectural singlestep */
1970 return true;
1971 }
1972
1973 gen_a64_update_pc(s, 4);
1974 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1975 /* Go back to the main loop to check for interrupts */
1976 s->base.is_jmp = DISAS_EXIT;
1977 return true;
1978 }
1979
trans_WFET(DisasContext * s,arg_WFET * a)1980 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1981 {
1982 if (!dc_isar_feature(aa64_wfxt, s)) {
1983 return false;
1984 }
1985
1986 /*
1987 * We rely here on our WFE implementation being a NOP, so we
1988 * don't need to do anything different to handle the WFET timeout
1989 * from what trans_WFE does.
1990 */
1991 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1992 s->base.is_jmp = DISAS_WFE;
1993 }
1994 return true;
1995 }
1996
trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)1997 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
1998 {
1999 if (s->pauth_active) {
2000 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
2001 }
2002 return true;
2003 }
2004
trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)2005 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2006 {
2007 if (s->pauth_active) {
2008 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2009 }
2010 return true;
2011 }
2012
trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)2013 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2014 {
2015 if (s->pauth_active) {
2016 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2017 }
2018 return true;
2019 }
2020
trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)2021 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2022 {
2023 if (s->pauth_active) {
2024 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2025 }
2026 return true;
2027 }
2028
trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)2029 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2030 {
2031 if (s->pauth_active) {
2032 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2033 }
2034 return true;
2035 }
2036
trans_ESB(DisasContext * s,arg_ESB * a)2037 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2038 {
2039 /* Without RAS, we must implement this as NOP. */
2040 if (dc_isar_feature(aa64_ras, s)) {
2041 /*
2042 * QEMU does not have a source of physical SErrors,
2043 * so we are only concerned with virtual SErrors.
2044 * The pseudocode in the ARM for this case is
2045 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2046 * AArch64.vESBOperation();
2047 * Most of the condition can be evaluated at translation time.
2048 * Test for EL2 present, and defer test for SEL2 to runtime.
2049 */
2050 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2051 gen_helper_vesb(tcg_env);
2052 }
2053 }
2054 return true;
2055 }
2056
trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)2057 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2058 {
2059 if (s->pauth_active) {
2060 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2061 }
2062 return true;
2063 }
2064
trans_PACIASP(DisasContext * s,arg_PACIASP * a)2065 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2066 {
2067 if (s->pauth_active) {
2068 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2069 }
2070 return true;
2071 }
2072
trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)2073 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2074 {
2075 if (s->pauth_active) {
2076 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2077 }
2078 return true;
2079 }
2080
trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)2081 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2082 {
2083 if (s->pauth_active) {
2084 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2085 }
2086 return true;
2087 }
2088
trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)2089 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2090 {
2091 if (s->pauth_active) {
2092 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2093 }
2094 return true;
2095 }
2096
trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)2097 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2098 {
2099 if (s->pauth_active) {
2100 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2101 }
2102 return true;
2103 }
2104
trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)2105 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2106 {
2107 if (s->pauth_active) {
2108 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2109 }
2110 return true;
2111 }
2112
trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)2113 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2114 {
2115 if (s->pauth_active) {
2116 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2117 }
2118 return true;
2119 }
2120
trans_CLREX(DisasContext * s,arg_CLREX * a)2121 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2122 {
2123 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2124 return true;
2125 }
2126
trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)2127 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2128 {
2129 /* We handle DSB and DMB the same way */
2130 TCGBar bar;
2131
2132 switch (a->types) {
2133 case 1: /* MBReqTypes_Reads */
2134 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2135 break;
2136 case 2: /* MBReqTypes_Writes */
2137 bar = TCG_BAR_SC | TCG_MO_ST_ST;
2138 break;
2139 default: /* MBReqTypes_All */
2140 bar = TCG_BAR_SC | TCG_MO_ALL;
2141 break;
2142 }
2143 tcg_gen_mb(bar);
2144 return true;
2145 }
2146
trans_DSB_nXS(DisasContext * s,arg_DSB_nXS * a)2147 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2148 {
2149 if (!dc_isar_feature(aa64_xs, s)) {
2150 return false;
2151 }
2152 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2153 return true;
2154 }
2155
trans_ISB(DisasContext * s,arg_ISB * a)2156 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2157 {
2158 /*
2159 * We need to break the TB after this insn to execute
2160 * self-modifying code correctly and also to take
2161 * any pending interrupts immediately.
2162 */
2163 reset_btype(s);
2164 gen_goto_tb(s, 0, 4);
2165 return true;
2166 }
2167
trans_SB(DisasContext * s,arg_SB * a)2168 static bool trans_SB(DisasContext *s, arg_SB *a)
2169 {
2170 if (!dc_isar_feature(aa64_sb, s)) {
2171 return false;
2172 }
2173 /*
2174 * TODO: There is no speculation barrier opcode for TCG;
2175 * MB and end the TB instead.
2176 */
2177 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2178 gen_goto_tb(s, 0, 4);
2179 return true;
2180 }
2181
trans_CFINV(DisasContext * s,arg_CFINV * a)2182 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2183 {
2184 if (!dc_isar_feature(aa64_condm_4, s)) {
2185 return false;
2186 }
2187 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2188 return true;
2189 }
2190
trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)2191 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2192 {
2193 TCGv_i32 z;
2194
2195 if (!dc_isar_feature(aa64_condm_5, s)) {
2196 return false;
2197 }
2198
2199 z = tcg_temp_new_i32();
2200
2201 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2202
2203 /*
2204 * (!C & !Z) << 31
2205 * (!(C | Z)) << 31
2206 * ~((C | Z) << 31)
2207 * ~-(C | Z)
2208 * (C | Z) - 1
2209 */
2210 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2211 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2212
2213 /* !(Z & C) */
2214 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2215 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2216
2217 /* (!C & Z) << 31 -> -(Z & ~C) */
2218 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2219 tcg_gen_neg_i32(cpu_VF, cpu_VF);
2220
2221 /* C | Z */
2222 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2223
2224 return true;
2225 }
2226
trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2227 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2228 {
2229 if (!dc_isar_feature(aa64_condm_5, s)) {
2230 return false;
2231 }
2232
2233 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
2234 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
2235
2236 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2237 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2238
2239 tcg_gen_movi_i32(cpu_NF, 0);
2240 tcg_gen_movi_i32(cpu_VF, 0);
2241
2242 return true;
2243 }
2244
trans_MSR_i_UAO(DisasContext * s,arg_i * a)2245 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2246 {
2247 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2248 return false;
2249 }
2250 if (a->imm & 1) {
2251 set_pstate_bits(PSTATE_UAO);
2252 } else {
2253 clear_pstate_bits(PSTATE_UAO);
2254 }
2255 gen_rebuild_hflags(s);
2256 s->base.is_jmp = DISAS_TOO_MANY;
2257 return true;
2258 }
2259
trans_MSR_i_PAN(DisasContext * s,arg_i * a)2260 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2261 {
2262 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2263 return false;
2264 }
2265 if (a->imm & 1) {
2266 set_pstate_bits(PSTATE_PAN);
2267 } else {
2268 clear_pstate_bits(PSTATE_PAN);
2269 }
2270 gen_rebuild_hflags(s);
2271 s->base.is_jmp = DISAS_TOO_MANY;
2272 return true;
2273 }
2274
trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2275 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2276 {
2277 if (s->current_el == 0) {
2278 return false;
2279 }
2280 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2281 s->base.is_jmp = DISAS_TOO_MANY;
2282 return true;
2283 }
2284
trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2285 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2286 {
2287 if (!dc_isar_feature(aa64_ssbs, s)) {
2288 return false;
2289 }
2290 if (a->imm & 1) {
2291 set_pstate_bits(PSTATE_SSBS);
2292 } else {
2293 clear_pstate_bits(PSTATE_SSBS);
2294 }
2295 /* Don't need to rebuild hflags since SSBS is a nop */
2296 s->base.is_jmp = DISAS_TOO_MANY;
2297 return true;
2298 }
2299
trans_MSR_i_DIT(DisasContext * s,arg_i * a)2300 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2301 {
2302 if (!dc_isar_feature(aa64_dit, s)) {
2303 return false;
2304 }
2305 if (a->imm & 1) {
2306 set_pstate_bits(PSTATE_DIT);
2307 } else {
2308 clear_pstate_bits(PSTATE_DIT);
2309 }
2310 /* There's no need to rebuild hflags because DIT is a nop */
2311 s->base.is_jmp = DISAS_TOO_MANY;
2312 return true;
2313 }
2314
trans_MSR_i_TCO(DisasContext * s,arg_i * a)2315 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2316 {
2317 if (dc_isar_feature(aa64_mte, s)) {
2318 /* Full MTE is enabled -- set the TCO bit as directed. */
2319 if (a->imm & 1) {
2320 set_pstate_bits(PSTATE_TCO);
2321 } else {
2322 clear_pstate_bits(PSTATE_TCO);
2323 }
2324 gen_rebuild_hflags(s);
2325 /* Many factors, including TCO, go into MTE_ACTIVE. */
2326 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2327 return true;
2328 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2329 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
2330 return true;
2331 } else {
2332 /* Insn not present */
2333 return false;
2334 }
2335 }
2336
trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2337 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2338 {
2339 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2340 s->base.is_jmp = DISAS_TOO_MANY;
2341 return true;
2342 }
2343
trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2344 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2345 {
2346 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2347 /* Exit the cpu loop to re-evaluate pending IRQs. */
2348 s->base.is_jmp = DISAS_UPDATE_EXIT;
2349 return true;
2350 }
2351
trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2352 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2353 {
2354 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2355 return false;
2356 }
2357
2358 if (a->imm == 0) {
2359 clear_pstate_bits(PSTATE_ALLINT);
2360 } else if (s->current_el > 1) {
2361 set_pstate_bits(PSTATE_ALLINT);
2362 } else {
2363 gen_helper_msr_set_allint_el1(tcg_env);
2364 }
2365
2366 /* Exit the cpu loop to re-evaluate pending IRQs. */
2367 s->base.is_jmp = DISAS_UPDATE_EXIT;
2368 return true;
2369 }
2370
trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2371 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2372 {
2373 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2374 return false;
2375 }
2376 if (sme_access_check(s)) {
2377 int old = s->pstate_sm | (s->pstate_za << 1);
2378 int new = a->imm * 3;
2379
2380 if ((old ^ new) & a->mask) {
2381 /* At least one bit changes. */
2382 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2383 tcg_constant_i32(a->mask));
2384 s->base.is_jmp = DISAS_TOO_MANY;
2385 }
2386 }
2387 return true;
2388 }
2389
gen_get_nzcv(TCGv_i64 tcg_rt)2390 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2391 {
2392 TCGv_i32 tmp = tcg_temp_new_i32();
2393 TCGv_i32 nzcv = tcg_temp_new_i32();
2394
2395 /* build bit 31, N */
2396 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2397 /* build bit 30, Z */
2398 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2399 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2400 /* build bit 29, C */
2401 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2402 /* build bit 28, V */
2403 tcg_gen_shri_i32(tmp, cpu_VF, 31);
2404 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2405 /* generate result */
2406 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2407 }
2408
gen_set_nzcv(TCGv_i64 tcg_rt)2409 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2410 {
2411 TCGv_i32 nzcv = tcg_temp_new_i32();
2412
2413 /* take NZCV from R[t] */
2414 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2415
2416 /* bit 31, N */
2417 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2418 /* bit 30, Z */
2419 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2420 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2421 /* bit 29, C */
2422 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2423 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2424 /* bit 28, V */
2425 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2426 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2427 }
2428
gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2429 static void gen_sysreg_undef(DisasContext *s, bool isread,
2430 uint8_t op0, uint8_t op1, uint8_t op2,
2431 uint8_t crn, uint8_t crm, uint8_t rt)
2432 {
2433 /*
2434 * Generate code to emit an UNDEF with correct syndrome
2435 * information for a failed system register access.
2436 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2437 * but if FEAT_IDST is implemented then read accesses to registers
2438 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2439 * syndrome.
2440 */
2441 uint32_t syndrome;
2442
2443 if (isread && dc_isar_feature(aa64_ids, s) &&
2444 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2445 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2446 } else {
2447 syndrome = syn_uncategorized();
2448 }
2449 gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2450 }
2451
2452 /* MRS - move from system register
2453 * MSR (register) - move to system register
2454 * SYS
2455 * SYSL
2456 * These are all essentially the same insn in 'read' and 'write'
2457 * versions, with varying op0 fields.
2458 */
handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2459 static void handle_sys(DisasContext *s, bool isread,
2460 unsigned int op0, unsigned int op1, unsigned int op2,
2461 unsigned int crn, unsigned int crm, unsigned int rt)
2462 {
2463 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2464 crn, crm, op0, op1, op2);
2465 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2466 bool need_exit_tb = false;
2467 bool nv_trap_to_el2 = false;
2468 bool nv_redirect_reg = false;
2469 bool skip_fp_access_checks = false;
2470 bool nv2_mem_redirect = false;
2471 TCGv_ptr tcg_ri = NULL;
2472 TCGv_i64 tcg_rt;
2473 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2474
2475 if (crn == 11 || crn == 15) {
2476 /*
2477 * Check for TIDCP trap, which must take precedence over
2478 * the UNDEF for "no such register" etc.
2479 */
2480 switch (s->current_el) {
2481 case 0:
2482 if (dc_isar_feature(aa64_tidcp1, s)) {
2483 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2484 }
2485 break;
2486 case 1:
2487 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2488 break;
2489 }
2490 }
2491
2492 if (!ri) {
2493 /* Unknown register; this might be a guest error or a QEMU
2494 * unimplemented feature.
2495 */
2496 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2497 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2498 isread ? "read" : "write", op0, op1, crn, crm, op2);
2499 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2500 return;
2501 }
2502
2503 if (s->nv2 && ri->nv2_redirect_offset) {
2504 /*
2505 * Some registers always redirect to memory; some only do so if
2506 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2507 * pairs which share an offset; see the table in R_CSRPQ).
2508 */
2509 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2510 nv2_mem_redirect = s->nv1;
2511 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2512 nv2_mem_redirect = !s->nv1;
2513 } else {
2514 nv2_mem_redirect = true;
2515 }
2516 }
2517
2518 /* Check access permissions */
2519 if (!cp_access_ok(s->current_el, ri, isread)) {
2520 /*
2521 * FEAT_NV/NV2 handling does not do the usual FP access checks
2522 * for registers only accessible at EL2 (though it *does* do them
2523 * for registers accessible at EL1).
2524 */
2525 skip_fp_access_checks = true;
2526 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2527 /*
2528 * This is one of the few EL2 registers which should redirect
2529 * to the equivalent EL1 register. We do that after running
2530 * the EL2 register's accessfn.
2531 */
2532 nv_redirect_reg = true;
2533 assert(!nv2_mem_redirect);
2534 } else if (nv2_mem_redirect) {
2535 /*
2536 * NV2 redirect-to-memory takes precedence over trap to EL2 or
2537 * UNDEF to EL1.
2538 */
2539 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2540 /*
2541 * This register / instruction exists and is an EL2 register, so
2542 * we must trap to EL2 if accessed in nested virtualization EL1
2543 * instead of UNDEFing. We'll do that after the usual access checks.
2544 * (This makes a difference only for a couple of registers like
2545 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2546 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2547 * an accessfn which does nothing when called from EL1, because
2548 * the trap-to-EL3 controls which would apply to that register
2549 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2550 */
2551 nv_trap_to_el2 = true;
2552 } else {
2553 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2554 return;
2555 }
2556 }
2557
2558 if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2559 /* Emit code to perform further access permissions checks at
2560 * runtime; this may result in an exception.
2561 */
2562 gen_a64_update_pc(s, 0);
2563 tcg_ri = tcg_temp_new_ptr();
2564 gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2565 tcg_constant_i32(key),
2566 tcg_constant_i32(syndrome),
2567 tcg_constant_i32(isread));
2568 } else if (ri->type & ARM_CP_RAISES_EXC) {
2569 /*
2570 * The readfn or writefn might raise an exception;
2571 * synchronize the CPU state in case it does.
2572 */
2573 gen_a64_update_pc(s, 0);
2574 }
2575
2576 if (!skip_fp_access_checks) {
2577 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2578 return;
2579 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2580 return;
2581 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2582 return;
2583 }
2584 }
2585
2586 if (nv_trap_to_el2) {
2587 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2588 return;
2589 }
2590
2591 if (nv_redirect_reg) {
2592 /*
2593 * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2594 * Conveniently in all cases the encoding of the EL1 register is
2595 * identical to the EL2 register except that opc1 is 0.
2596 * Get the reginfo for the EL1 register to use for the actual access.
2597 * We don't use the EL1 register's access function, and
2598 * fine-grained-traps on EL1 also do not apply here.
2599 */
2600 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2601 crn, crm, op0, 0, op2);
2602 ri = get_arm_cp_reginfo(s->cp_regs, key);
2603 assert(ri);
2604 assert(cp_access_ok(s->current_el, ri, isread));
2605 /*
2606 * We might not have done an update_pc earlier, so check we don't
2607 * need it. We could support this in future if necessary.
2608 */
2609 assert(!(ri->type & ARM_CP_RAISES_EXC));
2610 }
2611
2612 if (nv2_mem_redirect) {
2613 /*
2614 * This system register is being redirected into an EL2 memory access.
2615 * This means it is not an IO operation, doesn't change hflags,
2616 * and need not end the TB, because it has no side effects.
2617 *
2618 * The access is 64-bit single copy atomic, guaranteed aligned because
2619 * of the definition of VCNR_EL2. Its endianness depends on
2620 * SCTLR_EL2.EE, not on the data endianness of EL1.
2621 * It is done under either the EL2 translation regime or the EL2&0
2622 * translation regime, depending on HCR_EL2.E2H. It behaves as if
2623 * PSTATE.PAN is 0.
2624 */
2625 TCGv_i64 ptr = tcg_temp_new_i64();
2626 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2627 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2628 int memidx = arm_to_core_mmu_idx(armmemidx);
2629 uint32_t syn;
2630
2631 mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2632
2633 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2634 tcg_gen_addi_i64(ptr, ptr,
2635 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2636 tcg_rt = cpu_reg(s, rt);
2637
2638 syn = syn_data_abort_vncr(0, !isread, 0);
2639 disas_set_insn_syndrome(s, syn);
2640 if (isread) {
2641 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2642 } else {
2643 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2644 }
2645 return;
2646 }
2647
2648 /* Handle special cases first */
2649 switch (ri->type & ARM_CP_SPECIAL_MASK) {
2650 case 0:
2651 break;
2652 case ARM_CP_NOP:
2653 return;
2654 case ARM_CP_NZCV:
2655 tcg_rt = cpu_reg(s, rt);
2656 if (isread) {
2657 gen_get_nzcv(tcg_rt);
2658 } else {
2659 gen_set_nzcv(tcg_rt);
2660 }
2661 return;
2662 case ARM_CP_CURRENTEL:
2663 {
2664 /*
2665 * Reads as current EL value from pstate, which is
2666 * guaranteed to be constant by the tb flags.
2667 * For nested virt we should report EL2.
2668 */
2669 int el = s->nv ? 2 : s->current_el;
2670 tcg_rt = cpu_reg(s, rt);
2671 tcg_gen_movi_i64(tcg_rt, el << 2);
2672 return;
2673 }
2674 case ARM_CP_DC_ZVA:
2675 /* Writes clear the aligned block of memory which rt points into. */
2676 if (s->mte_active[0]) {
2677 int desc = 0;
2678
2679 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2680 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2681 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2682
2683 tcg_rt = tcg_temp_new_i64();
2684 gen_helper_mte_check_zva(tcg_rt, tcg_env,
2685 tcg_constant_i32(desc), cpu_reg(s, rt));
2686 } else {
2687 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2688 }
2689 gen_helper_dc_zva(tcg_env, tcg_rt);
2690 return;
2691 case ARM_CP_DC_GVA:
2692 {
2693 TCGv_i64 clean_addr, tag;
2694
2695 /*
2696 * DC_GVA, like DC_ZVA, requires that we supply the original
2697 * pointer for an invalid page. Probe that address first.
2698 */
2699 tcg_rt = cpu_reg(s, rt);
2700 clean_addr = clean_data_tbi(s, tcg_rt);
2701 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2702
2703 if (s->ata[0]) {
2704 /* Extract the tag from the register to match STZGM. */
2705 tag = tcg_temp_new_i64();
2706 tcg_gen_shri_i64(tag, tcg_rt, 56);
2707 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2708 }
2709 }
2710 return;
2711 case ARM_CP_DC_GZVA:
2712 {
2713 TCGv_i64 clean_addr, tag;
2714
2715 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2716 tcg_rt = cpu_reg(s, rt);
2717 clean_addr = clean_data_tbi(s, tcg_rt);
2718 gen_helper_dc_zva(tcg_env, clean_addr);
2719
2720 if (s->ata[0]) {
2721 /* Extract the tag from the register to match STZGM. */
2722 tag = tcg_temp_new_i64();
2723 tcg_gen_shri_i64(tag, tcg_rt, 56);
2724 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2725 }
2726 }
2727 return;
2728 default:
2729 g_assert_not_reached();
2730 }
2731
2732 if (ri->type & ARM_CP_IO) {
2733 /* I/O operations must end the TB here (whether read or write) */
2734 need_exit_tb = translator_io_start(&s->base);
2735 }
2736
2737 tcg_rt = cpu_reg(s, rt);
2738
2739 if (isread) {
2740 if (ri->type & ARM_CP_CONST) {
2741 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2742 } else if (ri->readfn) {
2743 if (!tcg_ri) {
2744 tcg_ri = gen_lookup_cp_reg(key);
2745 }
2746 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2747 } else {
2748 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2749 }
2750 } else {
2751 if (ri->type & ARM_CP_CONST) {
2752 /* If not forbidden by access permissions, treat as WI */
2753 return;
2754 } else if (ri->writefn) {
2755 if (!tcg_ri) {
2756 tcg_ri = gen_lookup_cp_reg(key);
2757 }
2758 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2759 } else {
2760 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2761 }
2762 }
2763
2764 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2765 /*
2766 * A write to any coprocessor register that ends a TB
2767 * must rebuild the hflags for the next TB.
2768 */
2769 gen_rebuild_hflags(s);
2770 /*
2771 * We default to ending the TB on a coprocessor register write,
2772 * but allow this to be suppressed by the register definition
2773 * (usually only necessary to work around guest bugs).
2774 */
2775 need_exit_tb = true;
2776 }
2777 if (need_exit_tb) {
2778 s->base.is_jmp = DISAS_UPDATE_EXIT;
2779 }
2780 }
2781
trans_SYS(DisasContext * s,arg_SYS * a)2782 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2783 {
2784 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2785 return true;
2786 }
2787
trans_SVC(DisasContext * s,arg_i * a)2788 static bool trans_SVC(DisasContext *s, arg_i *a)
2789 {
2790 /*
2791 * For SVC, HVC and SMC we advance the single-step state
2792 * machine before taking the exception. This is architecturally
2793 * mandated, to ensure that single-stepping a system call
2794 * instruction works properly.
2795 */
2796 uint32_t syndrome = syn_aa64_svc(a->imm);
2797 if (s->fgt_svc) {
2798 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2799 return true;
2800 }
2801 gen_ss_advance(s);
2802 gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2803 return true;
2804 }
2805
trans_HVC(DisasContext * s,arg_i * a)2806 static bool trans_HVC(DisasContext *s, arg_i *a)
2807 {
2808 int target_el = s->current_el == 3 ? 3 : 2;
2809
2810 if (s->current_el == 0) {
2811 unallocated_encoding(s);
2812 return true;
2813 }
2814 /*
2815 * The pre HVC helper handles cases when HVC gets trapped
2816 * as an undefined insn by runtime configuration.
2817 */
2818 gen_a64_update_pc(s, 0);
2819 gen_helper_pre_hvc(tcg_env);
2820 /* Architecture requires ss advance before we do the actual work */
2821 gen_ss_advance(s);
2822 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2823 return true;
2824 }
2825
trans_SMC(DisasContext * s,arg_i * a)2826 static bool trans_SMC(DisasContext *s, arg_i *a)
2827 {
2828 if (s->current_el == 0) {
2829 unallocated_encoding(s);
2830 return true;
2831 }
2832 gen_a64_update_pc(s, 0);
2833 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2834 /* Architecture requires ss advance before we do the actual work */
2835 gen_ss_advance(s);
2836 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2837 return true;
2838 }
2839
trans_BRK(DisasContext * s,arg_i * a)2840 static bool trans_BRK(DisasContext *s, arg_i *a)
2841 {
2842 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2843 return true;
2844 }
2845
trans_HLT(DisasContext * s,arg_i * a)2846 static bool trans_HLT(DisasContext *s, arg_i *a)
2847 {
2848 /*
2849 * HLT. This has two purposes.
2850 * Architecturally, it is an external halting debug instruction.
2851 * Since QEMU doesn't implement external debug, we treat this as
2852 * it is required for halting debug disabled: it will UNDEF.
2853 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2854 */
2855 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2856 gen_exception_internal_insn(s, EXCP_SEMIHOST);
2857 } else {
2858 unallocated_encoding(s);
2859 }
2860 return true;
2861 }
2862
2863 /*
2864 * Load/Store exclusive instructions are implemented by remembering
2865 * the value/address loaded, and seeing if these are the same
2866 * when the store is performed. This is not actually the architecturally
2867 * mandated semantics, but it works for typical guest code sequences
2868 * and avoids having to monitor regular stores.
2869 *
2870 * The store exclusive uses the atomic cmpxchg primitives to avoid
2871 * races in multi-threaded linux-user and when MTTCG softmmu is
2872 * enabled.
2873 */
gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2874 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2875 int size, bool is_pair)
2876 {
2877 int idx = get_mem_index(s);
2878 TCGv_i64 dirty_addr, clean_addr;
2879 MemOp memop = check_atomic_align(s, rn, size + is_pair);
2880
2881 s->is_ldex = true;
2882 dirty_addr = cpu_reg_sp(s, rn);
2883 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2884
2885 g_assert(size <= 3);
2886 if (is_pair) {
2887 g_assert(size >= 2);
2888 if (size == 2) {
2889 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2890 if (s->be_data == MO_LE) {
2891 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2892 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2893 } else {
2894 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2895 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2896 }
2897 } else {
2898 TCGv_i128 t16 = tcg_temp_new_i128();
2899
2900 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2901
2902 if (s->be_data == MO_LE) {
2903 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2904 cpu_exclusive_high, t16);
2905 } else {
2906 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2907 cpu_exclusive_val, t16);
2908 }
2909 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2910 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2911 }
2912 } else {
2913 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2914 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2915 }
2916 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2917 }
2918
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2919 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2920 int rn, int size, int is_pair)
2921 {
2922 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2923 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2924 * [addr] = {Rt};
2925 * if (is_pair) {
2926 * [addr + datasize] = {Rt2};
2927 * }
2928 * {Rd} = 0;
2929 * } else {
2930 * {Rd} = 1;
2931 * }
2932 * env->exclusive_addr = -1;
2933 */
2934 TCGLabel *fail_label = gen_new_label();
2935 TCGLabel *done_label = gen_new_label();
2936 TCGv_i64 tmp, clean_addr;
2937 MemOp memop;
2938
2939 /*
2940 * FIXME: We are out of spec here. We have recorded only the address
2941 * from load_exclusive, not the entire range, and we assume that the
2942 * size of the access on both sides match. The architecture allows the
2943 * store to be smaller than the load, so long as the stored bytes are
2944 * within the range recorded by the load.
2945 */
2946
2947 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2948 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2949 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2950
2951 /*
2952 * The write, and any associated faults, only happen if the virtual
2953 * and physical addresses pass the exclusive monitor check. These
2954 * faults are exceedingly unlikely, because normally the guest uses
2955 * the exact same address register for the load_exclusive, and we
2956 * would have recognized these faults there.
2957 *
2958 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2959 * unaligned 4-byte write within the range of an aligned 8-byte load.
2960 * With LSE2, the store would need to cross a 16-byte boundary when the
2961 * load did not, which would mean the store is outside the range
2962 * recorded for the monitor, which would have failed a corrected monitor
2963 * check above. For now, we assume no size change and retain the
2964 * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2965 *
2966 * It is possible to trigger an MTE fault, by performing the load with
2967 * a virtual address with a valid tag and performing the store with the
2968 * same virtual address and a different invalid tag.
2969 */
2970 memop = size + is_pair;
2971 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2972 memop |= MO_ALIGN;
2973 }
2974 memop = finalize_memop(s, memop);
2975 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2976
2977 tmp = tcg_temp_new_i64();
2978 if (is_pair) {
2979 if (size == 2) {
2980 if (s->be_data == MO_LE) {
2981 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2982 } else {
2983 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2984 }
2985 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2986 cpu_exclusive_val, tmp,
2987 get_mem_index(s), memop);
2988 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2989 } else {
2990 TCGv_i128 t16 = tcg_temp_new_i128();
2991 TCGv_i128 c16 = tcg_temp_new_i128();
2992 TCGv_i64 a, b;
2993
2994 if (s->be_data == MO_LE) {
2995 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
2996 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
2997 cpu_exclusive_high);
2998 } else {
2999 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
3000 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
3001 cpu_exclusive_val);
3002 }
3003
3004 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3005 get_mem_index(s), memop);
3006
3007 a = tcg_temp_new_i64();
3008 b = tcg_temp_new_i64();
3009 if (s->be_data == MO_LE) {
3010 tcg_gen_extr_i128_i64(a, b, t16);
3011 } else {
3012 tcg_gen_extr_i128_i64(b, a, t16);
3013 }
3014
3015 tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3016 tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3017 tcg_gen_or_i64(tmp, a, b);
3018
3019 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3020 }
3021 } else {
3022 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3023 cpu_reg(s, rt), get_mem_index(s), memop);
3024 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3025 }
3026 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3027 tcg_gen_br(done_label);
3028
3029 gen_set_label(fail_label);
3030 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3031 gen_set_label(done_label);
3032 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3033 }
3034
gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)3035 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3036 int rn, int size)
3037 {
3038 TCGv_i64 tcg_rs = cpu_reg(s, rs);
3039 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3040 int memidx = get_mem_index(s);
3041 TCGv_i64 clean_addr;
3042 MemOp memop;
3043
3044 if (rn == 31) {
3045 gen_check_sp_alignment(s);
3046 }
3047 memop = check_atomic_align(s, rn, size);
3048 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3049 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3050 memidx, memop);
3051 }
3052
gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)3053 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3054 int rn, int size)
3055 {
3056 TCGv_i64 s1 = cpu_reg(s, rs);
3057 TCGv_i64 s2 = cpu_reg(s, rs + 1);
3058 TCGv_i64 t1 = cpu_reg(s, rt);
3059 TCGv_i64 t2 = cpu_reg(s, rt + 1);
3060 TCGv_i64 clean_addr;
3061 int memidx = get_mem_index(s);
3062 MemOp memop;
3063
3064 if (rn == 31) {
3065 gen_check_sp_alignment(s);
3066 }
3067
3068 /* This is a single atomic access, despite the "pair". */
3069 memop = check_atomic_align(s, rn, size + 1);
3070 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3071
3072 if (size == 2) {
3073 TCGv_i64 cmp = tcg_temp_new_i64();
3074 TCGv_i64 val = tcg_temp_new_i64();
3075
3076 if (s->be_data == MO_LE) {
3077 tcg_gen_concat32_i64(val, t1, t2);
3078 tcg_gen_concat32_i64(cmp, s1, s2);
3079 } else {
3080 tcg_gen_concat32_i64(val, t2, t1);
3081 tcg_gen_concat32_i64(cmp, s2, s1);
3082 }
3083
3084 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3085
3086 if (s->be_data == MO_LE) {
3087 tcg_gen_extr32_i64(s1, s2, cmp);
3088 } else {
3089 tcg_gen_extr32_i64(s2, s1, cmp);
3090 }
3091 } else {
3092 TCGv_i128 cmp = tcg_temp_new_i128();
3093 TCGv_i128 val = tcg_temp_new_i128();
3094
3095 if (s->be_data == MO_LE) {
3096 tcg_gen_concat_i64_i128(val, t1, t2);
3097 tcg_gen_concat_i64_i128(cmp, s1, s2);
3098 } else {
3099 tcg_gen_concat_i64_i128(val, t2, t1);
3100 tcg_gen_concat_i64_i128(cmp, s2, s1);
3101 }
3102
3103 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3104
3105 if (s->be_data == MO_LE) {
3106 tcg_gen_extr_i128_i64(s1, s2, cmp);
3107 } else {
3108 tcg_gen_extr_i128_i64(s2, s1, cmp);
3109 }
3110 }
3111 }
3112
3113 /*
3114 * Compute the ISS.SF bit for syndrome information if an exception
3115 * is taken on a load or store. This indicates whether the instruction
3116 * is accessing a 32-bit or 64-bit register. This logic is derived
3117 * from the ARMv8 specs for LDR (Shared decode for all encodings).
3118 */
ldst_iss_sf(int size,bool sign,bool ext)3119 static bool ldst_iss_sf(int size, bool sign, bool ext)
3120 {
3121
3122 if (sign) {
3123 /*
3124 * Signed loads are 64 bit results if we are not going to
3125 * do a zero-extend from 32 to 64 after the load.
3126 * (For a store, sign and ext are always false.)
3127 */
3128 return !ext;
3129 } else {
3130 /* Unsigned loads/stores work at the specified size */
3131 return size == MO_64;
3132 }
3133 }
3134
trans_STXR(DisasContext * s,arg_stxr * a)3135 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3136 {
3137 if (a->rn == 31) {
3138 gen_check_sp_alignment(s);
3139 }
3140 if (a->lasr) {
3141 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3142 }
3143 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3144 return true;
3145 }
3146
trans_LDXR(DisasContext * s,arg_stxr * a)3147 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3148 {
3149 if (a->rn == 31) {
3150 gen_check_sp_alignment(s);
3151 }
3152 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3153 if (a->lasr) {
3154 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3155 }
3156 return true;
3157 }
3158
trans_STLR(DisasContext * s,arg_stlr * a)3159 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3160 {
3161 TCGv_i64 clean_addr;
3162 MemOp memop;
3163 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3164
3165 /*
3166 * StoreLORelease is the same as Store-Release for QEMU, but
3167 * needs the feature-test.
3168 */
3169 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3170 return false;
3171 }
3172 /* Generate ISS for non-exclusive accesses including LASR. */
3173 if (a->rn == 31) {
3174 gen_check_sp_alignment(s);
3175 }
3176 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3177 memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3178 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3179 true, a->rn != 31, memop);
3180 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3181 iss_sf, a->lasr);
3182 return true;
3183 }
3184
trans_LDAR(DisasContext * s,arg_stlr * a)3185 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3186 {
3187 TCGv_i64 clean_addr;
3188 MemOp memop;
3189 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3190
3191 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
3192 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3193 return false;
3194 }
3195 /* Generate ISS for non-exclusive accesses including LASR. */
3196 if (a->rn == 31) {
3197 gen_check_sp_alignment(s);
3198 }
3199 memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3200 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3201 false, a->rn != 31, memop);
3202 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3203 a->rt, iss_sf, a->lasr);
3204 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3205 return true;
3206 }
3207
trans_STXP(DisasContext * s,arg_stxr * a)3208 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3209 {
3210 if (a->rn == 31) {
3211 gen_check_sp_alignment(s);
3212 }
3213 if (a->lasr) {
3214 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3215 }
3216 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3217 return true;
3218 }
3219
trans_LDXP(DisasContext * s,arg_stxr * a)3220 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3221 {
3222 if (a->rn == 31) {
3223 gen_check_sp_alignment(s);
3224 }
3225 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3226 if (a->lasr) {
3227 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3228 }
3229 return true;
3230 }
3231
trans_CASP(DisasContext * s,arg_CASP * a)3232 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3233 {
3234 if (!dc_isar_feature(aa64_atomics, s)) {
3235 return false;
3236 }
3237 if (((a->rt | a->rs) & 1) != 0) {
3238 return false;
3239 }
3240
3241 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3242 return true;
3243 }
3244
trans_CAS(DisasContext * s,arg_CAS * a)3245 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3246 {
3247 if (!dc_isar_feature(aa64_atomics, s)) {
3248 return false;
3249 }
3250 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3251 return true;
3252 }
3253
trans_LD_lit(DisasContext * s,arg_ldlit * a)3254 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3255 {
3256 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3257 TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3258 TCGv_i64 clean_addr = tcg_temp_new_i64();
3259 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3260
3261 gen_pc_plus_diff(s, clean_addr, a->imm);
3262 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3263 false, true, a->rt, iss_sf, false);
3264 return true;
3265 }
3266
trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3267 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3268 {
3269 /* Load register (literal), vector version */
3270 TCGv_i64 clean_addr;
3271 MemOp memop;
3272
3273 if (!fp_access_check(s)) {
3274 return true;
3275 }
3276 memop = finalize_memop_asimd(s, a->sz);
3277 clean_addr = tcg_temp_new_i64();
3278 gen_pc_plus_diff(s, clean_addr, a->imm);
3279 do_fp_ld(s, a->rt, clean_addr, memop);
3280 return true;
3281 }
3282
op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3283 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3284 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3285 uint64_t offset, bool is_store, MemOp mop)
3286 {
3287 if (a->rn == 31) {
3288 gen_check_sp_alignment(s);
3289 }
3290
3291 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3292 if (!a->p) {
3293 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3294 }
3295
3296 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3297 (a->w || a->rn != 31), 2 << a->sz, mop);
3298 }
3299
op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3300 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3301 TCGv_i64 dirty_addr, uint64_t offset)
3302 {
3303 if (a->w) {
3304 if (a->p) {
3305 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3306 }
3307 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3308 }
3309 }
3310
trans_STP(DisasContext * s,arg_ldstpair * a)3311 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3312 {
3313 uint64_t offset = a->imm << a->sz;
3314 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3315 MemOp mop = finalize_memop(s, a->sz);
3316
3317 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3318 tcg_rt = cpu_reg(s, a->rt);
3319 tcg_rt2 = cpu_reg(s, a->rt2);
3320 /*
3321 * We built mop above for the single logical access -- rebuild it
3322 * now for the paired operation.
3323 *
3324 * With LSE2, non-sign-extending pairs are treated atomically if
3325 * aligned, and if unaligned one of the pair will be completely
3326 * within a 16-byte block and that element will be atomic.
3327 * Otherwise each element is separately atomic.
3328 * In all cases, issue one operation with the correct atomicity.
3329 */
3330 mop = a->sz + 1;
3331 if (s->align_mem) {
3332 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3333 }
3334 mop = finalize_memop_pair(s, mop);
3335 if (a->sz == 2) {
3336 TCGv_i64 tmp = tcg_temp_new_i64();
3337
3338 if (s->be_data == MO_LE) {
3339 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3340 } else {
3341 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3342 }
3343 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3344 } else {
3345 TCGv_i128 tmp = tcg_temp_new_i128();
3346
3347 if (s->be_data == MO_LE) {
3348 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3349 } else {
3350 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3351 }
3352 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3353 }
3354 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3355 return true;
3356 }
3357
trans_LDP(DisasContext * s,arg_ldstpair * a)3358 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3359 {
3360 uint64_t offset = a->imm << a->sz;
3361 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3362 MemOp mop = finalize_memop(s, a->sz);
3363
3364 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3365 tcg_rt = cpu_reg(s, a->rt);
3366 tcg_rt2 = cpu_reg(s, a->rt2);
3367
3368 /*
3369 * We built mop above for the single logical access -- rebuild it
3370 * now for the paired operation.
3371 *
3372 * With LSE2, non-sign-extending pairs are treated atomically if
3373 * aligned, and if unaligned one of the pair will be completely
3374 * within a 16-byte block and that element will be atomic.
3375 * Otherwise each element is separately atomic.
3376 * In all cases, issue one operation with the correct atomicity.
3377 *
3378 * This treats sign-extending loads like zero-extending loads,
3379 * since that reuses the most code below.
3380 */
3381 mop = a->sz + 1;
3382 if (s->align_mem) {
3383 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3384 }
3385 mop = finalize_memop_pair(s, mop);
3386 if (a->sz == 2) {
3387 int o2 = s->be_data == MO_LE ? 32 : 0;
3388 int o1 = o2 ^ 32;
3389
3390 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3391 if (a->sign) {
3392 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3393 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3394 } else {
3395 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3396 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3397 }
3398 } else {
3399 TCGv_i128 tmp = tcg_temp_new_i128();
3400
3401 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3402 if (s->be_data == MO_LE) {
3403 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3404 } else {
3405 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3406 }
3407 }
3408 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3409 return true;
3410 }
3411
trans_STP_v(DisasContext * s,arg_ldstpair * a)3412 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3413 {
3414 uint64_t offset = a->imm << a->sz;
3415 TCGv_i64 clean_addr, dirty_addr;
3416 MemOp mop;
3417
3418 if (!fp_access_check(s)) {
3419 return true;
3420 }
3421
3422 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3423 mop = finalize_memop_asimd(s, a->sz);
3424 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3425 do_fp_st(s, a->rt, clean_addr, mop);
3426 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3427 do_fp_st(s, a->rt2, clean_addr, mop);
3428 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3429 return true;
3430 }
3431
trans_LDP_v(DisasContext * s,arg_ldstpair * a)3432 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3433 {
3434 uint64_t offset = a->imm << a->sz;
3435 TCGv_i64 clean_addr, dirty_addr;
3436 MemOp mop;
3437
3438 if (!fp_access_check(s)) {
3439 return true;
3440 }
3441
3442 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3443 mop = finalize_memop_asimd(s, a->sz);
3444 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3445 do_fp_ld(s, a->rt, clean_addr, mop);
3446 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3447 do_fp_ld(s, a->rt2, clean_addr, mop);
3448 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3449 return true;
3450 }
3451
trans_STGP(DisasContext * s,arg_ldstpair * a)3452 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3453 {
3454 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3455 uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3456 MemOp mop;
3457 TCGv_i128 tmp;
3458
3459 /* STGP only comes in one size. */
3460 tcg_debug_assert(a->sz == MO_64);
3461
3462 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3463 return false;
3464 }
3465
3466 if (a->rn == 31) {
3467 gen_check_sp_alignment(s);
3468 }
3469
3470 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3471 if (!a->p) {
3472 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3473 }
3474
3475 clean_addr = clean_data_tbi(s, dirty_addr);
3476 tcg_rt = cpu_reg(s, a->rt);
3477 tcg_rt2 = cpu_reg(s, a->rt2);
3478
3479 /*
3480 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3481 * and one tag operation. We implement it as one single aligned 16-byte
3482 * memory operation for convenience. Note that the alignment ensures
3483 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3484 */
3485 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3486
3487 tmp = tcg_temp_new_i128();
3488 if (s->be_data == MO_LE) {
3489 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3490 } else {
3491 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3492 }
3493 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3494
3495 /* Perform the tag store, if tag access enabled. */
3496 if (s->ata[0]) {
3497 if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3498 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3499 } else {
3500 gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3501 }
3502 }
3503
3504 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3505 return true;
3506 }
3507
op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3508 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3509 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3510 uint64_t offset, bool is_store, MemOp mop)
3511 {
3512 int memidx;
3513
3514 if (a->rn == 31) {
3515 gen_check_sp_alignment(s);
3516 }
3517
3518 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3519 if (!a->p) {
3520 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3521 }
3522 memidx = get_a64_user_mem_index(s, a->unpriv);
3523 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3524 a->w || a->rn != 31,
3525 mop, a->unpriv, memidx);
3526 }
3527
op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3528 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3529 TCGv_i64 dirty_addr, uint64_t offset)
3530 {
3531 if (a->w) {
3532 if (a->p) {
3533 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3534 }
3535 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3536 }
3537 }
3538
trans_STR_i(DisasContext * s,arg_ldst_imm * a)3539 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3540 {
3541 bool iss_sf, iss_valid = !a->w;
3542 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3543 int memidx = get_a64_user_mem_index(s, a->unpriv);
3544 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3545
3546 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3547
3548 tcg_rt = cpu_reg(s, a->rt);
3549 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3550
3551 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3552 iss_valid, a->rt, iss_sf, false);
3553 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3554 return true;
3555 }
3556
trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3557 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3558 {
3559 bool iss_sf, iss_valid = !a->w;
3560 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3561 int memidx = get_a64_user_mem_index(s, a->unpriv);
3562 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3563
3564 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3565
3566 tcg_rt = cpu_reg(s, a->rt);
3567 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3568
3569 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3570 a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3571 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3572 return true;
3573 }
3574
trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3575 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3576 {
3577 TCGv_i64 clean_addr, dirty_addr;
3578 MemOp mop;
3579
3580 if (!fp_access_check(s)) {
3581 return true;
3582 }
3583 mop = finalize_memop_asimd(s, a->sz);
3584 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3585 do_fp_st(s, a->rt, clean_addr, mop);
3586 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3587 return true;
3588 }
3589
trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3590 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3591 {
3592 TCGv_i64 clean_addr, dirty_addr;
3593 MemOp mop;
3594
3595 if (!fp_access_check(s)) {
3596 return true;
3597 }
3598 mop = finalize_memop_asimd(s, a->sz);
3599 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3600 do_fp_ld(s, a->rt, clean_addr, mop);
3601 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3602 return true;
3603 }
3604
op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3605 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3606 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3607 bool is_store, MemOp memop)
3608 {
3609 TCGv_i64 tcg_rm;
3610
3611 if (a->rn == 31) {
3612 gen_check_sp_alignment(s);
3613 }
3614 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3615
3616 tcg_rm = read_cpu_reg(s, a->rm, 1);
3617 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3618
3619 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3620 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3621 }
3622
trans_LDR(DisasContext * s,arg_ldst * a)3623 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3624 {
3625 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3626 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3627 MemOp memop;
3628
3629 if (extract32(a->opt, 1, 1) == 0) {
3630 return false;
3631 }
3632
3633 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3634 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3635 tcg_rt = cpu_reg(s, a->rt);
3636 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3637 a->ext, true, a->rt, iss_sf, false);
3638 return true;
3639 }
3640
trans_STR(DisasContext * s,arg_ldst * a)3641 static bool trans_STR(DisasContext *s, arg_ldst *a)
3642 {
3643 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3644 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3645 MemOp memop;
3646
3647 if (extract32(a->opt, 1, 1) == 0) {
3648 return false;
3649 }
3650
3651 memop = finalize_memop(s, a->sz);
3652 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3653 tcg_rt = cpu_reg(s, a->rt);
3654 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3655 return true;
3656 }
3657
trans_LDR_v(DisasContext * s,arg_ldst * a)3658 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3659 {
3660 TCGv_i64 clean_addr, dirty_addr;
3661 MemOp memop;
3662
3663 if (extract32(a->opt, 1, 1) == 0) {
3664 return false;
3665 }
3666
3667 if (!fp_access_check(s)) {
3668 return true;
3669 }
3670
3671 memop = finalize_memop_asimd(s, a->sz);
3672 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3673 do_fp_ld(s, a->rt, clean_addr, memop);
3674 return true;
3675 }
3676
trans_STR_v(DisasContext * s,arg_ldst * a)3677 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3678 {
3679 TCGv_i64 clean_addr, dirty_addr;
3680 MemOp memop;
3681
3682 if (extract32(a->opt, 1, 1) == 0) {
3683 return false;
3684 }
3685
3686 if (!fp_access_check(s)) {
3687 return true;
3688 }
3689
3690 memop = finalize_memop_asimd(s, a->sz);
3691 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3692 do_fp_st(s, a->rt, clean_addr, memop);
3693 return true;
3694 }
3695
3696
do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3697 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3698 int sign, bool invert)
3699 {
3700 MemOp mop = a->sz | sign;
3701 TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3702
3703 if (a->rn == 31) {
3704 gen_check_sp_alignment(s);
3705 }
3706 mop = check_atomic_align(s, a->rn, mop);
3707 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3708 a->rn != 31, mop);
3709 tcg_rs = read_cpu_reg(s, a->rs, true);
3710 tcg_rt = cpu_reg(s, a->rt);
3711 if (invert) {
3712 tcg_gen_not_i64(tcg_rs, tcg_rs);
3713 }
3714 /*
3715 * The tcg atomic primitives are all full barriers. Therefore we
3716 * can ignore the Acquire and Release bits of this instruction.
3717 */
3718 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3719
3720 if (mop & MO_SIGN) {
3721 switch (a->sz) {
3722 case MO_8:
3723 tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3724 break;
3725 case MO_16:
3726 tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3727 break;
3728 case MO_32:
3729 tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3730 break;
3731 case MO_64:
3732 break;
3733 default:
3734 g_assert_not_reached();
3735 }
3736 }
3737 return true;
3738 }
3739
3740 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3741 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3742 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3743 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3744 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3745 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3746 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3747 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3748 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3749
3750 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3751 {
3752 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3753 TCGv_i64 clean_addr;
3754 MemOp mop;
3755
3756 if (!dc_isar_feature(aa64_atomics, s) ||
3757 !dc_isar_feature(aa64_rcpc_8_3, s)) {
3758 return false;
3759 }
3760 if (a->rn == 31) {
3761 gen_check_sp_alignment(s);
3762 }
3763 mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3764 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3765 a->rn != 31, mop);
3766 /*
3767 * LDAPR* are a special case because they are a simple load, not a
3768 * fetch-and-do-something op.
3769 * The architectural consistency requirements here are weaker than
3770 * full load-acquire (we only need "load-acquire processor consistent"),
3771 * but we choose to implement them as full LDAQ.
3772 */
3773 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3774 true, a->rt, iss_sf, true);
3775 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3776 return true;
3777 }
3778
trans_LDRA(DisasContext * s,arg_LDRA * a)3779 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3780 {
3781 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3782 MemOp memop;
3783
3784 /* Load with pointer authentication */
3785 if (!dc_isar_feature(aa64_pauth, s)) {
3786 return false;
3787 }
3788
3789 if (a->rn == 31) {
3790 gen_check_sp_alignment(s);
3791 }
3792 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3793
3794 if (s->pauth_active) {
3795 if (!a->m) {
3796 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3797 tcg_constant_i64(0));
3798 } else {
3799 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3800 tcg_constant_i64(0));
3801 }
3802 }
3803
3804 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3805
3806 memop = finalize_memop(s, MO_64);
3807
3808 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3809 clean_addr = gen_mte_check1(s, dirty_addr, false,
3810 a->w || a->rn != 31, memop);
3811
3812 tcg_rt = cpu_reg(s, a->rt);
3813 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3814 /* extend */ false, /* iss_valid */ !a->w,
3815 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3816
3817 if (a->w) {
3818 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3819 }
3820 return true;
3821 }
3822
trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3823 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3824 {
3825 TCGv_i64 clean_addr, dirty_addr;
3826 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3827 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3828
3829 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3830 return false;
3831 }
3832
3833 if (a->rn == 31) {
3834 gen_check_sp_alignment(s);
3835 }
3836
3837 mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3838 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3839 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3840 clean_addr = clean_data_tbi(s, dirty_addr);
3841
3842 /*
3843 * Load-AcquirePC semantics; we implement as the slightly more
3844 * restrictive Load-Acquire.
3845 */
3846 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3847 a->rt, iss_sf, true);
3848 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3849 return true;
3850 }
3851
trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3852 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3853 {
3854 TCGv_i64 clean_addr, dirty_addr;
3855 MemOp mop = a->sz;
3856 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3857
3858 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3859 return false;
3860 }
3861
3862 /* TODO: ARMv8.4-LSE SCTLR.nAA */
3863
3864 if (a->rn == 31) {
3865 gen_check_sp_alignment(s);
3866 }
3867
3868 mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3869 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3870 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3871 clean_addr = clean_data_tbi(s, dirty_addr);
3872
3873 /* Store-Release semantics */
3874 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3875 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3876 return true;
3877 }
3878
trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3879 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3880 {
3881 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3882 MemOp endian, align, mop;
3883
3884 int total; /* total bytes */
3885 int elements; /* elements per vector */
3886 int r;
3887 int size = a->sz;
3888
3889 if (!a->p && a->rm != 0) {
3890 /* For non-postindexed accesses the Rm field must be 0 */
3891 return false;
3892 }
3893 if (size == 3 && !a->q && a->selem != 1) {
3894 return false;
3895 }
3896 if (!fp_access_check(s)) {
3897 return true;
3898 }
3899
3900 if (a->rn == 31) {
3901 gen_check_sp_alignment(s);
3902 }
3903
3904 /* For our purposes, bytes are always little-endian. */
3905 endian = s->be_data;
3906 if (size == 0) {
3907 endian = MO_LE;
3908 }
3909
3910 total = a->rpt * a->selem * (a->q ? 16 : 8);
3911 tcg_rn = cpu_reg_sp(s, a->rn);
3912
3913 /*
3914 * Issue the MTE check vs the logical repeat count, before we
3915 * promote consecutive little-endian elements below.
3916 */
3917 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3918 finalize_memop_asimd(s, size));
3919
3920 /*
3921 * Consecutive little-endian elements from a single register
3922 * can be promoted to a larger little-endian operation.
3923 */
3924 align = MO_ALIGN;
3925 if (a->selem == 1 && endian == MO_LE) {
3926 align = pow2_align(size);
3927 size = 3;
3928 }
3929 if (!s->align_mem) {
3930 align = 0;
3931 }
3932 mop = endian | size | align;
3933
3934 elements = (a->q ? 16 : 8) >> size;
3935 tcg_ebytes = tcg_constant_i64(1 << size);
3936 for (r = 0; r < a->rpt; r++) {
3937 int e;
3938 for (e = 0; e < elements; e++) {
3939 int xs;
3940 for (xs = 0; xs < a->selem; xs++) {
3941 int tt = (a->rt + r + xs) % 32;
3942 do_vec_ld(s, tt, e, clean_addr, mop);
3943 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3944 }
3945 }
3946 }
3947
3948 /*
3949 * For non-quad operations, setting a slice of the low 64 bits of
3950 * the register clears the high 64 bits (in the ARM ARM pseudocode
3951 * this is implicit in the fact that 'rval' is a 64 bit wide
3952 * variable). For quad operations, we might still need to zero
3953 * the high bits of SVE.
3954 */
3955 for (r = 0; r < a->rpt * a->selem; r++) {
3956 int tt = (a->rt + r) % 32;
3957 clear_vec_high(s, a->q, tt);
3958 }
3959
3960 if (a->p) {
3961 if (a->rm == 31) {
3962 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3963 } else {
3964 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3965 }
3966 }
3967 return true;
3968 }
3969
trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3970 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3971 {
3972 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3973 MemOp endian, align, mop;
3974
3975 int total; /* total bytes */
3976 int elements; /* elements per vector */
3977 int r;
3978 int size = a->sz;
3979
3980 if (!a->p && a->rm != 0) {
3981 /* For non-postindexed accesses the Rm field must be 0 */
3982 return false;
3983 }
3984 if (size == 3 && !a->q && a->selem != 1) {
3985 return false;
3986 }
3987 if (!fp_access_check(s)) {
3988 return true;
3989 }
3990
3991 if (a->rn == 31) {
3992 gen_check_sp_alignment(s);
3993 }
3994
3995 /* For our purposes, bytes are always little-endian. */
3996 endian = s->be_data;
3997 if (size == 0) {
3998 endian = MO_LE;
3999 }
4000
4001 total = a->rpt * a->selem * (a->q ? 16 : 8);
4002 tcg_rn = cpu_reg_sp(s, a->rn);
4003
4004 /*
4005 * Issue the MTE check vs the logical repeat count, before we
4006 * promote consecutive little-endian elements below.
4007 */
4008 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4009 finalize_memop_asimd(s, size));
4010
4011 /*
4012 * Consecutive little-endian elements from a single register
4013 * can be promoted to a larger little-endian operation.
4014 */
4015 align = MO_ALIGN;
4016 if (a->selem == 1 && endian == MO_LE) {
4017 align = pow2_align(size);
4018 size = 3;
4019 }
4020 if (!s->align_mem) {
4021 align = 0;
4022 }
4023 mop = endian | size | align;
4024
4025 elements = (a->q ? 16 : 8) >> size;
4026 tcg_ebytes = tcg_constant_i64(1 << size);
4027 for (r = 0; r < a->rpt; r++) {
4028 int e;
4029 for (e = 0; e < elements; e++) {
4030 int xs;
4031 for (xs = 0; xs < a->selem; xs++) {
4032 int tt = (a->rt + r + xs) % 32;
4033 do_vec_st(s, tt, e, clean_addr, mop);
4034 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4035 }
4036 }
4037 }
4038
4039 if (a->p) {
4040 if (a->rm == 31) {
4041 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4042 } else {
4043 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4044 }
4045 }
4046 return true;
4047 }
4048
trans_ST_single(DisasContext * s,arg_ldst_single * a)4049 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4050 {
4051 int xs, total, rt;
4052 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4053 MemOp mop;
4054
4055 if (!a->p && a->rm != 0) {
4056 return false;
4057 }
4058 if (!fp_access_check(s)) {
4059 return true;
4060 }
4061
4062 if (a->rn == 31) {
4063 gen_check_sp_alignment(s);
4064 }
4065
4066 total = a->selem << a->scale;
4067 tcg_rn = cpu_reg_sp(s, a->rn);
4068
4069 mop = finalize_memop_asimd(s, a->scale);
4070 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4071 total, mop);
4072
4073 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4074 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4075 do_vec_st(s, rt, a->index, clean_addr, mop);
4076 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4077 }
4078
4079 if (a->p) {
4080 if (a->rm == 31) {
4081 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4082 } else {
4083 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4084 }
4085 }
4086 return true;
4087 }
4088
trans_LD_single(DisasContext * s,arg_ldst_single * a)4089 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4090 {
4091 int xs, total, rt;
4092 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4093 MemOp mop;
4094
4095 if (!a->p && a->rm != 0) {
4096 return false;
4097 }
4098 if (!fp_access_check(s)) {
4099 return true;
4100 }
4101
4102 if (a->rn == 31) {
4103 gen_check_sp_alignment(s);
4104 }
4105
4106 total = a->selem << a->scale;
4107 tcg_rn = cpu_reg_sp(s, a->rn);
4108
4109 mop = finalize_memop_asimd(s, a->scale);
4110 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4111 total, mop);
4112
4113 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4114 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4115 do_vec_ld(s, rt, a->index, clean_addr, mop);
4116 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4117 }
4118
4119 if (a->p) {
4120 if (a->rm == 31) {
4121 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4122 } else {
4123 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4124 }
4125 }
4126 return true;
4127 }
4128
trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)4129 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4130 {
4131 int xs, total, rt;
4132 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4133 MemOp mop;
4134
4135 if (!a->p && a->rm != 0) {
4136 return false;
4137 }
4138 if (!fp_access_check(s)) {
4139 return true;
4140 }
4141
4142 if (a->rn == 31) {
4143 gen_check_sp_alignment(s);
4144 }
4145
4146 total = a->selem << a->scale;
4147 tcg_rn = cpu_reg_sp(s, a->rn);
4148
4149 mop = finalize_memop_asimd(s, a->scale);
4150 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4151 total, mop);
4152
4153 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4154 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4155 /* Load and replicate to all elements */
4156 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4157
4158 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4159 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4160 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4161 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4162 }
4163
4164 if (a->p) {
4165 if (a->rm == 31) {
4166 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4167 } else {
4168 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4169 }
4170 }
4171 return true;
4172 }
4173
trans_STZGM(DisasContext * s,arg_ldst_tag * a)4174 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4175 {
4176 TCGv_i64 addr, clean_addr, tcg_rt;
4177 int size = 4 << s->dcz_blocksize;
4178
4179 if (!dc_isar_feature(aa64_mte, s)) {
4180 return false;
4181 }
4182 if (s->current_el == 0) {
4183 return false;
4184 }
4185
4186 if (a->rn == 31) {
4187 gen_check_sp_alignment(s);
4188 }
4189
4190 addr = read_cpu_reg_sp(s, a->rn, true);
4191 tcg_gen_addi_i64(addr, addr, a->imm);
4192 tcg_rt = cpu_reg(s, a->rt);
4193
4194 if (s->ata[0]) {
4195 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4196 }
4197 /*
4198 * The non-tags portion of STZGM is mostly like DC_ZVA,
4199 * except the alignment happens before the access.
4200 */
4201 clean_addr = clean_data_tbi(s, addr);
4202 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4203 gen_helper_dc_zva(tcg_env, clean_addr);
4204 return true;
4205 }
4206
trans_STGM(DisasContext * s,arg_ldst_tag * a)4207 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4208 {
4209 TCGv_i64 addr, clean_addr, tcg_rt;
4210
4211 if (!dc_isar_feature(aa64_mte, s)) {
4212 return false;
4213 }
4214 if (s->current_el == 0) {
4215 return false;
4216 }
4217
4218 if (a->rn == 31) {
4219 gen_check_sp_alignment(s);
4220 }
4221
4222 addr = read_cpu_reg_sp(s, a->rn, true);
4223 tcg_gen_addi_i64(addr, addr, a->imm);
4224 tcg_rt = cpu_reg(s, a->rt);
4225
4226 if (s->ata[0]) {
4227 gen_helper_stgm(tcg_env, addr, tcg_rt);
4228 } else {
4229 MMUAccessType acc = MMU_DATA_STORE;
4230 int size = 4 << s->gm_blocksize;
4231
4232 clean_addr = clean_data_tbi(s, addr);
4233 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4234 gen_probe_access(s, clean_addr, acc, size);
4235 }
4236 return true;
4237 }
4238
trans_LDGM(DisasContext * s,arg_ldst_tag * a)4239 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4240 {
4241 TCGv_i64 addr, clean_addr, tcg_rt;
4242
4243 if (!dc_isar_feature(aa64_mte, s)) {
4244 return false;
4245 }
4246 if (s->current_el == 0) {
4247 return false;
4248 }
4249
4250 if (a->rn == 31) {
4251 gen_check_sp_alignment(s);
4252 }
4253
4254 addr = read_cpu_reg_sp(s, a->rn, true);
4255 tcg_gen_addi_i64(addr, addr, a->imm);
4256 tcg_rt = cpu_reg(s, a->rt);
4257
4258 if (s->ata[0]) {
4259 gen_helper_ldgm(tcg_rt, tcg_env, addr);
4260 } else {
4261 MMUAccessType acc = MMU_DATA_LOAD;
4262 int size = 4 << s->gm_blocksize;
4263
4264 clean_addr = clean_data_tbi(s, addr);
4265 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4266 gen_probe_access(s, clean_addr, acc, size);
4267 /* The result tags are zeros. */
4268 tcg_gen_movi_i64(tcg_rt, 0);
4269 }
4270 return true;
4271 }
4272
trans_LDG(DisasContext * s,arg_ldst_tag * a)4273 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4274 {
4275 TCGv_i64 addr, clean_addr, tcg_rt;
4276
4277 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4278 return false;
4279 }
4280
4281 if (a->rn == 31) {
4282 gen_check_sp_alignment(s);
4283 }
4284
4285 addr = read_cpu_reg_sp(s, a->rn, true);
4286 if (!a->p) {
4287 /* pre-index or signed offset */
4288 tcg_gen_addi_i64(addr, addr, a->imm);
4289 }
4290
4291 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4292 tcg_rt = cpu_reg(s, a->rt);
4293 if (s->ata[0]) {
4294 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4295 } else {
4296 /*
4297 * Tag access disabled: we must check for aborts on the load
4298 * load from [rn+offset], and then insert a 0 tag into rt.
4299 */
4300 clean_addr = clean_data_tbi(s, addr);
4301 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4302 gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4303 }
4304
4305 if (a->w) {
4306 /* pre-index or post-index */
4307 if (a->p) {
4308 /* post-index */
4309 tcg_gen_addi_i64(addr, addr, a->imm);
4310 }
4311 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4312 }
4313 return true;
4314 }
4315
do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4316 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4317 {
4318 TCGv_i64 addr, tcg_rt;
4319
4320 if (a->rn == 31) {
4321 gen_check_sp_alignment(s);
4322 }
4323
4324 addr = read_cpu_reg_sp(s, a->rn, true);
4325 if (!a->p) {
4326 /* pre-index or signed offset */
4327 tcg_gen_addi_i64(addr, addr, a->imm);
4328 }
4329 tcg_rt = cpu_reg_sp(s, a->rt);
4330 if (!s->ata[0]) {
4331 /*
4332 * For STG and ST2G, we need to check alignment and probe memory.
4333 * TODO: For STZG and STZ2G, we could rely on the stores below,
4334 * at least for system mode; user-only won't enforce alignment.
4335 */
4336 if (is_pair) {
4337 gen_helper_st2g_stub(tcg_env, addr);
4338 } else {
4339 gen_helper_stg_stub(tcg_env, addr);
4340 }
4341 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4342 if (is_pair) {
4343 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4344 } else {
4345 gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4346 }
4347 } else {
4348 if (is_pair) {
4349 gen_helper_st2g(tcg_env, addr, tcg_rt);
4350 } else {
4351 gen_helper_stg(tcg_env, addr, tcg_rt);
4352 }
4353 }
4354
4355 if (is_zero) {
4356 TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4357 TCGv_i64 zero64 = tcg_constant_i64(0);
4358 TCGv_i128 zero128 = tcg_temp_new_i128();
4359 int mem_index = get_mem_index(s);
4360 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4361
4362 tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4363
4364 /* This is 1 or 2 atomic 16-byte operations. */
4365 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4366 if (is_pair) {
4367 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4368 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4369 }
4370 }
4371
4372 if (a->w) {
4373 /* pre-index or post-index */
4374 if (a->p) {
4375 /* post-index */
4376 tcg_gen_addi_i64(addr, addr, a->imm);
4377 }
4378 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4379 }
4380 return true;
4381 }
4382
4383 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4384 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4385 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4386 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4387
4388 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4389
do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4390 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4391 bool is_setg, SetFn fn)
4392 {
4393 int memidx;
4394 uint32_t syndrome, desc = 0;
4395
4396 if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4397 return false;
4398 }
4399
4400 /*
4401 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4402 * us to pull this check before the CheckMOPSEnabled() test
4403 * (which we do in the helper function)
4404 */
4405 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4406 a->rd == 31 || a->rn == 31) {
4407 return false;
4408 }
4409
4410 memidx = get_a64_user_mem_index(s, a->unpriv);
4411
4412 /*
4413 * We pass option_a == true, matching our implementation;
4414 * we pass wrong_option == false: helper function may set that bit.
4415 */
4416 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4417 is_epilogue, false, true, a->rd, a->rs, a->rn);
4418
4419 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4420 /* We may need to do MTE tag checking, so assemble the descriptor */
4421 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4422 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4423 desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4424 /* SIZEM1 and ALIGN we leave 0 (byte write) */
4425 }
4426 /* The helper function always needs the memidx even with MTE disabled */
4427 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4428
4429 /*
4430 * The helper needs the register numbers, but since they're in
4431 * the syndrome anyway, we let it extract them from there rather
4432 * than passing in an extra three integer arguments.
4433 */
4434 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4435 return true;
4436 }
4437
4438 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4439 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4440 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4441 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4442 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4443 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4444
4445 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4446
do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4447 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4448 {
4449 int rmemidx, wmemidx;
4450 uint32_t syndrome, rdesc = 0, wdesc = 0;
4451 bool wunpriv = extract32(a->options, 0, 1);
4452 bool runpriv = extract32(a->options, 1, 1);
4453
4454 /*
4455 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4456 * us to pull this check before the CheckMOPSEnabled() test
4457 * (which we do in the helper function)
4458 */
4459 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4460 a->rd == 31 || a->rs == 31 || a->rn == 31) {
4461 return false;
4462 }
4463
4464 rmemidx = get_a64_user_mem_index(s, runpriv);
4465 wmemidx = get_a64_user_mem_index(s, wunpriv);
4466
4467 /*
4468 * We pass option_a == true, matching our implementation;
4469 * we pass wrong_option == false: helper function may set that bit.
4470 */
4471 syndrome = syn_mop(false, false, a->options, is_epilogue,
4472 false, true, a->rd, a->rs, a->rn);
4473
4474 /* If we need to do MTE tag checking, assemble the descriptors */
4475 if (s->mte_active[runpriv]) {
4476 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4477 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4478 }
4479 if (s->mte_active[wunpriv]) {
4480 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4481 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4482 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4483 }
4484 /* The helper function needs these parts of the descriptor regardless */
4485 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4486 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4487
4488 /*
4489 * The helper needs the register numbers, but since they're in
4490 * the syndrome anyway, we let it extract them from there rather
4491 * than passing in an extra three integer arguments.
4492 */
4493 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4494 tcg_constant_i32(rdesc));
4495 return true;
4496 }
4497
4498 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4499 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4500 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4501 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4502 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4503 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4504
4505 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4506
gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4507 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4508 bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4509 {
4510 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4511 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4512 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4513
4514 fn(tcg_rd, tcg_rn, tcg_imm);
4515 if (!a->sf) {
4516 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4517 }
4518 return true;
4519 }
4520
4521 /*
4522 * PC-rel. addressing
4523 */
4524
trans_ADR(DisasContext * s,arg_ri * a)4525 static bool trans_ADR(DisasContext *s, arg_ri *a)
4526 {
4527 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4528 return true;
4529 }
4530
trans_ADRP(DisasContext * s,arg_ri * a)4531 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4532 {
4533 int64_t offset = (int64_t)a->imm << 12;
4534
4535 /* The page offset is ok for CF_PCREL. */
4536 offset -= s->pc_curr & 0xfff;
4537 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4538 return true;
4539 }
4540
4541 /*
4542 * Add/subtract (immediate)
4543 */
4544 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4545 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4546 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4547 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4548
4549 /*
4550 * Add/subtract (immediate, with tags)
4551 */
4552
gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4553 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4554 bool sub_op)
4555 {
4556 TCGv_i64 tcg_rn, tcg_rd;
4557 int imm;
4558
4559 imm = a->uimm6 << LOG2_TAG_GRANULE;
4560 if (sub_op) {
4561 imm = -imm;
4562 }
4563
4564 tcg_rn = cpu_reg_sp(s, a->rn);
4565 tcg_rd = cpu_reg_sp(s, a->rd);
4566
4567 if (s->ata[0]) {
4568 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4569 tcg_constant_i32(imm),
4570 tcg_constant_i32(a->uimm4));
4571 } else {
4572 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4573 gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4574 }
4575 return true;
4576 }
4577
TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4578 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4579 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4580
4581 /* The input should be a value in the bottom e bits (with higher
4582 * bits zero); returns that value replicated into every element
4583 * of size e in a 64 bit integer.
4584 */
4585 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4586 {
4587 assert(e != 0);
4588 while (e < 64) {
4589 mask |= mask << e;
4590 e *= 2;
4591 }
4592 return mask;
4593 }
4594
4595 /*
4596 * Logical (immediate)
4597 */
4598
4599 /*
4600 * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4601 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4602 * value (ie should cause a guest UNDEF exception), and true if they are
4603 * valid, in which case the decoded bit pattern is written to result.
4604 */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4605 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4606 unsigned int imms, unsigned int immr)
4607 {
4608 uint64_t mask;
4609 unsigned e, levels, s, r;
4610 int len;
4611
4612 assert(immn < 2 && imms < 64 && immr < 64);
4613
4614 /* The bit patterns we create here are 64 bit patterns which
4615 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4616 * 64 bits each. Each element contains the same value: a run
4617 * of between 1 and e-1 non-zero bits, rotated within the
4618 * element by between 0 and e-1 bits.
4619 *
4620 * The element size and run length are encoded into immn (1 bit)
4621 * and imms (6 bits) as follows:
4622 * 64 bit elements: immn = 1, imms = <length of run - 1>
4623 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4624 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4625 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4626 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4627 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4628 * Notice that immn = 0, imms = 11111x is the only combination
4629 * not covered by one of the above options; this is reserved.
4630 * Further, <length of run - 1> all-ones is a reserved pattern.
4631 *
4632 * In all cases the rotation is by immr % e (and immr is 6 bits).
4633 */
4634
4635 /* First determine the element size */
4636 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4637 if (len < 1) {
4638 /* This is the immn == 0, imms == 0x11111x case */
4639 return false;
4640 }
4641 e = 1 << len;
4642
4643 levels = e - 1;
4644 s = imms & levels;
4645 r = immr & levels;
4646
4647 if (s == levels) {
4648 /* <length of run - 1> mustn't be all-ones. */
4649 return false;
4650 }
4651
4652 /* Create the value of one element: s+1 set bits rotated
4653 * by r within the element (which is e bits wide)...
4654 */
4655 mask = MAKE_64BIT_MASK(0, s + 1);
4656 if (r) {
4657 mask = (mask >> r) | (mask << (e - r));
4658 mask &= MAKE_64BIT_MASK(0, e);
4659 }
4660 /* ...then replicate the element over the whole 64 bit value */
4661 mask = bitfield_replicate(mask, e);
4662 *result = mask;
4663 return true;
4664 }
4665
gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4666 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4667 void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4668 {
4669 TCGv_i64 tcg_rd, tcg_rn;
4670 uint64_t imm;
4671
4672 /* Some immediate field values are reserved. */
4673 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4674 extract32(a->dbm, 0, 6),
4675 extract32(a->dbm, 6, 6))) {
4676 return false;
4677 }
4678 if (!a->sf) {
4679 imm &= 0xffffffffull;
4680 }
4681
4682 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4683 tcg_rn = cpu_reg(s, a->rn);
4684
4685 fn(tcg_rd, tcg_rn, imm);
4686 if (set_cc) {
4687 gen_logic_CC(a->sf, tcg_rd);
4688 }
4689 if (!a->sf) {
4690 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4691 }
4692 return true;
4693 }
4694
TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4695 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4696 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4697 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4698 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4699
4700 /*
4701 * Move wide (immediate)
4702 */
4703
4704 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4705 {
4706 int pos = a->hw << 4;
4707 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4708 return true;
4709 }
4710
trans_MOVN(DisasContext * s,arg_movw * a)4711 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4712 {
4713 int pos = a->hw << 4;
4714 uint64_t imm = a->imm;
4715
4716 imm = ~(imm << pos);
4717 if (!a->sf) {
4718 imm = (uint32_t)imm;
4719 }
4720 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4721 return true;
4722 }
4723
trans_MOVK(DisasContext * s,arg_movw * a)4724 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4725 {
4726 int pos = a->hw << 4;
4727 TCGv_i64 tcg_rd, tcg_im;
4728
4729 tcg_rd = cpu_reg(s, a->rd);
4730 tcg_im = tcg_constant_i64(a->imm);
4731 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4732 if (!a->sf) {
4733 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4734 }
4735 return true;
4736 }
4737
4738 /*
4739 * Bitfield
4740 */
4741
trans_SBFM(DisasContext * s,arg_SBFM * a)4742 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4743 {
4744 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4745 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4746 unsigned int bitsize = a->sf ? 64 : 32;
4747 unsigned int ri = a->immr;
4748 unsigned int si = a->imms;
4749 unsigned int pos, len;
4750
4751 if (si >= ri) {
4752 /* Wd<s-r:0> = Wn<s:r> */
4753 len = (si - ri) + 1;
4754 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4755 if (!a->sf) {
4756 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4757 }
4758 } else {
4759 /* Wd<32+s-r,32-r> = Wn<s:0> */
4760 len = si + 1;
4761 pos = (bitsize - ri) & (bitsize - 1);
4762
4763 if (len < ri) {
4764 /*
4765 * Sign extend the destination field from len to fill the
4766 * balance of the word. Let the deposit below insert all
4767 * of those sign bits.
4768 */
4769 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4770 len = ri;
4771 }
4772
4773 /*
4774 * We start with zero, and we haven't modified any bits outside
4775 * bitsize, therefore no final zero-extension is unneeded for !sf.
4776 */
4777 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4778 }
4779 return true;
4780 }
4781
trans_UBFM(DisasContext * s,arg_UBFM * a)4782 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4783 {
4784 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4785 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4786 unsigned int bitsize = a->sf ? 64 : 32;
4787 unsigned int ri = a->immr;
4788 unsigned int si = a->imms;
4789 unsigned int pos, len;
4790
4791 tcg_rd = cpu_reg(s, a->rd);
4792 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4793
4794 if (si >= ri) {
4795 /* Wd<s-r:0> = Wn<s:r> */
4796 len = (si - ri) + 1;
4797 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4798 } else {
4799 /* Wd<32+s-r,32-r> = Wn<s:0> */
4800 len = si + 1;
4801 pos = (bitsize - ri) & (bitsize - 1);
4802 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4803 }
4804 return true;
4805 }
4806
trans_BFM(DisasContext * s,arg_BFM * a)4807 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4808 {
4809 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4810 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4811 unsigned int bitsize = a->sf ? 64 : 32;
4812 unsigned int ri = a->immr;
4813 unsigned int si = a->imms;
4814 unsigned int pos, len;
4815
4816 tcg_rd = cpu_reg(s, a->rd);
4817 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4818
4819 if (si >= ri) {
4820 /* Wd<s-r:0> = Wn<s:r> */
4821 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4822 len = (si - ri) + 1;
4823 pos = 0;
4824 } else {
4825 /* Wd<32+s-r,32-r> = Wn<s:0> */
4826 len = si + 1;
4827 pos = (bitsize - ri) & (bitsize - 1);
4828 }
4829
4830 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4831 if (!a->sf) {
4832 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4833 }
4834 return true;
4835 }
4836
trans_EXTR(DisasContext * s,arg_extract * a)4837 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4838 {
4839 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4840
4841 tcg_rd = cpu_reg(s, a->rd);
4842
4843 if (unlikely(a->imm == 0)) {
4844 /*
4845 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4846 * so an extract from bit 0 is a special case.
4847 */
4848 if (a->sf) {
4849 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4850 } else {
4851 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4852 }
4853 } else {
4854 tcg_rm = cpu_reg(s, a->rm);
4855 tcg_rn = cpu_reg(s, a->rn);
4856
4857 if (a->sf) {
4858 /* Specialization to ROR happens in EXTRACT2. */
4859 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4860 } else {
4861 TCGv_i32 t0 = tcg_temp_new_i32();
4862
4863 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4864 if (a->rm == a->rn) {
4865 tcg_gen_rotri_i32(t0, t0, a->imm);
4866 } else {
4867 TCGv_i32 t1 = tcg_temp_new_i32();
4868 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4869 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4870 }
4871 tcg_gen_extu_i32_i64(tcg_rd, t0);
4872 }
4873 }
4874 return true;
4875 }
4876
trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4877 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4878 {
4879 if (fp_access_check(s)) {
4880 int len = (a->len + 1) * 16;
4881
4882 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4883 vec_full_reg_offset(s, a->rm), tcg_env,
4884 a->q ? 16 : 8, vec_full_reg_size(s),
4885 (len << 6) | (a->tbx << 5) | a->rn,
4886 gen_helper_simd_tblx);
4887 }
4888 return true;
4889 }
4890
4891 typedef int simd_permute_idx_fn(int i, int part, int elements);
4892
do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4893 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4894 simd_permute_idx_fn *fn, int part)
4895 {
4896 MemOp esz = a->esz;
4897 int datasize = a->q ? 16 : 8;
4898 int elements = datasize >> esz;
4899 TCGv_i64 tcg_res[2], tcg_ele;
4900
4901 if (esz == MO_64 && !a->q) {
4902 return false;
4903 }
4904 if (!fp_access_check(s)) {
4905 return true;
4906 }
4907
4908 tcg_res[0] = tcg_temp_new_i64();
4909 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4910 tcg_ele = tcg_temp_new_i64();
4911
4912 for (int i = 0; i < elements; i++) {
4913 int o, w, idx;
4914
4915 idx = fn(i, part, elements);
4916 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4917 idx & (elements - 1), esz);
4918
4919 w = (i << (esz + 3)) / 64;
4920 o = (i << (esz + 3)) % 64;
4921 if (o == 0) {
4922 tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4923 } else {
4924 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4925 }
4926 }
4927
4928 for (int i = a->q; i >= 0; --i) {
4929 write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4930 }
4931 clear_vec_high(s, a->q, a->rd);
4932 return true;
4933 }
4934
permute_load_uzp(int i,int part,int elements)4935 static int permute_load_uzp(int i, int part, int elements)
4936 {
4937 return 2 * i + part;
4938 }
4939
4940 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4941 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4942
permute_load_trn(int i,int part,int elements)4943 static int permute_load_trn(int i, int part, int elements)
4944 {
4945 return (i & 1) * elements + (i & ~1) + part;
4946 }
4947
4948 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4949 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4950
permute_load_zip(int i,int part,int elements)4951 static int permute_load_zip(int i, int part, int elements)
4952 {
4953 return (i & 1) * elements + ((part * elements + i) >> 1);
4954 }
4955
4956 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4957 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4958
4959 /*
4960 * Cryptographic AES, SHA, SHA512
4961 */
4962
4963 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4964 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4965 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4966 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4967
4968 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4969 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4970 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4971 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4972
4973 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4974 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4975 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4976
4977 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4978 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4979 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4980
4981 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4982 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
4983 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)4984 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
4985 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
4986 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
4987 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
4988
4989 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
4990 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
4991
4992 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
4993 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
4994
4995 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
4996 {
4997 if (!dc_isar_feature(aa64_sm3, s)) {
4998 return false;
4999 }
5000 if (fp_access_check(s)) {
5001 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5002 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5003 TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5004 TCGv_i32 tcg_res = tcg_temp_new_i32();
5005
5006 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5007 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5008 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5009
5010 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5011 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5012 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5013 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5014
5015 /* Clear the whole register first, then store bits [127:96]. */
5016 clear_vec(s, a->rd);
5017 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5018 }
5019 return true;
5020 }
5021
do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)5022 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5023 {
5024 if (fp_access_check(s)) {
5025 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5026 }
5027 return true;
5028 }
TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)5029 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5030 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5031 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5032 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5033
5034 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5035 {
5036 if (!dc_isar_feature(aa64_sha3, s)) {
5037 return false;
5038 }
5039 if (fp_access_check(s)) {
5040 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5041 vec_full_reg_offset(s, a->rn),
5042 vec_full_reg_offset(s, a->rm), a->imm, 16,
5043 vec_full_reg_size(s));
5044 }
5045 return true;
5046 }
5047
5048 /*
5049 * Advanced SIMD copy
5050 */
5051
decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)5052 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5053 {
5054 unsigned esz = ctz32(imm);
5055 if (esz <= MO_64) {
5056 *pesz = esz;
5057 *pidx = imm >> (esz + 1);
5058 return true;
5059 }
5060 return false;
5061 }
5062
trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)5063 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5064 {
5065 MemOp esz;
5066 unsigned idx;
5067
5068 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5069 return false;
5070 }
5071 if (fp_access_check(s)) {
5072 /*
5073 * This instruction just extracts the specified element and
5074 * zero-extends it into the bottom of the destination register.
5075 */
5076 TCGv_i64 tmp = tcg_temp_new_i64();
5077 read_vec_element(s, tmp, a->rn, idx, esz);
5078 write_fp_dreg(s, a->rd, tmp);
5079 }
5080 return true;
5081 }
5082
trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)5083 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5084 {
5085 MemOp esz;
5086 unsigned idx;
5087
5088 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5089 return false;
5090 }
5091 if (esz == MO_64 && !a->q) {
5092 return false;
5093 }
5094 if (fp_access_check(s)) {
5095 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5096 vec_reg_offset(s, a->rn, idx, esz),
5097 a->q ? 16 : 8, vec_full_reg_size(s));
5098 }
5099 return true;
5100 }
5101
trans_DUP_general(DisasContext * s,arg_DUP_general * a)5102 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5103 {
5104 MemOp esz;
5105 unsigned idx;
5106
5107 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5108 return false;
5109 }
5110 if (esz == MO_64 && !a->q) {
5111 return false;
5112 }
5113 if (fp_access_check(s)) {
5114 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5115 a->q ? 16 : 8, vec_full_reg_size(s),
5116 cpu_reg(s, a->rn));
5117 }
5118 return true;
5119 }
5120
do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)5121 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5122 {
5123 MemOp esz;
5124 unsigned idx;
5125
5126 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5127 return false;
5128 }
5129 if (is_signed) {
5130 if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5131 return false;
5132 }
5133 } else {
5134 if (esz == MO_64 ? !a->q : a->q) {
5135 return false;
5136 }
5137 }
5138 if (fp_access_check(s)) {
5139 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5140 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5141 if (is_signed && !a->q) {
5142 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5143 }
5144 }
5145 return true;
5146 }
5147
TRANS(SMOV,do_smov_umov,a,MO_SIGN)5148 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5149 TRANS(UMOV, do_smov_umov, a, 0)
5150
5151 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5152 {
5153 MemOp esz;
5154 unsigned idx;
5155
5156 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5157 return false;
5158 }
5159 if (fp_access_check(s)) {
5160 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5161 clear_vec_high(s, true, a->rd);
5162 }
5163 return true;
5164 }
5165
trans_INS_element(DisasContext * s,arg_INS_element * a)5166 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5167 {
5168 MemOp esz;
5169 unsigned didx, sidx;
5170
5171 if (!decode_esz_idx(a->di, &esz, &didx)) {
5172 return false;
5173 }
5174 sidx = a->si >> esz;
5175 if (fp_access_check(s)) {
5176 TCGv_i64 tmp = tcg_temp_new_i64();
5177
5178 read_vec_element(s, tmp, a->rn, sidx, esz);
5179 write_vec_element(s, tmp, a->rd, didx, esz);
5180
5181 /* INS is considered a 128-bit write for SVE. */
5182 clear_vec_high(s, true, a->rd);
5183 }
5184 return true;
5185 }
5186
5187 /*
5188 * Advanced SIMD three same
5189 */
5190
5191 typedef struct FPScalar {
5192 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5193 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5194 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5195 } FPScalar;
5196
do_fp3_scalar_with_fpsttype(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg,ARMFPStatusFlavour fpsttype)5197 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5198 const FPScalar *f, int mergereg,
5199 ARMFPStatusFlavour fpsttype)
5200 {
5201 switch (a->esz) {
5202 case MO_64:
5203 if (fp_access_check(s)) {
5204 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5205 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5206 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5207 write_fp_dreg_merging(s, a->rd, mergereg, t0);
5208 }
5209 break;
5210 case MO_32:
5211 if (fp_access_check(s)) {
5212 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5213 TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5214 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5215 write_fp_sreg_merging(s, a->rd, mergereg, t0);
5216 }
5217 break;
5218 case MO_16:
5219 if (!dc_isar_feature(aa64_fp16, s)) {
5220 return false;
5221 }
5222 if (fp_access_check(s)) {
5223 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5224 TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5225 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5226 write_fp_hreg_merging(s, a->rd, mergereg, t0);
5227 }
5228 break;
5229 default:
5230 return false;
5231 }
5232 return true;
5233 }
5234
do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg)5235 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5236 int mergereg)
5237 {
5238 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5239 a->esz == MO_16 ?
5240 FPST_A64_F16 : FPST_A64);
5241 }
5242
do_fp3_scalar_ah_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5243 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5244 const FPScalar *fnormal, const FPScalar *fah,
5245 int mergereg)
5246 {
5247 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5248 mergereg, select_ah_fpst(s, a->esz));
5249 }
5250
5251 /* Some insns need to call different helpers when FPCR.AH == 1 */
do_fp3_scalar_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5252 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5253 const FPScalar *fnormal,
5254 const FPScalar *fah,
5255 int mergereg)
5256 {
5257 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5258 }
5259
5260 static const FPScalar f_scalar_fadd = {
5261 gen_helper_vfp_addh,
5262 gen_helper_vfp_adds,
5263 gen_helper_vfp_addd,
5264 };
5265 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5266
5267 static const FPScalar f_scalar_fsub = {
5268 gen_helper_vfp_subh,
5269 gen_helper_vfp_subs,
5270 gen_helper_vfp_subd,
5271 };
5272 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5273
5274 static const FPScalar f_scalar_fdiv = {
5275 gen_helper_vfp_divh,
5276 gen_helper_vfp_divs,
5277 gen_helper_vfp_divd,
5278 };
5279 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5280
5281 static const FPScalar f_scalar_fmul = {
5282 gen_helper_vfp_mulh,
5283 gen_helper_vfp_muls,
5284 gen_helper_vfp_muld,
5285 };
5286 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5287
5288 static const FPScalar f_scalar_fmax = {
5289 gen_helper_vfp_maxh,
5290 gen_helper_vfp_maxs,
5291 gen_helper_vfp_maxd,
5292 };
5293 static const FPScalar f_scalar_fmax_ah = {
5294 gen_helper_vfp_ah_maxh,
5295 gen_helper_vfp_ah_maxs,
5296 gen_helper_vfp_ah_maxd,
5297 };
5298 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5299
5300 static const FPScalar f_scalar_fmin = {
5301 gen_helper_vfp_minh,
5302 gen_helper_vfp_mins,
5303 gen_helper_vfp_mind,
5304 };
5305 static const FPScalar f_scalar_fmin_ah = {
5306 gen_helper_vfp_ah_minh,
5307 gen_helper_vfp_ah_mins,
5308 gen_helper_vfp_ah_mind,
5309 };
5310 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5311
5312 static const FPScalar f_scalar_fmaxnm = {
5313 gen_helper_vfp_maxnumh,
5314 gen_helper_vfp_maxnums,
5315 gen_helper_vfp_maxnumd,
5316 };
5317 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5318
5319 static const FPScalar f_scalar_fminnm = {
5320 gen_helper_vfp_minnumh,
5321 gen_helper_vfp_minnums,
5322 gen_helper_vfp_minnumd,
5323 };
5324 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5325
5326 static const FPScalar f_scalar_fmulx = {
5327 gen_helper_advsimd_mulxh,
5328 gen_helper_vfp_mulxs,
5329 gen_helper_vfp_mulxd,
5330 };
5331 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5332
gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5333 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5334 {
5335 gen_helper_vfp_mulh(d, n, m, s);
5336 gen_vfp_negh(d, d);
5337 }
5338
gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5339 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5340 {
5341 gen_helper_vfp_muls(d, n, m, s);
5342 gen_vfp_negs(d, d);
5343 }
5344
gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5345 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5346 {
5347 gen_helper_vfp_muld(d, n, m, s);
5348 gen_vfp_negd(d, d);
5349 }
5350
gen_fnmul_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5351 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5352 {
5353 gen_helper_vfp_mulh(d, n, m, s);
5354 gen_vfp_ah_negh(d, d);
5355 }
5356
gen_fnmul_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5357 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5358 {
5359 gen_helper_vfp_muls(d, n, m, s);
5360 gen_vfp_ah_negs(d, d);
5361 }
5362
gen_fnmul_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5363 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5364 {
5365 gen_helper_vfp_muld(d, n, m, s);
5366 gen_vfp_ah_negd(d, d);
5367 }
5368
5369 static const FPScalar f_scalar_fnmul = {
5370 gen_fnmul_h,
5371 gen_fnmul_s,
5372 gen_fnmul_d,
5373 };
5374 static const FPScalar f_scalar_ah_fnmul = {
5375 gen_fnmul_ah_h,
5376 gen_fnmul_ah_s,
5377 gen_fnmul_ah_d,
5378 };
5379 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5380
5381 static const FPScalar f_scalar_fcmeq = {
5382 gen_helper_advsimd_ceq_f16,
5383 gen_helper_neon_ceq_f32,
5384 gen_helper_neon_ceq_f64,
5385 };
5386 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5387
5388 static const FPScalar f_scalar_fcmge = {
5389 gen_helper_advsimd_cge_f16,
5390 gen_helper_neon_cge_f32,
5391 gen_helper_neon_cge_f64,
5392 };
5393 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5394
5395 static const FPScalar f_scalar_fcmgt = {
5396 gen_helper_advsimd_cgt_f16,
5397 gen_helper_neon_cgt_f32,
5398 gen_helper_neon_cgt_f64,
5399 };
5400 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5401
5402 static const FPScalar f_scalar_facge = {
5403 gen_helper_advsimd_acge_f16,
5404 gen_helper_neon_acge_f32,
5405 gen_helper_neon_acge_f64,
5406 };
5407 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5408
5409 static const FPScalar f_scalar_facgt = {
5410 gen_helper_advsimd_acgt_f16,
5411 gen_helper_neon_acgt_f32,
5412 gen_helper_neon_acgt_f64,
5413 };
5414 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5415
gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5416 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5417 {
5418 gen_helper_vfp_subh(d, n, m, s);
5419 gen_vfp_absh(d, d);
5420 }
5421
gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5422 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5423 {
5424 gen_helper_vfp_subs(d, n, m, s);
5425 gen_vfp_abss(d, d);
5426 }
5427
gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5428 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5429 {
5430 gen_helper_vfp_subd(d, n, m, s);
5431 gen_vfp_absd(d, d);
5432 }
5433
gen_fabd_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5434 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5435 {
5436 gen_helper_vfp_subh(d, n, m, s);
5437 gen_vfp_ah_absh(d, d);
5438 }
5439
gen_fabd_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5440 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5441 {
5442 gen_helper_vfp_subs(d, n, m, s);
5443 gen_vfp_ah_abss(d, d);
5444 }
5445
gen_fabd_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5446 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5447 {
5448 gen_helper_vfp_subd(d, n, m, s);
5449 gen_vfp_ah_absd(d, d);
5450 }
5451
5452 static const FPScalar f_scalar_fabd = {
5453 gen_fabd_h,
5454 gen_fabd_s,
5455 gen_fabd_d,
5456 };
5457 static const FPScalar f_scalar_ah_fabd = {
5458 gen_fabd_ah_h,
5459 gen_fabd_ah_s,
5460 gen_fabd_ah_d,
5461 };
5462 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5463
5464 static const FPScalar f_scalar_frecps = {
5465 gen_helper_recpsf_f16,
5466 gen_helper_recpsf_f32,
5467 gen_helper_recpsf_f64,
5468 };
5469 static const FPScalar f_scalar_ah_frecps = {
5470 gen_helper_recpsf_ah_f16,
5471 gen_helper_recpsf_ah_f32,
5472 gen_helper_recpsf_ah_f64,
5473 };
5474 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5475 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5476
5477 static const FPScalar f_scalar_frsqrts = {
5478 gen_helper_rsqrtsf_f16,
5479 gen_helper_rsqrtsf_f32,
5480 gen_helper_rsqrtsf_f64,
5481 };
5482 static const FPScalar f_scalar_ah_frsqrts = {
5483 gen_helper_rsqrtsf_ah_f16,
5484 gen_helper_rsqrtsf_ah_f32,
5485 gen_helper_rsqrtsf_ah_f64,
5486 };
5487 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5488 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5489
do_fcmp0_s(DisasContext * s,arg_rr_e * a,const FPScalar * f,bool swap)5490 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5491 const FPScalar *f, bool swap)
5492 {
5493 switch (a->esz) {
5494 case MO_64:
5495 if (fp_access_check(s)) {
5496 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5497 TCGv_i64 t1 = tcg_constant_i64(0);
5498 if (swap) {
5499 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5500 } else {
5501 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5502 }
5503 write_fp_dreg(s, a->rd, t0);
5504 }
5505 break;
5506 case MO_32:
5507 if (fp_access_check(s)) {
5508 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5509 TCGv_i32 t1 = tcg_constant_i32(0);
5510 if (swap) {
5511 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5512 } else {
5513 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5514 }
5515 write_fp_sreg(s, a->rd, t0);
5516 }
5517 break;
5518 case MO_16:
5519 if (!dc_isar_feature(aa64_fp16, s)) {
5520 return false;
5521 }
5522 if (fp_access_check(s)) {
5523 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5524 TCGv_i32 t1 = tcg_constant_i32(0);
5525 if (swap) {
5526 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5527 } else {
5528 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5529 }
5530 write_fp_sreg(s, a->rd, t0);
5531 }
5532 break;
5533 default:
5534 return false;
5535 }
5536 return true;
5537 }
5538
5539 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5540 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5541 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5542 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5543 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5544
do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5545 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5546 MemOp sgn_n, MemOp sgn_m,
5547 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5548 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5549 {
5550 TCGv_i64 t0, t1, t2, qc;
5551 MemOp esz = a->esz;
5552
5553 if (!fp_access_check(s)) {
5554 return true;
5555 }
5556
5557 t0 = tcg_temp_new_i64();
5558 t1 = tcg_temp_new_i64();
5559 t2 = tcg_temp_new_i64();
5560 qc = tcg_temp_new_i64();
5561 read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5562 read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5563 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5564
5565 if (esz == MO_64) {
5566 gen_d(t0, qc, t1, t2);
5567 } else {
5568 gen_bhs(t0, qc, t1, t2, esz);
5569 tcg_gen_ext_i64(t0, t0, esz);
5570 }
5571
5572 write_fp_dreg(s, a->rd, t0);
5573 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5574 return true;
5575 }
5576
TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5577 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5578 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5579 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5580 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5581 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5582 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5583
5584 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5585 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5586 {
5587 if (fp_access_check(s)) {
5588 TCGv_i64 t0 = tcg_temp_new_i64();
5589 TCGv_i64 t1 = tcg_temp_new_i64();
5590
5591 read_vec_element(s, t0, a->rn, 0, MO_64);
5592 read_vec_element(s, t1, a->rm, 0, MO_64);
5593 fn(t0, t0, t1);
5594 write_fp_dreg(s, a->rd, t0);
5595 }
5596 return true;
5597 }
5598
5599 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5600 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5601 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5602 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5603 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5604 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5605
5606 typedef struct ENVScalar2 {
5607 NeonGenTwoOpEnvFn *gen_bhs[3];
5608 NeonGenTwo64OpEnvFn *gen_d;
5609 } ENVScalar2;
5610
do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5611 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5612 {
5613 if (!fp_access_check(s)) {
5614 return true;
5615 }
5616 if (a->esz == MO_64) {
5617 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5618 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5619 f->gen_d(t0, tcg_env, t0, t1);
5620 write_fp_dreg(s, a->rd, t0);
5621 } else {
5622 TCGv_i32 t0 = tcg_temp_new_i32();
5623 TCGv_i32 t1 = tcg_temp_new_i32();
5624
5625 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5626 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5627 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5628 write_fp_sreg(s, a->rd, t0);
5629 }
5630 return true;
5631 }
5632
5633 static const ENVScalar2 f_scalar_sqshl = {
5634 { gen_helper_neon_qshl_s8,
5635 gen_helper_neon_qshl_s16,
5636 gen_helper_neon_qshl_s32 },
5637 gen_helper_neon_qshl_s64,
5638 };
5639 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5640
5641 static const ENVScalar2 f_scalar_uqshl = {
5642 { gen_helper_neon_qshl_u8,
5643 gen_helper_neon_qshl_u16,
5644 gen_helper_neon_qshl_u32 },
5645 gen_helper_neon_qshl_u64,
5646 };
5647 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5648
5649 static const ENVScalar2 f_scalar_sqrshl = {
5650 { gen_helper_neon_qrshl_s8,
5651 gen_helper_neon_qrshl_s16,
5652 gen_helper_neon_qrshl_s32 },
5653 gen_helper_neon_qrshl_s64,
5654 };
5655 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5656
5657 static const ENVScalar2 f_scalar_uqrshl = {
5658 { gen_helper_neon_qrshl_u8,
5659 gen_helper_neon_qrshl_u16,
5660 gen_helper_neon_qrshl_u32 },
5661 gen_helper_neon_qrshl_u64,
5662 };
5663 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5664
do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5665 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5666 const ENVScalar2 *f)
5667 {
5668 if (a->esz == MO_16 || a->esz == MO_32) {
5669 return do_env_scalar2(s, a, f);
5670 }
5671 return false;
5672 }
5673
5674 static const ENVScalar2 f_scalar_sqdmulh = {
5675 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5676 };
5677 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5678
5679 static const ENVScalar2 f_scalar_sqrdmulh = {
5680 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5681 };
5682 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5683
5684 typedef struct ENVScalar3 {
5685 NeonGenThreeOpEnvFn *gen_hs[2];
5686 } ENVScalar3;
5687
do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5688 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5689 const ENVScalar3 *f)
5690 {
5691 TCGv_i32 t0, t1, t2;
5692
5693 if (a->esz != MO_16 && a->esz != MO_32) {
5694 return false;
5695 }
5696 if (!fp_access_check(s)) {
5697 return true;
5698 }
5699
5700 t0 = tcg_temp_new_i32();
5701 t1 = tcg_temp_new_i32();
5702 t2 = tcg_temp_new_i32();
5703 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5704 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5705 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5706 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5707 write_fp_sreg(s, a->rd, t0);
5708 return true;
5709 }
5710
5711 static const ENVScalar3 f_scalar_sqrdmlah = {
5712 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5713 };
5714 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5715
5716 static const ENVScalar3 f_scalar_sqrdmlsh = {
5717 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5718 };
5719 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5720
do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5721 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5722 {
5723 if (fp_access_check(s)) {
5724 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5725 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5726 tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5727 write_fp_dreg(s, a->rd, t0);
5728 }
5729 return true;
5730 }
5731
TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5732 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5733 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5734 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5735 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5736 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5737 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5738
5739 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5740 int data,
5741 gen_helper_gvec_3_ptr * const fns[3],
5742 ARMFPStatusFlavour fpsttype)
5743 {
5744 MemOp esz = a->esz;
5745 int check = fp_access_check_vector_hsd(s, a->q, esz);
5746
5747 if (check <= 0) {
5748 return check == 0;
5749 }
5750
5751 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5752 data, fns[esz - 1]);
5753 return true;
5754 }
5755
do_fp3_vector(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fns[3])5756 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5757 gen_helper_gvec_3_ptr * const fns[3])
5758 {
5759 return do_fp3_vector_with_fpsttype(s, a, data, fns,
5760 a->esz == MO_16 ?
5761 FPST_A64_F16 : FPST_A64);
5762 }
5763
do_fp3_vector_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5764 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5765 gen_helper_gvec_3_ptr * const fnormal[3],
5766 gen_helper_gvec_3_ptr * const fah[3])
5767 {
5768 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5769 }
5770
do_fp3_vector_ah_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5771 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5772 gen_helper_gvec_3_ptr * const fnormal[3],
5773 gen_helper_gvec_3_ptr * const fah[3])
5774 {
5775 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5776 select_ah_fpst(s, a->esz));
5777 }
5778
5779 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5780 gen_helper_gvec_fadd_h,
5781 gen_helper_gvec_fadd_s,
5782 gen_helper_gvec_fadd_d,
5783 };
5784 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5785
5786 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5787 gen_helper_gvec_fsub_h,
5788 gen_helper_gvec_fsub_s,
5789 gen_helper_gvec_fsub_d,
5790 };
5791 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5792
5793 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5794 gen_helper_gvec_fdiv_h,
5795 gen_helper_gvec_fdiv_s,
5796 gen_helper_gvec_fdiv_d,
5797 };
5798 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5799
5800 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5801 gen_helper_gvec_fmul_h,
5802 gen_helper_gvec_fmul_s,
5803 gen_helper_gvec_fmul_d,
5804 };
5805 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5806
5807 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5808 gen_helper_gvec_fmax_h,
5809 gen_helper_gvec_fmax_s,
5810 gen_helper_gvec_fmax_d,
5811 };
5812 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5813 gen_helper_gvec_ah_fmax_h,
5814 gen_helper_gvec_ah_fmax_s,
5815 gen_helper_gvec_ah_fmax_d,
5816 };
5817 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5818
5819 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5820 gen_helper_gvec_fmin_h,
5821 gen_helper_gvec_fmin_s,
5822 gen_helper_gvec_fmin_d,
5823 };
5824 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5825 gen_helper_gvec_ah_fmin_h,
5826 gen_helper_gvec_ah_fmin_s,
5827 gen_helper_gvec_ah_fmin_d,
5828 };
5829 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5830
5831 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5832 gen_helper_gvec_fmaxnum_h,
5833 gen_helper_gvec_fmaxnum_s,
5834 gen_helper_gvec_fmaxnum_d,
5835 };
5836 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5837
5838 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5839 gen_helper_gvec_fminnum_h,
5840 gen_helper_gvec_fminnum_s,
5841 gen_helper_gvec_fminnum_d,
5842 };
5843 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5844
5845 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5846 gen_helper_gvec_fmulx_h,
5847 gen_helper_gvec_fmulx_s,
5848 gen_helper_gvec_fmulx_d,
5849 };
5850 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5851
5852 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5853 gen_helper_gvec_vfma_h,
5854 gen_helper_gvec_vfma_s,
5855 gen_helper_gvec_vfma_d,
5856 };
5857 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5858
5859 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5860 gen_helper_gvec_vfms_h,
5861 gen_helper_gvec_vfms_s,
5862 gen_helper_gvec_vfms_d,
5863 };
5864 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5865 gen_helper_gvec_ah_vfms_h,
5866 gen_helper_gvec_ah_vfms_s,
5867 gen_helper_gvec_ah_vfms_d,
5868 };
5869 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5870
5871 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5872 gen_helper_gvec_fceq_h,
5873 gen_helper_gvec_fceq_s,
5874 gen_helper_gvec_fceq_d,
5875 };
5876 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5877
5878 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5879 gen_helper_gvec_fcge_h,
5880 gen_helper_gvec_fcge_s,
5881 gen_helper_gvec_fcge_d,
5882 };
5883 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5884
5885 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5886 gen_helper_gvec_fcgt_h,
5887 gen_helper_gvec_fcgt_s,
5888 gen_helper_gvec_fcgt_d,
5889 };
5890 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5891
5892 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5893 gen_helper_gvec_facge_h,
5894 gen_helper_gvec_facge_s,
5895 gen_helper_gvec_facge_d,
5896 };
5897 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5898
5899 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5900 gen_helper_gvec_facgt_h,
5901 gen_helper_gvec_facgt_s,
5902 gen_helper_gvec_facgt_d,
5903 };
5904 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5905
5906 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5907 gen_helper_gvec_fabd_h,
5908 gen_helper_gvec_fabd_s,
5909 gen_helper_gvec_fabd_d,
5910 };
5911 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5912 gen_helper_gvec_ah_fabd_h,
5913 gen_helper_gvec_ah_fabd_s,
5914 gen_helper_gvec_ah_fabd_d,
5915 };
5916 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5917
5918 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5919 gen_helper_gvec_recps_h,
5920 gen_helper_gvec_recps_s,
5921 gen_helper_gvec_recps_d,
5922 };
5923 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5924 gen_helper_gvec_ah_recps_h,
5925 gen_helper_gvec_ah_recps_s,
5926 gen_helper_gvec_ah_recps_d,
5927 };
5928 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5929
5930 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5931 gen_helper_gvec_rsqrts_h,
5932 gen_helper_gvec_rsqrts_s,
5933 gen_helper_gvec_rsqrts_d,
5934 };
5935 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5936 gen_helper_gvec_ah_rsqrts_h,
5937 gen_helper_gvec_ah_rsqrts_s,
5938 gen_helper_gvec_ah_rsqrts_d,
5939 };
5940 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5941
5942 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5943 gen_helper_gvec_faddp_h,
5944 gen_helper_gvec_faddp_s,
5945 gen_helper_gvec_faddp_d,
5946 };
5947 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5948
5949 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5950 gen_helper_gvec_fmaxp_h,
5951 gen_helper_gvec_fmaxp_s,
5952 gen_helper_gvec_fmaxp_d,
5953 };
5954 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5955 gen_helper_gvec_ah_fmaxp_h,
5956 gen_helper_gvec_ah_fmaxp_s,
5957 gen_helper_gvec_ah_fmaxp_d,
5958 };
5959 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5960
5961 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5962 gen_helper_gvec_fminp_h,
5963 gen_helper_gvec_fminp_s,
5964 gen_helper_gvec_fminp_d,
5965 };
5966 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5967 gen_helper_gvec_ah_fminp_h,
5968 gen_helper_gvec_ah_fminp_s,
5969 gen_helper_gvec_ah_fminp_d,
5970 };
5971 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5972
5973 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5974 gen_helper_gvec_fmaxnump_h,
5975 gen_helper_gvec_fmaxnump_s,
5976 gen_helper_gvec_fmaxnump_d,
5977 };
5978 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5979
5980 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5981 gen_helper_gvec_fminnump_h,
5982 gen_helper_gvec_fminnump_s,
5983 gen_helper_gvec_fminnump_d,
5984 };
5985 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
5986
do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)5987 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
5988 {
5989 if (fp_access_check(s)) {
5990 int data = (is_2 << 1) | is_s;
5991 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
5992 vec_full_reg_offset(s, a->rn),
5993 vec_full_reg_offset(s, a->rm), tcg_env,
5994 a->q ? 16 : 8, vec_full_reg_size(s),
5995 data, gen_helper_gvec_fmlal_a64);
5996 }
5997 return true;
5998 }
5999
TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)6000 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
6001 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
6002 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
6003 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6004
6005 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6006 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6007 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6008 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6009 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6010
6011 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6012 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6013 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6014 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6015 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6016
6017 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6018 {
6019 if (fp_access_check(s)) {
6020 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6021 }
6022 return true;
6023 }
6024
6025 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6026 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6027 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6028
TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)6029 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6030 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6031 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6032 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6033 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6034 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6035
6036 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6037 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6038 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6039 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6040 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6041 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6042 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6043 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6044
6045 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6046 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6047 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6048 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6049 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6050 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6051 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6052 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6053 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6054 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6055 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6056 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6057 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6058 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6059 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6060 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6061 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6062 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6063 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6064 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6065
6066 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6067 {
6068 if (a->esz == MO_64 && !a->q) {
6069 return false;
6070 }
6071 if (fp_access_check(s)) {
6072 tcg_gen_gvec_cmp(cond, a->esz,
6073 vec_full_reg_offset(s, a->rd),
6074 vec_full_reg_offset(s, a->rn),
6075 vec_full_reg_offset(s, a->rm),
6076 a->q ? 16 : 8, vec_full_reg_size(s));
6077 }
6078 return true;
6079 }
6080
TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)6081 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6082 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6083 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6084 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6085 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6086 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6087
6088 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6089 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6090 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6091 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6092
6093 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6094 gen_helper_gvec_4 *fn)
6095 {
6096 if (fp_access_check(s)) {
6097 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6098 }
6099 return true;
6100 }
6101
do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)6102 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6103 gen_helper_gvec_4_ptr *fn)
6104 {
6105 if (fp_access_check(s)) {
6106 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6107 }
6108 return true;
6109 }
6110
TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)6111 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6112 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6113 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6114 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6115 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6116 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6117 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6118 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6119
6120 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6121 {
6122 if (!dc_isar_feature(aa64_bf16, s)) {
6123 return false;
6124 }
6125 if (fp_access_check(s)) {
6126 /* Q bit selects BFMLALB vs BFMLALT. */
6127 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6128 s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6129 gen_helper_gvec_bfmlal);
6130 }
6131 return true;
6132 }
6133
6134 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6135 gen_helper_gvec_fcaddh,
6136 gen_helper_gvec_fcadds,
6137 gen_helper_gvec_fcaddd,
6138 };
6139 /*
6140 * Encode FPCR.AH into the data so the helper knows whether the
6141 * negations it does should avoid flipping the sign bit on a NaN
6142 */
6143 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6144 f_vector_fcadd)
6145 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6146 f_vector_fcadd)
6147
trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)6148 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6149 {
6150 static gen_helper_gvec_4_ptr * const fn[] = {
6151 [MO_16] = gen_helper_gvec_fcmlah,
6152 [MO_32] = gen_helper_gvec_fcmlas,
6153 [MO_64] = gen_helper_gvec_fcmlad,
6154 };
6155 int check;
6156
6157 if (!dc_isar_feature(aa64_fcma, s)) {
6158 return false;
6159 }
6160
6161 check = fp_access_check_vector_hsd(s, a->q, a->esz);
6162 if (check <= 0) {
6163 return check == 0;
6164 }
6165
6166 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6167 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6168 a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6169 return true;
6170 }
6171
6172 /*
6173 * Widening vector x vector/indexed.
6174 *
6175 * These read from the top or bottom half of a 128-bit vector.
6176 * After widening, optionally accumulate with a 128-bit vector.
6177 * Implement these inline, as the number of elements are limited
6178 * and the related SVE and SME operations on larger vectors use
6179 * even/odd elements instead of top/bottom half.
6180 *
6181 * If idx >= 0, operand 2 is indexed, otherwise vector.
6182 * If acc, operand 0 is loaded with rd.
6183 */
6184
6185 /* For low half, iterating up. */
do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)6186 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6187 int rd, int rn, int rm, int idx,
6188 NeonGenTwo64OpFn *fn, bool acc)
6189 {
6190 TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6191 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6192 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6193 MemOp esz = memop & MO_SIZE;
6194 int half = 8 >> esz;
6195 int top_swap, top_half;
6196
6197 /* There are no 64x64->128 bit operations. */
6198 if (esz >= MO_64) {
6199 return false;
6200 }
6201 if (!fp_access_check(s)) {
6202 return true;
6203 }
6204
6205 if (idx >= 0) {
6206 read_vec_element(s, tcg_op2, rm, idx, memop);
6207 }
6208
6209 /*
6210 * For top half inputs, iterate forward; backward for bottom half.
6211 * This means the store to the destination will not occur until
6212 * overlapping input inputs are consumed.
6213 * Use top_swap to conditionally invert the forward iteration index.
6214 */
6215 top_swap = top ? 0 : half - 1;
6216 top_half = top ? half : 0;
6217
6218 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6219 int elt = elt_fwd ^ top_swap;
6220
6221 read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6222 if (idx < 0) {
6223 read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6224 }
6225 if (acc) {
6226 read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6227 }
6228 fn(tcg_op0, tcg_op1, tcg_op2);
6229 write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6230 }
6231 clear_vec_high(s, 1, rd);
6232 return true;
6233 }
6234
gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6235 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6236 {
6237 TCGv_i64 t = tcg_temp_new_i64();
6238 tcg_gen_mul_i64(t, n, m);
6239 tcg_gen_add_i64(d, d, t);
6240 }
6241
gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6242 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6243 {
6244 TCGv_i64 t = tcg_temp_new_i64();
6245 tcg_gen_mul_i64(t, n, m);
6246 tcg_gen_sub_i64(d, d, t);
6247 }
6248
6249 TRANS(SMULL_v, do_3op_widening,
6250 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6251 tcg_gen_mul_i64, false)
6252 TRANS(UMULL_v, do_3op_widening,
6253 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6254 tcg_gen_mul_i64, false)
6255 TRANS(SMLAL_v, do_3op_widening,
6256 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6257 gen_muladd_i64, true)
6258 TRANS(UMLAL_v, do_3op_widening,
6259 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6260 gen_muladd_i64, true)
6261 TRANS(SMLSL_v, do_3op_widening,
6262 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6263 gen_mulsub_i64, true)
6264 TRANS(UMLSL_v, do_3op_widening,
6265 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6266 gen_mulsub_i64, true)
6267
6268 TRANS(SMULL_vi, do_3op_widening,
6269 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6270 tcg_gen_mul_i64, false)
6271 TRANS(UMULL_vi, do_3op_widening,
6272 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6273 tcg_gen_mul_i64, false)
6274 TRANS(SMLAL_vi, do_3op_widening,
6275 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6276 gen_muladd_i64, true)
6277 TRANS(UMLAL_vi, do_3op_widening,
6278 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6279 gen_muladd_i64, true)
6280 TRANS(SMLSL_vi, do_3op_widening,
6281 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6282 gen_mulsub_i64, true)
6283 TRANS(UMLSL_vi, do_3op_widening,
6284 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6285 gen_mulsub_i64, true)
6286
gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6287 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6288 {
6289 TCGv_i64 t1 = tcg_temp_new_i64();
6290 TCGv_i64 t2 = tcg_temp_new_i64();
6291
6292 tcg_gen_sub_i64(t1, n, m);
6293 tcg_gen_sub_i64(t2, m, n);
6294 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6295 }
6296
gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6297 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6298 {
6299 TCGv_i64 t1 = tcg_temp_new_i64();
6300 TCGv_i64 t2 = tcg_temp_new_i64();
6301
6302 tcg_gen_sub_i64(t1, n, m);
6303 tcg_gen_sub_i64(t2, m, n);
6304 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6305 }
6306
gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6307 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6308 {
6309 TCGv_i64 t = tcg_temp_new_i64();
6310 gen_sabd_i64(t, n, m);
6311 tcg_gen_add_i64(d, d, t);
6312 }
6313
gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6314 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6315 {
6316 TCGv_i64 t = tcg_temp_new_i64();
6317 gen_uabd_i64(t, n, m);
6318 tcg_gen_add_i64(d, d, t);
6319 }
6320
6321 TRANS(SADDL_v, do_3op_widening,
6322 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6323 tcg_gen_add_i64, false)
6324 TRANS(UADDL_v, do_3op_widening,
6325 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6326 tcg_gen_add_i64, false)
6327 TRANS(SSUBL_v, do_3op_widening,
6328 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6329 tcg_gen_sub_i64, false)
6330 TRANS(USUBL_v, do_3op_widening,
6331 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6332 tcg_gen_sub_i64, false)
6333 TRANS(SABDL_v, do_3op_widening,
6334 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6335 gen_sabd_i64, false)
6336 TRANS(UABDL_v, do_3op_widening,
6337 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6338 gen_uabd_i64, false)
6339 TRANS(SABAL_v, do_3op_widening,
6340 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6341 gen_saba_i64, true)
6342 TRANS(UABAL_v, do_3op_widening,
6343 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6344 gen_uaba_i64, true)
6345
gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6346 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6347 {
6348 tcg_gen_mul_i64(d, n, m);
6349 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6350 }
6351
gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6352 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6353 {
6354 tcg_gen_mul_i64(d, n, m);
6355 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6356 }
6357
gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6358 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6359 {
6360 TCGv_i64 t = tcg_temp_new_i64();
6361
6362 tcg_gen_mul_i64(t, n, m);
6363 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6364 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6365 }
6366
gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6367 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6368 {
6369 TCGv_i64 t = tcg_temp_new_i64();
6370
6371 tcg_gen_mul_i64(t, n, m);
6372 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6373 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6374 }
6375
gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6376 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6377 {
6378 TCGv_i64 t = tcg_temp_new_i64();
6379
6380 tcg_gen_mul_i64(t, n, m);
6381 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6382 tcg_gen_neg_i64(t, t);
6383 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6384 }
6385
gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6386 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6387 {
6388 TCGv_i64 t = tcg_temp_new_i64();
6389
6390 tcg_gen_mul_i64(t, n, m);
6391 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6392 tcg_gen_neg_i64(t, t);
6393 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6394 }
6395
6396 TRANS(SQDMULL_v, do_3op_widening,
6397 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6398 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6399 TRANS(SQDMLAL_v, do_3op_widening,
6400 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6401 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6402 TRANS(SQDMLSL_v, do_3op_widening,
6403 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6404 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6405
6406 TRANS(SQDMULL_vi, do_3op_widening,
6407 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6408 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6409 TRANS(SQDMLAL_vi, do_3op_widening,
6410 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6411 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6412 TRANS(SQDMLSL_vi, do_3op_widening,
6413 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6414 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6415
do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6416 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6417 MemOp sign, bool sub)
6418 {
6419 TCGv_i64 tcg_op0, tcg_op1;
6420 MemOp esz = a->esz;
6421 int half = 8 >> esz;
6422 bool top = a->q;
6423 int top_swap = top ? 0 : half - 1;
6424 int top_half = top ? half : 0;
6425
6426 /* There are no 64x64->128 bit operations. */
6427 if (esz >= MO_64) {
6428 return false;
6429 }
6430 if (!fp_access_check(s)) {
6431 return true;
6432 }
6433 tcg_op0 = tcg_temp_new_i64();
6434 tcg_op1 = tcg_temp_new_i64();
6435
6436 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6437 int elt = elt_fwd ^ top_swap;
6438
6439 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6440 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6441 if (sub) {
6442 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6443 } else {
6444 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6445 }
6446 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6447 }
6448 clear_vec_high(s, 1, a->rd);
6449 return true;
6450 }
6451
TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6452 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6453 TRANS(UADDW, do_addsub_wide, a, 0, false)
6454 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6455 TRANS(USUBW, do_addsub_wide, a, 0, true)
6456
6457 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6458 bool sub, bool round)
6459 {
6460 TCGv_i64 tcg_op0, tcg_op1;
6461 MemOp esz = a->esz;
6462 int half = 8 >> esz;
6463 bool top = a->q;
6464 int ebits = 8 << esz;
6465 uint64_t rbit = 1ull << (ebits - 1);
6466 int top_swap, top_half;
6467
6468 /* There are no 128x128->64 bit operations. */
6469 if (esz >= MO_64) {
6470 return false;
6471 }
6472 if (!fp_access_check(s)) {
6473 return true;
6474 }
6475 tcg_op0 = tcg_temp_new_i64();
6476 tcg_op1 = tcg_temp_new_i64();
6477
6478 /*
6479 * For top half inputs, iterate backward; forward for bottom half.
6480 * This means the store to the destination will not occur until
6481 * overlapping input inputs are consumed.
6482 */
6483 top_swap = top ? half - 1 : 0;
6484 top_half = top ? half : 0;
6485
6486 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6487 int elt = elt_fwd ^ top_swap;
6488
6489 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6490 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6491 if (sub) {
6492 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6493 } else {
6494 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6495 }
6496 if (round) {
6497 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6498 }
6499 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6500 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6501 }
6502 clear_vec_high(s, top, a->rd);
6503 return true;
6504 }
6505
TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6506 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6507 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6508 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6509 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6510
6511 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6512 {
6513 if (fp_access_check(s)) {
6514 /* The Q field specifies lo/hi half input for these insns. */
6515 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6516 }
6517 return true;
6518 }
6519
TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6520 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6521 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6522
6523 /*
6524 * Advanced SIMD scalar/vector x indexed element
6525 */
6526
6527 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6528 {
6529 switch (a->esz) {
6530 case MO_64:
6531 if (fp_access_check(s)) {
6532 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6533 TCGv_i64 t1 = tcg_temp_new_i64();
6534
6535 read_vec_element(s, t1, a->rm, a->idx, MO_64);
6536 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6537 write_fp_dreg_merging(s, a->rd, a->rn, t0);
6538 }
6539 break;
6540 case MO_32:
6541 if (fp_access_check(s)) {
6542 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6543 TCGv_i32 t1 = tcg_temp_new_i32();
6544
6545 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6546 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6547 write_fp_sreg_merging(s, a->rd, a->rn, t0);
6548 }
6549 break;
6550 case MO_16:
6551 if (!dc_isar_feature(aa64_fp16, s)) {
6552 return false;
6553 }
6554 if (fp_access_check(s)) {
6555 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6556 TCGv_i32 t1 = tcg_temp_new_i32();
6557
6558 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6559 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6560 write_fp_hreg_merging(s, a->rd, a->rn, t0);
6561 }
6562 break;
6563 default:
6564 g_assert_not_reached();
6565 }
6566 return true;
6567 }
6568
6569 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6570 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6571
do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6572 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6573 {
6574 switch (a->esz) {
6575 case MO_64:
6576 if (fp_access_check(s)) {
6577 TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6578 TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6579 TCGv_i64 t2 = tcg_temp_new_i64();
6580
6581 read_vec_element(s, t2, a->rm, a->idx, MO_64);
6582 if (neg) {
6583 gen_vfp_maybe_ah_negd(s, t1, t1);
6584 }
6585 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6586 write_fp_dreg_merging(s, a->rd, a->rd, t0);
6587 }
6588 break;
6589 case MO_32:
6590 if (fp_access_check(s)) {
6591 TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6592 TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6593 TCGv_i32 t2 = tcg_temp_new_i32();
6594
6595 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6596 if (neg) {
6597 gen_vfp_maybe_ah_negs(s, t1, t1);
6598 }
6599 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6600 write_fp_sreg_merging(s, a->rd, a->rd, t0);
6601 }
6602 break;
6603 case MO_16:
6604 if (!dc_isar_feature(aa64_fp16, s)) {
6605 return false;
6606 }
6607 if (fp_access_check(s)) {
6608 TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6609 TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6610 TCGv_i32 t2 = tcg_temp_new_i32();
6611
6612 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6613 if (neg) {
6614 gen_vfp_maybe_ah_negh(s, t1, t1);
6615 }
6616 gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6617 fpstatus_ptr(FPST_A64_F16));
6618 write_fp_hreg_merging(s, a->rd, a->rd, t0);
6619 }
6620 break;
6621 default:
6622 g_assert_not_reached();
6623 }
6624 return true;
6625 }
6626
TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6627 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6628 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6629
6630 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6631 const ENVScalar2 *f)
6632 {
6633 if (a->esz < MO_16 || a->esz > MO_32) {
6634 return false;
6635 }
6636 if (fp_access_check(s)) {
6637 TCGv_i32 t0 = tcg_temp_new_i32();
6638 TCGv_i32 t1 = tcg_temp_new_i32();
6639
6640 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6641 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6642 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6643 write_fp_sreg(s, a->rd, t0);
6644 }
6645 return true;
6646 }
6647
6648 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6649 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6650
do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6651 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6652 const ENVScalar3 *f)
6653 {
6654 if (a->esz < MO_16 || a->esz > MO_32) {
6655 return false;
6656 }
6657 if (fp_access_check(s)) {
6658 TCGv_i32 t0 = tcg_temp_new_i32();
6659 TCGv_i32 t1 = tcg_temp_new_i32();
6660 TCGv_i32 t2 = tcg_temp_new_i32();
6661
6662 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6663 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6664 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6665 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6666 write_fp_sreg(s, a->rd, t0);
6667 }
6668 return true;
6669 }
6670
6671 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6672 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6673
do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6674 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6675 NeonGenTwo64OpFn *fn, bool acc)
6676 {
6677 if (fp_access_check(s)) {
6678 TCGv_i64 t0 = tcg_temp_new_i64();
6679 TCGv_i64 t1 = tcg_temp_new_i64();
6680 TCGv_i64 t2 = tcg_temp_new_i64();
6681
6682 if (acc) {
6683 read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6684 }
6685 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6686 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6687 fn(t0, t1, t2);
6688
6689 /* Clear the whole register first, then store scalar. */
6690 clear_vec(s, a->rd);
6691 write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6692 }
6693 return true;
6694 }
6695
6696 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6697 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6698 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6699 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6700 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6701 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6702
do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6703 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6704 gen_helper_gvec_3_ptr * const fns[3])
6705 {
6706 MemOp esz = a->esz;
6707 int check = fp_access_check_vector_hsd(s, a->q, esz);
6708
6709 if (check <= 0) {
6710 return check == 0;
6711 }
6712
6713 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6714 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6715 a->idx, fns[esz - 1]);
6716 return true;
6717 }
6718
6719 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6720 gen_helper_gvec_fmul_idx_h,
6721 gen_helper_gvec_fmul_idx_s,
6722 gen_helper_gvec_fmul_idx_d,
6723 };
6724 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6725
6726 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6727 gen_helper_gvec_fmulx_idx_h,
6728 gen_helper_gvec_fmulx_idx_s,
6729 gen_helper_gvec_fmulx_idx_d,
6730 };
TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6731 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6732
6733 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6734 {
6735 static gen_helper_gvec_4_ptr * const fns[3][3] = {
6736 { gen_helper_gvec_fmla_idx_h,
6737 gen_helper_gvec_fmla_idx_s,
6738 gen_helper_gvec_fmla_idx_d },
6739 { gen_helper_gvec_fmls_idx_h,
6740 gen_helper_gvec_fmls_idx_s,
6741 gen_helper_gvec_fmls_idx_d },
6742 { gen_helper_gvec_ah_fmls_idx_h,
6743 gen_helper_gvec_ah_fmls_idx_s,
6744 gen_helper_gvec_ah_fmls_idx_d },
6745 };
6746 MemOp esz = a->esz;
6747 int check = fp_access_check_vector_hsd(s, a->q, esz);
6748
6749 if (check <= 0) {
6750 return check == 0;
6751 }
6752
6753 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6754 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6755 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6756 return true;
6757 }
6758
TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6759 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6760 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6761
6762 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6763 {
6764 if (fp_access_check(s)) {
6765 int data = (a->idx << 2) | (is_2 << 1) | is_s;
6766 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6767 vec_full_reg_offset(s, a->rn),
6768 vec_full_reg_offset(s, a->rm), tcg_env,
6769 a->q ? 16 : 8, vec_full_reg_size(s),
6770 data, gen_helper_gvec_fmlal_idx_a64);
6771 }
6772 return true;
6773 }
6774
TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6775 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6776 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6777 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6778 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6779
6780 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6781 gen_helper_gvec_3 * const fns[2])
6782 {
6783 assert(a->esz == MO_16 || a->esz == MO_32);
6784 if (fp_access_check(s)) {
6785 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6786 }
6787 return true;
6788 }
6789
6790 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6791 gen_helper_gvec_mul_idx_h,
6792 gen_helper_gvec_mul_idx_s,
6793 };
TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6794 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6795
6796 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6797 {
6798 static gen_helper_gvec_4 * const fns[2][2] = {
6799 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6800 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6801 };
6802
6803 assert(a->esz == MO_16 || a->esz == MO_32);
6804 if (fp_access_check(s)) {
6805 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6806 a->idx, fns[a->esz - 1][sub]);
6807 }
6808 return true;
6809 }
6810
TRANS(MLA_vi,do_mla_vector_idx,a,false)6811 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6812 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6813
6814 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6815 gen_helper_gvec_4 * const fns[2])
6816 {
6817 assert(a->esz == MO_16 || a->esz == MO_32);
6818 if (fp_access_check(s)) {
6819 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6820 vec_full_reg_offset(s, a->rn),
6821 vec_full_reg_offset(s, a->rm),
6822 offsetof(CPUARMState, vfp.qc),
6823 a->q ? 16 : 8, vec_full_reg_size(s),
6824 a->idx, fns[a->esz - 1]);
6825 }
6826 return true;
6827 }
6828
6829 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6830 gen_helper_neon_sqdmulh_idx_h,
6831 gen_helper_neon_sqdmulh_idx_s,
6832 };
6833 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6834
6835 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6836 gen_helper_neon_sqrdmulh_idx_h,
6837 gen_helper_neon_sqrdmulh_idx_s,
6838 };
6839 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6840
6841 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6842 gen_helper_neon_sqrdmlah_idx_h,
6843 gen_helper_neon_sqrdmlah_idx_s,
6844 };
6845 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6846 f_vector_idx_sqrdmlah)
6847
6848 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6849 gen_helper_neon_sqrdmlsh_idx_h,
6850 gen_helper_neon_sqrdmlsh_idx_s,
6851 };
TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6852 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6853 f_vector_idx_sqrdmlsh)
6854
6855 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6856 gen_helper_gvec_4 *fn)
6857 {
6858 if (fp_access_check(s)) {
6859 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6860 }
6861 return true;
6862 }
6863
do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6864 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6865 gen_helper_gvec_4_ptr *fn)
6866 {
6867 if (fp_access_check(s)) {
6868 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6869 }
6870 return true;
6871 }
6872
TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6873 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6874 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6875 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6876 gen_helper_gvec_sudot_idx_b)
6877 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6878 gen_helper_gvec_usdot_idx_b)
6879 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6880 gen_helper_gvec_bfdot_idx)
6881
6882 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6883 {
6884 if (!dc_isar_feature(aa64_bf16, s)) {
6885 return false;
6886 }
6887 if (fp_access_check(s)) {
6888 /* Q bit selects BFMLALB vs BFMLALT. */
6889 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6890 s->fpcr_ah ? FPST_AH : FPST_A64,
6891 (a->idx << 1) | a->q,
6892 gen_helper_gvec_bfmlal_idx);
6893 }
6894 return true;
6895 }
6896
trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6897 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6898 {
6899 gen_helper_gvec_4_ptr *fn;
6900
6901 if (!dc_isar_feature(aa64_fcma, s)) {
6902 return false;
6903 }
6904 switch (a->esz) {
6905 case MO_16:
6906 if (!dc_isar_feature(aa64_fp16, s)) {
6907 return false;
6908 }
6909 fn = gen_helper_gvec_fcmlah_idx;
6910 break;
6911 case MO_32:
6912 fn = gen_helper_gvec_fcmlas_idx;
6913 break;
6914 default:
6915 g_assert_not_reached();
6916 }
6917 if (fp_access_check(s)) {
6918 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6919 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6920 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6921 }
6922 return true;
6923 }
6924
6925 /*
6926 * Advanced SIMD scalar pairwise
6927 */
6928
do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6929 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6930 {
6931 switch (a->esz) {
6932 case MO_64:
6933 if (fp_access_check(s)) {
6934 TCGv_i64 t0 = tcg_temp_new_i64();
6935 TCGv_i64 t1 = tcg_temp_new_i64();
6936
6937 read_vec_element(s, t0, a->rn, 0, MO_64);
6938 read_vec_element(s, t1, a->rn, 1, MO_64);
6939 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6940 write_fp_dreg(s, a->rd, t0);
6941 }
6942 break;
6943 case MO_32:
6944 if (fp_access_check(s)) {
6945 TCGv_i32 t0 = tcg_temp_new_i32();
6946 TCGv_i32 t1 = tcg_temp_new_i32();
6947
6948 read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6949 read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6950 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6951 write_fp_sreg(s, a->rd, t0);
6952 }
6953 break;
6954 case MO_16:
6955 if (!dc_isar_feature(aa64_fp16, s)) {
6956 return false;
6957 }
6958 if (fp_access_check(s)) {
6959 TCGv_i32 t0 = tcg_temp_new_i32();
6960 TCGv_i32 t1 = tcg_temp_new_i32();
6961
6962 read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6963 read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6964 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6965 write_fp_sreg(s, a->rd, t0);
6966 }
6967 break;
6968 default:
6969 g_assert_not_reached();
6970 }
6971 return true;
6972 }
6973
do_fp3_scalar_pair_2fn(DisasContext * s,arg_rr_e * a,const FPScalar * fnormal,const FPScalar * fah)6974 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6975 const FPScalar *fnormal,
6976 const FPScalar *fah)
6977 {
6978 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6979 }
6980
6981 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6982 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
6983 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
6984 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
6985 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
6986
trans_ADDP_s(DisasContext * s,arg_rr_e * a)6987 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
6988 {
6989 if (fp_access_check(s)) {
6990 TCGv_i64 t0 = tcg_temp_new_i64();
6991 TCGv_i64 t1 = tcg_temp_new_i64();
6992
6993 read_vec_element(s, t0, a->rn, 0, MO_64);
6994 read_vec_element(s, t1, a->rn, 1, MO_64);
6995 tcg_gen_add_i64(t0, t0, t1);
6996 write_fp_dreg(s, a->rd, t0);
6997 }
6998 return true;
6999 }
7000
7001 /*
7002 * Floating-point conditional select
7003 */
7004
trans_FCSEL(DisasContext * s,arg_FCSEL * a)7005 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7006 {
7007 TCGv_i64 t_true, t_false;
7008 DisasCompare64 c;
7009 int check = fp_access_check_scalar_hsd(s, a->esz);
7010
7011 if (check <= 0) {
7012 return check == 0;
7013 }
7014
7015 /* Zero extend sreg & hreg inputs to 64 bits now. */
7016 t_true = tcg_temp_new_i64();
7017 t_false = tcg_temp_new_i64();
7018 read_vec_element(s, t_true, a->rn, 0, a->esz);
7019 read_vec_element(s, t_false, a->rm, 0, a->esz);
7020
7021 a64_test_cc(&c, a->cond);
7022 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7023 t_true, t_false);
7024
7025 /*
7026 * Note that sregs & hregs write back zeros to the high bits,
7027 * and we've already done the zero-extension.
7028 */
7029 write_fp_dreg(s, a->rd, t_true);
7030 return true;
7031 }
7032
7033 /*
7034 * Advanced SIMD Extract
7035 */
7036
trans_EXT_d(DisasContext * s,arg_EXT_d * a)7037 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7038 {
7039 if (fp_access_check(s)) {
7040 TCGv_i64 lo = read_fp_dreg(s, a->rn);
7041 if (a->imm != 0) {
7042 TCGv_i64 hi = read_fp_dreg(s, a->rm);
7043 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7044 }
7045 write_fp_dreg(s, a->rd, lo);
7046 }
7047 return true;
7048 }
7049
trans_EXT_q(DisasContext * s,arg_EXT_q * a)7050 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7051 {
7052 TCGv_i64 lo, hi;
7053 int pos = (a->imm & 7) * 8;
7054 int elt = a->imm >> 3;
7055
7056 if (!fp_access_check(s)) {
7057 return true;
7058 }
7059
7060 lo = tcg_temp_new_i64();
7061 hi = tcg_temp_new_i64();
7062
7063 read_vec_element(s, lo, a->rn, elt, MO_64);
7064 elt++;
7065 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7066 elt++;
7067
7068 if (pos != 0) {
7069 TCGv_i64 hh = tcg_temp_new_i64();
7070 tcg_gen_extract2_i64(lo, lo, hi, pos);
7071 read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7072 tcg_gen_extract2_i64(hi, hi, hh, pos);
7073 }
7074
7075 write_vec_element(s, lo, a->rd, 0, MO_64);
7076 write_vec_element(s, hi, a->rd, 1, MO_64);
7077 clear_vec_high(s, true, a->rd);
7078 return true;
7079 }
7080
7081 /*
7082 * Floating-point data-processing (3 source)
7083 */
7084
do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)7085 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7086 {
7087 TCGv_ptr fpst;
7088
7089 /*
7090 * These are fused multiply-add. Note that doing the negations here
7091 * as separate steps is correct: an input NaN should come out with
7092 * its sign bit flipped if it is a negated-input.
7093 */
7094 switch (a->esz) {
7095 case MO_64:
7096 if (fp_access_check(s)) {
7097 TCGv_i64 tn = read_fp_dreg(s, a->rn);
7098 TCGv_i64 tm = read_fp_dreg(s, a->rm);
7099 TCGv_i64 ta = read_fp_dreg(s, a->ra);
7100
7101 if (neg_a) {
7102 gen_vfp_maybe_ah_negd(s, ta, ta);
7103 }
7104 if (neg_n) {
7105 gen_vfp_maybe_ah_negd(s, tn, tn);
7106 }
7107 fpst = fpstatus_ptr(FPST_A64);
7108 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7109 write_fp_dreg_merging(s, a->rd, a->ra, ta);
7110 }
7111 break;
7112
7113 case MO_32:
7114 if (fp_access_check(s)) {
7115 TCGv_i32 tn = read_fp_sreg(s, a->rn);
7116 TCGv_i32 tm = read_fp_sreg(s, a->rm);
7117 TCGv_i32 ta = read_fp_sreg(s, a->ra);
7118
7119 if (neg_a) {
7120 gen_vfp_maybe_ah_negs(s, ta, ta);
7121 }
7122 if (neg_n) {
7123 gen_vfp_maybe_ah_negs(s, tn, tn);
7124 }
7125 fpst = fpstatus_ptr(FPST_A64);
7126 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7127 write_fp_sreg_merging(s, a->rd, a->ra, ta);
7128 }
7129 break;
7130
7131 case MO_16:
7132 if (!dc_isar_feature(aa64_fp16, s)) {
7133 return false;
7134 }
7135 if (fp_access_check(s)) {
7136 TCGv_i32 tn = read_fp_hreg(s, a->rn);
7137 TCGv_i32 tm = read_fp_hreg(s, a->rm);
7138 TCGv_i32 ta = read_fp_hreg(s, a->ra);
7139
7140 if (neg_a) {
7141 gen_vfp_maybe_ah_negh(s, ta, ta);
7142 }
7143 if (neg_n) {
7144 gen_vfp_maybe_ah_negh(s, tn, tn);
7145 }
7146 fpst = fpstatus_ptr(FPST_A64_F16);
7147 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7148 write_fp_hreg_merging(s, a->rd, a->ra, ta);
7149 }
7150 break;
7151
7152 default:
7153 return false;
7154 }
7155 return true;
7156 }
7157
TRANS(FMADD,do_fmadd,a,false,false)7158 TRANS(FMADD, do_fmadd, a, false, false)
7159 TRANS(FNMADD, do_fmadd, a, true, true)
7160 TRANS(FMSUB, do_fmadd, a, false, true)
7161 TRANS(FNMSUB, do_fmadd, a, true, false)
7162
7163 /*
7164 * Advanced SIMD Across Lanes
7165 */
7166
7167 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7168 MemOp src_sign, NeonGenTwo64OpFn *fn)
7169 {
7170 TCGv_i64 tcg_res, tcg_elt;
7171 MemOp src_mop = a->esz | src_sign;
7172 int elements = (a->q ? 16 : 8) >> a->esz;
7173
7174 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7175 if (elements < 4) {
7176 return false;
7177 }
7178 if (!fp_access_check(s)) {
7179 return true;
7180 }
7181
7182 tcg_res = tcg_temp_new_i64();
7183 tcg_elt = tcg_temp_new_i64();
7184
7185 read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7186 for (int i = 1; i < elements; i++) {
7187 read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7188 fn(tcg_res, tcg_res, tcg_elt);
7189 }
7190
7191 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7192 write_fp_dreg(s, a->rd, tcg_res);
7193 return true;
7194 }
7195
7196 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)7197 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7198 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7199 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7200 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7201 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7202 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7203
7204 /*
7205 * do_fp_reduction helper
7206 *
7207 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7208 * important for correct NaN propagation that we do these
7209 * operations in exactly the order specified by the pseudocode.
7210 *
7211 * This is a recursive function.
7212 */
7213 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7214 int ebase, int ecount, TCGv_ptr fpst,
7215 NeonGenTwoSingleOpFn *fn)
7216 {
7217 if (ecount == 1) {
7218 TCGv_i32 tcg_elem = tcg_temp_new_i32();
7219 read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7220 return tcg_elem;
7221 } else {
7222 int half = ecount >> 1;
7223 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7224
7225 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7226 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7227 tcg_res = tcg_temp_new_i32();
7228
7229 fn(tcg_res, tcg_lo, tcg_hi, fpst);
7230 return tcg_res;
7231 }
7232 }
7233
do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fnormal,NeonGenTwoSingleOpFn * fah)7234 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7235 NeonGenTwoSingleOpFn *fnormal,
7236 NeonGenTwoSingleOpFn *fah)
7237 {
7238 if (fp_access_check(s)) {
7239 MemOp esz = a->esz;
7240 int elts = (a->q ? 16 : 8) >> esz;
7241 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7242 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7243 s->fpcr_ah ? fah : fnormal);
7244 write_fp_sreg(s, a->rd, res);
7245 }
7246 return true;
7247 }
7248
TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_vfp_maxnumh,gen_helper_vfp_maxnumh)7249 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7250 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7251 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7252 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7253 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7254 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7255 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7256 gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7257
7258 TRANS(FMAXNMV_s, do_fp_reduction, a,
7259 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7260 TRANS(FMINNMV_s, do_fp_reduction, a,
7261 gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7262 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7263 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7264
7265 /*
7266 * Floating-point Immediate
7267 */
7268
7269 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7270 {
7271 int check = fp_access_check_scalar_hsd(s, a->esz);
7272 uint64_t imm;
7273
7274 if (check <= 0) {
7275 return check == 0;
7276 }
7277
7278 imm = vfp_expand_imm(a->esz, a->imm);
7279 write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7280 return true;
7281 }
7282
7283 /*
7284 * Floating point compare, conditional compare
7285 */
7286
handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)7287 static void handle_fp_compare(DisasContext *s, int size,
7288 unsigned int rn, unsigned int rm,
7289 bool cmp_with_zero, bool signal_all_nans)
7290 {
7291 TCGv_i64 tcg_flags = tcg_temp_new_i64();
7292 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7293
7294 if (size == MO_64) {
7295 TCGv_i64 tcg_vn, tcg_vm;
7296
7297 tcg_vn = read_fp_dreg(s, rn);
7298 if (cmp_with_zero) {
7299 tcg_vm = tcg_constant_i64(0);
7300 } else {
7301 tcg_vm = read_fp_dreg(s, rm);
7302 }
7303 if (signal_all_nans) {
7304 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7305 } else {
7306 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7307 }
7308 } else {
7309 TCGv_i32 tcg_vn = tcg_temp_new_i32();
7310 TCGv_i32 tcg_vm = tcg_temp_new_i32();
7311
7312 read_vec_element_i32(s, tcg_vn, rn, 0, size);
7313 if (cmp_with_zero) {
7314 tcg_gen_movi_i32(tcg_vm, 0);
7315 } else {
7316 read_vec_element_i32(s, tcg_vm, rm, 0, size);
7317 }
7318
7319 switch (size) {
7320 case MO_32:
7321 if (signal_all_nans) {
7322 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7323 } else {
7324 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7325 }
7326 break;
7327 case MO_16:
7328 if (signal_all_nans) {
7329 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7330 } else {
7331 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7332 }
7333 break;
7334 default:
7335 g_assert_not_reached();
7336 }
7337 }
7338
7339 gen_set_nzcv(tcg_flags);
7340 }
7341
7342 /* FCMP, FCMPE */
trans_FCMP(DisasContext * s,arg_FCMP * a)7343 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7344 {
7345 int check = fp_access_check_scalar_hsd(s, a->esz);
7346
7347 if (check <= 0) {
7348 return check == 0;
7349 }
7350
7351 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7352 return true;
7353 }
7354
7355 /* FCCMP, FCCMPE */
trans_FCCMP(DisasContext * s,arg_FCCMP * a)7356 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7357 {
7358 TCGLabel *label_continue = NULL;
7359 int check = fp_access_check_scalar_hsd(s, a->esz);
7360
7361 if (check <= 0) {
7362 return check == 0;
7363 }
7364
7365 if (a->cond < 0x0e) { /* not always */
7366 TCGLabel *label_match = gen_new_label();
7367 label_continue = gen_new_label();
7368 arm_gen_test_cc(a->cond, label_match);
7369 /* nomatch: */
7370 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7371 tcg_gen_br(label_continue);
7372 gen_set_label(label_match);
7373 }
7374
7375 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7376
7377 if (label_continue) {
7378 gen_set_label(label_continue);
7379 }
7380 return true;
7381 }
7382
7383 /*
7384 * Advanced SIMD Modified Immediate
7385 */
7386
trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)7387 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7388 {
7389 if (!dc_isar_feature(aa64_fp16, s)) {
7390 return false;
7391 }
7392 if (fp_access_check(s)) {
7393 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7394 a->q ? 16 : 8, vec_full_reg_size(s),
7395 vfp_expand_imm(MO_16, a->abcdefgh));
7396 }
7397 return true;
7398 }
7399
gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)7400 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7401 int64_t c, uint32_t oprsz, uint32_t maxsz)
7402 {
7403 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7404 }
7405
trans_Vimm(DisasContext * s,arg_Vimm * a)7406 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7407 {
7408 GVecGen2iFn *fn;
7409
7410 /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7411 if ((a->cmode & 1) && a->cmode < 12) {
7412 /* For op=1, the imm will be inverted, so BIC becomes AND. */
7413 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7414 } else {
7415 /* There is one unallocated cmode/op combination in this space */
7416 if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7417 return false;
7418 }
7419 fn = gen_movi;
7420 }
7421
7422 if (fp_access_check(s)) {
7423 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7424 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7425 }
7426 return true;
7427 }
7428
7429 /*
7430 * Advanced SIMD Shift by Immediate
7431 */
7432
do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)7433 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7434 {
7435 if (fp_access_check(s)) {
7436 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7437 }
7438 return true;
7439 }
7440
7441 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7442 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7443 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7444 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7445 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7446 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7447 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7448 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7449 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7450 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7451 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)7452 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7453 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7454 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7455
7456 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7457 {
7458 TCGv_i64 tcg_rn, tcg_rd;
7459 int esz = a->esz;
7460 int esize;
7461
7462 if (!fp_access_check(s)) {
7463 return true;
7464 }
7465
7466 /*
7467 * For the LL variants the store is larger than the load,
7468 * so if rd == rn we would overwrite parts of our input.
7469 * So load everything right now and use shifts in the main loop.
7470 */
7471 tcg_rd = tcg_temp_new_i64();
7472 tcg_rn = tcg_temp_new_i64();
7473 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7474
7475 esize = 8 << esz;
7476 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7477 if (is_u) {
7478 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7479 } else {
7480 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7481 }
7482 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7483 write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7484 }
7485 clear_vec_high(s, true, a->rd);
7486 return true;
7487 }
7488
TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7489 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7490 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7491
7492 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7493 {
7494 assert(shift >= 0 && shift <= 64);
7495 tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7496 }
7497
gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7498 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7499 {
7500 assert(shift >= 0 && shift <= 64);
7501 if (shift == 64) {
7502 tcg_gen_movi_i64(dst, 0);
7503 } else {
7504 tcg_gen_shri_i64(dst, src, shift);
7505 }
7506 }
7507
gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7508 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7509 {
7510 gen_sshr_d(src, src, shift);
7511 tcg_gen_add_i64(dst, dst, src);
7512 }
7513
gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7514 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7515 {
7516 gen_ushr_d(src, src, shift);
7517 tcg_gen_add_i64(dst, dst, src);
7518 }
7519
gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7520 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7521 {
7522 assert(shift >= 0 && shift <= 32);
7523 if (shift) {
7524 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7525 tcg_gen_add_i64(dst, src, rnd);
7526 tcg_gen_sari_i64(dst, dst, shift);
7527 } else {
7528 tcg_gen_mov_i64(dst, src);
7529 }
7530 }
7531
gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7532 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7533 {
7534 assert(shift >= 0 && shift <= 32);
7535 if (shift) {
7536 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7537 tcg_gen_add_i64(dst, src, rnd);
7538 tcg_gen_shri_i64(dst, dst, shift);
7539 } else {
7540 tcg_gen_mov_i64(dst, src);
7541 }
7542 }
7543
gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7544 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7545 {
7546 assert(shift >= 0 && shift <= 64);
7547 if (shift == 0) {
7548 tcg_gen_mov_i64(dst, src);
7549 } else if (shift == 64) {
7550 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7551 tcg_gen_movi_i64(dst, 0);
7552 } else {
7553 TCGv_i64 rnd = tcg_temp_new_i64();
7554 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7555 tcg_gen_sari_i64(dst, src, shift);
7556 tcg_gen_add_i64(dst, dst, rnd);
7557 }
7558 }
7559
gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7560 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7561 {
7562 assert(shift >= 0 && shift <= 64);
7563 if (shift == 0) {
7564 tcg_gen_mov_i64(dst, src);
7565 } else if (shift == 64) {
7566 /* Rounding will propagate bit 63 into bit 64. */
7567 tcg_gen_shri_i64(dst, src, 63);
7568 } else {
7569 TCGv_i64 rnd = tcg_temp_new_i64();
7570 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7571 tcg_gen_shri_i64(dst, src, shift);
7572 tcg_gen_add_i64(dst, dst, rnd);
7573 }
7574 }
7575
gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7576 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7577 {
7578 gen_srshr_d(src, src, shift);
7579 tcg_gen_add_i64(dst, dst, src);
7580 }
7581
gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7582 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7583 {
7584 gen_urshr_d(src, src, shift);
7585 tcg_gen_add_i64(dst, dst, src);
7586 }
7587
gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7588 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7589 {
7590 /* If shift is 64, dst is unchanged. */
7591 if (shift != 64) {
7592 tcg_gen_shri_i64(src, src, shift);
7593 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7594 }
7595 }
7596
gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7597 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7598 {
7599 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7600 }
7601
do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7602 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7603 WideShiftImmFn * const fns[3], MemOp sign)
7604 {
7605 TCGv_i64 tcg_rn, tcg_rd;
7606 int esz = a->esz;
7607 int esize;
7608 WideShiftImmFn *fn;
7609
7610 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7611
7612 if (!fp_access_check(s)) {
7613 return true;
7614 }
7615
7616 tcg_rn = tcg_temp_new_i64();
7617 tcg_rd = tcg_temp_new_i64();
7618 tcg_gen_movi_i64(tcg_rd, 0);
7619
7620 fn = fns[esz];
7621 esize = 8 << esz;
7622 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7623 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7624 fn(tcg_rn, tcg_rn, a->imm);
7625 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7626 }
7627
7628 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7629 clear_vec_high(s, a->q, a->rd);
7630 return true;
7631 }
7632
gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7633 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7634 {
7635 tcg_gen_sari_i64(d, s, i);
7636 tcg_gen_ext16u_i64(d, d);
7637 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7638 }
7639
gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7640 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7641 {
7642 tcg_gen_sari_i64(d, s, i);
7643 tcg_gen_ext32u_i64(d, d);
7644 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7645 }
7646
gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7647 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7648 {
7649 gen_sshr_d(d, s, i);
7650 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7651 }
7652
gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7653 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7654 {
7655 tcg_gen_shri_i64(d, s, i);
7656 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7657 }
7658
gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7659 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7660 {
7661 tcg_gen_shri_i64(d, s, i);
7662 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7663 }
7664
gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7665 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7666 {
7667 gen_ushr_d(d, s, i);
7668 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7669 }
7670
gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7671 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7672 {
7673 tcg_gen_sari_i64(d, s, i);
7674 tcg_gen_ext16u_i64(d, d);
7675 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7676 }
7677
gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7678 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7679 {
7680 tcg_gen_sari_i64(d, s, i);
7681 tcg_gen_ext32u_i64(d, d);
7682 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7683 }
7684
gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7685 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7686 {
7687 gen_sshr_d(d, s, i);
7688 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7689 }
7690
gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7691 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7692 {
7693 gen_srshr_bhs(d, s, i);
7694 tcg_gen_ext16u_i64(d, d);
7695 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7696 }
7697
gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7698 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7699 {
7700 gen_srshr_bhs(d, s, i);
7701 tcg_gen_ext32u_i64(d, d);
7702 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7703 }
7704
gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7705 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7706 {
7707 gen_srshr_d(d, s, i);
7708 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7709 }
7710
gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7711 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7712 {
7713 gen_urshr_bhs(d, s, i);
7714 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7715 }
7716
gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7717 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7718 {
7719 gen_urshr_bhs(d, s, i);
7720 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7721 }
7722
gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7723 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7724 {
7725 gen_urshr_d(d, s, i);
7726 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7727 }
7728
gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7729 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7730 {
7731 gen_srshr_bhs(d, s, i);
7732 tcg_gen_ext16u_i64(d, d);
7733 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7734 }
7735
gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7736 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7737 {
7738 gen_srshr_bhs(d, s, i);
7739 tcg_gen_ext32u_i64(d, d);
7740 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7741 }
7742
gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7743 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7744 {
7745 gen_srshr_d(d, s, i);
7746 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7747 }
7748
7749 static WideShiftImmFn * const shrn_fns[] = {
7750 tcg_gen_shri_i64,
7751 tcg_gen_shri_i64,
7752 gen_ushr_d,
7753 };
7754 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7755
7756 static WideShiftImmFn * const rshrn_fns[] = {
7757 gen_urshr_bhs,
7758 gen_urshr_bhs,
7759 gen_urshr_d,
7760 };
7761 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7762
7763 static WideShiftImmFn * const sqshrn_fns[] = {
7764 gen_sqshrn_b,
7765 gen_sqshrn_h,
7766 gen_sqshrn_s,
7767 };
7768 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7769
7770 static WideShiftImmFn * const uqshrn_fns[] = {
7771 gen_uqshrn_b,
7772 gen_uqshrn_h,
7773 gen_uqshrn_s,
7774 };
7775 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7776
7777 static WideShiftImmFn * const sqshrun_fns[] = {
7778 gen_sqshrun_b,
7779 gen_sqshrun_h,
7780 gen_sqshrun_s,
7781 };
7782 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7783
7784 static WideShiftImmFn * const sqrshrn_fns[] = {
7785 gen_sqrshrn_b,
7786 gen_sqrshrn_h,
7787 gen_sqrshrn_s,
7788 };
7789 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7790
7791 static WideShiftImmFn * const uqrshrn_fns[] = {
7792 gen_uqrshrn_b,
7793 gen_uqrshrn_h,
7794 gen_uqrshrn_s,
7795 };
7796 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7797
7798 static WideShiftImmFn * const sqrshrun_fns[] = {
7799 gen_sqrshrun_b,
7800 gen_sqrshrun_h,
7801 gen_sqrshrun_s,
7802 };
TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7803 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7804
7805 /*
7806 * Advanced SIMD Scalar Shift by Immediate
7807 */
7808
7809 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7810 WideShiftImmFn *fn, bool accumulate,
7811 MemOp sign)
7812 {
7813 if (fp_access_check(s)) {
7814 TCGv_i64 rd = tcg_temp_new_i64();
7815 TCGv_i64 rn = tcg_temp_new_i64();
7816
7817 read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7818 if (accumulate) {
7819 read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7820 }
7821 fn(rd, rn, a->imm);
7822 write_fp_dreg(s, a->rd, rd);
7823 }
7824 return true;
7825 }
7826
7827 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7828 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7829 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7830 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7831 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7832 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7833 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7834 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7835 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7836
7837 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7838 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7839
trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7840 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7841 NeonGenTwoOpEnvFn *fn)
7842 {
7843 TCGv_i32 t = tcg_temp_new_i32();
7844 tcg_gen_extrl_i64_i32(t, s);
7845 fn(t, tcg_env, t, tcg_constant_i32(i));
7846 tcg_gen_extu_i32_i64(d, t);
7847 }
7848
gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7849 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7850 {
7851 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7852 }
7853
gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7854 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7855 {
7856 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7857 }
7858
gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7859 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7860 {
7861 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7862 }
7863
gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7864 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7865 {
7866 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7867 }
7868
gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7869 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7870 {
7871 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7872 }
7873
gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7874 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7875 {
7876 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7877 }
7878
gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7879 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7880 {
7881 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7882 }
7883
gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7884 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7885 {
7886 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7887 }
7888
gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7889 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7890 {
7891 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7892 }
7893
gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7894 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7895 {
7896 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7897 }
7898
gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7899 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7900 {
7901 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7902 }
7903
gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7904 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7905 {
7906 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7907 }
7908
7909 static WideShiftImmFn * const f_scalar_sqshli[] = {
7910 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7911 };
7912
7913 static WideShiftImmFn * const f_scalar_uqshli[] = {
7914 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7915 };
7916
7917 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7918 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7919 };
7920
7921 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7922 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7923 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7924 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7925
do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7926 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7927 WideShiftImmFn * const fns[3],
7928 MemOp sign, bool zext)
7929 {
7930 MemOp esz = a->esz;
7931
7932 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7933
7934 if (fp_access_check(s)) {
7935 TCGv_i64 rd = tcg_temp_new_i64();
7936 TCGv_i64 rn = tcg_temp_new_i64();
7937
7938 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7939 fns[esz](rd, rn, a->imm);
7940 if (zext) {
7941 tcg_gen_ext_i64(rd, rd, esz);
7942 }
7943 write_fp_dreg(s, a->rd, rd);
7944 }
7945 return true;
7946 }
7947
TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7948 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7949 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7950 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7951 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7952 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7953 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7954
7955 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7956 {
7957 TCGv_i64 tcg_n, tcg_m, tcg_rd;
7958 tcg_rd = cpu_reg(s, a->rd);
7959
7960 if (!a->sf && is_signed) {
7961 tcg_n = tcg_temp_new_i64();
7962 tcg_m = tcg_temp_new_i64();
7963 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7964 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7965 } else {
7966 tcg_n = read_cpu_reg(s, a->rn, a->sf);
7967 tcg_m = read_cpu_reg(s, a->rm, a->sf);
7968 }
7969
7970 if (is_signed) {
7971 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7972 } else {
7973 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7974 }
7975
7976 if (!a->sf) { /* zero extend final result */
7977 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7978 }
7979 return true;
7980 }
7981
TRANS(SDIV,do_div,a,true)7982 TRANS(SDIV, do_div, a, true)
7983 TRANS(UDIV, do_div, a, false)
7984
7985 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
7986 * Note that it is the caller's responsibility to ensure that the
7987 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
7988 * mandated semantics for out of range shifts.
7989 */
7990 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
7991 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
7992 {
7993 switch (shift_type) {
7994 case A64_SHIFT_TYPE_LSL:
7995 tcg_gen_shl_i64(dst, src, shift_amount);
7996 break;
7997 case A64_SHIFT_TYPE_LSR:
7998 tcg_gen_shr_i64(dst, src, shift_amount);
7999 break;
8000 case A64_SHIFT_TYPE_ASR:
8001 if (!sf) {
8002 tcg_gen_ext32s_i64(dst, src);
8003 }
8004 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8005 break;
8006 case A64_SHIFT_TYPE_ROR:
8007 if (sf) {
8008 tcg_gen_rotr_i64(dst, src, shift_amount);
8009 } else {
8010 TCGv_i32 t0, t1;
8011 t0 = tcg_temp_new_i32();
8012 t1 = tcg_temp_new_i32();
8013 tcg_gen_extrl_i64_i32(t0, src);
8014 tcg_gen_extrl_i64_i32(t1, shift_amount);
8015 tcg_gen_rotr_i32(t0, t0, t1);
8016 tcg_gen_extu_i32_i64(dst, t0);
8017 }
8018 break;
8019 default:
8020 assert(FALSE); /* all shift types should be handled */
8021 break;
8022 }
8023
8024 if (!sf) { /* zero extend final result */
8025 tcg_gen_ext32u_i64(dst, dst);
8026 }
8027 }
8028
8029 /* Shift a TCGv src by immediate, put result in dst.
8030 * The shift amount must be in range (this should always be true as the
8031 * relevant instructions will UNDEF on bad shift immediates).
8032 */
shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)8033 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8034 enum a64_shift_type shift_type, unsigned int shift_i)
8035 {
8036 assert(shift_i < (sf ? 64 : 32));
8037
8038 if (shift_i == 0) {
8039 tcg_gen_mov_i64(dst, src);
8040 } else {
8041 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8042 }
8043 }
8044
do_shift_reg(DisasContext * s,arg_rrr_sf * a,enum a64_shift_type shift_type)8045 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8046 enum a64_shift_type shift_type)
8047 {
8048 TCGv_i64 tcg_shift = tcg_temp_new_i64();
8049 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8050 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8051
8052 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8053 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8054 return true;
8055 }
8056
TRANS(LSLV,do_shift_reg,a,A64_SHIFT_TYPE_LSL)8057 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8058 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8059 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8060 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8061
8062 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8063 {
8064 TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8065 TCGv_i32 tcg_bytes;
8066
8067 switch (a->esz) {
8068 case MO_8:
8069 case MO_16:
8070 case MO_32:
8071 tcg_val = tcg_temp_new_i64();
8072 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8073 break;
8074 case MO_64:
8075 tcg_val = cpu_reg(s, a->rm);
8076 break;
8077 default:
8078 g_assert_not_reached();
8079 }
8080 tcg_acc = cpu_reg(s, a->rn);
8081 tcg_bytes = tcg_constant_i32(1 << a->esz);
8082 tcg_rd = cpu_reg(s, a->rd);
8083
8084 if (crc32c) {
8085 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8086 } else {
8087 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8088 }
8089 return true;
8090 }
8091
TRANS_FEAT(CRC32,aa64_crc32,do_crc32,a,false)8092 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8093 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8094
8095 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8096 {
8097 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8098 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8099 TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8100
8101 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8102 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8103
8104 if (setflag) {
8105 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8106 } else {
8107 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8108 }
8109 return true;
8110 }
8111
TRANS_FEAT(SUBP,aa64_mte_insn_reg,do_subp,a,false)8112 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8113 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8114
8115 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8116 {
8117 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8118 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8119 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8120
8121 if (s->ata[0]) {
8122 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8123 } else {
8124 gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8125 }
8126 return true;
8127 }
8128 return false;
8129 }
8130
trans_GMI(DisasContext * s,arg_rrr * a)8131 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8132 {
8133 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8134 TCGv_i64 t = tcg_temp_new_i64();
8135
8136 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8137 tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8138 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8139 return true;
8140 }
8141 return false;
8142 }
8143
trans_PACGA(DisasContext * s,arg_rrr * a)8144 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8145 {
8146 if (dc_isar_feature(aa64_pauth, s)) {
8147 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8148 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8149 return true;
8150 }
8151 return false;
8152 }
8153
8154 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8155
gen_rr(DisasContext * s,int rd,int rn,ArithOneOp fn)8156 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8157 {
8158 fn(cpu_reg(s, rd), cpu_reg(s, rn));
8159 return true;
8160 }
8161
gen_rbit32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8162 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8163 {
8164 TCGv_i32 t32 = tcg_temp_new_i32();
8165
8166 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8167 gen_helper_rbit(t32, t32);
8168 tcg_gen_extu_i32_i64(tcg_rd, t32);
8169 }
8170
gen_rev16_xx(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 mask)8171 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8172 {
8173 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8174
8175 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8176 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8177 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8178 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8179 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8180 }
8181
gen_rev16_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8182 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8183 {
8184 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8185 }
8186
gen_rev16_64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8187 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8188 {
8189 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8190 }
8191
gen_rev_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8192 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8193 {
8194 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8195 }
8196
gen_rev32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8197 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8198 {
8199 tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8200 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8201 }
8202
8203 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8204 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8205 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8206 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8207
gen_clz32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8208 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8209 {
8210 TCGv_i32 t32 = tcg_temp_new_i32();
8211
8212 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8213 tcg_gen_clzi_i32(t32, t32, 32);
8214 tcg_gen_extu_i32_i64(tcg_rd, t32);
8215 }
8216
gen_clz64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8217 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8218 {
8219 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8220 }
8221
gen_cls32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8222 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8223 {
8224 TCGv_i32 t32 = tcg_temp_new_i32();
8225
8226 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8227 tcg_gen_clrsb_i32(t32, t32);
8228 tcg_gen_extu_i32_i64(tcg_rd, t32);
8229 }
8230
8231 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8232 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8233
gen_pacaut(DisasContext * s,arg_pacaut * a,NeonGenTwo64OpEnvFn fn)8234 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8235 {
8236 TCGv_i64 tcg_rd, tcg_rn;
8237
8238 if (a->z) {
8239 if (a->rn != 31) {
8240 return false;
8241 }
8242 tcg_rn = tcg_constant_i64(0);
8243 } else {
8244 tcg_rn = cpu_reg_sp(s, a->rn);
8245 }
8246 if (s->pauth_active) {
8247 tcg_rd = cpu_reg(s, a->rd);
8248 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8249 }
8250 return true;
8251 }
8252
TRANS_FEAT(PACIA,aa64_pauth,gen_pacaut,a,gen_helper_pacia)8253 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8254 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8255 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8256 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8257
8258 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8259 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8260 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8261 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8262
8263 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8264 {
8265 if (s->pauth_active) {
8266 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8267 fn(tcg_rd, tcg_env, tcg_rd);
8268 }
8269 return true;
8270 }
8271
8272 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8273 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8274
do_logic_reg(DisasContext * s,arg_logic_shift * a,ArithTwoOp * fn,ArithTwoOp * inv_fn,bool setflags)8275 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8276 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8277 {
8278 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8279
8280 if (!a->sf && (a->sa & (1 << 5))) {
8281 return false;
8282 }
8283
8284 tcg_rd = cpu_reg(s, a->rd);
8285 tcg_rn = cpu_reg(s, a->rn);
8286
8287 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8288 if (a->sa) {
8289 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8290 }
8291
8292 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8293 if (!a->sf) {
8294 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8295 }
8296 if (setflags) {
8297 gen_logic_CC(a->sf, tcg_rd);
8298 }
8299 return true;
8300 }
8301
trans_ORR_r(DisasContext * s,arg_logic_shift * a)8302 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8303 {
8304 /*
8305 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8306 * register-register MOV and MVN, so it is worth special casing.
8307 */
8308 if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8309 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8310 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8311
8312 if (a->n) {
8313 tcg_gen_not_i64(tcg_rd, tcg_rm);
8314 if (!a->sf) {
8315 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8316 }
8317 } else {
8318 if (a->sf) {
8319 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8320 } else {
8321 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8322 }
8323 }
8324 return true;
8325 }
8326
8327 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8328 }
8329
TRANS(AND_r,do_logic_reg,a,tcg_gen_and_i64,tcg_gen_andc_i64,false)8330 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8331 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8332 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8333
8334 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8335 bool sub_op, bool setflags)
8336 {
8337 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8338
8339 if (a->sa > 4) {
8340 return false;
8341 }
8342
8343 /* non-flag setting ops may use SP */
8344 if (!setflags) {
8345 tcg_rd = cpu_reg_sp(s, a->rd);
8346 } else {
8347 tcg_rd = cpu_reg(s, a->rd);
8348 }
8349 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8350
8351 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8352 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8353
8354 tcg_result = tcg_temp_new_i64();
8355 if (!setflags) {
8356 if (sub_op) {
8357 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8358 } else {
8359 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8360 }
8361 } else {
8362 if (sub_op) {
8363 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8364 } else {
8365 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8366 }
8367 }
8368
8369 if (a->sf) {
8370 tcg_gen_mov_i64(tcg_rd, tcg_result);
8371 } else {
8372 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8373 }
8374 return true;
8375 }
8376
TRANS(ADD_ext,do_addsub_ext,a,false,false)8377 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8378 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8379 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8380 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8381
8382 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8383 bool sub_op, bool setflags)
8384 {
8385 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8386
8387 if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8388 return false;
8389 }
8390
8391 tcg_rd = cpu_reg(s, a->rd);
8392 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8393 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8394
8395 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8396
8397 tcg_result = tcg_temp_new_i64();
8398 if (!setflags) {
8399 if (sub_op) {
8400 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8401 } else {
8402 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8403 }
8404 } else {
8405 if (sub_op) {
8406 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8407 } else {
8408 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8409 }
8410 }
8411
8412 if (a->sf) {
8413 tcg_gen_mov_i64(tcg_rd, tcg_result);
8414 } else {
8415 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8416 }
8417 return true;
8418 }
8419
TRANS(ADD_r,do_addsub_reg,a,false,false)8420 TRANS(ADD_r, do_addsub_reg, a, false, false)
8421 TRANS(SUB_r, do_addsub_reg, a, true, false)
8422 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8423 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8424
8425 static bool do_mulh(DisasContext *s, arg_rrr *a,
8426 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8427 {
8428 TCGv_i64 discard = tcg_temp_new_i64();
8429 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8430 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8431 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8432
8433 fn(discard, tcg_rd, tcg_rn, tcg_rm);
8434 return true;
8435 }
8436
TRANS(SMULH,do_mulh,a,tcg_gen_muls2_i64)8437 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8438 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8439
8440 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8441 bool sf, bool is_sub, MemOp mop)
8442 {
8443 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8444 TCGv_i64 tcg_op1, tcg_op2;
8445
8446 if (mop == MO_64) {
8447 tcg_op1 = cpu_reg(s, a->rn);
8448 tcg_op2 = cpu_reg(s, a->rm);
8449 } else {
8450 tcg_op1 = tcg_temp_new_i64();
8451 tcg_op2 = tcg_temp_new_i64();
8452 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8453 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8454 }
8455
8456 if (a->ra == 31 && !is_sub) {
8457 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8458 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8459 } else {
8460 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8461 TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8462
8463 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8464 if (is_sub) {
8465 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8466 } else {
8467 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8468 }
8469 }
8470
8471 if (!sf) {
8472 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8473 }
8474 return true;
8475 }
8476
TRANS(MADD_w,do_muladd,a,false,false,MO_64)8477 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8478 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8479 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8480 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8481
8482 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8483 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8484 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8485 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8486
8487 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8488 bool is_sub, bool setflags)
8489 {
8490 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8491
8492 tcg_rd = cpu_reg(s, a->rd);
8493 tcg_rn = cpu_reg(s, a->rn);
8494
8495 if (is_sub) {
8496 tcg_y = tcg_temp_new_i64();
8497 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8498 } else {
8499 tcg_y = cpu_reg(s, a->rm);
8500 }
8501
8502 if (setflags) {
8503 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8504 } else {
8505 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8506 }
8507 return true;
8508 }
8509
TRANS(ADC,do_adc_sbc,a,false,false)8510 TRANS(ADC, do_adc_sbc, a, false, false)
8511 TRANS(SBC, do_adc_sbc, a, true, false)
8512 TRANS(ADCS, do_adc_sbc, a, false, true)
8513 TRANS(SBCS, do_adc_sbc, a, true, true)
8514
8515 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8516 {
8517 int mask = a->mask;
8518 TCGv_i64 tcg_rn;
8519 TCGv_i32 nzcv;
8520
8521 if (!dc_isar_feature(aa64_condm_4, s)) {
8522 return false;
8523 }
8524
8525 tcg_rn = read_cpu_reg(s, a->rn, 1);
8526 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8527
8528 nzcv = tcg_temp_new_i32();
8529 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8530
8531 if (mask & 8) { /* N */
8532 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8533 }
8534 if (mask & 4) { /* Z */
8535 tcg_gen_not_i32(cpu_ZF, nzcv);
8536 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8537 }
8538 if (mask & 2) { /* C */
8539 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8540 }
8541 if (mask & 1) { /* V */
8542 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8543 }
8544 return true;
8545 }
8546
do_setf(DisasContext * s,int rn,int shift)8547 static bool do_setf(DisasContext *s, int rn, int shift)
8548 {
8549 TCGv_i32 tmp = tcg_temp_new_i32();
8550
8551 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8552 tcg_gen_shli_i32(cpu_NF, tmp, shift);
8553 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8554 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8555 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8556 return true;
8557 }
8558
8559 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8560 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8561
8562 /* CCMP, CCMN */
trans_CCMP(DisasContext * s,arg_CCMP * a)8563 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8564 {
8565 TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8566 TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8567 TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8568 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8569 TCGv_i64 tcg_rn, tcg_y;
8570 DisasCompare c;
8571 unsigned nzcv;
8572 bool has_andc;
8573
8574 /* Set T0 = !COND. */
8575 arm_test_cc(&c, a->cond);
8576 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8577
8578 /* Load the arguments for the new comparison. */
8579 if (a->imm) {
8580 tcg_y = tcg_constant_i64(a->y);
8581 } else {
8582 tcg_y = cpu_reg(s, a->y);
8583 }
8584 tcg_rn = cpu_reg(s, a->rn);
8585
8586 /* Set the flags for the new comparison. */
8587 if (a->op) {
8588 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8589 } else {
8590 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8591 }
8592
8593 /*
8594 * If COND was false, force the flags to #nzcv. Compute two masks
8595 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8596 * For tcg hosts that support ANDC, we can make do with just T1.
8597 * In either case, allow the tcg optimizer to delete any unused mask.
8598 */
8599 tcg_gen_neg_i32(tcg_t1, tcg_t0);
8600 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8601
8602 nzcv = a->nzcv;
8603 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8604 if (nzcv & 8) { /* N */
8605 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8606 } else {
8607 if (has_andc) {
8608 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8609 } else {
8610 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8611 }
8612 }
8613 if (nzcv & 4) { /* Z */
8614 if (has_andc) {
8615 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8616 } else {
8617 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8618 }
8619 } else {
8620 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8621 }
8622 if (nzcv & 2) { /* C */
8623 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8624 } else {
8625 if (has_andc) {
8626 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8627 } else {
8628 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8629 }
8630 }
8631 if (nzcv & 1) { /* V */
8632 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8633 } else {
8634 if (has_andc) {
8635 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8636 } else {
8637 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8638 }
8639 }
8640 return true;
8641 }
8642
trans_CSEL(DisasContext * s,arg_CSEL * a)8643 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8644 {
8645 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8646 TCGv_i64 zero = tcg_constant_i64(0);
8647 DisasCompare64 c;
8648
8649 a64_test_cc(&c, a->cond);
8650
8651 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8652 /* CSET & CSETM. */
8653 if (a->else_inv) {
8654 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8655 tcg_rd, c.value, zero);
8656 } else {
8657 tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8658 tcg_rd, c.value, zero);
8659 }
8660 } else {
8661 TCGv_i64 t_true = cpu_reg(s, a->rn);
8662 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8663
8664 if (a->else_inv && a->else_inc) {
8665 tcg_gen_neg_i64(t_false, t_false);
8666 } else if (a->else_inv) {
8667 tcg_gen_not_i64(t_false, t_false);
8668 } else if (a->else_inc) {
8669 tcg_gen_addi_i64(t_false, t_false, 1);
8670 }
8671 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8672 }
8673
8674 if (!a->sf) {
8675 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8676 }
8677 return true;
8678 }
8679
8680 typedef struct FPScalar1Int {
8681 void (*gen_h)(TCGv_i32, TCGv_i32);
8682 void (*gen_s)(TCGv_i32, TCGv_i32);
8683 void (*gen_d)(TCGv_i64, TCGv_i64);
8684 } FPScalar1Int;
8685
do_fp1_scalar_int(DisasContext * s,arg_rr_e * a,const FPScalar1Int * f,bool merging)8686 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8687 const FPScalar1Int *f,
8688 bool merging)
8689 {
8690 switch (a->esz) {
8691 case MO_64:
8692 if (fp_access_check(s)) {
8693 TCGv_i64 t = read_fp_dreg(s, a->rn);
8694 f->gen_d(t, t);
8695 if (merging) {
8696 write_fp_dreg_merging(s, a->rd, a->rd, t);
8697 } else {
8698 write_fp_dreg(s, a->rd, t);
8699 }
8700 }
8701 break;
8702 case MO_32:
8703 if (fp_access_check(s)) {
8704 TCGv_i32 t = read_fp_sreg(s, a->rn);
8705 f->gen_s(t, t);
8706 if (merging) {
8707 write_fp_sreg_merging(s, a->rd, a->rd, t);
8708 } else {
8709 write_fp_sreg(s, a->rd, t);
8710 }
8711 }
8712 break;
8713 case MO_16:
8714 if (!dc_isar_feature(aa64_fp16, s)) {
8715 return false;
8716 }
8717 if (fp_access_check(s)) {
8718 TCGv_i32 t = read_fp_hreg(s, a->rn);
8719 f->gen_h(t, t);
8720 if (merging) {
8721 write_fp_hreg_merging(s, a->rd, a->rd, t);
8722 } else {
8723 write_fp_sreg(s, a->rd, t);
8724 }
8725 }
8726 break;
8727 default:
8728 return false;
8729 }
8730 return true;
8731 }
8732
do_fp1_scalar_int_2fn(DisasContext * s,arg_rr_e * a,const FPScalar1Int * fnormal,const FPScalar1Int * fah)8733 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8734 const FPScalar1Int *fnormal,
8735 const FPScalar1Int *fah)
8736 {
8737 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8738 }
8739
8740 static const FPScalar1Int f_scalar_fmov = {
8741 tcg_gen_mov_i32,
8742 tcg_gen_mov_i32,
8743 tcg_gen_mov_i64,
8744 };
8745 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8746
8747 static const FPScalar1Int f_scalar_fabs = {
8748 gen_vfp_absh,
8749 gen_vfp_abss,
8750 gen_vfp_absd,
8751 };
8752 static const FPScalar1Int f_scalar_ah_fabs = {
8753 gen_vfp_ah_absh,
8754 gen_vfp_ah_abss,
8755 gen_vfp_ah_absd,
8756 };
8757 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8758
8759 static const FPScalar1Int f_scalar_fneg = {
8760 gen_vfp_negh,
8761 gen_vfp_negs,
8762 gen_vfp_negd,
8763 };
8764 static const FPScalar1Int f_scalar_ah_fneg = {
8765 gen_vfp_ah_negh,
8766 gen_vfp_ah_negs,
8767 gen_vfp_ah_negd,
8768 };
8769 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8770
8771 typedef struct FPScalar1 {
8772 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8773 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8774 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8775 } FPScalar1;
8776
do_fp1_scalar_with_fpsttype(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode,ARMFPStatusFlavour fpsttype)8777 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8778 const FPScalar1 *f, int rmode,
8779 ARMFPStatusFlavour fpsttype)
8780 {
8781 TCGv_i32 tcg_rmode = NULL;
8782 TCGv_ptr fpst;
8783 TCGv_i64 t64;
8784 TCGv_i32 t32;
8785 int check = fp_access_check_scalar_hsd(s, a->esz);
8786
8787 if (check <= 0) {
8788 return check == 0;
8789 }
8790
8791 fpst = fpstatus_ptr(fpsttype);
8792 if (rmode >= 0) {
8793 tcg_rmode = gen_set_rmode(rmode, fpst);
8794 }
8795
8796 switch (a->esz) {
8797 case MO_64:
8798 t64 = read_fp_dreg(s, a->rn);
8799 f->gen_d(t64, t64, fpst);
8800 write_fp_dreg_merging(s, a->rd, a->rd, t64);
8801 break;
8802 case MO_32:
8803 t32 = read_fp_sreg(s, a->rn);
8804 f->gen_s(t32, t32, fpst);
8805 write_fp_sreg_merging(s, a->rd, a->rd, t32);
8806 break;
8807 case MO_16:
8808 t32 = read_fp_hreg(s, a->rn);
8809 f->gen_h(t32, t32, fpst);
8810 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8811 break;
8812 default:
8813 g_assert_not_reached();
8814 }
8815
8816 if (rmode >= 0) {
8817 gen_restore_rmode(tcg_rmode, fpst);
8818 }
8819 return true;
8820 }
8821
do_fp1_scalar(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8822 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8823 const FPScalar1 *f, int rmode)
8824 {
8825 return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8826 a->esz == MO_16 ?
8827 FPST_A64_F16 : FPST_A64);
8828 }
8829
do_fp1_scalar_ah(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8830 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8831 const FPScalar1 *f, int rmode)
8832 {
8833 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8834 }
8835
8836 static const FPScalar1 f_scalar_fsqrt = {
8837 gen_helper_vfp_sqrth,
8838 gen_helper_vfp_sqrts,
8839 gen_helper_vfp_sqrtd,
8840 };
8841 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8842
8843 static const FPScalar1 f_scalar_frint = {
8844 gen_helper_advsimd_rinth,
8845 gen_helper_rints,
8846 gen_helper_rintd,
8847 };
8848 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8849 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8850 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8851 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8852 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8853 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8854
8855 static const FPScalar1 f_scalar_frintx = {
8856 gen_helper_advsimd_rinth_exact,
8857 gen_helper_rints_exact,
8858 gen_helper_rintd_exact,
8859 };
8860 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8861
trans_BFCVT_s(DisasContext * s,arg_rr_e * a)8862 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8863 {
8864 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8865 TCGv_i32 t32;
8866 int check;
8867
8868 if (!dc_isar_feature(aa64_bf16, s)) {
8869 return false;
8870 }
8871
8872 check = fp_access_check_scalar_hsd(s, a->esz);
8873
8874 if (check <= 0) {
8875 return check == 0;
8876 }
8877
8878 t32 = read_fp_sreg(s, a->rn);
8879 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8880 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8881 return true;
8882 }
8883
8884 static const FPScalar1 f_scalar_frint32 = {
8885 NULL,
8886 gen_helper_frint32_s,
8887 gen_helper_frint32_d,
8888 };
8889 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8890 &f_scalar_frint32, FPROUNDING_ZERO)
8891 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8892
8893 static const FPScalar1 f_scalar_frint64 = {
8894 NULL,
8895 gen_helper_frint64_s,
8896 gen_helper_frint64_d,
8897 };
8898 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8899 &f_scalar_frint64, FPROUNDING_ZERO)
8900 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8901
8902 static const FPScalar1 f_scalar_frecpe = {
8903 gen_helper_recpe_f16,
8904 gen_helper_recpe_f32,
8905 gen_helper_recpe_f64,
8906 };
8907 static const FPScalar1 f_scalar_frecpe_rpres = {
8908 gen_helper_recpe_f16,
8909 gen_helper_recpe_rpres_f32,
8910 gen_helper_recpe_f64,
8911 };
8912 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8913 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8914 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8915
8916 static const FPScalar1 f_scalar_frecpx = {
8917 gen_helper_frecpx_f16,
8918 gen_helper_frecpx_f32,
8919 gen_helper_frecpx_f64,
8920 };
8921 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8922
8923 static const FPScalar1 f_scalar_frsqrte = {
8924 gen_helper_rsqrte_f16,
8925 gen_helper_rsqrte_f32,
8926 gen_helper_rsqrte_f64,
8927 };
8928 static const FPScalar1 f_scalar_frsqrte_rpres = {
8929 gen_helper_rsqrte_f16,
8930 gen_helper_rsqrte_rpres_f32,
8931 gen_helper_rsqrte_f64,
8932 };
8933 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8934 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8935 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8936
trans_FCVT_s_ds(DisasContext * s,arg_rr * a)8937 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8938 {
8939 if (fp_access_check(s)) {
8940 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8941 TCGv_i64 tcg_rd = tcg_temp_new_i64();
8942 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8943
8944 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8945 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8946 }
8947 return true;
8948 }
8949
trans_FCVT_s_hs(DisasContext * s,arg_rr * a)8950 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8951 {
8952 if (fp_access_check(s)) {
8953 TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8954 TCGv_i32 ahp = get_ahp_flag();
8955 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8956
8957 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8958 /* write_fp_hreg_merging is OK here because top half of result is zero */
8959 write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8960 }
8961 return true;
8962 }
8963
trans_FCVT_s_sd(DisasContext * s,arg_rr * a)8964 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8965 {
8966 if (fp_access_check(s)) {
8967 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8968 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8969 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8970
8971 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8972 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8973 }
8974 return true;
8975 }
8976
trans_FCVT_s_hd(DisasContext * s,arg_rr * a)8977 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8978 {
8979 if (fp_access_check(s)) {
8980 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8981 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8982 TCGv_i32 ahp = get_ahp_flag();
8983 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8984
8985 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
8986 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
8987 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
8988 }
8989 return true;
8990 }
8991
trans_FCVT_s_sh(DisasContext * s,arg_rr * a)8992 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
8993 {
8994 if (fp_access_check(s)) {
8995 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
8996 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8997 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
8998 TCGv_i32 tcg_ahp = get_ahp_flag();
8999
9000 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9001 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9002 }
9003 return true;
9004 }
9005
trans_FCVT_s_dh(DisasContext * s,arg_rr * a)9006 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9007 {
9008 if (fp_access_check(s)) {
9009 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9010 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9011 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9012 TCGv_i32 tcg_ahp = get_ahp_flag();
9013
9014 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9015 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9016 }
9017 return true;
9018 }
9019
do_cvtf_scalar(DisasContext * s,MemOp esz,int rd,int shift,TCGv_i64 tcg_int,bool is_signed)9020 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9021 TCGv_i64 tcg_int, bool is_signed)
9022 {
9023 TCGv_ptr tcg_fpstatus;
9024 TCGv_i32 tcg_shift, tcg_single;
9025 TCGv_i64 tcg_double;
9026
9027 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9028 tcg_shift = tcg_constant_i32(shift);
9029
9030 switch (esz) {
9031 case MO_64:
9032 tcg_double = tcg_temp_new_i64();
9033 if (is_signed) {
9034 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9035 } else {
9036 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9037 }
9038 write_fp_dreg_merging(s, rd, rd, tcg_double);
9039 break;
9040
9041 case MO_32:
9042 tcg_single = tcg_temp_new_i32();
9043 if (is_signed) {
9044 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9045 } else {
9046 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9047 }
9048 write_fp_sreg_merging(s, rd, rd, tcg_single);
9049 break;
9050
9051 case MO_16:
9052 tcg_single = tcg_temp_new_i32();
9053 if (is_signed) {
9054 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9055 } else {
9056 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9057 }
9058 write_fp_hreg_merging(s, rd, rd, tcg_single);
9059 break;
9060
9061 default:
9062 g_assert_not_reached();
9063 }
9064 return true;
9065 }
9066
do_cvtf_g(DisasContext * s,arg_fcvt * a,bool is_signed)9067 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9068 {
9069 TCGv_i64 tcg_int;
9070 int check = fp_access_check_scalar_hsd(s, a->esz);
9071
9072 if (check <= 0) {
9073 return check == 0;
9074 }
9075
9076 if (a->sf) {
9077 tcg_int = cpu_reg(s, a->rn);
9078 } else {
9079 tcg_int = read_cpu_reg(s, a->rn, true);
9080 if (is_signed) {
9081 tcg_gen_ext32s_i64(tcg_int, tcg_int);
9082 } else {
9083 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9084 }
9085 }
9086 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9087 }
9088
TRANS(SCVTF_g,do_cvtf_g,a,true)9089 TRANS(SCVTF_g, do_cvtf_g, a, true)
9090 TRANS(UCVTF_g, do_cvtf_g, a, false)
9091
9092 /*
9093 * [US]CVTF (vector), scalar version.
9094 * Which sounds weird, but really just means input from fp register
9095 * instead of input from general register. Input and output element
9096 * size are always equal.
9097 */
9098 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9099 {
9100 TCGv_i64 tcg_int;
9101 int check = fp_access_check_scalar_hsd(s, a->esz);
9102
9103 if (check <= 0) {
9104 return check == 0;
9105 }
9106
9107 tcg_int = tcg_temp_new_i64();
9108 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9109 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9110 }
9111
TRANS(SCVTF_f,do_cvtf_f,a,true)9112 TRANS(SCVTF_f, do_cvtf_f, a, true)
9113 TRANS(UCVTF_f, do_cvtf_f, a, false)
9114
9115 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9116 TCGv_i64 tcg_out, int shift, int rn,
9117 ARMFPRounding rmode)
9118 {
9119 TCGv_ptr tcg_fpstatus;
9120 TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9121
9122 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9123 tcg_shift = tcg_constant_i32(shift);
9124 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9125
9126 switch (esz) {
9127 case MO_64:
9128 read_vec_element(s, tcg_out, rn, 0, MO_64);
9129 switch (out) {
9130 case MO_64 | MO_SIGN:
9131 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9132 break;
9133 case MO_64:
9134 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9135 break;
9136 case MO_32 | MO_SIGN:
9137 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9138 break;
9139 case MO_32:
9140 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9141 break;
9142 default:
9143 g_assert_not_reached();
9144 }
9145 break;
9146
9147 case MO_32:
9148 tcg_single = read_fp_sreg(s, rn);
9149 switch (out) {
9150 case MO_64 | MO_SIGN:
9151 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9152 break;
9153 case MO_64:
9154 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9155 break;
9156 case MO_32 | MO_SIGN:
9157 gen_helper_vfp_tosls(tcg_single, tcg_single,
9158 tcg_shift, tcg_fpstatus);
9159 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9160 break;
9161 case MO_32:
9162 gen_helper_vfp_touls(tcg_single, tcg_single,
9163 tcg_shift, tcg_fpstatus);
9164 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9165 break;
9166 default:
9167 g_assert_not_reached();
9168 }
9169 break;
9170
9171 case MO_16:
9172 tcg_single = read_fp_hreg(s, rn);
9173 switch (out) {
9174 case MO_64 | MO_SIGN:
9175 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9176 break;
9177 case MO_64:
9178 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9179 break;
9180 case MO_32 | MO_SIGN:
9181 gen_helper_vfp_toslh(tcg_single, tcg_single,
9182 tcg_shift, tcg_fpstatus);
9183 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9184 break;
9185 case MO_32:
9186 gen_helper_vfp_toulh(tcg_single, tcg_single,
9187 tcg_shift, tcg_fpstatus);
9188 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9189 break;
9190 case MO_16 | MO_SIGN:
9191 gen_helper_vfp_toshh(tcg_single, tcg_single,
9192 tcg_shift, tcg_fpstatus);
9193 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9194 break;
9195 case MO_16:
9196 gen_helper_vfp_touhh(tcg_single, tcg_single,
9197 tcg_shift, tcg_fpstatus);
9198 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9199 break;
9200 default:
9201 g_assert_not_reached();
9202 }
9203 break;
9204
9205 default:
9206 g_assert_not_reached();
9207 }
9208
9209 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9210 }
9211
do_fcvt_g(DisasContext * s,arg_fcvt * a,ARMFPRounding rmode,bool is_signed)9212 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9213 ARMFPRounding rmode, bool is_signed)
9214 {
9215 TCGv_i64 tcg_int;
9216 int check = fp_access_check_scalar_hsd(s, a->esz);
9217
9218 if (check <= 0) {
9219 return check == 0;
9220 }
9221
9222 tcg_int = cpu_reg(s, a->rd);
9223 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9224 a->esz, tcg_int, a->shift, a->rn, rmode);
9225
9226 if (!a->sf) {
9227 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9228 }
9229 return true;
9230 }
9231
TRANS(FCVTNS_g,do_fcvt_g,a,FPROUNDING_TIEEVEN,true)9232 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9233 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9234 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9235 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9236 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9237 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9238 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9239 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9240 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9241 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9242
9243 /*
9244 * FCVT* (vector), scalar version.
9245 * Which sounds weird, but really just means output to fp register
9246 * instead of output to general register. Input and output element
9247 * size are always equal.
9248 */
9249 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9250 ARMFPRounding rmode, bool is_signed)
9251 {
9252 TCGv_i64 tcg_int;
9253 int check = fp_access_check_scalar_hsd(s, a->esz);
9254
9255 if (check <= 0) {
9256 return check == 0;
9257 }
9258
9259 tcg_int = tcg_temp_new_i64();
9260 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9261 a->esz, tcg_int, a->shift, a->rn, rmode);
9262
9263 if (!s->fpcr_nep) {
9264 clear_vec(s, a->rd);
9265 }
9266 write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9267 return true;
9268 }
9269
TRANS(FCVTNS_f,do_fcvt_f,a,FPROUNDING_TIEEVEN,true)9270 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9271 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9272 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9273 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9274 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9275 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9276 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9277 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9278 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9279 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9280
9281 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9282 {
9283 if (!dc_isar_feature(aa64_jscvt, s)) {
9284 return false;
9285 }
9286 if (fp_access_check(s)) {
9287 TCGv_i64 t = read_fp_dreg(s, a->rn);
9288 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9289
9290 gen_helper_fjcvtzs(t, t, fpstatus);
9291
9292 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9293 tcg_gen_extrh_i64_i32(cpu_ZF, t);
9294 tcg_gen_movi_i32(cpu_CF, 0);
9295 tcg_gen_movi_i32(cpu_NF, 0);
9296 tcg_gen_movi_i32(cpu_VF, 0);
9297 }
9298 return true;
9299 }
9300
trans_FMOV_hx(DisasContext * s,arg_rr * a)9301 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9302 {
9303 if (!dc_isar_feature(aa64_fp16, s)) {
9304 return false;
9305 }
9306 if (fp_access_check(s)) {
9307 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9308 TCGv_i64 tmp = tcg_temp_new_i64();
9309 tcg_gen_ext16u_i64(tmp, tcg_rn);
9310 write_fp_dreg(s, a->rd, tmp);
9311 }
9312 return true;
9313 }
9314
trans_FMOV_sw(DisasContext * s,arg_rr * a)9315 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9316 {
9317 if (fp_access_check(s)) {
9318 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9319 TCGv_i64 tmp = tcg_temp_new_i64();
9320 tcg_gen_ext32u_i64(tmp, tcg_rn);
9321 write_fp_dreg(s, a->rd, tmp);
9322 }
9323 return true;
9324 }
9325
trans_FMOV_dx(DisasContext * s,arg_rr * a)9326 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9327 {
9328 if (fp_access_check(s)) {
9329 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9330 write_fp_dreg(s, a->rd, tcg_rn);
9331 }
9332 return true;
9333 }
9334
trans_FMOV_ux(DisasContext * s,arg_rr * a)9335 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9336 {
9337 if (fp_access_check(s)) {
9338 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9339 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9340 clear_vec_high(s, true, a->rd);
9341 }
9342 return true;
9343 }
9344
trans_FMOV_xh(DisasContext * s,arg_rr * a)9345 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9346 {
9347 if (!dc_isar_feature(aa64_fp16, s)) {
9348 return false;
9349 }
9350 if (fp_access_check(s)) {
9351 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9352 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9353 }
9354 return true;
9355 }
9356
trans_FMOV_ws(DisasContext * s,arg_rr * a)9357 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9358 {
9359 if (fp_access_check(s)) {
9360 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9361 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9362 }
9363 return true;
9364 }
9365
trans_FMOV_xd(DisasContext * s,arg_rr * a)9366 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9367 {
9368 if (fp_access_check(s)) {
9369 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9370 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9371 }
9372 return true;
9373 }
9374
trans_FMOV_xu(DisasContext * s,arg_rr * a)9375 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9376 {
9377 if (fp_access_check(s)) {
9378 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9379 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9380 }
9381 return true;
9382 }
9383
9384 typedef struct ENVScalar1 {
9385 NeonGenOneOpEnvFn *gen_bhs[3];
9386 NeonGenOne64OpEnvFn *gen_d;
9387 } ENVScalar1;
9388
do_env_scalar1(DisasContext * s,arg_rr_e * a,const ENVScalar1 * f)9389 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9390 {
9391 if (!fp_access_check(s)) {
9392 return true;
9393 }
9394 if (a->esz == MO_64) {
9395 TCGv_i64 t = read_fp_dreg(s, a->rn);
9396 f->gen_d(t, tcg_env, t);
9397 write_fp_dreg(s, a->rd, t);
9398 } else {
9399 TCGv_i32 t = tcg_temp_new_i32();
9400
9401 read_vec_element_i32(s, t, a->rn, 0, a->esz);
9402 f->gen_bhs[a->esz](t, tcg_env, t);
9403 write_fp_sreg(s, a->rd, t);
9404 }
9405 return true;
9406 }
9407
do_env_vector1(DisasContext * s,arg_qrr_e * a,const ENVScalar1 * f)9408 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9409 {
9410 if (a->esz == MO_64 && !a->q) {
9411 return false;
9412 }
9413 if (!fp_access_check(s)) {
9414 return true;
9415 }
9416 if (a->esz == MO_64) {
9417 TCGv_i64 t = tcg_temp_new_i64();
9418
9419 for (int i = 0; i < 2; ++i) {
9420 read_vec_element(s, t, a->rn, i, MO_64);
9421 f->gen_d(t, tcg_env, t);
9422 write_vec_element(s, t, a->rd, i, MO_64);
9423 }
9424 } else {
9425 TCGv_i32 t = tcg_temp_new_i32();
9426 int n = (a->q ? 16 : 8) >> a->esz;
9427
9428 for (int i = 0; i < n; ++i) {
9429 read_vec_element_i32(s, t, a->rn, i, a->esz);
9430 f->gen_bhs[a->esz](t, tcg_env, t);
9431 write_vec_element_i32(s, t, a->rd, i, a->esz);
9432 }
9433 }
9434 clear_vec_high(s, a->q, a->rd);
9435 return true;
9436 }
9437
9438 static const ENVScalar1 f_scalar_sqabs = {
9439 { gen_helper_neon_qabs_s8,
9440 gen_helper_neon_qabs_s16,
9441 gen_helper_neon_qabs_s32 },
9442 gen_helper_neon_qabs_s64,
9443 };
9444 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9445 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9446
9447 static const ENVScalar1 f_scalar_sqneg = {
9448 { gen_helper_neon_qneg_s8,
9449 gen_helper_neon_qneg_s16,
9450 gen_helper_neon_qneg_s32 },
9451 gen_helper_neon_qneg_s64,
9452 };
9453 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9454 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9455
do_scalar1_d(DisasContext * s,arg_rr * a,ArithOneOp * f)9456 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9457 {
9458 if (fp_access_check(s)) {
9459 TCGv_i64 t = read_fp_dreg(s, a->rn);
9460 f(t, t);
9461 write_fp_dreg(s, a->rd, t);
9462 }
9463 return true;
9464 }
9465
TRANS(ABS_s,do_scalar1_d,a,tcg_gen_abs_i64)9466 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9467 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9468
9469 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9470 {
9471 if (fp_access_check(s)) {
9472 TCGv_i64 t = read_fp_dreg(s, a->rn);
9473 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9474 write_fp_dreg(s, a->rd, t);
9475 }
9476 return true;
9477 }
9478
TRANS(CMGT0_s,do_cmop0_d,a,TCG_COND_GT)9479 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9480 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9481 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9482 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9483 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9484
9485 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9486 ArithOneOp * const fn[3])
9487 {
9488 if (a->esz == MO_64) {
9489 return false;
9490 }
9491 if (fp_access_check(s)) {
9492 TCGv_i64 t = tcg_temp_new_i64();
9493
9494 read_vec_element(s, t, a->rn, 0, a->esz + 1);
9495 fn[a->esz](t, t);
9496 clear_vec(s, a->rd);
9497 write_vec_element(s, t, a->rd, 0, a->esz);
9498 }
9499 return true;
9500 }
9501
9502 #define WRAP_ENV(NAME) \
9503 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9504 { gen_helper_##NAME(d, tcg_env, n); }
9505
9506 WRAP_ENV(neon_unarrow_sat8)
9507 WRAP_ENV(neon_unarrow_sat16)
9508 WRAP_ENV(neon_unarrow_sat32)
9509
9510 static ArithOneOp * const f_scalar_sqxtun[] = {
9511 gen_neon_unarrow_sat8,
9512 gen_neon_unarrow_sat16,
9513 gen_neon_unarrow_sat32,
9514 };
9515 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9516
9517 WRAP_ENV(neon_narrow_sat_s8)
9518 WRAP_ENV(neon_narrow_sat_s16)
9519 WRAP_ENV(neon_narrow_sat_s32)
9520
9521 static ArithOneOp * const f_scalar_sqxtn[] = {
9522 gen_neon_narrow_sat_s8,
9523 gen_neon_narrow_sat_s16,
9524 gen_neon_narrow_sat_s32,
9525 };
9526 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9527
9528 WRAP_ENV(neon_narrow_sat_u8)
9529 WRAP_ENV(neon_narrow_sat_u16)
9530 WRAP_ENV(neon_narrow_sat_u32)
9531
9532 static ArithOneOp * const f_scalar_uqxtn[] = {
9533 gen_neon_narrow_sat_u8,
9534 gen_neon_narrow_sat_u16,
9535 gen_neon_narrow_sat_u32,
9536 };
TRANS(UQXTN_s,do_2misc_narrow_scalar,a,f_scalar_uqxtn)9537 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9538
9539 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9540 {
9541 if (fp_access_check(s)) {
9542 /*
9543 * 64 bit to 32 bit float conversion
9544 * with von Neumann rounding (round to odd)
9545 */
9546 TCGv_i64 src = read_fp_dreg(s, a->rn);
9547 TCGv_i32 dst = tcg_temp_new_i32();
9548 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9549 write_fp_sreg_merging(s, a->rd, a->rd, dst);
9550 }
9551 return true;
9552 }
9553
9554 #undef WRAP_ENV
9555
do_gvec_fn2(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9556 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9557 {
9558 if (!a->q && a->esz == MO_64) {
9559 return false;
9560 }
9561 if (fp_access_check(s)) {
9562 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9563 }
9564 return true;
9565 }
9566
TRANS(ABS_v,do_gvec_fn2,a,tcg_gen_gvec_abs)9567 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9568 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9569 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9570 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9571 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9572 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9573 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9574 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9575 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9576 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9577 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9578 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9579 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9580 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9581
9582 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9583 {
9584 if (a->esz == MO_64) {
9585 return false;
9586 }
9587 if (fp_access_check(s)) {
9588 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9589 }
9590 return true;
9591 }
9592
TRANS(CLS_v,do_gvec_fn2_bhs,a,gen_gvec_cls)9593 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9594 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9595 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9596 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9597 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9598 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9599 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9600
9601 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9602 ArithOneOp * const fn[3])
9603 {
9604 if (a->esz == MO_64) {
9605 return false;
9606 }
9607 if (fp_access_check(s)) {
9608 TCGv_i64 t0 = tcg_temp_new_i64();
9609 TCGv_i64 t1 = tcg_temp_new_i64();
9610
9611 read_vec_element(s, t0, a->rn, 0, MO_64);
9612 read_vec_element(s, t1, a->rn, 1, MO_64);
9613 fn[a->esz](t0, t0);
9614 fn[a->esz](t1, t1);
9615 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9616 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9617 clear_vec_high(s, a->q, a->rd);
9618 }
9619 return true;
9620 }
9621
9622 static ArithOneOp * const f_scalar_xtn[] = {
9623 gen_helper_neon_narrow_u8,
9624 gen_helper_neon_narrow_u16,
9625 tcg_gen_ext32u_i64,
9626 };
TRANS(XTN,do_2misc_narrow_vector,a,f_scalar_xtn)9627 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9628 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9629 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9630 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9631
9632 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9633 {
9634 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9635 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9636 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9637 TCGv_i32 ahp = get_ahp_flag();
9638
9639 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9640 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9641 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9642 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9643 tcg_gen_extu_i32_i64(d, tcg_lo);
9644 }
9645
gen_fcvtn_sd(TCGv_i64 d,TCGv_i64 n)9646 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9647 {
9648 TCGv_i32 tmp = tcg_temp_new_i32();
9649 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9650
9651 gen_helper_vfp_fcvtsd(tmp, n, fpst);
9652 tcg_gen_extu_i32_i64(d, tmp);
9653 }
9654
gen_fcvtxn_sd(TCGv_i64 d,TCGv_i64 n)9655 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9656 {
9657 /*
9658 * 64 bit to 32 bit float conversion
9659 * with von Neumann rounding (round to odd)
9660 */
9661 TCGv_i32 tmp = tcg_temp_new_i32();
9662 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9663 tcg_gen_extu_i32_i64(d, tmp);
9664 }
9665
9666 static ArithOneOp * const f_vector_fcvtn[] = {
9667 NULL,
9668 gen_fcvtn_hs,
9669 gen_fcvtn_sd,
9670 };
9671 static ArithOneOp * const f_scalar_fcvtxn[] = {
9672 NULL,
9673 NULL,
9674 gen_fcvtxn_sd,
9675 };
TRANS(FCVTN_v,do_2misc_narrow_vector,a,f_vector_fcvtn)9676 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9677 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9678
9679 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9680 {
9681 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9682 TCGv_i32 tmp = tcg_temp_new_i32();
9683 gen_helper_bfcvt_pair(tmp, n, fpst);
9684 tcg_gen_extu_i32_i64(d, tmp);
9685 }
9686
gen_bfcvtn_ah_hs(TCGv_i64 d,TCGv_i64 n)9687 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9688 {
9689 TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9690 TCGv_i32 tmp = tcg_temp_new_i32();
9691 gen_helper_bfcvt_pair(tmp, n, fpst);
9692 tcg_gen_extu_i32_i64(d, tmp);
9693 }
9694
9695 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9696 {
9697 NULL,
9698 gen_bfcvtn_hs,
9699 NULL,
9700 }, {
9701 NULL,
9702 gen_bfcvtn_ah_hs,
9703 NULL,
9704 }
9705 };
TRANS_FEAT(BFCVTN_v,aa64_bf16,do_2misc_narrow_vector,a,f_vector_bfcvtn[s->fpcr_ah])9706 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9707 f_vector_bfcvtn[s->fpcr_ah])
9708
9709 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9710 {
9711 static NeonGenWidenFn * const widenfns[3] = {
9712 gen_helper_neon_widen_u8,
9713 gen_helper_neon_widen_u16,
9714 tcg_gen_extu_i32_i64,
9715 };
9716 NeonGenWidenFn *widenfn;
9717 TCGv_i64 tcg_res[2];
9718 TCGv_i32 tcg_op;
9719 int part, pass;
9720
9721 if (a->esz == MO_64) {
9722 return false;
9723 }
9724 if (!fp_access_check(s)) {
9725 return true;
9726 }
9727
9728 tcg_op = tcg_temp_new_i32();
9729 widenfn = widenfns[a->esz];
9730 part = a->q ? 2 : 0;
9731
9732 for (pass = 0; pass < 2; pass++) {
9733 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9734 tcg_res[pass] = tcg_temp_new_i64();
9735 widenfn(tcg_res[pass], tcg_op);
9736 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9737 }
9738
9739 for (pass = 0; pass < 2; pass++) {
9740 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9741 }
9742 return true;
9743 }
9744
do_fabs_fneg_v(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9745 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9746 {
9747 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9748
9749 if (check <= 0) {
9750 return check == 0;
9751 }
9752
9753 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9754 return true;
9755 }
9756
TRANS(FABS_v,do_fabs_fneg_v,a,gen_gvec_fabs)9757 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9758 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9759
9760 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9761 const FPScalar1 *f, int rmode)
9762 {
9763 TCGv_i32 tcg_rmode = NULL;
9764 TCGv_ptr fpst;
9765 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9766
9767 if (check <= 0) {
9768 return check == 0;
9769 }
9770
9771 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9772 if (rmode >= 0) {
9773 tcg_rmode = gen_set_rmode(rmode, fpst);
9774 }
9775
9776 if (a->esz == MO_64) {
9777 TCGv_i64 t64 = tcg_temp_new_i64();
9778
9779 for (int pass = 0; pass < 2; ++pass) {
9780 read_vec_element(s, t64, a->rn, pass, MO_64);
9781 f->gen_d(t64, t64, fpst);
9782 write_vec_element(s, t64, a->rd, pass, MO_64);
9783 }
9784 } else {
9785 TCGv_i32 t32 = tcg_temp_new_i32();
9786 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9787 = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9788
9789 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9790 read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9791 gen(t32, t32, fpst);
9792 write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9793 }
9794 }
9795 clear_vec_high(s, a->q, a->rd);
9796
9797 if (rmode >= 0) {
9798 gen_restore_rmode(tcg_rmode, fpst);
9799 }
9800 return true;
9801 }
9802
9803 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9804
9805 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9806 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9807 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9808 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9809 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9810 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9811 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9812
9813 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9814 &f_scalar_frint32, FPROUNDING_ZERO)
9815 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9816 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9817 &f_scalar_frint64, FPROUNDING_ZERO)
9818 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9819
do_gvec_op2_fpst_with_fpsttype(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3],ARMFPStatusFlavour fpsttype)9820 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9821 bool is_q, int rd, int rn, int data,
9822 gen_helper_gvec_2_ptr * const fns[3],
9823 ARMFPStatusFlavour fpsttype)
9824 {
9825 int check = fp_access_check_vector_hsd(s, is_q, esz);
9826 TCGv_ptr fpst;
9827
9828 if (check <= 0) {
9829 return check == 0;
9830 }
9831
9832 fpst = fpstatus_ptr(fpsttype);
9833 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9834 vec_full_reg_offset(s, rn), fpst,
9835 is_q ? 16 : 8, vec_full_reg_size(s),
9836 data, fns[esz - 1]);
9837 return true;
9838 }
9839
do_gvec_op2_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9840 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9841 int rd, int rn, int data,
9842 gen_helper_gvec_2_ptr * const fns[3])
9843 {
9844 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9845 esz == MO_16 ? FPST_A64_F16 :
9846 FPST_A64);
9847 }
9848
do_gvec_op2_ah_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9849 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9850 int rd, int rn, int data,
9851 gen_helper_gvec_2_ptr * const fns[3])
9852 {
9853 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9854 fns, select_ah_fpst(s, esz));
9855 }
9856
9857 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9858 gen_helper_gvec_vcvt_sh,
9859 gen_helper_gvec_vcvt_sf,
9860 gen_helper_gvec_vcvt_sd,
9861 };
9862 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9863 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9864 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9865 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9866
9867 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9868 gen_helper_gvec_vcvt_uh,
9869 gen_helper_gvec_vcvt_uf,
9870 gen_helper_gvec_vcvt_ud,
9871 };
9872 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9873 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9874 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9875 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9876
9877 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9878 gen_helper_gvec_vcvt_rz_hs,
9879 gen_helper_gvec_vcvt_rz_fs,
9880 gen_helper_gvec_vcvt_rz_ds,
9881 };
9882 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9883 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9884
9885 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9886 gen_helper_gvec_vcvt_rz_hu,
9887 gen_helper_gvec_vcvt_rz_fu,
9888 gen_helper_gvec_vcvt_rz_du,
9889 };
9890 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9891 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9892
9893 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9894 gen_helper_gvec_vcvt_rm_sh,
9895 gen_helper_gvec_vcvt_rm_ss,
9896 gen_helper_gvec_vcvt_rm_sd,
9897 };
9898
9899 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9900 gen_helper_gvec_vcvt_rm_uh,
9901 gen_helper_gvec_vcvt_rm_us,
9902 gen_helper_gvec_vcvt_rm_ud,
9903 };
9904
9905 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9906 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9907 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9908 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9909 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9910 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9911 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9912 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9913 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9914 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9915 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9916 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9917 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9918 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9919 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9920 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9921 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9922 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9923 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9924 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9925
9926 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9927 gen_helper_gvec_fceq0_h,
9928 gen_helper_gvec_fceq0_s,
9929 gen_helper_gvec_fceq0_d,
9930 };
9931 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9932
9933 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9934 gen_helper_gvec_fcgt0_h,
9935 gen_helper_gvec_fcgt0_s,
9936 gen_helper_gvec_fcgt0_d,
9937 };
9938 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9939
9940 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9941 gen_helper_gvec_fcge0_h,
9942 gen_helper_gvec_fcge0_s,
9943 gen_helper_gvec_fcge0_d,
9944 };
9945 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9946
9947 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9948 gen_helper_gvec_fclt0_h,
9949 gen_helper_gvec_fclt0_s,
9950 gen_helper_gvec_fclt0_d,
9951 };
9952 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9953
9954 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9955 gen_helper_gvec_fcle0_h,
9956 gen_helper_gvec_fcle0_s,
9957 gen_helper_gvec_fcle0_d,
9958 };
9959 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9960
9961 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9962 gen_helper_gvec_frecpe_h,
9963 gen_helper_gvec_frecpe_s,
9964 gen_helper_gvec_frecpe_d,
9965 };
9966 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9967 gen_helper_gvec_frecpe_h,
9968 gen_helper_gvec_frecpe_rpres_s,
9969 gen_helper_gvec_frecpe_d,
9970 };
9971 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9972 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9973
9974 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9975 gen_helper_gvec_frsqrte_h,
9976 gen_helper_gvec_frsqrte_s,
9977 gen_helper_gvec_frsqrte_d,
9978 };
9979 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9980 gen_helper_gvec_frsqrte_h,
9981 gen_helper_gvec_frsqrte_rpres_s,
9982 gen_helper_gvec_frsqrte_d,
9983 };
9984 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9985 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
9986
trans_FCVTL_v(DisasContext * s,arg_qrr_e * a)9987 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
9988 {
9989 /* Handle 2-reg-misc ops which are widening (so each size element
9990 * in the source becomes a 2*size element in the destination.
9991 * The only instruction like this is FCVTL.
9992 */
9993 int pass;
9994 TCGv_ptr fpst;
9995
9996 if (!fp_access_check(s)) {
9997 return true;
9998 }
9999
10000 if (a->esz == MO_64) {
10001 /* 32 -> 64 bit fp conversion */
10002 TCGv_i64 tcg_res[2];
10003 TCGv_i32 tcg_op = tcg_temp_new_i32();
10004 int srcelt = a->q ? 2 : 0;
10005
10006 fpst = fpstatus_ptr(FPST_A64);
10007
10008 for (pass = 0; pass < 2; pass++) {
10009 tcg_res[pass] = tcg_temp_new_i64();
10010 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10011 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10012 }
10013 for (pass = 0; pass < 2; pass++) {
10014 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10015 }
10016 } else {
10017 /* 16 -> 32 bit fp conversion */
10018 int srcelt = a->q ? 4 : 0;
10019 TCGv_i32 tcg_res[4];
10020 TCGv_i32 ahp = get_ahp_flag();
10021
10022 fpst = fpstatus_ptr(FPST_A64_F16);
10023
10024 for (pass = 0; pass < 4; pass++) {
10025 tcg_res[pass] = tcg_temp_new_i32();
10026 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10027 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10028 fpst, ahp);
10029 }
10030 for (pass = 0; pass < 4; pass++) {
10031 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10032 }
10033 }
10034 clear_vec_high(s, true, a->rd);
10035 return true;
10036 }
10037
trans_OK(DisasContext * s,arg_OK * a)10038 static bool trans_OK(DisasContext *s, arg_OK *a)
10039 {
10040 return true;
10041 }
10042
trans_FAIL(DisasContext * s,arg_OK * a)10043 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10044 {
10045 s->is_nonstreaming = true;
10046 return true;
10047 }
10048
10049 /**
10050 * btype_destination_ok:
10051 * @insn: The instruction at the branch destination
10052 * @bt: SCTLR_ELx.BT
10053 * @btype: PSTATE.BTYPE, and is non-zero
10054 *
10055 * On a guarded page, there are a limited number of insns
10056 * that may be present at the branch target:
10057 * - branch target identifiers,
10058 * - paciasp, pacibsp,
10059 * - BRK insn
10060 * - HLT insn
10061 * Anything else causes a Branch Target Exception.
10062 *
10063 * Return true if the branch is compatible, false to raise BTITRAP.
10064 */
btype_destination_ok(uint32_t insn,bool bt,int btype)10065 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10066 {
10067 if ((insn & 0xfffff01fu) == 0xd503201fu) {
10068 /* HINT space */
10069 switch (extract32(insn, 5, 7)) {
10070 case 0b011001: /* PACIASP */
10071 case 0b011011: /* PACIBSP */
10072 /*
10073 * If SCTLR_ELx.BT, then PACI*SP are not compatible
10074 * with btype == 3. Otherwise all btype are ok.
10075 */
10076 return !bt || btype != 3;
10077 case 0b100000: /* BTI */
10078 /* Not compatible with any btype. */
10079 return false;
10080 case 0b100010: /* BTI c */
10081 /* Not compatible with btype == 3 */
10082 return btype != 3;
10083 case 0b100100: /* BTI j */
10084 /* Not compatible with btype == 2 */
10085 return btype != 2;
10086 case 0b100110: /* BTI jc */
10087 /* Compatible with any btype. */
10088 return true;
10089 }
10090 } else {
10091 switch (insn & 0xffe0001fu) {
10092 case 0xd4200000u: /* BRK */
10093 case 0xd4400000u: /* HLT */
10094 /* Give priority to the breakpoint exception. */
10095 return true;
10096 }
10097 }
10098 return false;
10099 }
10100
aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)10101 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10102 CPUState *cpu)
10103 {
10104 DisasContext *dc = container_of(dcbase, DisasContext, base);
10105 CPUARMState *env = cpu_env(cpu);
10106 ARMCPU *arm_cpu = env_archcpu(env);
10107 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10108 int bound, core_mmu_idx;
10109
10110 dc->isar = &arm_cpu->isar;
10111 dc->condjmp = 0;
10112 dc->pc_save = dc->base.pc_first;
10113 dc->aarch64 = true;
10114 dc->thumb = false;
10115 dc->sctlr_b = 0;
10116 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10117 dc->condexec_mask = 0;
10118 dc->condexec_cond = 0;
10119 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10120 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10121 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10122 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10123 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10124 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10125 #if !defined(CONFIG_USER_ONLY)
10126 dc->user = (dc->current_el == 0);
10127 #endif
10128 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10129 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10130 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10131 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10132 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10133 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10134 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10135 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10136 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10137 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10138 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10139 dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10140 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10141 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10142 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10143 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10144 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10145 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10146 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10147 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10148 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10149 dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10150 dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10151 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10152 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10153 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10154 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10155 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10156 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10157 dc->vec_len = 0;
10158 dc->vec_stride = 0;
10159 dc->cp_regs = arm_cpu->cp_regs;
10160 dc->features = env->features;
10161 dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10162 dc->gm_blocksize = arm_cpu->gm_blocksize;
10163
10164 #ifdef CONFIG_USER_ONLY
10165 /* In sve_probe_page, we assume TBI is enabled. */
10166 tcg_debug_assert(dc->tbid & 1);
10167 #endif
10168
10169 dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10170
10171 /* Single step state. The code-generation logic here is:
10172 * SS_ACTIVE == 0:
10173 * generate code with no special handling for single-stepping (except
10174 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10175 * this happens anyway because those changes are all system register or
10176 * PSTATE writes).
10177 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10178 * emit code for one insn
10179 * emit code to clear PSTATE.SS
10180 * emit code to generate software step exception for completed step
10181 * end TB (as usual for having generated an exception)
10182 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10183 * emit code to generate a software step exception
10184 * end the TB
10185 */
10186 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10187 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10188 dc->is_ldex = false;
10189
10190 /* Bound the number of insns to execute to those left on the page. */
10191 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10192
10193 /* If architectural single step active, limit to 1. */
10194 if (dc->ss_active) {
10195 bound = 1;
10196 }
10197 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10198 }
10199
aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)10200 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10201 {
10202 }
10203
aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)10204 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10205 {
10206 DisasContext *dc = container_of(dcbase, DisasContext, base);
10207 target_ulong pc_arg = dc->base.pc_next;
10208
10209 if (tb_cflags(dcbase->tb) & CF_PCREL) {
10210 pc_arg &= ~TARGET_PAGE_MASK;
10211 }
10212 tcg_gen_insn_start(pc_arg, 0, 0);
10213 dc->insn_start_updated = false;
10214 }
10215
aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)10216 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10217 {
10218 DisasContext *s = container_of(dcbase, DisasContext, base);
10219 CPUARMState *env = cpu_env(cpu);
10220 uint64_t pc = s->base.pc_next;
10221 uint32_t insn;
10222
10223 /* Singlestep exceptions have the highest priority. */
10224 if (s->ss_active && !s->pstate_ss) {
10225 /* Singlestep state is Active-pending.
10226 * If we're in this state at the start of a TB then either
10227 * a) we just took an exception to an EL which is being debugged
10228 * and this is the first insn in the exception handler
10229 * b) debug exceptions were masked and we just unmasked them
10230 * without changing EL (eg by clearing PSTATE.D)
10231 * In either case we're going to take a swstep exception in the
10232 * "did not step an insn" case, and so the syndrome ISV and EX
10233 * bits should be zero.
10234 */
10235 assert(s->base.num_insns == 1);
10236 gen_swstep_exception(s, 0, 0);
10237 s->base.is_jmp = DISAS_NORETURN;
10238 s->base.pc_next = pc + 4;
10239 return;
10240 }
10241
10242 if (pc & 3) {
10243 /*
10244 * PC alignment fault. This has priority over the instruction abort
10245 * that we would receive from a translation fault via arm_ldl_code.
10246 * This should only be possible after an indirect branch, at the
10247 * start of the TB.
10248 */
10249 assert(s->base.num_insns == 1);
10250 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10251 s->base.is_jmp = DISAS_NORETURN;
10252 s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10253 return;
10254 }
10255
10256 s->pc_curr = pc;
10257 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10258 s->insn = insn;
10259 s->base.pc_next = pc + 4;
10260
10261 s->fp_access_checked = 0;
10262 s->sve_access_checked = 0;
10263
10264 if (s->pstate_il) {
10265 /*
10266 * Illegal execution state. This has priority over BTI
10267 * exceptions, but comes after instruction abort exceptions.
10268 */
10269 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10270 return;
10271 }
10272
10273 if (dc_isar_feature(aa64_bti, s)) {
10274 if (s->base.num_insns == 1) {
10275 /* First insn can have btype set to non-zero. */
10276 tcg_debug_assert(s->btype >= 0);
10277
10278 /*
10279 * Note that the Branch Target Exception has fairly high
10280 * priority -- below debugging exceptions but above most
10281 * everything else. This allows us to handle this now
10282 * instead of waiting until the insn is otherwise decoded.
10283 *
10284 * We can check all but the guarded page check here;
10285 * defer the latter to a helper.
10286 */
10287 if (s->btype != 0
10288 && !btype_destination_ok(insn, s->bt, s->btype)) {
10289 gen_helper_guarded_page_check(tcg_env);
10290 }
10291 } else {
10292 /* Not the first insn: btype must be 0. */
10293 tcg_debug_assert(s->btype == 0);
10294 }
10295 }
10296
10297 s->is_nonstreaming = false;
10298 if (s->sme_trap_nonstreaming) {
10299 disas_sme_fa64(s, insn);
10300 }
10301
10302 if (!disas_a64(s, insn) &&
10303 !disas_sme(s, insn) &&
10304 !disas_sve(s, insn)) {
10305 unallocated_encoding(s);
10306 }
10307
10308 /*
10309 * After execution of most insns, btype is reset to 0.
10310 * Note that we set btype == -1 when the insn sets btype.
10311 */
10312 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10313 reset_btype(s);
10314 }
10315 }
10316
aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)10317 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10318 {
10319 DisasContext *dc = container_of(dcbase, DisasContext, base);
10320
10321 if (unlikely(dc->ss_active)) {
10322 /* Note that this means single stepping WFI doesn't halt the CPU.
10323 * For conditional branch insns this is harmless unreachable code as
10324 * gen_goto_tb() has already handled emitting the debug exception
10325 * (and thus a tb-jump is not possible when singlestepping).
10326 */
10327 switch (dc->base.is_jmp) {
10328 default:
10329 gen_a64_update_pc(dc, 4);
10330 /* fall through */
10331 case DISAS_EXIT:
10332 case DISAS_JUMP:
10333 gen_step_complete_exception(dc);
10334 break;
10335 case DISAS_NORETURN:
10336 break;
10337 }
10338 } else {
10339 switch (dc->base.is_jmp) {
10340 case DISAS_NEXT:
10341 case DISAS_TOO_MANY:
10342 gen_goto_tb(dc, 1, 4);
10343 break;
10344 default:
10345 case DISAS_UPDATE_EXIT:
10346 gen_a64_update_pc(dc, 4);
10347 /* fall through */
10348 case DISAS_EXIT:
10349 tcg_gen_exit_tb(NULL, 0);
10350 break;
10351 case DISAS_UPDATE_NOCHAIN:
10352 gen_a64_update_pc(dc, 4);
10353 /* fall through */
10354 case DISAS_JUMP:
10355 tcg_gen_lookup_and_goto_ptr();
10356 break;
10357 case DISAS_NORETURN:
10358 case DISAS_SWI:
10359 break;
10360 case DISAS_WFE:
10361 gen_a64_update_pc(dc, 4);
10362 gen_helper_wfe(tcg_env);
10363 break;
10364 case DISAS_YIELD:
10365 gen_a64_update_pc(dc, 4);
10366 gen_helper_yield(tcg_env);
10367 break;
10368 case DISAS_WFI:
10369 /*
10370 * This is a special case because we don't want to just halt
10371 * the CPU if trying to debug across a WFI.
10372 */
10373 gen_a64_update_pc(dc, 4);
10374 gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10375 /*
10376 * The helper doesn't necessarily throw an exception, but we
10377 * must go back to the main loop to check for interrupts anyway.
10378 */
10379 tcg_gen_exit_tb(NULL, 0);
10380 break;
10381 }
10382 }
10383 }
10384
10385 const TranslatorOps aarch64_translator_ops = {
10386 .init_disas_context = aarch64_tr_init_disas_context,
10387 .tb_start = aarch64_tr_tb_start,
10388 .insn_start = aarch64_tr_insn_start,
10389 .translate_insn = aarch64_tr_translate_insn,
10390 .tb_stop = aarch64_tr_tb_stop,
10391 };
10392