1 /*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20
21 #include "exec/exec-all.h"
22 #include "translate.h"
23 #include "translate-a64.h"
24 #include "qemu/log.h"
25 #include "arm_ldst.h"
26 #include "semihosting/semihost.h"
27 #include "cpregs.h"
28
29 static TCGv_i64 cpu_X[32];
30 static TCGv_i64 cpu_pc;
31
32 /* Load/store exclusive handling */
33 static TCGv_i64 cpu_exclusive_high;
34
35 static const char *regnames[] = {
36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
40 };
41
42 enum a64_shift_type {
43 A64_SHIFT_TYPE_LSL = 0,
44 A64_SHIFT_TYPE_LSR = 1,
45 A64_SHIFT_TYPE_ASR = 2,
46 A64_SHIFT_TYPE_ROR = 3
47 };
48
49 /*
50 * Helpers for extracting complex instruction fields
51 */
52
53 /*
54 * For load/store with an unsigned 12 bit immediate scaled by the element
55 * size. The input has the immediate field in bits [14:3] and the element
56 * size in [2:0].
57 */
uimm_scaled(DisasContext * s,int x)58 static int uimm_scaled(DisasContext *s, int x)
59 {
60 unsigned imm = x >> 3;
61 unsigned scale = extract32(x, 0, 3);
62 return imm << scale;
63 }
64
65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */
scale_by_log2_tag_granule(DisasContext * s,int x)66 static int scale_by_log2_tag_granule(DisasContext *s, int x)
67 {
68 return x << LOG2_TAG_GRANULE;
69 }
70
71 /*
72 * Include the generated decoders.
73 */
74
75 #include "decode-sme-fa64.c.inc"
76 #include "decode-a64.c.inc"
77
78 /* initialize TCG globals. */
a64_translate_init(void)79 void a64_translate_init(void)
80 {
81 int i;
82
83 cpu_pc = tcg_global_mem_new_i64(tcg_env,
84 offsetof(CPUARMState, pc),
85 "pc");
86 for (i = 0; i < 32; i++) {
87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env,
88 offsetof(CPUARMState, xregs[i]),
89 regnames[i]);
90 }
91
92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env,
93 offsetof(CPUARMState, exclusive_high), "exclusive_high");
94 }
95
96 /*
97 * Return the core mmu_idx to use for A64 load/store insns which
98 * have a "unprivileged load/store" variant. Those insns access
99 * EL0 if executed from an EL which has control over EL0 (usually
100 * EL1) but behave like normal loads and stores if executed from
101 * elsewhere (eg EL3).
102 *
103 * @unpriv : true for the unprivileged encoding; false for the
104 * normal encoding (in which case we will return the same
105 * thing as get_mem_index().
106 */
get_a64_user_mem_index(DisasContext * s,bool unpriv)107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv)
108 {
109 /*
110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
111 * which is the usual mmu_idx for this cpu state.
112 */
113 ARMMMUIdx useridx = s->mmu_idx;
114
115 if (unpriv && s->unpriv) {
116 /*
117 * We have pre-computed the condition for AccType_UNPRIV.
118 * Therefore we should never get here with a mmu_idx for
119 * which we do not know the corresponding user mmu_idx.
120 */
121 switch (useridx) {
122 case ARMMMUIdx_E10_1:
123 case ARMMMUIdx_E10_1_PAN:
124 useridx = ARMMMUIdx_E10_0;
125 break;
126 case ARMMMUIdx_E20_2:
127 case ARMMMUIdx_E20_2_PAN:
128 useridx = ARMMMUIdx_E20_0;
129 break;
130 default:
131 g_assert_not_reached();
132 }
133 }
134 return arm_to_core_mmu_idx(useridx);
135 }
136
set_btype_raw(int val)137 static void set_btype_raw(int val)
138 {
139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env,
140 offsetof(CPUARMState, btype));
141 }
142
set_btype(DisasContext * s,int val)143 static void set_btype(DisasContext *s, int val)
144 {
145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */
146 tcg_debug_assert(val >= 1 && val <= 3);
147 set_btype_raw(val);
148 s->btype = -1;
149 }
150
reset_btype(DisasContext * s)151 static void reset_btype(DisasContext *s)
152 {
153 if (s->btype != 0) {
154 set_btype_raw(0);
155 s->btype = 0;
156 }
157 }
158
gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff)
160 {
161 assert(s->pc_save != -1);
162 if (tb_cflags(s->base.tb) & CF_PCREL) {
163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff);
164 } else {
165 tcg_gen_movi_i64(dest, s->pc_curr + diff);
166 }
167 }
168
gen_a64_update_pc(DisasContext * s,target_long diff)169 void gen_a64_update_pc(DisasContext *s, target_long diff)
170 {
171 gen_pc_plus_diff(s, cpu_pc, diff);
172 s->pc_save = s->pc_curr + diff;
173 }
174
175 /*
176 * Handle Top Byte Ignore (TBI) bits.
177 *
178 * If address tagging is enabled via the TCR TBI bits:
179 * + for EL2 and EL3 there is only one TBI bit, and if it is set
180 * then the address is zero-extended, clearing bits [63:56]
181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
182 * and TBI1 controls addresses with bit 55 == 1.
183 * If the appropriate TBI bit is set for the address then
184 * the address is sign-extended from bit 55 into bits [63:56]
185 *
186 * Here We have concatenated TBI{1,0} into tbi.
187 */
gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
189 TCGv_i64 src, int tbi)
190 {
191 if (tbi == 0) {
192 /* Load unmodified address */
193 tcg_gen_mov_i64(dst, src);
194 } else if (!regime_has_2_ranges(s->mmu_idx)) {
195 /* Force tag byte to all zero */
196 tcg_gen_extract_i64(dst, src, 0, 56);
197 } else {
198 /* Sign-extend from bit 55. */
199 tcg_gen_sextract_i64(dst, src, 0, 56);
200
201 switch (tbi) {
202 case 1:
203 /* tbi0 but !tbi1: only use the extension if positive */
204 tcg_gen_and_i64(dst, dst, src);
205 break;
206 case 2:
207 /* !tbi0 but tbi1: only use the extension if negative */
208 tcg_gen_or_i64(dst, dst, src);
209 break;
210 case 3:
211 /* tbi0 and tbi1: always use the extension */
212 break;
213 default:
214 g_assert_not_reached();
215 }
216 }
217 }
218
gen_a64_set_pc(DisasContext * s,TCGv_i64 src)219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
220 {
221 /*
222 * If address tagging is enabled for instructions via the TCR TBI bits,
223 * then loading an address into the PC will clear out any tag.
224 */
225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
226 s->pc_save = -1;
227 }
228
229 /*
230 * Handle MTE and/or TBI.
231 *
232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is
233 * for the tag to be present in the FAR_ELx register. But for user-only
234 * mode we do not have a TLB with which to implement this, so we must
235 * remove the top byte now.
236 *
237 * Always return a fresh temporary that we can increment independently
238 * of the write-back address.
239 */
240
clean_data_tbi(DisasContext * s,TCGv_i64 addr)241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
242 {
243 TCGv_i64 clean = tcg_temp_new_i64();
244 #ifdef CONFIG_USER_ONLY
245 gen_top_byte_ignore(s, clean, addr, s->tbid);
246 #else
247 tcg_gen_mov_i64(clean, addr);
248 #endif
249 return clean;
250 }
251
252 /* Insert a zero tag into src, with the result at dst. */
gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
254 {
255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
256 }
257
gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
259 MMUAccessType acc, int log2_size)
260 {
261 gen_helper_probe_access(tcg_env, ptr,
262 tcg_constant_i32(acc),
263 tcg_constant_i32(get_mem_index(s)),
264 tcg_constant_i32(1 << log2_size));
265 }
266
267 /*
268 * For MTE, check a single logical or atomic access. This probes a single
269 * address, the exact one specified. The size and alignment of the access
270 * is not relevant to MTE, per se, but watchpoints do require the size,
271 * and we want to recognize those before making any other changes to state.
272 */
gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
274 bool is_write, bool tag_checked,
275 MemOp memop, bool is_unpriv,
276 int core_idx)
277 {
278 if (tag_checked && s->mte_active[is_unpriv]) {
279 TCGv_i64 ret;
280 int desc = 0;
281
282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop));
287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
288
289 ret = tcg_temp_new_i64();
290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
291
292 return ret;
293 }
294 return clean_data_tbi(s, addr);
295 }
296
gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
298 bool tag_checked, MemOp memop)
299 {
300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
301 false, get_mem_index(s));
302 }
303
304 /*
305 * For MTE, check multiple logical sequential accesses.
306 */
gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
308 bool tag_checked, int total_size, MemOp single_mop)
309 {
310 if (tag_checked && s->mte_active[0]) {
311 TCGv_i64 ret;
312 int desc = 0;
313
314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop));
319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
320
321 ret = tcg_temp_new_i64();
322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr);
323
324 return ret;
325 }
326 return clean_data_tbi(s, addr);
327 }
328
329 /*
330 * Generate the special alignment check that applies to AccType_ATOMIC
331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
332 * naturally aligned, but it must not cross a 16-byte boundary.
333 * See AArch64.CheckAlignment().
334 */
check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)335 static void check_lse2_align(DisasContext *s, int rn, int imm,
336 bool is_write, MemOp mop)
337 {
338 TCGv_i32 tmp;
339 TCGv_i64 addr;
340 TCGLabel *over_label;
341 MMUAccessType type;
342 int mmu_idx;
343
344 tmp = tcg_temp_new_i32();
345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
346 tcg_gen_addi_i32(tmp, tmp, imm & 15);
347 tcg_gen_andi_i32(tmp, tmp, 15);
348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
349
350 over_label = gen_new_label();
351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
352
353 addr = tcg_temp_new_i64();
354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
355
356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
357 mmu_idx = get_mem_index(s);
358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type),
359 tcg_constant_i32(mmu_idx));
360
361 gen_set_label(over_label);
362
363 }
364
365 /* Handle the alignment check for AccType_ATOMIC instructions. */
check_atomic_align(DisasContext * s,int rn,MemOp mop)366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
367 {
368 MemOp size = mop & MO_SIZE;
369
370 if (size == MO_8) {
371 return mop;
372 }
373
374 /*
375 * If size == MO_128, this is a LDXP, and the operation is single-copy
376 * atomic for each doubleword, not the entire quadword; it still must
377 * be quadword aligned.
378 */
379 if (size == MO_128) {
380 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
381 MO_ATOM_IFALIGN_PAIR);
382 }
383 if (dc_isar_feature(aa64_lse2, s)) {
384 check_lse2_align(s, rn, 0, true, mop);
385 } else {
386 mop |= MO_ALIGN;
387 }
388 return finalize_memop(s, mop);
389 }
390
391 /* Handle the alignment check for AccType_ORDERED instructions. */
check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
393 bool is_write, MemOp mop)
394 {
395 MemOp size = mop & MO_SIZE;
396
397 if (size == MO_8) {
398 return mop;
399 }
400 if (size == MO_128) {
401 return finalize_memop_atom(s, MO_128 | MO_ALIGN,
402 MO_ATOM_IFALIGN_PAIR);
403 }
404 if (!dc_isar_feature(aa64_lse2, s)) {
405 mop |= MO_ALIGN;
406 } else if (!s->naa) {
407 check_lse2_align(s, rn, imm, is_write, mop);
408 }
409 return finalize_memop(s, mop);
410 }
411
412 typedef struct DisasCompare64 {
413 TCGCond cond;
414 TCGv_i64 value;
415 } DisasCompare64;
416
a64_test_cc(DisasCompare64 * c64,int cc)417 static void a64_test_cc(DisasCompare64 *c64, int cc)
418 {
419 DisasCompare c32;
420
421 arm_test_cc(&c32, cc);
422
423 /*
424 * Sign-extend the 32-bit value so that the GE/LT comparisons work
425 * properly. The NE/EQ comparisons are also fine with this choice.
426 */
427 c64->cond = c32.cond;
428 c64->value = tcg_temp_new_i64();
429 tcg_gen_ext_i32_i64(c64->value, c32.value);
430 }
431
gen_rebuild_hflags(DisasContext * s)432 static void gen_rebuild_hflags(DisasContext *s)
433 {
434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el));
435 }
436
gen_exception_internal(int excp)437 static void gen_exception_internal(int excp)
438 {
439 assert(excp_is_internal(excp));
440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp));
441 }
442
gen_exception_internal_insn(DisasContext * s,int excp)443 static void gen_exception_internal_insn(DisasContext *s, int excp)
444 {
445 gen_a64_update_pc(s, 0);
446 gen_exception_internal(excp);
447 s->base.is_jmp = DISAS_NORETURN;
448 }
449
gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
451 {
452 gen_a64_update_pc(s, 0);
453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome));
454 s->base.is_jmp = DISAS_NORETURN;
455 }
456
gen_step_complete_exception(DisasContext * s)457 static void gen_step_complete_exception(DisasContext *s)
458 {
459 /* We just completed step of an insn. Move from Active-not-pending
460 * to Active-pending, and then also take the swstep exception.
461 * This corresponds to making the (IMPDEF) choice to prioritize
462 * swstep exceptions over asynchronous exceptions taken to an exception
463 * level where debug is disabled. This choice has the advantage that
464 * we do not need to maintain internal state corresponding to the
465 * ISV/EX syndrome bits between completion of the step and generation
466 * of the exception, and our syndrome information is always correct.
467 */
468 gen_ss_advance(s);
469 gen_swstep_exception(s, 1, s->is_ldex);
470 s->base.is_jmp = DISAS_NORETURN;
471 }
472
use_goto_tb(DisasContext * s,uint64_t dest)473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
474 {
475 if (s->ss_active) {
476 return false;
477 }
478 return translator_use_goto_tb(&s->base, dest);
479 }
480
gen_goto_tb(DisasContext * s,int n,int64_t diff)481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff)
482 {
483 if (use_goto_tb(s, s->pc_curr + diff)) {
484 /*
485 * For pcrel, the pc must always be up-to-date on entry to
486 * the linked TB, so that it can use simple additions for all
487 * further adjustments. For !pcrel, the linked TB is compiled
488 * to know its full virtual address, so we can delay the
489 * update to pc to the unlinked path. A long chain of links
490 * can thus avoid many updates to the PC.
491 */
492 if (tb_cflags(s->base.tb) & CF_PCREL) {
493 gen_a64_update_pc(s, diff);
494 tcg_gen_goto_tb(n);
495 } else {
496 tcg_gen_goto_tb(n);
497 gen_a64_update_pc(s, diff);
498 }
499 tcg_gen_exit_tb(s->base.tb, n);
500 s->base.is_jmp = DISAS_NORETURN;
501 } else {
502 gen_a64_update_pc(s, diff);
503 if (s->ss_active) {
504 gen_step_complete_exception(s);
505 } else {
506 tcg_gen_lookup_and_goto_ptr();
507 s->base.is_jmp = DISAS_NORETURN;
508 }
509 }
510 }
511
512 /*
513 * Register access functions
514 *
515 * These functions are used for directly accessing a register in where
516 * changes to the final register value are likely to be made. If you
517 * need to use a register for temporary calculation (e.g. index type
518 * operations) use the read_* form.
519 *
520 * B1.2.1 Register mappings
521 *
522 * In instruction register encoding 31 can refer to ZR (zero register) or
523 * the SP (stack pointer) depending on context. In QEMU's case we map SP
524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
525 * This is the point of the _sp forms.
526 */
cpu_reg(DisasContext * s,int reg)527 TCGv_i64 cpu_reg(DisasContext *s, int reg)
528 {
529 if (reg == 31) {
530 TCGv_i64 t = tcg_temp_new_i64();
531 tcg_gen_movi_i64(t, 0);
532 return t;
533 } else {
534 return cpu_X[reg];
535 }
536 }
537
538 /* register access for when 31 == SP */
cpu_reg_sp(DisasContext * s,int reg)539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
540 {
541 return cpu_X[reg];
542 }
543
544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
545 * representing the register contents. This TCGv is an auto-freed
546 * temporary so it need not be explicitly freed, and may be modified.
547 */
read_cpu_reg(DisasContext * s,int reg,int sf)548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
549 {
550 TCGv_i64 v = tcg_temp_new_i64();
551 if (reg != 31) {
552 if (sf) {
553 tcg_gen_mov_i64(v, cpu_X[reg]);
554 } else {
555 tcg_gen_ext32u_i64(v, cpu_X[reg]);
556 }
557 } else {
558 tcg_gen_movi_i64(v, 0);
559 }
560 return v;
561 }
562
read_cpu_reg_sp(DisasContext * s,int reg,int sf)563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
564 {
565 TCGv_i64 v = tcg_temp_new_i64();
566 if (sf) {
567 tcg_gen_mov_i64(v, cpu_X[reg]);
568 } else {
569 tcg_gen_ext32u_i64(v, cpu_X[reg]);
570 }
571 return v;
572 }
573
574 /* Return the offset into CPUARMState of a slice (from
575 * the least significant end) of FP register Qn (ie
576 * Dn, Sn, Hn or Bn).
577 * (Note that this is not the same mapping as for A32; see cpu.h)
578 */
fp_reg_offset(DisasContext * s,int regno,MemOp size)579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
580 {
581 return vec_reg_offset(s, regno, 0, size);
582 }
583
584 /* Offset of the high half of the 128 bit vector Qn */
fp_reg_hi_offset(DisasContext * s,int regno)585 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
586 {
587 return vec_reg_offset(s, regno, 1, MO_64);
588 }
589
590 /* Convenience accessors for reading and writing single and double
591 * FP registers. Writing clears the upper parts of the associated
592 * 128 bit vector register, as required by the architecture.
593 * Note that unlike the GP register accessors, the values returned
594 * by the read functions must be manually freed.
595 */
read_fp_dreg(DisasContext * s,int reg)596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
597 {
598 TCGv_i64 v = tcg_temp_new_i64();
599
600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64));
601 return v;
602 }
603
read_fp_sreg(DisasContext * s,int reg)604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
605 {
606 TCGv_i32 v = tcg_temp_new_i32();
607
608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
609 return v;
610 }
611
read_fp_hreg(DisasContext * s,int reg)612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
613 {
614 TCGv_i32 v = tcg_temp_new_i32();
615
616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
617 return v;
618 }
619
clear_vec(DisasContext * s,int rd)620 static void clear_vec(DisasContext *s, int rd)
621 {
622 unsigned ofs = fp_reg_offset(s, rd, MO_64);
623 unsigned vsz = vec_full_reg_size(s);
624
625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0);
626 }
627
628 /*
629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
630 * If SVE is not enabled, then there are only 128 bits in the vector.
631 */
clear_vec_high(DisasContext * s,bool is_q,int rd)632 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
633 {
634 unsigned ofs = fp_reg_offset(s, rd, MO_64);
635 unsigned vsz = vec_full_reg_size(s);
636
637 /* Nop move, with side effect of clearing the tail. */
638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
639 }
640
write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
642 {
643 unsigned ofs = fp_reg_offset(s, reg, MO_64);
644
645 tcg_gen_st_i64(v, tcg_env, ofs);
646 clear_vec_high(s, false, reg);
647 }
648
write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
650 {
651 TCGv_i64 tmp = tcg_temp_new_i64();
652
653 tcg_gen_extu_i32_i64(tmp, v);
654 write_fp_dreg(s, reg, tmp);
655 }
656
657 /*
658 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP:
659 * - if FPCR.NEP == 0, clear the high elements of reg
660 * - if FPCR.NEP == 1, set the high elements of reg from mergereg
661 * (i.e. merge the result with those high elements)
662 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB).
663 */
write_fp_dreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i64 v)664 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg,
665 TCGv_i64 v)
666 {
667 if (!s->fpcr_nep) {
668 write_fp_dreg(s, reg, v);
669 return;
670 }
671
672 /*
673 * Move from mergereg to reg; this sets the high elements and
674 * clears the bits above 128 as a side effect.
675 */
676 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
677 vec_full_reg_offset(s, mergereg),
678 16, vec_full_reg_size(s));
679 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg));
680 }
681
682 /*
683 * Write a single-prec result, but only clear the higher elements
684 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
685 */
write_fp_sreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)686 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg,
687 TCGv_i32 v)
688 {
689 if (!s->fpcr_nep) {
690 write_fp_sreg(s, reg, v);
691 return;
692 }
693
694 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
695 vec_full_reg_offset(s, mergereg),
696 16, vec_full_reg_size(s));
697 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32));
698 }
699
700 /*
701 * Write a half-prec result, but only clear the higher elements
702 * of the destination register if FPCR.NEP is 0; otherwise preserve them.
703 * The caller must ensure that the top 16 bits of v are zero.
704 */
write_fp_hreg_merging(DisasContext * s,int reg,int mergereg,TCGv_i32 v)705 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg,
706 TCGv_i32 v)
707 {
708 if (!s->fpcr_nep) {
709 write_fp_sreg(s, reg, v);
710 return;
711 }
712
713 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg),
714 vec_full_reg_offset(s, mergereg),
715 16, vec_full_reg_size(s));
716 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16));
717 }
718
719 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)720 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
721 GVecGen2Fn *gvec_fn, int vece)
722 {
723 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
724 is_q ? 16 : 8, vec_full_reg_size(s));
725 }
726
727 /* Expand a 2-operand + immediate AdvSIMD vector operation using
728 * an expander function.
729 */
gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)730 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
731 int64_t imm, GVecGen2iFn *gvec_fn, int vece)
732 {
733 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
734 imm, is_q ? 16 : 8, vec_full_reg_size(s));
735 }
736
737 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)738 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
739 GVecGen3Fn *gvec_fn, int vece)
740 {
741 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
742 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
743 }
744
745 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */
gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)746 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
747 int rx, GVecGen4Fn *gvec_fn, int vece)
748 {
749 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
750 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
751 is_q ? 16 : 8, vec_full_reg_size(s));
752 }
753
754 /* Expand a 2-operand operation using an out-of-line helper. */
gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)755 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
756 int rn, int data, gen_helper_gvec_2 *fn)
757 {
758 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
759 vec_full_reg_offset(s, rn),
760 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
761 }
762
763 /* Expand a 3-operand operation using an out-of-line helper. */
gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)764 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
765 int rn, int rm, int data, gen_helper_gvec_3 *fn)
766 {
767 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
768 vec_full_reg_offset(s, rn),
769 vec_full_reg_offset(s, rm),
770 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
771 }
772
773 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
774 * an out-of-line helper.
775 */
gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_3_ptr * fn)776 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
777 int rm, ARMFPStatusFlavour fpsttype, int data,
778 gen_helper_gvec_3_ptr *fn)
779 {
780 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
781 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
782 vec_full_reg_offset(s, rn),
783 vec_full_reg_offset(s, rm), fpst,
784 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
785 }
786
787 /* Expand a 4-operand operation using an out-of-line helper. */
gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)788 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
789 int rm, int ra, int data, gen_helper_gvec_4 *fn)
790 {
791 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
792 vec_full_reg_offset(s, rn),
793 vec_full_reg_offset(s, rm),
794 vec_full_reg_offset(s, ra),
795 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
796 }
797
798 /*
799 * Expand a 4-operand operation using an out-of-line helper that takes
800 * a pointer to the CPU env.
801 */
gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)802 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn,
803 int rm, int ra, int data,
804 gen_helper_gvec_4_ptr *fn)
805 {
806 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
807 vec_full_reg_offset(s, rn),
808 vec_full_reg_offset(s, rm),
809 vec_full_reg_offset(s, ra),
810 tcg_env,
811 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
812 }
813
814 /*
815 * Expand a 4-operand + fpstatus pointer + simd data value operation using
816 * an out-of-line helper.
817 */
gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,ARMFPStatusFlavour fpsttype,int data,gen_helper_gvec_4_ptr * fn)818 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
819 int rm, int ra, ARMFPStatusFlavour fpsttype,
820 int data,
821 gen_helper_gvec_4_ptr *fn)
822 {
823 TCGv_ptr fpst = fpstatus_ptr(fpsttype);
824 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
825 vec_full_reg_offset(s, rn),
826 vec_full_reg_offset(s, rm),
827 vec_full_reg_offset(s, ra), fpst,
828 is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
829 }
830
831 /*
832 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
833 * These functions implement
834 * d = floatN_is_any_nan(s) ? s : floatN_chs(s)
835 * which for float32 is
836 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
837 * and similarly for the other float sizes.
838 */
gen_vfp_ah_negh(TCGv_i32 d,TCGv_i32 s)839 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
840 {
841 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
842
843 gen_vfp_negh(chs_s, s);
844 gen_vfp_absh(abs_s, s);
845 tcg_gen_movcond_i32(TCG_COND_GTU, d,
846 abs_s, tcg_constant_i32(0x7c00),
847 s, chs_s);
848 }
849
gen_vfp_ah_negs(TCGv_i32 d,TCGv_i32 s)850 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
851 {
852 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
853
854 gen_vfp_negs(chs_s, s);
855 gen_vfp_abss(abs_s, s);
856 tcg_gen_movcond_i32(TCG_COND_GTU, d,
857 abs_s, tcg_constant_i32(0x7f800000UL),
858 s, chs_s);
859 }
860
gen_vfp_ah_negd(TCGv_i64 d,TCGv_i64 s)861 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
862 {
863 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
864
865 gen_vfp_negd(chs_s, s);
866 gen_vfp_absd(abs_s, s);
867 tcg_gen_movcond_i64(TCG_COND_GTU, d,
868 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
869 s, chs_s);
870 }
871
872 /*
873 * These functions implement
874 * d = floatN_is_any_nan(s) ? s : floatN_abs(s)
875 * which for float32 is
876 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31))
877 * and similarly for the other float sizes.
878 */
gen_vfp_ah_absh(TCGv_i32 d,TCGv_i32 s)879 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s)
880 {
881 TCGv_i32 abs_s = tcg_temp_new_i32();
882
883 gen_vfp_absh(abs_s, s);
884 tcg_gen_movcond_i32(TCG_COND_GTU, d,
885 abs_s, tcg_constant_i32(0x7c00),
886 s, abs_s);
887 }
888
gen_vfp_ah_abss(TCGv_i32 d,TCGv_i32 s)889 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s)
890 {
891 TCGv_i32 abs_s = tcg_temp_new_i32();
892
893 gen_vfp_abss(abs_s, s);
894 tcg_gen_movcond_i32(TCG_COND_GTU, d,
895 abs_s, tcg_constant_i32(0x7f800000UL),
896 s, abs_s);
897 }
898
gen_vfp_ah_absd(TCGv_i64 d,TCGv_i64 s)899 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s)
900 {
901 TCGv_i64 abs_s = tcg_temp_new_i64();
902
903 gen_vfp_absd(abs_s, s);
904 tcg_gen_movcond_i64(TCG_COND_GTU, d,
905 abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
906 s, abs_s);
907 }
908
gen_vfp_maybe_ah_negh(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)909 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
910 {
911 if (dc->fpcr_ah) {
912 gen_vfp_ah_negh(d, s);
913 } else {
914 gen_vfp_negh(d, s);
915 }
916 }
917
gen_vfp_maybe_ah_negs(DisasContext * dc,TCGv_i32 d,TCGv_i32 s)918 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
919 {
920 if (dc->fpcr_ah) {
921 gen_vfp_ah_negs(d, s);
922 } else {
923 gen_vfp_negs(d, s);
924 }
925 }
926
gen_vfp_maybe_ah_negd(DisasContext * dc,TCGv_i64 d,TCGv_i64 s)927 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
928 {
929 if (dc->fpcr_ah) {
930 gen_vfp_ah_negd(d, s);
931 } else {
932 gen_vfp_negd(d, s);
933 }
934 }
935
936 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
937 * than the 32 bit equivalent.
938 */
gen_set_NZ64(TCGv_i64 result)939 static inline void gen_set_NZ64(TCGv_i64 result)
940 {
941 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
942 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
943 }
944
945 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
gen_logic_CC(int sf,TCGv_i64 result)946 static inline void gen_logic_CC(int sf, TCGv_i64 result)
947 {
948 if (sf) {
949 gen_set_NZ64(result);
950 } else {
951 tcg_gen_extrl_i64_i32(cpu_ZF, result);
952 tcg_gen_mov_i32(cpu_NF, cpu_ZF);
953 }
954 tcg_gen_movi_i32(cpu_CF, 0);
955 tcg_gen_movi_i32(cpu_VF, 0);
956 }
957
958 /* dest = T0 + T1; compute C, N, V and Z flags */
gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)959 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
960 {
961 TCGv_i64 result, flag, tmp;
962 result = tcg_temp_new_i64();
963 flag = tcg_temp_new_i64();
964 tmp = tcg_temp_new_i64();
965
966 tcg_gen_movi_i64(tmp, 0);
967 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
968
969 tcg_gen_extrl_i64_i32(cpu_CF, flag);
970
971 gen_set_NZ64(result);
972
973 tcg_gen_xor_i64(flag, result, t0);
974 tcg_gen_xor_i64(tmp, t0, t1);
975 tcg_gen_andc_i64(flag, flag, tmp);
976 tcg_gen_extrh_i64_i32(cpu_VF, flag);
977
978 tcg_gen_mov_i64(dest, result);
979 }
980
gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)981 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
982 {
983 TCGv_i32 t0_32 = tcg_temp_new_i32();
984 TCGv_i32 t1_32 = tcg_temp_new_i32();
985 TCGv_i32 tmp = tcg_temp_new_i32();
986
987 tcg_gen_movi_i32(tmp, 0);
988 tcg_gen_extrl_i64_i32(t0_32, t0);
989 tcg_gen_extrl_i64_i32(t1_32, t1);
990 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
991 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
992 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
993 tcg_gen_xor_i32(tmp, t0_32, t1_32);
994 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
995 tcg_gen_extu_i32_i64(dest, cpu_NF);
996 }
997
gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)998 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
999 {
1000 if (sf) {
1001 gen_add64_CC(dest, t0, t1);
1002 } else {
1003 gen_add32_CC(dest, t0, t1);
1004 }
1005 }
1006
1007 /* dest = T0 - T1; compute C, N, V and Z flags */
gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1008 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1009 {
1010 /* 64 bit arithmetic */
1011 TCGv_i64 result, flag, tmp;
1012
1013 result = tcg_temp_new_i64();
1014 flag = tcg_temp_new_i64();
1015 tcg_gen_sub_i64(result, t0, t1);
1016
1017 gen_set_NZ64(result);
1018
1019 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
1020 tcg_gen_extrl_i64_i32(cpu_CF, flag);
1021
1022 tcg_gen_xor_i64(flag, result, t0);
1023 tmp = tcg_temp_new_i64();
1024 tcg_gen_xor_i64(tmp, t0, t1);
1025 tcg_gen_and_i64(flag, flag, tmp);
1026 tcg_gen_extrh_i64_i32(cpu_VF, flag);
1027 tcg_gen_mov_i64(dest, result);
1028 }
1029
gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1030 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1031 {
1032 /* 32 bit arithmetic */
1033 TCGv_i32 t0_32 = tcg_temp_new_i32();
1034 TCGv_i32 t1_32 = tcg_temp_new_i32();
1035 TCGv_i32 tmp;
1036
1037 tcg_gen_extrl_i64_i32(t0_32, t0);
1038 tcg_gen_extrl_i64_i32(t1_32, t1);
1039 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
1040 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1041 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1043 tmp = tcg_temp_new_i32();
1044 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1045 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
1046 tcg_gen_extu_i32_i64(dest, cpu_NF);
1047 }
1048
gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1049 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1050 {
1051 if (sf) {
1052 gen_sub64_CC(dest, t0, t1);
1053 } else {
1054 gen_sub32_CC(dest, t0, t1);
1055 }
1056 }
1057
1058 /* dest = T0 + T1 + CF; do not compute flags. */
gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1059 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1060 {
1061 TCGv_i64 flag = tcg_temp_new_i64();
1062 tcg_gen_extu_i32_i64(flag, cpu_CF);
1063 tcg_gen_add_i64(dest, t0, t1);
1064 tcg_gen_add_i64(dest, dest, flag);
1065
1066 if (!sf) {
1067 tcg_gen_ext32u_i64(dest, dest);
1068 }
1069 }
1070
1071 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)1072 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
1073 {
1074 if (sf) {
1075 TCGv_i64 result = tcg_temp_new_i64();
1076 TCGv_i64 cf_64 = tcg_temp_new_i64();
1077 TCGv_i64 vf_64 = tcg_temp_new_i64();
1078 TCGv_i64 tmp = tcg_temp_new_i64();
1079 TCGv_i64 zero = tcg_constant_i64(0);
1080
1081 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
1082 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero);
1083 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero);
1084 tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
1085 gen_set_NZ64(result);
1086
1087 tcg_gen_xor_i64(vf_64, result, t0);
1088 tcg_gen_xor_i64(tmp, t0, t1);
1089 tcg_gen_andc_i64(vf_64, vf_64, tmp);
1090 tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
1091
1092 tcg_gen_mov_i64(dest, result);
1093 } else {
1094 TCGv_i32 t0_32 = tcg_temp_new_i32();
1095 TCGv_i32 t1_32 = tcg_temp_new_i32();
1096 TCGv_i32 tmp = tcg_temp_new_i32();
1097 TCGv_i32 zero = tcg_constant_i32(0);
1098
1099 tcg_gen_extrl_i64_i32(t0_32, t0);
1100 tcg_gen_extrl_i64_i32(t1_32, t1);
1101 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero);
1102 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero);
1103
1104 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1105 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
1106 tcg_gen_xor_i32(tmp, t0_32, t1_32);
1107 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
1108 tcg_gen_extu_i32_i64(dest, cpu_NF);
1109 }
1110 }
1111
1112 /*
1113 * Load/Store generators
1114 */
1115
1116 /*
1117 * Store from GPR register to memory.
1118 */
do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1119 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
1120 TCGv_i64 tcg_addr, MemOp memop, int memidx,
1121 bool iss_valid,
1122 unsigned int iss_srt,
1123 bool iss_sf, bool iss_ar)
1124 {
1125 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
1126
1127 if (iss_valid) {
1128 uint32_t syn;
1129
1130 syn = syn_data_abort_with_iss(0,
1131 (memop & MO_SIZE),
1132 false,
1133 iss_srt,
1134 iss_sf,
1135 iss_ar,
1136 0, 0, 0, 0, 0, false);
1137 disas_set_insn_syndrome(s, syn);
1138 }
1139 }
1140
do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1141 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
1142 TCGv_i64 tcg_addr, MemOp memop,
1143 bool iss_valid,
1144 unsigned int iss_srt,
1145 bool iss_sf, bool iss_ar)
1146 {
1147 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
1148 iss_valid, iss_srt, iss_sf, iss_ar);
1149 }
1150
1151 /*
1152 * Load from memory to GPR register
1153 */
do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1154 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1155 MemOp memop, bool extend, int memidx,
1156 bool iss_valid, unsigned int iss_srt,
1157 bool iss_sf, bool iss_ar)
1158 {
1159 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
1160
1161 if (extend && (memop & MO_SIGN)) {
1162 g_assert((memop & MO_SIZE) <= MO_32);
1163 tcg_gen_ext32u_i64(dest, dest);
1164 }
1165
1166 if (iss_valid) {
1167 uint32_t syn;
1168
1169 syn = syn_data_abort_with_iss(0,
1170 (memop & MO_SIZE),
1171 (memop & MO_SIGN) != 0,
1172 iss_srt,
1173 iss_sf,
1174 iss_ar,
1175 0, 0, 0, 0, 0, false);
1176 disas_set_insn_syndrome(s, syn);
1177 }
1178 }
1179
do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1180 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
1181 MemOp memop, bool extend,
1182 bool iss_valid, unsigned int iss_srt,
1183 bool iss_sf, bool iss_ar)
1184 {
1185 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
1186 iss_valid, iss_srt, iss_sf, iss_ar);
1187 }
1188
1189 /*
1190 * Store from FP register to memory
1191 */
do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1192 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
1193 {
1194 /* This writes the bottom N bits of a 128 bit wide vector to memory */
1195 TCGv_i64 tmplo = tcg_temp_new_i64();
1196
1197 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64));
1198
1199 if ((mop & MO_SIZE) < MO_128) {
1200 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1201 } else {
1202 TCGv_i64 tmphi = tcg_temp_new_i64();
1203 TCGv_i128 t16 = tcg_temp_new_i128();
1204
1205 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx));
1206 tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
1207
1208 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
1209 }
1210 }
1211
1212 /*
1213 * Load from memory to FP register
1214 */
do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1215 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
1216 {
1217 /* This always zero-extends and writes to a full 128 bit wide vector */
1218 TCGv_i64 tmplo = tcg_temp_new_i64();
1219 TCGv_i64 tmphi = NULL;
1220
1221 if ((mop & MO_SIZE) < MO_128) {
1222 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1223 } else {
1224 TCGv_i128 t16 = tcg_temp_new_i128();
1225
1226 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
1227
1228 tmphi = tcg_temp_new_i64();
1229 tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
1230 }
1231
1232 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64));
1233
1234 if (tmphi) {
1235 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx));
1236 }
1237 clear_vec_high(s, tmphi != NULL, destidx);
1238 }
1239
1240 /*
1241 * Vector load/store helpers.
1242 *
1243 * The principal difference between this and a FP load is that we don't
1244 * zero extend as we are filling a partial chunk of the vector register.
1245 * These functions don't support 128 bit loads/stores, which would be
1246 * normal load/store operations.
1247 *
1248 * The _i32 versions are useful when operating on 32 bit quantities
1249 * (eg for floating point single or using Neon helper functions).
1250 */
1251
1252 /* Get value of an element within a vector register */
read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1253 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1254 int element, MemOp memop)
1255 {
1256 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1257 switch ((unsigned)memop) {
1258 case MO_8:
1259 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off);
1260 break;
1261 case MO_16:
1262 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off);
1263 break;
1264 case MO_32:
1265 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off);
1266 break;
1267 case MO_8|MO_SIGN:
1268 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off);
1269 break;
1270 case MO_16|MO_SIGN:
1271 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off);
1272 break;
1273 case MO_32|MO_SIGN:
1274 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off);
1275 break;
1276 case MO_64:
1277 case MO_64|MO_SIGN:
1278 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off);
1279 break;
1280 default:
1281 g_assert_not_reached();
1282 }
1283 }
1284
read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1285 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1286 int element, MemOp memop)
1287 {
1288 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1289 switch (memop) {
1290 case MO_8:
1291 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off);
1292 break;
1293 case MO_16:
1294 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off);
1295 break;
1296 case MO_8|MO_SIGN:
1297 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off);
1298 break;
1299 case MO_16|MO_SIGN:
1300 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off);
1301 break;
1302 case MO_32:
1303 case MO_32|MO_SIGN:
1304 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off);
1305 break;
1306 default:
1307 g_assert_not_reached();
1308 }
1309 }
1310
1311 /* Set value of an element within a vector register */
write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1312 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1313 int element, MemOp memop)
1314 {
1315 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1316 switch (memop) {
1317 case MO_8:
1318 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off);
1319 break;
1320 case MO_16:
1321 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off);
1322 break;
1323 case MO_32:
1324 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off);
1325 break;
1326 case MO_64:
1327 tcg_gen_st_i64(tcg_src, tcg_env, vect_off);
1328 break;
1329 default:
1330 g_assert_not_reached();
1331 }
1332 }
1333
write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1334 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1335 int destidx, int element, MemOp memop)
1336 {
1337 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1338 switch (memop) {
1339 case MO_8:
1340 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off);
1341 break;
1342 case MO_16:
1343 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off);
1344 break;
1345 case MO_32:
1346 tcg_gen_st_i32(tcg_src, tcg_env, vect_off);
1347 break;
1348 default:
1349 g_assert_not_reached();
1350 }
1351 }
1352
1353 /* Store from vector register to memory */
do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1354 static void do_vec_st(DisasContext *s, int srcidx, int element,
1355 TCGv_i64 tcg_addr, MemOp mop)
1356 {
1357 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1358
1359 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1360 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1361 }
1362
1363 /* Load from memory to vector register */
do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1364 static void do_vec_ld(DisasContext *s, int destidx, int element,
1365 TCGv_i64 tcg_addr, MemOp mop)
1366 {
1367 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1368
1369 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1370 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1371 }
1372
1373 /* Check that FP/Neon access is enabled. If it is, return
1374 * true. If not, emit code to generate an appropriate exception,
1375 * and return false; the caller should not emit any code for
1376 * the instruction. Note that this check must happen after all
1377 * unallocated-encoding checks (otherwise the syndrome information
1378 * for the resulting exception will be incorrect).
1379 */
fp_access_check_only(DisasContext * s)1380 static bool fp_access_check_only(DisasContext *s)
1381 {
1382 if (s->fp_excp_el) {
1383 assert(!s->fp_access_checked);
1384 s->fp_access_checked = -1;
1385
1386 gen_exception_insn_el(s, 0, EXCP_UDEF,
1387 syn_fp_access_trap(1, 0xe, false, 0),
1388 s->fp_excp_el);
1389 return false;
1390 }
1391 s->fp_access_checked = 1;
1392 return true;
1393 }
1394
nonstreaming_check(DisasContext * s)1395 static bool nonstreaming_check(DisasContext *s)
1396 {
1397 if (s->sme_trap_nonstreaming && s->is_nonstreaming) {
1398 gen_exception_insn(s, 0, EXCP_UDEF,
1399 syn_smetrap(SME_ET_Streaming, false));
1400 return false;
1401 }
1402 return true;
1403 }
1404
fp_access_check(DisasContext * s)1405 static bool fp_access_check(DisasContext *s)
1406 {
1407 return fp_access_check_only(s) && nonstreaming_check(s);
1408 }
1409
1410 /*
1411 * Return <0 for non-supported element sizes, with MO_16 controlled by
1412 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success.
1413 */
fp_access_check_scalar_hsd(DisasContext * s,MemOp esz)1414 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz)
1415 {
1416 switch (esz) {
1417 case MO_64:
1418 case MO_32:
1419 break;
1420 case MO_16:
1421 if (!dc_isar_feature(aa64_fp16, s)) {
1422 return -1;
1423 }
1424 break;
1425 default:
1426 return -1;
1427 }
1428 return fp_access_check(s);
1429 }
1430
1431 /* Likewise, but vector MO_64 must have two elements. */
fp_access_check_vector_hsd(DisasContext * s,bool is_q,MemOp esz)1432 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz)
1433 {
1434 switch (esz) {
1435 case MO_64:
1436 if (!is_q) {
1437 return -1;
1438 }
1439 break;
1440 case MO_32:
1441 break;
1442 case MO_16:
1443 if (!dc_isar_feature(aa64_fp16, s)) {
1444 return -1;
1445 }
1446 break;
1447 default:
1448 return -1;
1449 }
1450 return fp_access_check(s);
1451 }
1452
1453 /*
1454 * Check that SVE access is enabled. If it is, return true.
1455 * If not, emit code to generate an appropriate exception and return false.
1456 * This function corresponds to CheckSVEEnabled().
1457 */
sve_access_check(DisasContext * s)1458 bool sve_access_check(DisasContext *s)
1459 {
1460 if (dc_isar_feature(aa64_sme, s)) {
1461 bool ret;
1462
1463 if (s->pstate_sm) {
1464 ret = sme_enabled_check(s);
1465 } else if (dc_isar_feature(aa64_sve, s)) {
1466 goto continue_sve;
1467 } else {
1468 ret = sme_sm_enabled_check(s);
1469 }
1470 if (ret) {
1471 ret = nonstreaming_check(s);
1472 }
1473 s->sve_access_checked = (ret ? 1 : -1);
1474 return ret;
1475 }
1476
1477 continue_sve:
1478 if (s->sve_excp_el) {
1479 /* Assert that we only raise one exception per instruction. */
1480 assert(!s->sve_access_checked);
1481 gen_exception_insn_el(s, 0, EXCP_UDEF,
1482 syn_sve_access_trap(), s->sve_excp_el);
1483 s->sve_access_checked = -1;
1484 return false;
1485 }
1486 s->sve_access_checked = 1;
1487 return fp_access_check(s);
1488 }
1489
1490 /*
1491 * Check that SME access is enabled, raise an exception if not.
1492 * Note that this function corresponds to CheckSMEAccess and is
1493 * only used directly for cpregs.
1494 */
sme_access_check(DisasContext * s)1495 static bool sme_access_check(DisasContext *s)
1496 {
1497 if (s->sme_excp_el) {
1498 gen_exception_insn_el(s, 0, EXCP_UDEF,
1499 syn_smetrap(SME_ET_AccessTrap, false),
1500 s->sme_excp_el);
1501 return false;
1502 }
1503 return true;
1504 }
1505
1506 /* This function corresponds to CheckSMEEnabled. */
sme_enabled_check(DisasContext * s)1507 bool sme_enabled_check(DisasContext *s)
1508 {
1509 /*
1510 * Note that unlike sve_excp_el, we have not constrained sme_excp_el
1511 * to be zero when fp_excp_el has priority. This is because we need
1512 * sme_excp_el by itself for cpregs access checks.
1513 */
1514 if (s->sme_excp_el
1515 && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) {
1516 bool ret = sme_access_check(s);
1517 s->fp_access_checked = (ret ? 1 : -1);
1518 return ret;
1519 }
1520 return fp_access_check_only(s);
1521 }
1522
1523 /* Common subroutine for CheckSMEAnd*Enabled. */
sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1524 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
1525 {
1526 if (!sme_enabled_check(s)) {
1527 return false;
1528 }
1529 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
1530 gen_exception_insn(s, 0, EXCP_UDEF,
1531 syn_smetrap(SME_ET_NotStreaming, false));
1532 return false;
1533 }
1534 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
1535 gen_exception_insn(s, 0, EXCP_UDEF,
1536 syn_smetrap(SME_ET_InactiveZA, false));
1537 return false;
1538 }
1539 return true;
1540 }
1541
1542 /*
1543 * Expanders for AdvSIMD translation functions.
1544 */
1545
do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1546 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data,
1547 gen_helper_gvec_2 *fn)
1548 {
1549 if (!a->q && a->esz == MO_64) {
1550 return false;
1551 }
1552 if (fp_access_check(s)) {
1553 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn);
1554 }
1555 return true;
1556 }
1557
do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1558 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data,
1559 gen_helper_gvec_3 *fn)
1560 {
1561 if (!a->q && a->esz == MO_64) {
1562 return false;
1563 }
1564 if (fp_access_check(s)) {
1565 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn);
1566 }
1567 return true;
1568 }
1569
do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1570 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1571 {
1572 if (!a->q && a->esz == MO_64) {
1573 return false;
1574 }
1575 if (fp_access_check(s)) {
1576 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1577 }
1578 return true;
1579 }
1580
do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1581 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1582 {
1583 if (a->esz == MO_64) {
1584 return false;
1585 }
1586 if (fp_access_check(s)) {
1587 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz);
1588 }
1589 return true;
1590 }
1591
do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1592 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
1593 {
1594 if (a->esz == MO_8) {
1595 return false;
1596 }
1597 return do_gvec_fn3_no64(s, a, fn);
1598 }
1599
do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1600 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
1601 {
1602 if (!a->q && a->esz == MO_64) {
1603 return false;
1604 }
1605 if (fp_access_check(s)) {
1606 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz);
1607 }
1608 return true;
1609 }
1610
1611 /*
1612 * This utility function is for doing register extension with an
1613 * optional shift. You will likely want to pass a temporary for the
1614 * destination register. See DecodeRegExtend() in the ARM ARM.
1615 */
ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1616 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1617 int option, unsigned int shift)
1618 {
1619 int extsize = extract32(option, 0, 2);
1620 bool is_signed = extract32(option, 2, 1);
1621
1622 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0));
1623 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1624 }
1625
gen_check_sp_alignment(DisasContext * s)1626 static inline void gen_check_sp_alignment(DisasContext *s)
1627 {
1628 /* The AArch64 architecture mandates that (if enabled via PSTATE
1629 * or SCTLR bits) there is a check that SP is 16-aligned on every
1630 * SP-relative load or store (with an exception generated if it is not).
1631 * In line with general QEMU practice regarding misaligned accesses,
1632 * we omit these checks for the sake of guest program performance.
1633 * This function is provided as a hook so we can more easily add these
1634 * checks in future (possibly as a "favour catching guest program bugs
1635 * over speed" user selectable option).
1636 */
1637 }
1638
1639 /*
1640 * The instruction disassembly implemented here matches
1641 * the instruction encoding classifications in chapter C4
1642 * of the ARM Architecture Reference Manual (DDI0487B_a);
1643 * classification names and decode diagrams here should generally
1644 * match up with those in the manual.
1645 */
1646
trans_B(DisasContext * s,arg_i * a)1647 static bool trans_B(DisasContext *s, arg_i *a)
1648 {
1649 reset_btype(s);
1650 gen_goto_tb(s, 0, a->imm);
1651 return true;
1652 }
1653
trans_BL(DisasContext * s,arg_i * a)1654 static bool trans_BL(DisasContext *s, arg_i *a)
1655 {
1656 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s));
1657 reset_btype(s);
1658 gen_goto_tb(s, 0, a->imm);
1659 return true;
1660 }
1661
1662
trans_CBZ(DisasContext * s,arg_cbz * a)1663 static bool trans_CBZ(DisasContext *s, arg_cbz *a)
1664 {
1665 DisasLabel match;
1666 TCGv_i64 tcg_cmp;
1667
1668 tcg_cmp = read_cpu_reg(s, a->rt, a->sf);
1669 reset_btype(s);
1670
1671 match = gen_disas_label(s);
1672 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1673 tcg_cmp, 0, match.label);
1674 gen_goto_tb(s, 0, 4);
1675 set_disas_label(s, match);
1676 gen_goto_tb(s, 1, a->imm);
1677 return true;
1678 }
1679
trans_TBZ(DisasContext * s,arg_tbz * a)1680 static bool trans_TBZ(DisasContext *s, arg_tbz *a)
1681 {
1682 DisasLabel match;
1683 TCGv_i64 tcg_cmp;
1684
1685 tcg_cmp = tcg_temp_new_i64();
1686 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos);
1687
1688 reset_btype(s);
1689
1690 match = gen_disas_label(s);
1691 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ,
1692 tcg_cmp, 0, match.label);
1693 gen_goto_tb(s, 0, 4);
1694 set_disas_label(s, match);
1695 gen_goto_tb(s, 1, a->imm);
1696 return true;
1697 }
1698
trans_B_cond(DisasContext * s,arg_B_cond * a)1699 static bool trans_B_cond(DisasContext *s, arg_B_cond *a)
1700 {
1701 /* BC.cond is only present with FEAT_HBC */
1702 if (a->c && !dc_isar_feature(aa64_hbc, s)) {
1703 return false;
1704 }
1705 reset_btype(s);
1706 if (a->cond < 0x0e) {
1707 /* genuinely conditional branches */
1708 DisasLabel match = gen_disas_label(s);
1709 arm_gen_test_cc(a->cond, match.label);
1710 gen_goto_tb(s, 0, 4);
1711 set_disas_label(s, match);
1712 gen_goto_tb(s, 1, a->imm);
1713 } else {
1714 /* 0xe and 0xf are both "always" conditions */
1715 gen_goto_tb(s, 0, a->imm);
1716 }
1717 return true;
1718 }
1719
set_btype_for_br(DisasContext * s,int rn)1720 static void set_btype_for_br(DisasContext *s, int rn)
1721 {
1722 if (dc_isar_feature(aa64_bti, s)) {
1723 /* BR to {x16,x17} or !guard -> 1, else 3. */
1724 if (rn == 16 || rn == 17) {
1725 set_btype(s, 1);
1726 } else {
1727 TCGv_i64 pc = tcg_temp_new_i64();
1728 gen_pc_plus_diff(s, pc, 0);
1729 gen_helper_guarded_page_br(tcg_env, pc);
1730 s->btype = -1;
1731 }
1732 }
1733 }
1734
set_btype_for_blr(DisasContext * s)1735 static void set_btype_for_blr(DisasContext *s)
1736 {
1737 if (dc_isar_feature(aa64_bti, s)) {
1738 /* BLR sets BTYPE to 2, regardless of source guarded page. */
1739 set_btype(s, 2);
1740 }
1741 }
1742
trans_BR(DisasContext * s,arg_r * a)1743 static bool trans_BR(DisasContext *s, arg_r *a)
1744 {
1745 set_btype_for_br(s, a->rn);
1746 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1747 s->base.is_jmp = DISAS_JUMP;
1748 return true;
1749 }
1750
trans_BLR(DisasContext * s,arg_r * a)1751 static bool trans_BLR(DisasContext *s, arg_r *a)
1752 {
1753 TCGv_i64 dst = cpu_reg(s, a->rn);
1754 TCGv_i64 lr = cpu_reg(s, 30);
1755 if (dst == lr) {
1756 TCGv_i64 tmp = tcg_temp_new_i64();
1757 tcg_gen_mov_i64(tmp, dst);
1758 dst = tmp;
1759 }
1760 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1761 gen_a64_set_pc(s, dst);
1762 set_btype_for_blr(s);
1763 s->base.is_jmp = DISAS_JUMP;
1764 return true;
1765 }
1766
trans_RET(DisasContext * s,arg_r * a)1767 static bool trans_RET(DisasContext *s, arg_r *a)
1768 {
1769 gen_a64_set_pc(s, cpu_reg(s, a->rn));
1770 s->base.is_jmp = DISAS_JUMP;
1771 return true;
1772 }
1773
auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1774 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst,
1775 TCGv_i64 modifier, bool use_key_a)
1776 {
1777 TCGv_i64 truedst;
1778 /*
1779 * Return the branch target for a BRAA/RETA/etc, which is either
1780 * just the destination dst, or that value with the pauth check
1781 * done and the code removed from the high bits.
1782 */
1783 if (!s->pauth_active) {
1784 return dst;
1785 }
1786
1787 truedst = tcg_temp_new_i64();
1788 if (use_key_a) {
1789 gen_helper_autia_combined(truedst, tcg_env, dst, modifier);
1790 } else {
1791 gen_helper_autib_combined(truedst, tcg_env, dst, modifier);
1792 }
1793 return truedst;
1794 }
1795
trans_BRAZ(DisasContext * s,arg_braz * a)1796 static bool trans_BRAZ(DisasContext *s, arg_braz *a)
1797 {
1798 TCGv_i64 dst;
1799
1800 if (!dc_isar_feature(aa64_pauth, s)) {
1801 return false;
1802 }
1803
1804 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1805 set_btype_for_br(s, a->rn);
1806 gen_a64_set_pc(s, dst);
1807 s->base.is_jmp = DISAS_JUMP;
1808 return true;
1809 }
1810
trans_BLRAZ(DisasContext * s,arg_braz * a)1811 static bool trans_BLRAZ(DisasContext *s, arg_braz *a)
1812 {
1813 TCGv_i64 dst, lr;
1814
1815 if (!dc_isar_feature(aa64_pauth, s)) {
1816 return false;
1817 }
1818
1819 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m);
1820 lr = cpu_reg(s, 30);
1821 if (dst == lr) {
1822 TCGv_i64 tmp = tcg_temp_new_i64();
1823 tcg_gen_mov_i64(tmp, dst);
1824 dst = tmp;
1825 }
1826 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1827 gen_a64_set_pc(s, dst);
1828 set_btype_for_blr(s);
1829 s->base.is_jmp = DISAS_JUMP;
1830 return true;
1831 }
1832
trans_RETA(DisasContext * s,arg_reta * a)1833 static bool trans_RETA(DisasContext *s, arg_reta *a)
1834 {
1835 TCGv_i64 dst;
1836
1837 if (!dc_isar_feature(aa64_pauth, s)) {
1838 return false;
1839 }
1840
1841 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m);
1842 gen_a64_set_pc(s, dst);
1843 s->base.is_jmp = DISAS_JUMP;
1844 return true;
1845 }
1846
trans_BRA(DisasContext * s,arg_bra * a)1847 static bool trans_BRA(DisasContext *s, arg_bra *a)
1848 {
1849 TCGv_i64 dst;
1850
1851 if (!dc_isar_feature(aa64_pauth, s)) {
1852 return false;
1853 }
1854 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m);
1855 gen_a64_set_pc(s, dst);
1856 set_btype_for_br(s, a->rn);
1857 s->base.is_jmp = DISAS_JUMP;
1858 return true;
1859 }
1860
trans_BLRA(DisasContext * s,arg_bra * a)1861 static bool trans_BLRA(DisasContext *s, arg_bra *a)
1862 {
1863 TCGv_i64 dst, lr;
1864
1865 if (!dc_isar_feature(aa64_pauth, s)) {
1866 return false;
1867 }
1868 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m);
1869 lr = cpu_reg(s, 30);
1870 if (dst == lr) {
1871 TCGv_i64 tmp = tcg_temp_new_i64();
1872 tcg_gen_mov_i64(tmp, dst);
1873 dst = tmp;
1874 }
1875 gen_pc_plus_diff(s, lr, curr_insn_len(s));
1876 gen_a64_set_pc(s, dst);
1877 set_btype_for_blr(s);
1878 s->base.is_jmp = DISAS_JUMP;
1879 return true;
1880 }
1881
trans_ERET(DisasContext * s,arg_ERET * a)1882 static bool trans_ERET(DisasContext *s, arg_ERET *a)
1883 {
1884 TCGv_i64 dst;
1885
1886 if (s->current_el == 0) {
1887 return false;
1888 }
1889 if (s->trap_eret) {
1890 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2);
1891 return true;
1892 }
1893 dst = tcg_temp_new_i64();
1894 tcg_gen_ld_i64(dst, tcg_env,
1895 offsetof(CPUARMState, elr_el[s->current_el]));
1896
1897 translator_io_start(&s->base);
1898
1899 gen_helper_exception_return(tcg_env, dst);
1900 /* Must exit loop to check un-masked IRQs */
1901 s->base.is_jmp = DISAS_EXIT;
1902 return true;
1903 }
1904
trans_ERETA(DisasContext * s,arg_reta * a)1905 static bool trans_ERETA(DisasContext *s, arg_reta *a)
1906 {
1907 TCGv_i64 dst;
1908
1909 if (!dc_isar_feature(aa64_pauth, s)) {
1910 return false;
1911 }
1912 if (s->current_el == 0) {
1913 return false;
1914 }
1915 /* The FGT trap takes precedence over an auth trap. */
1916 if (s->trap_eret) {
1917 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2);
1918 return true;
1919 }
1920 dst = tcg_temp_new_i64();
1921 tcg_gen_ld_i64(dst, tcg_env,
1922 offsetof(CPUARMState, elr_el[s->current_el]));
1923
1924 dst = auth_branch_target(s, dst, cpu_X[31], !a->m);
1925
1926 translator_io_start(&s->base);
1927
1928 gen_helper_exception_return(tcg_env, dst);
1929 /* Must exit loop to check un-masked IRQs */
1930 s->base.is_jmp = DISAS_EXIT;
1931 return true;
1932 }
1933
trans_NOP(DisasContext * s,arg_NOP * a)1934 static bool trans_NOP(DisasContext *s, arg_NOP *a)
1935 {
1936 return true;
1937 }
1938
trans_YIELD(DisasContext * s,arg_YIELD * a)1939 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
1940 {
1941 /*
1942 * When running in MTTCG we don't generate jumps to the yield and
1943 * WFE helpers as it won't affect the scheduling of other vCPUs.
1944 * If we wanted to more completely model WFE/SEV so we don't busy
1945 * spin unnecessarily we would need to do something more involved.
1946 */
1947 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1948 s->base.is_jmp = DISAS_YIELD;
1949 }
1950 return true;
1951 }
1952
trans_WFI(DisasContext * s,arg_WFI * a)1953 static bool trans_WFI(DisasContext *s, arg_WFI *a)
1954 {
1955 s->base.is_jmp = DISAS_WFI;
1956 return true;
1957 }
1958
trans_WFE(DisasContext * s,arg_WFI * a)1959 static bool trans_WFE(DisasContext *s, arg_WFI *a)
1960 {
1961 /*
1962 * When running in MTTCG we don't generate jumps to the yield and
1963 * WFE helpers as it won't affect the scheduling of other vCPUs.
1964 * If we wanted to more completely model WFE/SEV so we don't busy
1965 * spin unnecessarily we would need to do something more involved.
1966 */
1967 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1968 s->base.is_jmp = DISAS_WFE;
1969 }
1970 return true;
1971 }
1972
trans_WFIT(DisasContext * s,arg_WFIT * a)1973 static bool trans_WFIT(DisasContext *s, arg_WFIT *a)
1974 {
1975 if (!dc_isar_feature(aa64_wfxt, s)) {
1976 return false;
1977 }
1978
1979 /*
1980 * Because we need to pass the register value to the helper,
1981 * it's easier to emit the code now, unlike trans_WFI which
1982 * defers it to aarch64_tr_tb_stop(). That means we need to
1983 * check ss_active so that single-stepping a WFIT doesn't halt.
1984 */
1985 if (s->ss_active) {
1986 /* Act like a NOP under architectural singlestep */
1987 return true;
1988 }
1989
1990 gen_a64_update_pc(s, 4);
1991 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd));
1992 /* Go back to the main loop to check for interrupts */
1993 s->base.is_jmp = DISAS_EXIT;
1994 return true;
1995 }
1996
trans_WFET(DisasContext * s,arg_WFET * a)1997 static bool trans_WFET(DisasContext *s, arg_WFET *a)
1998 {
1999 if (!dc_isar_feature(aa64_wfxt, s)) {
2000 return false;
2001 }
2002
2003 /*
2004 * We rely here on our WFE implementation being a NOP, so we
2005 * don't need to do anything different to handle the WFET timeout
2006 * from what trans_WFE does.
2007 */
2008 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
2009 s->base.is_jmp = DISAS_WFE;
2010 }
2011 return true;
2012 }
2013
trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)2014 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a)
2015 {
2016 if (s->pauth_active) {
2017 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]);
2018 }
2019 return true;
2020 }
2021
trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)2022 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a)
2023 {
2024 if (s->pauth_active) {
2025 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2026 }
2027 return true;
2028 }
2029
trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)2030 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a)
2031 {
2032 if (s->pauth_active) {
2033 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2034 }
2035 return true;
2036 }
2037
trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)2038 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a)
2039 {
2040 if (s->pauth_active) {
2041 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2042 }
2043 return true;
2044 }
2045
trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)2046 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a)
2047 {
2048 if (s->pauth_active) {
2049 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]);
2050 }
2051 return true;
2052 }
2053
trans_ESB(DisasContext * s,arg_ESB * a)2054 static bool trans_ESB(DisasContext *s, arg_ESB *a)
2055 {
2056 /* Without RAS, we must implement this as NOP. */
2057 if (dc_isar_feature(aa64_ras, s)) {
2058 /*
2059 * QEMU does not have a source of physical SErrors,
2060 * so we are only concerned with virtual SErrors.
2061 * The pseudocode in the ARM for this case is
2062 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then
2063 * AArch64.vESBOperation();
2064 * Most of the condition can be evaluated at translation time.
2065 * Test for EL2 present, and defer test for SEL2 to runtime.
2066 */
2067 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) {
2068 gen_helper_vesb(tcg_env);
2069 }
2070 }
2071 return true;
2072 }
2073
trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)2074 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a)
2075 {
2076 if (s->pauth_active) {
2077 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2078 }
2079 return true;
2080 }
2081
trans_PACIASP(DisasContext * s,arg_PACIASP * a)2082 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a)
2083 {
2084 if (s->pauth_active) {
2085 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2086 }
2087 return true;
2088 }
2089
trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)2090 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a)
2091 {
2092 if (s->pauth_active) {
2093 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2094 }
2095 return true;
2096 }
2097
trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)2098 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a)
2099 {
2100 if (s->pauth_active) {
2101 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2102 }
2103 return true;
2104 }
2105
trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)2106 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a)
2107 {
2108 if (s->pauth_active) {
2109 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2110 }
2111 return true;
2112 }
2113
trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)2114 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a)
2115 {
2116 if (s->pauth_active) {
2117 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2118 }
2119 return true;
2120 }
2121
trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)2122 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a)
2123 {
2124 if (s->pauth_active) {
2125 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0));
2126 }
2127 return true;
2128 }
2129
trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)2130 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a)
2131 {
2132 if (s->pauth_active) {
2133 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]);
2134 }
2135 return true;
2136 }
2137
trans_CLREX(DisasContext * s,arg_CLREX * a)2138 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
2139 {
2140 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2141 return true;
2142 }
2143
trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)2144 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a)
2145 {
2146 /* We handle DSB and DMB the same way */
2147 TCGBar bar;
2148
2149 switch (a->types) {
2150 case 1: /* MBReqTypes_Reads */
2151 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
2152 break;
2153 case 2: /* MBReqTypes_Writes */
2154 bar = TCG_BAR_SC | TCG_MO_ST_ST;
2155 break;
2156 default: /* MBReqTypes_All */
2157 bar = TCG_BAR_SC | TCG_MO_ALL;
2158 break;
2159 }
2160 tcg_gen_mb(bar);
2161 return true;
2162 }
2163
trans_DSB_nXS(DisasContext * s,arg_DSB_nXS * a)2164 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a)
2165 {
2166 if (!dc_isar_feature(aa64_xs, s)) {
2167 return false;
2168 }
2169 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL);
2170 return true;
2171 }
2172
trans_ISB(DisasContext * s,arg_ISB * a)2173 static bool trans_ISB(DisasContext *s, arg_ISB *a)
2174 {
2175 /*
2176 * We need to break the TB after this insn to execute
2177 * self-modifying code correctly and also to take
2178 * any pending interrupts immediately.
2179 */
2180 reset_btype(s);
2181 gen_goto_tb(s, 0, 4);
2182 return true;
2183 }
2184
trans_SB(DisasContext * s,arg_SB * a)2185 static bool trans_SB(DisasContext *s, arg_SB *a)
2186 {
2187 if (!dc_isar_feature(aa64_sb, s)) {
2188 return false;
2189 }
2190 /*
2191 * TODO: There is no speculation barrier opcode for TCG;
2192 * MB and end the TB instead.
2193 */
2194 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
2195 gen_goto_tb(s, 0, 4);
2196 return true;
2197 }
2198
trans_CFINV(DisasContext * s,arg_CFINV * a)2199 static bool trans_CFINV(DisasContext *s, arg_CFINV *a)
2200 {
2201 if (!dc_isar_feature(aa64_condm_4, s)) {
2202 return false;
2203 }
2204 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
2205 return true;
2206 }
2207
trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)2208 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a)
2209 {
2210 TCGv_i32 z;
2211
2212 if (!dc_isar_feature(aa64_condm_5, s)) {
2213 return false;
2214 }
2215
2216 z = tcg_temp_new_i32();
2217
2218 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
2219
2220 /*
2221 * (!C & !Z) << 31
2222 * (!(C | Z)) << 31
2223 * ~((C | Z) << 31)
2224 * ~-(C | Z)
2225 * (C | Z) - 1
2226 */
2227 tcg_gen_or_i32(cpu_NF, cpu_CF, z);
2228 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
2229
2230 /* !(Z & C) */
2231 tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
2232 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
2233
2234 /* (!C & Z) << 31 -> -(Z & ~C) */
2235 tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
2236 tcg_gen_neg_i32(cpu_VF, cpu_VF);
2237
2238 /* C | Z */
2239 tcg_gen_or_i32(cpu_CF, cpu_CF, z);
2240
2241 return true;
2242 }
2243
trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2244 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a)
2245 {
2246 if (!dc_isar_feature(aa64_condm_5, s)) {
2247 return false;
2248 }
2249
2250 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */
2251 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */
2252
2253 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
2254 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
2255
2256 tcg_gen_movi_i32(cpu_NF, 0);
2257 tcg_gen_movi_i32(cpu_VF, 0);
2258
2259 return true;
2260 }
2261
trans_MSR_i_UAO(DisasContext * s,arg_i * a)2262 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a)
2263 {
2264 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
2265 return false;
2266 }
2267 if (a->imm & 1) {
2268 set_pstate_bits(PSTATE_UAO);
2269 } else {
2270 clear_pstate_bits(PSTATE_UAO);
2271 }
2272 gen_rebuild_hflags(s);
2273 s->base.is_jmp = DISAS_TOO_MANY;
2274 return true;
2275 }
2276
trans_MSR_i_PAN(DisasContext * s,arg_i * a)2277 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a)
2278 {
2279 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
2280 return false;
2281 }
2282 if (a->imm & 1) {
2283 set_pstate_bits(PSTATE_PAN);
2284 } else {
2285 clear_pstate_bits(PSTATE_PAN);
2286 }
2287 gen_rebuild_hflags(s);
2288 s->base.is_jmp = DISAS_TOO_MANY;
2289 return true;
2290 }
2291
trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2292 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a)
2293 {
2294 if (s->current_el == 0) {
2295 return false;
2296 }
2297 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP));
2298 s->base.is_jmp = DISAS_TOO_MANY;
2299 return true;
2300 }
2301
trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2302 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a)
2303 {
2304 if (!dc_isar_feature(aa64_ssbs, s)) {
2305 return false;
2306 }
2307 if (a->imm & 1) {
2308 set_pstate_bits(PSTATE_SSBS);
2309 } else {
2310 clear_pstate_bits(PSTATE_SSBS);
2311 }
2312 /* Don't need to rebuild hflags since SSBS is a nop */
2313 s->base.is_jmp = DISAS_TOO_MANY;
2314 return true;
2315 }
2316
trans_MSR_i_DIT(DisasContext * s,arg_i * a)2317 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a)
2318 {
2319 if (!dc_isar_feature(aa64_dit, s)) {
2320 return false;
2321 }
2322 if (a->imm & 1) {
2323 set_pstate_bits(PSTATE_DIT);
2324 } else {
2325 clear_pstate_bits(PSTATE_DIT);
2326 }
2327 /* There's no need to rebuild hflags because DIT is a nop */
2328 s->base.is_jmp = DISAS_TOO_MANY;
2329 return true;
2330 }
2331
trans_MSR_i_TCO(DisasContext * s,arg_i * a)2332 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a)
2333 {
2334 if (dc_isar_feature(aa64_mte, s)) {
2335 /* Full MTE is enabled -- set the TCO bit as directed. */
2336 if (a->imm & 1) {
2337 set_pstate_bits(PSTATE_TCO);
2338 } else {
2339 clear_pstate_bits(PSTATE_TCO);
2340 }
2341 gen_rebuild_hflags(s);
2342 /* Many factors, including TCO, go into MTE_ACTIVE. */
2343 s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
2344 return true;
2345 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
2346 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
2347 return true;
2348 } else {
2349 /* Insn not present */
2350 return false;
2351 }
2352 }
2353
trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2354 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a)
2355 {
2356 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm));
2357 s->base.is_jmp = DISAS_TOO_MANY;
2358 return true;
2359 }
2360
trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2361 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a)
2362 {
2363 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm));
2364 /* Exit the cpu loop to re-evaluate pending IRQs. */
2365 s->base.is_jmp = DISAS_UPDATE_EXIT;
2366 return true;
2367 }
2368
trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2369 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a)
2370 {
2371 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) {
2372 return false;
2373 }
2374
2375 if (a->imm == 0) {
2376 clear_pstate_bits(PSTATE_ALLINT);
2377 } else if (s->current_el > 1) {
2378 set_pstate_bits(PSTATE_ALLINT);
2379 } else {
2380 gen_helper_msr_set_allint_el1(tcg_env);
2381 }
2382
2383 /* Exit the cpu loop to re-evaluate pending IRQs. */
2384 s->base.is_jmp = DISAS_UPDATE_EXIT;
2385 return true;
2386 }
2387
trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2388 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a)
2389 {
2390 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) {
2391 return false;
2392 }
2393 if (sme_access_check(s)) {
2394 int old = s->pstate_sm | (s->pstate_za << 1);
2395 int new = a->imm * 3;
2396
2397 if ((old ^ new) & a->mask) {
2398 /* At least one bit changes. */
2399 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new),
2400 tcg_constant_i32(a->mask));
2401 s->base.is_jmp = DISAS_TOO_MANY;
2402 }
2403 }
2404 return true;
2405 }
2406
gen_get_nzcv(TCGv_i64 tcg_rt)2407 static void gen_get_nzcv(TCGv_i64 tcg_rt)
2408 {
2409 TCGv_i32 tmp = tcg_temp_new_i32();
2410 TCGv_i32 nzcv = tcg_temp_new_i32();
2411
2412 /* build bit 31, N */
2413 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
2414 /* build bit 30, Z */
2415 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
2416 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
2417 /* build bit 29, C */
2418 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
2419 /* build bit 28, V */
2420 tcg_gen_shri_i32(tmp, cpu_VF, 31);
2421 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
2422 /* generate result */
2423 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
2424 }
2425
gen_set_nzcv(TCGv_i64 tcg_rt)2426 static void gen_set_nzcv(TCGv_i64 tcg_rt)
2427 {
2428 TCGv_i32 nzcv = tcg_temp_new_i32();
2429
2430 /* take NZCV from R[t] */
2431 tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
2432
2433 /* bit 31, N */
2434 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
2435 /* bit 30, Z */
2436 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
2437 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
2438 /* bit 29, C */
2439 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
2440 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
2441 /* bit 28, V */
2442 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
2443 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
2444 }
2445
gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2446 static void gen_sysreg_undef(DisasContext *s, bool isread,
2447 uint8_t op0, uint8_t op1, uint8_t op2,
2448 uint8_t crn, uint8_t crm, uint8_t rt)
2449 {
2450 /*
2451 * Generate code to emit an UNDEF with correct syndrome
2452 * information for a failed system register access.
2453 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases,
2454 * but if FEAT_IDST is implemented then read accesses to registers
2455 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP
2456 * syndrome.
2457 */
2458 uint32_t syndrome;
2459
2460 if (isread && dc_isar_feature(aa64_ids, s) &&
2461 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) {
2462 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2463 } else {
2464 syndrome = syn_uncategorized();
2465 }
2466 gen_exception_insn(s, 0, EXCP_UDEF, syndrome);
2467 }
2468
2469 /* MRS - move from system register
2470 * MSR (register) - move to system register
2471 * SYS
2472 * SYSL
2473 * These are all essentially the same insn in 'read' and 'write'
2474 * versions, with varying op0 fields.
2475 */
handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2476 static void handle_sys(DisasContext *s, bool isread,
2477 unsigned int op0, unsigned int op1, unsigned int op2,
2478 unsigned int crn, unsigned int crm, unsigned int rt)
2479 {
2480 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2481 crn, crm, op0, op1, op2);
2482 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key);
2483 bool need_exit_tb = false;
2484 bool nv_trap_to_el2 = false;
2485 bool nv_redirect_reg = false;
2486 bool skip_fp_access_checks = false;
2487 bool nv2_mem_redirect = false;
2488 TCGv_ptr tcg_ri = NULL;
2489 TCGv_i64 tcg_rt;
2490 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
2491
2492 if (crn == 11 || crn == 15) {
2493 /*
2494 * Check for TIDCP trap, which must take precedence over
2495 * the UNDEF for "no such register" etc.
2496 */
2497 switch (s->current_el) {
2498 case 0:
2499 if (dc_isar_feature(aa64_tidcp1, s)) {
2500 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome));
2501 }
2502 break;
2503 case 1:
2504 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome));
2505 break;
2506 }
2507 }
2508
2509 if (!ri) {
2510 /* Unknown register; this might be a guest error or a QEMU
2511 * unimplemented feature.
2512 */
2513 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
2514 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
2515 isread ? "read" : "write", op0, op1, crn, crm, op2);
2516 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2517 return;
2518 }
2519
2520 if (s->nv2 && ri->nv2_redirect_offset) {
2521 /*
2522 * Some registers always redirect to memory; some only do so if
2523 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in
2524 * pairs which share an offset; see the table in R_CSRPQ).
2525 */
2526 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) {
2527 nv2_mem_redirect = s->nv1;
2528 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) {
2529 nv2_mem_redirect = !s->nv1;
2530 } else {
2531 nv2_mem_redirect = true;
2532 }
2533 }
2534
2535 /* Check access permissions */
2536 if (!cp_access_ok(s->current_el, ri, isread)) {
2537 /*
2538 * FEAT_NV/NV2 handling does not do the usual FP access checks
2539 * for registers only accessible at EL2 (though it *does* do them
2540 * for registers accessible at EL1).
2541 */
2542 skip_fp_access_checks = true;
2543 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) {
2544 /*
2545 * This is one of the few EL2 registers which should redirect
2546 * to the equivalent EL1 register. We do that after running
2547 * the EL2 register's accessfn.
2548 */
2549 nv_redirect_reg = true;
2550 assert(!nv2_mem_redirect);
2551 } else if (nv2_mem_redirect) {
2552 /*
2553 * NV2 redirect-to-memory takes precedence over trap to EL2 or
2554 * UNDEF to EL1.
2555 */
2556 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) {
2557 /*
2558 * This register / instruction exists and is an EL2 register, so
2559 * we must trap to EL2 if accessed in nested virtualization EL1
2560 * instead of UNDEFing. We'll do that after the usual access checks.
2561 * (This makes a difference only for a couple of registers like
2562 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority
2563 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have
2564 * an accessfn which does nothing when called from EL1, because
2565 * the trap-to-EL3 controls which would apply to that register
2566 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.)
2567 */
2568 nv_trap_to_el2 = true;
2569 } else {
2570 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt);
2571 return;
2572 }
2573 }
2574
2575 if (ri->accessfn || (ri->fgt && s->fgt_active)) {
2576 /* Emit code to perform further access permissions checks at
2577 * runtime; this may result in an exception.
2578 */
2579 gen_a64_update_pc(s, 0);
2580 tcg_ri = tcg_temp_new_ptr();
2581 gen_helper_access_check_cp_reg(tcg_ri, tcg_env,
2582 tcg_constant_i32(key),
2583 tcg_constant_i32(syndrome),
2584 tcg_constant_i32(isread));
2585 } else if (ri->type & ARM_CP_RAISES_EXC) {
2586 /*
2587 * The readfn or writefn might raise an exception;
2588 * synchronize the CPU state in case it does.
2589 */
2590 gen_a64_update_pc(s, 0);
2591 }
2592
2593 if (!skip_fp_access_checks) {
2594 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) {
2595 return;
2596 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
2597 return;
2598 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) {
2599 return;
2600 }
2601 }
2602
2603 if (nv_trap_to_el2) {
2604 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2605 return;
2606 }
2607
2608 if (nv_redirect_reg) {
2609 /*
2610 * FEAT_NV2 redirection of an EL2 register to an EL1 register.
2611 * Conveniently in all cases the encoding of the EL1 register is
2612 * identical to the EL2 register except that opc1 is 0.
2613 * Get the reginfo for the EL1 register to use for the actual access.
2614 * We don't use the EL1 register's access function, and
2615 * fine-grained-traps on EL1 also do not apply here.
2616 */
2617 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
2618 crn, crm, op0, 0, op2);
2619 ri = get_arm_cp_reginfo(s->cp_regs, key);
2620 assert(ri);
2621 assert(cp_access_ok(s->current_el, ri, isread));
2622 /*
2623 * We might not have done an update_pc earlier, so check we don't
2624 * need it. We could support this in future if necessary.
2625 */
2626 assert(!(ri->type & ARM_CP_RAISES_EXC));
2627 }
2628
2629 if (nv2_mem_redirect) {
2630 /*
2631 * This system register is being redirected into an EL2 memory access.
2632 * This means it is not an IO operation, doesn't change hflags,
2633 * and need not end the TB, because it has no side effects.
2634 *
2635 * The access is 64-bit single copy atomic, guaranteed aligned because
2636 * of the definition of VCNR_EL2. Its endianness depends on
2637 * SCTLR_EL2.EE, not on the data endianness of EL1.
2638 * It is done under either the EL2 translation regime or the EL2&0
2639 * translation regime, depending on HCR_EL2.E2H. It behaves as if
2640 * PSTATE.PAN is 0.
2641 */
2642 TCGv_i64 ptr = tcg_temp_new_i64();
2643 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN;
2644 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2;
2645 int memidx = arm_to_core_mmu_idx(armmemidx);
2646 uint32_t syn;
2647
2648 mop |= (s->nv2_mem_be ? MO_BE : MO_LE);
2649
2650 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2));
2651 tcg_gen_addi_i64(ptr, ptr,
2652 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK));
2653 tcg_rt = cpu_reg(s, rt);
2654
2655 syn = syn_data_abort_vncr(0, !isread, 0);
2656 disas_set_insn_syndrome(s, syn);
2657 if (isread) {
2658 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop);
2659 } else {
2660 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop);
2661 }
2662 return;
2663 }
2664
2665 /* Handle special cases first */
2666 switch (ri->type & ARM_CP_SPECIAL_MASK) {
2667 case 0:
2668 break;
2669 case ARM_CP_NOP:
2670 return;
2671 case ARM_CP_NZCV:
2672 tcg_rt = cpu_reg(s, rt);
2673 if (isread) {
2674 gen_get_nzcv(tcg_rt);
2675 } else {
2676 gen_set_nzcv(tcg_rt);
2677 }
2678 return;
2679 case ARM_CP_CURRENTEL:
2680 {
2681 /*
2682 * Reads as current EL value from pstate, which is
2683 * guaranteed to be constant by the tb flags.
2684 * For nested virt we should report EL2.
2685 */
2686 int el = s->nv ? 2 : s->current_el;
2687 tcg_rt = cpu_reg(s, rt);
2688 tcg_gen_movi_i64(tcg_rt, el << 2);
2689 return;
2690 }
2691 case ARM_CP_DC_ZVA:
2692 /* Writes clear the aligned block of memory which rt points into. */
2693 if (s->mte_active[0]) {
2694 int desc = 0;
2695
2696 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
2697 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
2698 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
2699
2700 tcg_rt = tcg_temp_new_i64();
2701 gen_helper_mte_check_zva(tcg_rt, tcg_env,
2702 tcg_constant_i32(desc), cpu_reg(s, rt));
2703 } else {
2704 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
2705 }
2706 gen_helper_dc_zva(tcg_env, tcg_rt);
2707 return;
2708 case ARM_CP_DC_GVA:
2709 {
2710 TCGv_i64 clean_addr, tag;
2711
2712 /*
2713 * DC_GVA, like DC_ZVA, requires that we supply the original
2714 * pointer for an invalid page. Probe that address first.
2715 */
2716 tcg_rt = cpu_reg(s, rt);
2717 clean_addr = clean_data_tbi(s, tcg_rt);
2718 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
2719
2720 if (s->ata[0]) {
2721 /* Extract the tag from the register to match STZGM. */
2722 tag = tcg_temp_new_i64();
2723 tcg_gen_shri_i64(tag, tcg_rt, 56);
2724 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2725 }
2726 }
2727 return;
2728 case ARM_CP_DC_GZVA:
2729 {
2730 TCGv_i64 clean_addr, tag;
2731
2732 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
2733 tcg_rt = cpu_reg(s, rt);
2734 clean_addr = clean_data_tbi(s, tcg_rt);
2735 gen_helper_dc_zva(tcg_env, clean_addr);
2736
2737 if (s->ata[0]) {
2738 /* Extract the tag from the register to match STZGM. */
2739 tag = tcg_temp_new_i64();
2740 tcg_gen_shri_i64(tag, tcg_rt, 56);
2741 gen_helper_stzgm_tags(tcg_env, clean_addr, tag);
2742 }
2743 }
2744 return;
2745 default:
2746 g_assert_not_reached();
2747 }
2748
2749 if (ri->type & ARM_CP_IO) {
2750 /* I/O operations must end the TB here (whether read or write) */
2751 need_exit_tb = translator_io_start(&s->base);
2752 }
2753
2754 tcg_rt = cpu_reg(s, rt);
2755
2756 if (isread) {
2757 if (ri->type & ARM_CP_CONST) {
2758 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
2759 } else if (ri->readfn) {
2760 if (!tcg_ri) {
2761 tcg_ri = gen_lookup_cp_reg(key);
2762 }
2763 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri);
2764 } else {
2765 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset);
2766 }
2767 } else {
2768 if (ri->type & ARM_CP_CONST) {
2769 /* If not forbidden by access permissions, treat as WI */
2770 return;
2771 } else if (ri->writefn) {
2772 if (!tcg_ri) {
2773 tcg_ri = gen_lookup_cp_reg(key);
2774 }
2775 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt);
2776 } else {
2777 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset);
2778 }
2779 }
2780
2781 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
2782 /*
2783 * A write to any coprocessor register that ends a TB
2784 * must rebuild the hflags for the next TB.
2785 */
2786 gen_rebuild_hflags(s);
2787 /*
2788 * We default to ending the TB on a coprocessor register write,
2789 * but allow this to be suppressed by the register definition
2790 * (usually only necessary to work around guest bugs).
2791 */
2792 need_exit_tb = true;
2793 }
2794 if (need_exit_tb) {
2795 s->base.is_jmp = DISAS_UPDATE_EXIT;
2796 }
2797 }
2798
trans_SYS(DisasContext * s,arg_SYS * a)2799 static bool trans_SYS(DisasContext *s, arg_SYS *a)
2800 {
2801 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt);
2802 return true;
2803 }
2804
trans_SVC(DisasContext * s,arg_i * a)2805 static bool trans_SVC(DisasContext *s, arg_i *a)
2806 {
2807 /*
2808 * For SVC, HVC and SMC we advance the single-step state
2809 * machine before taking the exception. This is architecturally
2810 * mandated, to ensure that single-stepping a system call
2811 * instruction works properly.
2812 */
2813 uint32_t syndrome = syn_aa64_svc(a->imm);
2814 if (s->fgt_svc) {
2815 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2);
2816 return true;
2817 }
2818 gen_ss_advance(s);
2819 gen_exception_insn(s, 4, EXCP_SWI, syndrome);
2820 return true;
2821 }
2822
trans_HVC(DisasContext * s,arg_i * a)2823 static bool trans_HVC(DisasContext *s, arg_i *a)
2824 {
2825 int target_el = s->current_el == 3 ? 3 : 2;
2826
2827 if (s->current_el == 0) {
2828 unallocated_encoding(s);
2829 return true;
2830 }
2831 /*
2832 * The pre HVC helper handles cases when HVC gets trapped
2833 * as an undefined insn by runtime configuration.
2834 */
2835 gen_a64_update_pc(s, 0);
2836 gen_helper_pre_hvc(tcg_env);
2837 /* Architecture requires ss advance before we do the actual work */
2838 gen_ss_advance(s);
2839 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el);
2840 return true;
2841 }
2842
trans_SMC(DisasContext * s,arg_i * a)2843 static bool trans_SMC(DisasContext *s, arg_i *a)
2844 {
2845 if (s->current_el == 0) {
2846 unallocated_encoding(s);
2847 return true;
2848 }
2849 gen_a64_update_pc(s, 0);
2850 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm)));
2851 /* Architecture requires ss advance before we do the actual work */
2852 gen_ss_advance(s);
2853 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3);
2854 return true;
2855 }
2856
trans_BRK(DisasContext * s,arg_i * a)2857 static bool trans_BRK(DisasContext *s, arg_i *a)
2858 {
2859 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm));
2860 return true;
2861 }
2862
trans_HLT(DisasContext * s,arg_i * a)2863 static bool trans_HLT(DisasContext *s, arg_i *a)
2864 {
2865 /*
2866 * HLT. This has two purposes.
2867 * Architecturally, it is an external halting debug instruction.
2868 * Since QEMU doesn't implement external debug, we treat this as
2869 * it is required for halting debug disabled: it will UNDEF.
2870 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2871 */
2872 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) {
2873 gen_exception_internal_insn(s, EXCP_SEMIHOST);
2874 } else {
2875 unallocated_encoding(s);
2876 }
2877 return true;
2878 }
2879
2880 /*
2881 * Load/Store exclusive instructions are implemented by remembering
2882 * the value/address loaded, and seeing if these are the same
2883 * when the store is performed. This is not actually the architecturally
2884 * mandated semantics, but it works for typical guest code sequences
2885 * and avoids having to monitor regular stores.
2886 *
2887 * The store exclusive uses the atomic cmpxchg primitives to avoid
2888 * races in multi-threaded linux-user and when MTTCG softmmu is
2889 * enabled.
2890 */
gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2891 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
2892 int size, bool is_pair)
2893 {
2894 int idx = get_mem_index(s);
2895 TCGv_i64 dirty_addr, clean_addr;
2896 MemOp memop = check_atomic_align(s, rn, size + is_pair);
2897
2898 s->is_ldex = true;
2899 dirty_addr = cpu_reg_sp(s, rn);
2900 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
2901
2902 g_assert(size <= 3);
2903 if (is_pair) {
2904 g_assert(size >= 2);
2905 if (size == 2) {
2906 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2907 if (s->be_data == MO_LE) {
2908 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2909 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2910 } else {
2911 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2912 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2913 }
2914 } else {
2915 TCGv_i128 t16 = tcg_temp_new_i128();
2916
2917 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
2918
2919 if (s->be_data == MO_LE) {
2920 tcg_gen_extr_i128_i64(cpu_exclusive_val,
2921 cpu_exclusive_high, t16);
2922 } else {
2923 tcg_gen_extr_i128_i64(cpu_exclusive_high,
2924 cpu_exclusive_val, t16);
2925 }
2926 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2927 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2928 }
2929 } else {
2930 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
2931 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2932 }
2933 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
2934 }
2935
gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2936 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2937 int rn, int size, int is_pair)
2938 {
2939 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2940 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
2941 * [addr] = {Rt};
2942 * if (is_pair) {
2943 * [addr + datasize] = {Rt2};
2944 * }
2945 * {Rd} = 0;
2946 * } else {
2947 * {Rd} = 1;
2948 * }
2949 * env->exclusive_addr = -1;
2950 */
2951 TCGLabel *fail_label = gen_new_label();
2952 TCGLabel *done_label = gen_new_label();
2953 TCGv_i64 tmp, clean_addr;
2954 MemOp memop;
2955
2956 /*
2957 * FIXME: We are out of spec here. We have recorded only the address
2958 * from load_exclusive, not the entire range, and we assume that the
2959 * size of the access on both sides match. The architecture allows the
2960 * store to be smaller than the load, so long as the stored bytes are
2961 * within the range recorded by the load.
2962 */
2963
2964 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
2965 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2966 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
2967
2968 /*
2969 * The write, and any associated faults, only happen if the virtual
2970 * and physical addresses pass the exclusive monitor check. These
2971 * faults are exceedingly unlikely, because normally the guest uses
2972 * the exact same address register for the load_exclusive, and we
2973 * would have recognized these faults there.
2974 *
2975 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
2976 * unaligned 4-byte write within the range of an aligned 8-byte load.
2977 * With LSE2, the store would need to cross a 16-byte boundary when the
2978 * load did not, which would mean the store is outside the range
2979 * recorded for the monitor, which would have failed a corrected monitor
2980 * check above. For now, we assume no size change and retain the
2981 * MO_ALIGN to let tcg know what we checked in the load_exclusive.
2982 *
2983 * It is possible to trigger an MTE fault, by performing the load with
2984 * a virtual address with a valid tag and performing the store with the
2985 * same virtual address and a different invalid tag.
2986 */
2987 memop = size + is_pair;
2988 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
2989 memop |= MO_ALIGN;
2990 }
2991 memop = finalize_memop(s, memop);
2992 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
2993
2994 tmp = tcg_temp_new_i64();
2995 if (is_pair) {
2996 if (size == 2) {
2997 if (s->be_data == MO_LE) {
2998 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2999 } else {
3000 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
3001 }
3002 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
3003 cpu_exclusive_val, tmp,
3004 get_mem_index(s), memop);
3005 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3006 } else {
3007 TCGv_i128 t16 = tcg_temp_new_i128();
3008 TCGv_i128 c16 = tcg_temp_new_i128();
3009 TCGv_i64 a, b;
3010
3011 if (s->be_data == MO_LE) {
3012 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2));
3013 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val,
3014 cpu_exclusive_high);
3015 } else {
3016 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt));
3017 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high,
3018 cpu_exclusive_val);
3019 }
3020
3021 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
3022 get_mem_index(s), memop);
3023
3024 a = tcg_temp_new_i64();
3025 b = tcg_temp_new_i64();
3026 if (s->be_data == MO_LE) {
3027 tcg_gen_extr_i128_i64(a, b, t16);
3028 } else {
3029 tcg_gen_extr_i128_i64(b, a, t16);
3030 }
3031
3032 tcg_gen_xor_i64(a, a, cpu_exclusive_val);
3033 tcg_gen_xor_i64(b, b, cpu_exclusive_high);
3034 tcg_gen_or_i64(tmp, a, b);
3035
3036 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0);
3037 }
3038 } else {
3039 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
3040 cpu_reg(s, rt), get_mem_index(s), memop);
3041 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
3042 }
3043 tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
3044 tcg_gen_br(done_label);
3045
3046 gen_set_label(fail_label);
3047 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
3048 gen_set_label(done_label);
3049 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
3050 }
3051
gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)3052 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
3053 int rn, int size)
3054 {
3055 TCGv_i64 tcg_rs = cpu_reg(s, rs);
3056 TCGv_i64 tcg_rt = cpu_reg(s, rt);
3057 int memidx = get_mem_index(s);
3058 TCGv_i64 clean_addr;
3059 MemOp memop;
3060
3061 if (rn == 31) {
3062 gen_check_sp_alignment(s);
3063 }
3064 memop = check_atomic_align(s, rn, size);
3065 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3066 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
3067 memidx, memop);
3068 }
3069
gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)3070 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
3071 int rn, int size)
3072 {
3073 TCGv_i64 s1 = cpu_reg(s, rs);
3074 TCGv_i64 s2 = cpu_reg(s, rs + 1);
3075 TCGv_i64 t1 = cpu_reg(s, rt);
3076 TCGv_i64 t2 = cpu_reg(s, rt + 1);
3077 TCGv_i64 clean_addr;
3078 int memidx = get_mem_index(s);
3079 MemOp memop;
3080
3081 if (rn == 31) {
3082 gen_check_sp_alignment(s);
3083 }
3084
3085 /* This is a single atomic access, despite the "pair". */
3086 memop = check_atomic_align(s, rn, size + 1);
3087 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
3088
3089 if (size == 2) {
3090 TCGv_i64 cmp = tcg_temp_new_i64();
3091 TCGv_i64 val = tcg_temp_new_i64();
3092
3093 if (s->be_data == MO_LE) {
3094 tcg_gen_concat32_i64(val, t1, t2);
3095 tcg_gen_concat32_i64(cmp, s1, s2);
3096 } else {
3097 tcg_gen_concat32_i64(val, t2, t1);
3098 tcg_gen_concat32_i64(cmp, s2, s1);
3099 }
3100
3101 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
3102
3103 if (s->be_data == MO_LE) {
3104 tcg_gen_extr32_i64(s1, s2, cmp);
3105 } else {
3106 tcg_gen_extr32_i64(s2, s1, cmp);
3107 }
3108 } else {
3109 TCGv_i128 cmp = tcg_temp_new_i128();
3110 TCGv_i128 val = tcg_temp_new_i128();
3111
3112 if (s->be_data == MO_LE) {
3113 tcg_gen_concat_i64_i128(val, t1, t2);
3114 tcg_gen_concat_i64_i128(cmp, s1, s2);
3115 } else {
3116 tcg_gen_concat_i64_i128(val, t2, t1);
3117 tcg_gen_concat_i64_i128(cmp, s2, s1);
3118 }
3119
3120 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
3121
3122 if (s->be_data == MO_LE) {
3123 tcg_gen_extr_i128_i64(s1, s2, cmp);
3124 } else {
3125 tcg_gen_extr_i128_i64(s2, s1, cmp);
3126 }
3127 }
3128 }
3129
3130 /*
3131 * Compute the ISS.SF bit for syndrome information if an exception
3132 * is taken on a load or store. This indicates whether the instruction
3133 * is accessing a 32-bit or 64-bit register. This logic is derived
3134 * from the ARMv8 specs for LDR (Shared decode for all encodings).
3135 */
ldst_iss_sf(int size,bool sign,bool ext)3136 static bool ldst_iss_sf(int size, bool sign, bool ext)
3137 {
3138
3139 if (sign) {
3140 /*
3141 * Signed loads are 64 bit results if we are not going to
3142 * do a zero-extend from 32 to 64 after the load.
3143 * (For a store, sign and ext are always false.)
3144 */
3145 return !ext;
3146 } else {
3147 /* Unsigned loads/stores work at the specified size */
3148 return size == MO_64;
3149 }
3150 }
3151
trans_STXR(DisasContext * s,arg_stxr * a)3152 static bool trans_STXR(DisasContext *s, arg_stxr *a)
3153 {
3154 if (a->rn == 31) {
3155 gen_check_sp_alignment(s);
3156 }
3157 if (a->lasr) {
3158 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3159 }
3160 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false);
3161 return true;
3162 }
3163
trans_LDXR(DisasContext * s,arg_stxr * a)3164 static bool trans_LDXR(DisasContext *s, arg_stxr *a)
3165 {
3166 if (a->rn == 31) {
3167 gen_check_sp_alignment(s);
3168 }
3169 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false);
3170 if (a->lasr) {
3171 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3172 }
3173 return true;
3174 }
3175
trans_STLR(DisasContext * s,arg_stlr * a)3176 static bool trans_STLR(DisasContext *s, arg_stlr *a)
3177 {
3178 TCGv_i64 clean_addr;
3179 MemOp memop;
3180 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3181
3182 /*
3183 * StoreLORelease is the same as Store-Release for QEMU, but
3184 * needs the feature-test.
3185 */
3186 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3187 return false;
3188 }
3189 /* Generate ISS for non-exclusive accesses including LASR. */
3190 if (a->rn == 31) {
3191 gen_check_sp_alignment(s);
3192 }
3193 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3194 memop = check_ordered_align(s, a->rn, 0, true, a->sz);
3195 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3196 true, a->rn != 31, memop);
3197 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt,
3198 iss_sf, a->lasr);
3199 return true;
3200 }
3201
trans_LDAR(DisasContext * s,arg_stlr * a)3202 static bool trans_LDAR(DisasContext *s, arg_stlr *a)
3203 {
3204 TCGv_i64 clean_addr;
3205 MemOp memop;
3206 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3207
3208 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */
3209 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) {
3210 return false;
3211 }
3212 /* Generate ISS for non-exclusive accesses including LASR. */
3213 if (a->rn == 31) {
3214 gen_check_sp_alignment(s);
3215 }
3216 memop = check_ordered_align(s, a->rn, 0, false, a->sz);
3217 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn),
3218 false, a->rn != 31, memop);
3219 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true,
3220 a->rt, iss_sf, a->lasr);
3221 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3222 return true;
3223 }
3224
trans_STXP(DisasContext * s,arg_stxr * a)3225 static bool trans_STXP(DisasContext *s, arg_stxr *a)
3226 {
3227 if (a->rn == 31) {
3228 gen_check_sp_alignment(s);
3229 }
3230 if (a->lasr) {
3231 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3232 }
3233 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true);
3234 return true;
3235 }
3236
trans_LDXP(DisasContext * s,arg_stxr * a)3237 static bool trans_LDXP(DisasContext *s, arg_stxr *a)
3238 {
3239 if (a->rn == 31) {
3240 gen_check_sp_alignment(s);
3241 }
3242 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true);
3243 if (a->lasr) {
3244 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3245 }
3246 return true;
3247 }
3248
trans_CASP(DisasContext * s,arg_CASP * a)3249 static bool trans_CASP(DisasContext *s, arg_CASP *a)
3250 {
3251 if (!dc_isar_feature(aa64_atomics, s)) {
3252 return false;
3253 }
3254 if (((a->rt | a->rs) & 1) != 0) {
3255 return false;
3256 }
3257
3258 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz);
3259 return true;
3260 }
3261
trans_CAS(DisasContext * s,arg_CAS * a)3262 static bool trans_CAS(DisasContext *s, arg_CAS *a)
3263 {
3264 if (!dc_isar_feature(aa64_atomics, s)) {
3265 return false;
3266 }
3267 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz);
3268 return true;
3269 }
3270
trans_LD_lit(DisasContext * s,arg_ldlit * a)3271 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a)
3272 {
3273 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false);
3274 TCGv_i64 tcg_rt = cpu_reg(s, a->rt);
3275 TCGv_i64 clean_addr = tcg_temp_new_i64();
3276 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3277
3278 gen_pc_plus_diff(s, clean_addr, a->imm);
3279 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3280 false, true, a->rt, iss_sf, false);
3281 return true;
3282 }
3283
trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3284 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a)
3285 {
3286 /* Load register (literal), vector version */
3287 TCGv_i64 clean_addr;
3288 MemOp memop;
3289
3290 if (!fp_access_check(s)) {
3291 return true;
3292 }
3293 memop = finalize_memop_asimd(s, a->sz);
3294 clean_addr = tcg_temp_new_i64();
3295 gen_pc_plus_diff(s, clean_addr, a->imm);
3296 do_fp_ld(s, a->rt, clean_addr, memop);
3297 return true;
3298 }
3299
op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3300 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a,
3301 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3302 uint64_t offset, bool is_store, MemOp mop)
3303 {
3304 if (a->rn == 31) {
3305 gen_check_sp_alignment(s);
3306 }
3307
3308 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3309 if (!a->p) {
3310 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3311 }
3312
3313 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store,
3314 (a->w || a->rn != 31), 2 << a->sz, mop);
3315 }
3316
op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3317 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a,
3318 TCGv_i64 dirty_addr, uint64_t offset)
3319 {
3320 if (a->w) {
3321 if (a->p) {
3322 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3323 }
3324 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3325 }
3326 }
3327
trans_STP(DisasContext * s,arg_ldstpair * a)3328 static bool trans_STP(DisasContext *s, arg_ldstpair *a)
3329 {
3330 uint64_t offset = a->imm << a->sz;
3331 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3332 MemOp mop = finalize_memop(s, a->sz);
3333
3334 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3335 tcg_rt = cpu_reg(s, a->rt);
3336 tcg_rt2 = cpu_reg(s, a->rt2);
3337 /*
3338 * We built mop above for the single logical access -- rebuild it
3339 * now for the paired operation.
3340 *
3341 * With LSE2, non-sign-extending pairs are treated atomically if
3342 * aligned, and if unaligned one of the pair will be completely
3343 * within a 16-byte block and that element will be atomic.
3344 * Otherwise each element is separately atomic.
3345 * In all cases, issue one operation with the correct atomicity.
3346 */
3347 mop = a->sz + 1;
3348 if (s->align_mem) {
3349 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3350 }
3351 mop = finalize_memop_pair(s, mop);
3352 if (a->sz == 2) {
3353 TCGv_i64 tmp = tcg_temp_new_i64();
3354
3355 if (s->be_data == MO_LE) {
3356 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
3357 } else {
3358 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
3359 }
3360 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
3361 } else {
3362 TCGv_i128 tmp = tcg_temp_new_i128();
3363
3364 if (s->be_data == MO_LE) {
3365 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3366 } else {
3367 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3368 }
3369 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3370 }
3371 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3372 return true;
3373 }
3374
trans_LDP(DisasContext * s,arg_ldstpair * a)3375 static bool trans_LDP(DisasContext *s, arg_ldstpair *a)
3376 {
3377 uint64_t offset = a->imm << a->sz;
3378 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3379 MemOp mop = finalize_memop(s, a->sz);
3380
3381 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3382 tcg_rt = cpu_reg(s, a->rt);
3383 tcg_rt2 = cpu_reg(s, a->rt2);
3384
3385 /*
3386 * We built mop above for the single logical access -- rebuild it
3387 * now for the paired operation.
3388 *
3389 * With LSE2, non-sign-extending pairs are treated atomically if
3390 * aligned, and if unaligned one of the pair will be completely
3391 * within a 16-byte block and that element will be atomic.
3392 * Otherwise each element is separately atomic.
3393 * In all cases, issue one operation with the correct atomicity.
3394 *
3395 * This treats sign-extending loads like zero-extending loads,
3396 * since that reuses the most code below.
3397 */
3398 mop = a->sz + 1;
3399 if (s->align_mem) {
3400 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
3401 }
3402 mop = finalize_memop_pair(s, mop);
3403 if (a->sz == 2) {
3404 int o2 = s->be_data == MO_LE ? 32 : 0;
3405 int o1 = o2 ^ 32;
3406
3407 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
3408 if (a->sign) {
3409 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
3410 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
3411 } else {
3412 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
3413 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
3414 }
3415 } else {
3416 TCGv_i128 tmp = tcg_temp_new_i128();
3417
3418 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
3419 if (s->be_data == MO_LE) {
3420 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
3421 } else {
3422 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
3423 }
3424 }
3425 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3426 return true;
3427 }
3428
trans_STP_v(DisasContext * s,arg_ldstpair * a)3429 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a)
3430 {
3431 uint64_t offset = a->imm << a->sz;
3432 TCGv_i64 clean_addr, dirty_addr;
3433 MemOp mop;
3434
3435 if (!fp_access_check(s)) {
3436 return true;
3437 }
3438
3439 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3440 mop = finalize_memop_asimd(s, a->sz);
3441 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop);
3442 do_fp_st(s, a->rt, clean_addr, mop);
3443 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3444 do_fp_st(s, a->rt2, clean_addr, mop);
3445 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3446 return true;
3447 }
3448
trans_LDP_v(DisasContext * s,arg_ldstpair * a)3449 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a)
3450 {
3451 uint64_t offset = a->imm << a->sz;
3452 TCGv_i64 clean_addr, dirty_addr;
3453 MemOp mop;
3454
3455 if (!fp_access_check(s)) {
3456 return true;
3457 }
3458
3459 /* LSE2 does not merge FP pairs; leave these as separate operations. */
3460 mop = finalize_memop_asimd(s, a->sz);
3461 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop);
3462 do_fp_ld(s, a->rt, clean_addr, mop);
3463 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz);
3464 do_fp_ld(s, a->rt2, clean_addr, mop);
3465 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3466 return true;
3467 }
3468
trans_STGP(DisasContext * s,arg_ldstpair * a)3469 static bool trans_STGP(DisasContext *s, arg_ldstpair *a)
3470 {
3471 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2;
3472 uint64_t offset = a->imm << LOG2_TAG_GRANULE;
3473 MemOp mop;
3474 TCGv_i128 tmp;
3475
3476 /* STGP only comes in one size. */
3477 tcg_debug_assert(a->sz == MO_64);
3478
3479 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
3480 return false;
3481 }
3482
3483 if (a->rn == 31) {
3484 gen_check_sp_alignment(s);
3485 }
3486
3487 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3488 if (!a->p) {
3489 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3490 }
3491
3492 clean_addr = clean_data_tbi(s, dirty_addr);
3493 tcg_rt = cpu_reg(s, a->rt);
3494 tcg_rt2 = cpu_reg(s, a->rt2);
3495
3496 /*
3497 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE,
3498 * and one tag operation. We implement it as one single aligned 16-byte
3499 * memory operation for convenience. Note that the alignment ensures
3500 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store.
3501 */
3502 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR);
3503
3504 tmp = tcg_temp_new_i128();
3505 if (s->be_data == MO_LE) {
3506 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
3507 } else {
3508 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
3509 }
3510 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
3511
3512 /* Perform the tag store, if tag access enabled. */
3513 if (s->ata[0]) {
3514 if (tb_cflags(s->base.tb) & CF_PARALLEL) {
3515 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr);
3516 } else {
3517 gen_helper_stg(tcg_env, dirty_addr, dirty_addr);
3518 }
3519 }
3520
3521 op_addr_ldstpair_post(s, a, dirty_addr, offset);
3522 return true;
3523 }
3524
op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3525 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a,
3526 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3527 uint64_t offset, bool is_store, MemOp mop)
3528 {
3529 int memidx;
3530
3531 if (a->rn == 31) {
3532 gen_check_sp_alignment(s);
3533 }
3534
3535 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3536 if (!a->p) {
3537 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset);
3538 }
3539 memidx = get_a64_user_mem_index(s, a->unpriv);
3540 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store,
3541 a->w || a->rn != 31,
3542 mop, a->unpriv, memidx);
3543 }
3544
op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3545 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a,
3546 TCGv_i64 dirty_addr, uint64_t offset)
3547 {
3548 if (a->w) {
3549 if (a->p) {
3550 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3551 }
3552 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3553 }
3554 }
3555
trans_STR_i(DisasContext * s,arg_ldst_imm * a)3556 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a)
3557 {
3558 bool iss_sf, iss_valid = !a->w;
3559 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3560 int memidx = get_a64_user_mem_index(s, a->unpriv);
3561 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3562
3563 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3564
3565 tcg_rt = cpu_reg(s, a->rt);
3566 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3567
3568 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx,
3569 iss_valid, a->rt, iss_sf, false);
3570 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3571 return true;
3572 }
3573
trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3574 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a)
3575 {
3576 bool iss_sf, iss_valid = !a->w;
3577 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3578 int memidx = get_a64_user_mem_index(s, a->unpriv);
3579 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3580
3581 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3582
3583 tcg_rt = cpu_reg(s, a->rt);
3584 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3585
3586 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop,
3587 a->ext, memidx, iss_valid, a->rt, iss_sf, false);
3588 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3589 return true;
3590 }
3591
trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3592 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a)
3593 {
3594 TCGv_i64 clean_addr, dirty_addr;
3595 MemOp mop;
3596
3597 if (!fp_access_check(s)) {
3598 return true;
3599 }
3600 mop = finalize_memop_asimd(s, a->sz);
3601 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop);
3602 do_fp_st(s, a->rt, clean_addr, mop);
3603 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3604 return true;
3605 }
3606
trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3607 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a)
3608 {
3609 TCGv_i64 clean_addr, dirty_addr;
3610 MemOp mop;
3611
3612 if (!fp_access_check(s)) {
3613 return true;
3614 }
3615 mop = finalize_memop_asimd(s, a->sz);
3616 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop);
3617 do_fp_ld(s, a->rt, clean_addr, mop);
3618 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm);
3619 return true;
3620 }
3621
op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3622 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a,
3623 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr,
3624 bool is_store, MemOp memop)
3625 {
3626 TCGv_i64 tcg_rm;
3627
3628 if (a->rn == 31) {
3629 gen_check_sp_alignment(s);
3630 }
3631 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3632
3633 tcg_rm = read_cpu_reg(s, a->rm, 1);
3634 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0);
3635
3636 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm);
3637 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop);
3638 }
3639
trans_LDR(DisasContext * s,arg_ldst * a)3640 static bool trans_LDR(DisasContext *s, arg_ldst *a)
3641 {
3642 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3643 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3644 MemOp memop;
3645
3646 if (extract32(a->opt, 1, 1) == 0) {
3647 return false;
3648 }
3649
3650 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN);
3651 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3652 tcg_rt = cpu_reg(s, a->rt);
3653 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3654 a->ext, true, a->rt, iss_sf, false);
3655 return true;
3656 }
3657
trans_STR(DisasContext * s,arg_ldst * a)3658 static bool trans_STR(DisasContext *s, arg_ldst *a)
3659 {
3660 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3661 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3662 MemOp memop;
3663
3664 if (extract32(a->opt, 1, 1) == 0) {
3665 return false;
3666 }
3667
3668 memop = finalize_memop(s, a->sz);
3669 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3670 tcg_rt = cpu_reg(s, a->rt);
3671 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false);
3672 return true;
3673 }
3674
trans_LDR_v(DisasContext * s,arg_ldst * a)3675 static bool trans_LDR_v(DisasContext *s, arg_ldst *a)
3676 {
3677 TCGv_i64 clean_addr, dirty_addr;
3678 MemOp memop;
3679
3680 if (extract32(a->opt, 1, 1) == 0) {
3681 return false;
3682 }
3683
3684 if (!fp_access_check(s)) {
3685 return true;
3686 }
3687
3688 memop = finalize_memop_asimd(s, a->sz);
3689 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop);
3690 do_fp_ld(s, a->rt, clean_addr, memop);
3691 return true;
3692 }
3693
trans_STR_v(DisasContext * s,arg_ldst * a)3694 static bool trans_STR_v(DisasContext *s, arg_ldst *a)
3695 {
3696 TCGv_i64 clean_addr, dirty_addr;
3697 MemOp memop;
3698
3699 if (extract32(a->opt, 1, 1) == 0) {
3700 return false;
3701 }
3702
3703 if (!fp_access_check(s)) {
3704 return true;
3705 }
3706
3707 memop = finalize_memop_asimd(s, a->sz);
3708 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop);
3709 do_fp_st(s, a->rt, clean_addr, memop);
3710 return true;
3711 }
3712
3713
do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3714 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn,
3715 int sign, bool invert)
3716 {
3717 MemOp mop = a->sz | sign;
3718 TCGv_i64 clean_addr, tcg_rs, tcg_rt;
3719
3720 if (a->rn == 31) {
3721 gen_check_sp_alignment(s);
3722 }
3723 mop = check_atomic_align(s, a->rn, mop);
3724 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3725 a->rn != 31, mop);
3726 tcg_rs = read_cpu_reg(s, a->rs, true);
3727 tcg_rt = cpu_reg(s, a->rt);
3728 if (invert) {
3729 tcg_gen_not_i64(tcg_rs, tcg_rs);
3730 }
3731 /*
3732 * The tcg atomic primitives are all full barriers. Therefore we
3733 * can ignore the Acquire and Release bits of this instruction.
3734 */
3735 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3736
3737 if (mop & MO_SIGN) {
3738 switch (a->sz) {
3739 case MO_8:
3740 tcg_gen_ext8u_i64(tcg_rt, tcg_rt);
3741 break;
3742 case MO_16:
3743 tcg_gen_ext16u_i64(tcg_rt, tcg_rt);
3744 break;
3745 case MO_32:
3746 tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3747 break;
3748 case MO_64:
3749 break;
3750 default:
3751 g_assert_not_reached();
3752 }
3753 }
3754 return true;
3755 }
3756
3757 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false)
3758 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true)
3759 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false)
3760 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false)
TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3761 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false)
3762 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false)
3763 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false)
3764 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false)
3765 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false)
3766
3767 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a)
3768 {
3769 bool iss_sf = ldst_iss_sf(a->sz, false, false);
3770 TCGv_i64 clean_addr;
3771 MemOp mop;
3772
3773 if (!dc_isar_feature(aa64_atomics, s) ||
3774 !dc_isar_feature(aa64_rcpc_8_3, s)) {
3775 return false;
3776 }
3777 if (a->rn == 31) {
3778 gen_check_sp_alignment(s);
3779 }
3780 mop = check_ordered_align(s, a->rn, 0, false, a->sz);
3781 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false,
3782 a->rn != 31, mop);
3783 /*
3784 * LDAPR* are a special case because they are a simple load, not a
3785 * fetch-and-do-something op.
3786 * The architectural consistency requirements here are weaker than
3787 * full load-acquire (we only need "load-acquire processor consistent"),
3788 * but we choose to implement them as full LDAQ.
3789 */
3790 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false,
3791 true, a->rt, iss_sf, true);
3792 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3793 return true;
3794 }
3795
trans_LDRA(DisasContext * s,arg_LDRA * a)3796 static bool trans_LDRA(DisasContext *s, arg_LDRA *a)
3797 {
3798 TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3799 MemOp memop;
3800
3801 /* Load with pointer authentication */
3802 if (!dc_isar_feature(aa64_pauth, s)) {
3803 return false;
3804 }
3805
3806 if (a->rn == 31) {
3807 gen_check_sp_alignment(s);
3808 }
3809 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3810
3811 if (s->pauth_active) {
3812 if (!a->m) {
3813 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr,
3814 tcg_constant_i64(0));
3815 } else {
3816 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr,
3817 tcg_constant_i64(0));
3818 }
3819 }
3820
3821 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3822
3823 memop = finalize_memop(s, MO_64);
3824
3825 /* Note that "clean" and "dirty" here refer to TBI not PAC. */
3826 clean_addr = gen_mte_check1(s, dirty_addr, false,
3827 a->w || a->rn != 31, memop);
3828
3829 tcg_rt = cpu_reg(s, a->rt);
3830 do_gpr_ld(s, tcg_rt, clean_addr, memop,
3831 /* extend */ false, /* iss_valid */ !a->w,
3832 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false);
3833
3834 if (a->w) {
3835 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr);
3836 }
3837 return true;
3838 }
3839
trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3840 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3841 {
3842 TCGv_i64 clean_addr, dirty_addr;
3843 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0);
3844 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3845
3846 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3847 return false;
3848 }
3849
3850 if (a->rn == 31) {
3851 gen_check_sp_alignment(s);
3852 }
3853
3854 mop = check_ordered_align(s, a->rn, a->imm, false, mop);
3855 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3856 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3857 clean_addr = clean_data_tbi(s, dirty_addr);
3858
3859 /*
3860 * Load-AcquirePC semantics; we implement as the slightly more
3861 * restrictive Load-Acquire.
3862 */
3863 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true,
3864 a->rt, iss_sf, true);
3865 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3866 return true;
3867 }
3868
trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3869 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a)
3870 {
3871 TCGv_i64 clean_addr, dirty_addr;
3872 MemOp mop = a->sz;
3873 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext);
3874
3875 if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3876 return false;
3877 }
3878
3879 /* TODO: ARMv8.4-LSE SCTLR.nAA */
3880
3881 if (a->rn == 31) {
3882 gen_check_sp_alignment(s);
3883 }
3884
3885 mop = check_ordered_align(s, a->rn, a->imm, true, mop);
3886 dirty_addr = read_cpu_reg_sp(s, a->rn, 1);
3887 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm);
3888 clean_addr = clean_data_tbi(s, dirty_addr);
3889
3890 /* Store-Release semantics */
3891 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3892 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true);
3893 return true;
3894 }
3895
trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3896 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a)
3897 {
3898 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3899 MemOp endian, align, mop;
3900
3901 int total; /* total bytes */
3902 int elements; /* elements per vector */
3903 int r;
3904 int size = a->sz;
3905
3906 if (!a->p && a->rm != 0) {
3907 /* For non-postindexed accesses the Rm field must be 0 */
3908 return false;
3909 }
3910 if (size == 3 && !a->q && a->selem != 1) {
3911 return false;
3912 }
3913 if (!fp_access_check(s)) {
3914 return true;
3915 }
3916
3917 if (a->rn == 31) {
3918 gen_check_sp_alignment(s);
3919 }
3920
3921 /* For our purposes, bytes are always little-endian. */
3922 endian = s->be_data;
3923 if (size == 0) {
3924 endian = MO_LE;
3925 }
3926
3927 total = a->rpt * a->selem * (a->q ? 16 : 8);
3928 tcg_rn = cpu_reg_sp(s, a->rn);
3929
3930 /*
3931 * Issue the MTE check vs the logical repeat count, before we
3932 * promote consecutive little-endian elements below.
3933 */
3934 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total,
3935 finalize_memop_asimd(s, size));
3936
3937 /*
3938 * Consecutive little-endian elements from a single register
3939 * can be promoted to a larger little-endian operation.
3940 */
3941 align = MO_ALIGN;
3942 if (a->selem == 1 && endian == MO_LE) {
3943 align = pow2_align(size);
3944 size = 3;
3945 }
3946 if (!s->align_mem) {
3947 align = 0;
3948 }
3949 mop = endian | size | align;
3950
3951 elements = (a->q ? 16 : 8) >> size;
3952 tcg_ebytes = tcg_constant_i64(1 << size);
3953 for (r = 0; r < a->rpt; r++) {
3954 int e;
3955 for (e = 0; e < elements; e++) {
3956 int xs;
3957 for (xs = 0; xs < a->selem; xs++) {
3958 int tt = (a->rt + r + xs) % 32;
3959 do_vec_ld(s, tt, e, clean_addr, mop);
3960 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3961 }
3962 }
3963 }
3964
3965 /*
3966 * For non-quad operations, setting a slice of the low 64 bits of
3967 * the register clears the high 64 bits (in the ARM ARM pseudocode
3968 * this is implicit in the fact that 'rval' is a 64 bit wide
3969 * variable). For quad operations, we might still need to zero
3970 * the high bits of SVE.
3971 */
3972 for (r = 0; r < a->rpt * a->selem; r++) {
3973 int tt = (a->rt + r) % 32;
3974 clear_vec_high(s, a->q, tt);
3975 }
3976
3977 if (a->p) {
3978 if (a->rm == 31) {
3979 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3980 } else {
3981 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
3982 }
3983 }
3984 return true;
3985 }
3986
trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3987 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a)
3988 {
3989 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3990 MemOp endian, align, mop;
3991
3992 int total; /* total bytes */
3993 int elements; /* elements per vector */
3994 int r;
3995 int size = a->sz;
3996
3997 if (!a->p && a->rm != 0) {
3998 /* For non-postindexed accesses the Rm field must be 0 */
3999 return false;
4000 }
4001 if (size == 3 && !a->q && a->selem != 1) {
4002 return false;
4003 }
4004 if (!fp_access_check(s)) {
4005 return true;
4006 }
4007
4008 if (a->rn == 31) {
4009 gen_check_sp_alignment(s);
4010 }
4011
4012 /* For our purposes, bytes are always little-endian. */
4013 endian = s->be_data;
4014 if (size == 0) {
4015 endian = MO_LE;
4016 }
4017
4018 total = a->rpt * a->selem * (a->q ? 16 : 8);
4019 tcg_rn = cpu_reg_sp(s, a->rn);
4020
4021 /*
4022 * Issue the MTE check vs the logical repeat count, before we
4023 * promote consecutive little-endian elements below.
4024 */
4025 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total,
4026 finalize_memop_asimd(s, size));
4027
4028 /*
4029 * Consecutive little-endian elements from a single register
4030 * can be promoted to a larger little-endian operation.
4031 */
4032 align = MO_ALIGN;
4033 if (a->selem == 1 && endian == MO_LE) {
4034 align = pow2_align(size);
4035 size = 3;
4036 }
4037 if (!s->align_mem) {
4038 align = 0;
4039 }
4040 mop = endian | size | align;
4041
4042 elements = (a->q ? 16 : 8) >> size;
4043 tcg_ebytes = tcg_constant_i64(1 << size);
4044 for (r = 0; r < a->rpt; r++) {
4045 int e;
4046 for (e = 0; e < elements; e++) {
4047 int xs;
4048 for (xs = 0; xs < a->selem; xs++) {
4049 int tt = (a->rt + r + xs) % 32;
4050 do_vec_st(s, tt, e, clean_addr, mop);
4051 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4052 }
4053 }
4054 }
4055
4056 if (a->p) {
4057 if (a->rm == 31) {
4058 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4059 } else {
4060 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4061 }
4062 }
4063 return true;
4064 }
4065
trans_ST_single(DisasContext * s,arg_ldst_single * a)4066 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a)
4067 {
4068 int xs, total, rt;
4069 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4070 MemOp mop;
4071
4072 if (!a->p && a->rm != 0) {
4073 return false;
4074 }
4075 if (!fp_access_check(s)) {
4076 return true;
4077 }
4078
4079 if (a->rn == 31) {
4080 gen_check_sp_alignment(s);
4081 }
4082
4083 total = a->selem << a->scale;
4084 tcg_rn = cpu_reg_sp(s, a->rn);
4085
4086 mop = finalize_memop_asimd(s, a->scale);
4087 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31,
4088 total, mop);
4089
4090 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4091 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4092 do_vec_st(s, rt, a->index, clean_addr, mop);
4093 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4094 }
4095
4096 if (a->p) {
4097 if (a->rm == 31) {
4098 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4099 } else {
4100 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4101 }
4102 }
4103 return true;
4104 }
4105
trans_LD_single(DisasContext * s,arg_ldst_single * a)4106 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a)
4107 {
4108 int xs, total, rt;
4109 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4110 MemOp mop;
4111
4112 if (!a->p && a->rm != 0) {
4113 return false;
4114 }
4115 if (!fp_access_check(s)) {
4116 return true;
4117 }
4118
4119 if (a->rn == 31) {
4120 gen_check_sp_alignment(s);
4121 }
4122
4123 total = a->selem << a->scale;
4124 tcg_rn = cpu_reg_sp(s, a->rn);
4125
4126 mop = finalize_memop_asimd(s, a->scale);
4127 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4128 total, mop);
4129
4130 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4131 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4132 do_vec_ld(s, rt, a->index, clean_addr, mop);
4133 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4134 }
4135
4136 if (a->p) {
4137 if (a->rm == 31) {
4138 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4139 } else {
4140 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4141 }
4142 }
4143 return true;
4144 }
4145
trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)4146 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a)
4147 {
4148 int xs, total, rt;
4149 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
4150 MemOp mop;
4151
4152 if (!a->p && a->rm != 0) {
4153 return false;
4154 }
4155 if (!fp_access_check(s)) {
4156 return true;
4157 }
4158
4159 if (a->rn == 31) {
4160 gen_check_sp_alignment(s);
4161 }
4162
4163 total = a->selem << a->scale;
4164 tcg_rn = cpu_reg_sp(s, a->rn);
4165
4166 mop = finalize_memop_asimd(s, a->scale);
4167 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31,
4168 total, mop);
4169
4170 tcg_ebytes = tcg_constant_i64(1 << a->scale);
4171 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) {
4172 /* Load and replicate to all elements */
4173 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4174
4175 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
4176 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt),
4177 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp);
4178 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
4179 }
4180
4181 if (a->p) {
4182 if (a->rm == 31) {
4183 tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
4184 } else {
4185 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm));
4186 }
4187 }
4188 return true;
4189 }
4190
trans_STZGM(DisasContext * s,arg_ldst_tag * a)4191 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a)
4192 {
4193 TCGv_i64 addr, clean_addr, tcg_rt;
4194 int size = 4 << s->dcz_blocksize;
4195
4196 if (!dc_isar_feature(aa64_mte, s)) {
4197 return false;
4198 }
4199 if (s->current_el == 0) {
4200 return false;
4201 }
4202
4203 if (a->rn == 31) {
4204 gen_check_sp_alignment(s);
4205 }
4206
4207 addr = read_cpu_reg_sp(s, a->rn, true);
4208 tcg_gen_addi_i64(addr, addr, a->imm);
4209 tcg_rt = cpu_reg(s, a->rt);
4210
4211 if (s->ata[0]) {
4212 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt);
4213 }
4214 /*
4215 * The non-tags portion of STZGM is mostly like DC_ZVA,
4216 * except the alignment happens before the access.
4217 */
4218 clean_addr = clean_data_tbi(s, addr);
4219 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4220 gen_helper_dc_zva(tcg_env, clean_addr);
4221 return true;
4222 }
4223
trans_STGM(DisasContext * s,arg_ldst_tag * a)4224 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a)
4225 {
4226 TCGv_i64 addr, clean_addr, tcg_rt;
4227
4228 if (!dc_isar_feature(aa64_mte, s)) {
4229 return false;
4230 }
4231 if (s->current_el == 0) {
4232 return false;
4233 }
4234
4235 if (a->rn == 31) {
4236 gen_check_sp_alignment(s);
4237 }
4238
4239 addr = read_cpu_reg_sp(s, a->rn, true);
4240 tcg_gen_addi_i64(addr, addr, a->imm);
4241 tcg_rt = cpu_reg(s, a->rt);
4242
4243 if (s->ata[0]) {
4244 gen_helper_stgm(tcg_env, addr, tcg_rt);
4245 } else {
4246 MMUAccessType acc = MMU_DATA_STORE;
4247 int size = 4 << s->gm_blocksize;
4248
4249 clean_addr = clean_data_tbi(s, addr);
4250 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4251 gen_probe_access(s, clean_addr, acc, size);
4252 }
4253 return true;
4254 }
4255
trans_LDGM(DisasContext * s,arg_ldst_tag * a)4256 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a)
4257 {
4258 TCGv_i64 addr, clean_addr, tcg_rt;
4259
4260 if (!dc_isar_feature(aa64_mte, s)) {
4261 return false;
4262 }
4263 if (s->current_el == 0) {
4264 return false;
4265 }
4266
4267 if (a->rn == 31) {
4268 gen_check_sp_alignment(s);
4269 }
4270
4271 addr = read_cpu_reg_sp(s, a->rn, true);
4272 tcg_gen_addi_i64(addr, addr, a->imm);
4273 tcg_rt = cpu_reg(s, a->rt);
4274
4275 if (s->ata[0]) {
4276 gen_helper_ldgm(tcg_rt, tcg_env, addr);
4277 } else {
4278 MMUAccessType acc = MMU_DATA_LOAD;
4279 int size = 4 << s->gm_blocksize;
4280
4281 clean_addr = clean_data_tbi(s, addr);
4282 tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4283 gen_probe_access(s, clean_addr, acc, size);
4284 /* The result tags are zeros. */
4285 tcg_gen_movi_i64(tcg_rt, 0);
4286 }
4287 return true;
4288 }
4289
trans_LDG(DisasContext * s,arg_ldst_tag * a)4290 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a)
4291 {
4292 TCGv_i64 addr, clean_addr, tcg_rt;
4293
4294 if (!dc_isar_feature(aa64_mte_insn_reg, s)) {
4295 return false;
4296 }
4297
4298 if (a->rn == 31) {
4299 gen_check_sp_alignment(s);
4300 }
4301
4302 addr = read_cpu_reg_sp(s, a->rn, true);
4303 if (!a->p) {
4304 /* pre-index or signed offset */
4305 tcg_gen_addi_i64(addr, addr, a->imm);
4306 }
4307
4308 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4309 tcg_rt = cpu_reg(s, a->rt);
4310 if (s->ata[0]) {
4311 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt);
4312 } else {
4313 /*
4314 * Tag access disabled: we must check for aborts on the load
4315 * load from [rn+offset], and then insert a 0 tag into rt.
4316 */
4317 clean_addr = clean_data_tbi(s, addr);
4318 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4319 gen_address_with_allocation_tag0(tcg_rt, tcg_rt);
4320 }
4321
4322 if (a->w) {
4323 /* pre-index or post-index */
4324 if (a->p) {
4325 /* post-index */
4326 tcg_gen_addi_i64(addr, addr, a->imm);
4327 }
4328 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4329 }
4330 return true;
4331 }
4332
do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4333 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair)
4334 {
4335 TCGv_i64 addr, tcg_rt;
4336
4337 if (a->rn == 31) {
4338 gen_check_sp_alignment(s);
4339 }
4340
4341 addr = read_cpu_reg_sp(s, a->rn, true);
4342 if (!a->p) {
4343 /* pre-index or signed offset */
4344 tcg_gen_addi_i64(addr, addr, a->imm);
4345 }
4346 tcg_rt = cpu_reg_sp(s, a->rt);
4347 if (!s->ata[0]) {
4348 /*
4349 * For STG and ST2G, we need to check alignment and probe memory.
4350 * TODO: For STZG and STZ2G, we could rely on the stores below,
4351 * at least for system mode; user-only won't enforce alignment.
4352 */
4353 if (is_pair) {
4354 gen_helper_st2g_stub(tcg_env, addr);
4355 } else {
4356 gen_helper_stg_stub(tcg_env, addr);
4357 }
4358 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4359 if (is_pair) {
4360 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt);
4361 } else {
4362 gen_helper_stg_parallel(tcg_env, addr, tcg_rt);
4363 }
4364 } else {
4365 if (is_pair) {
4366 gen_helper_st2g(tcg_env, addr, tcg_rt);
4367 } else {
4368 gen_helper_stg(tcg_env, addr, tcg_rt);
4369 }
4370 }
4371
4372 if (is_zero) {
4373 TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4374 TCGv_i64 zero64 = tcg_constant_i64(0);
4375 TCGv_i128 zero128 = tcg_temp_new_i128();
4376 int mem_index = get_mem_index(s);
4377 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
4378
4379 tcg_gen_concat_i64_i128(zero128, zero64, zero64);
4380
4381 /* This is 1 or 2 atomic 16-byte operations. */
4382 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4383 if (is_pair) {
4384 tcg_gen_addi_i64(clean_addr, clean_addr, 16);
4385 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
4386 }
4387 }
4388
4389 if (a->w) {
4390 /* pre-index or post-index */
4391 if (a->p) {
4392 /* post-index */
4393 tcg_gen_addi_i64(addr, addr, a->imm);
4394 }
4395 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr);
4396 }
4397 return true;
4398 }
4399
4400 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false)
4401 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false)
4402 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true)
4403 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true)
4404
4405 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32);
4406
do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4407 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue,
4408 bool is_setg, SetFn fn)
4409 {
4410 int memidx;
4411 uint32_t syndrome, desc = 0;
4412
4413 if (is_setg && !dc_isar_feature(aa64_mte, s)) {
4414 return false;
4415 }
4416
4417 /*
4418 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4419 * us to pull this check before the CheckMOPSEnabled() test
4420 * (which we do in the helper function)
4421 */
4422 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4423 a->rd == 31 || a->rn == 31) {
4424 return false;
4425 }
4426
4427 memidx = get_a64_user_mem_index(s, a->unpriv);
4428
4429 /*
4430 * We pass option_a == true, matching our implementation;
4431 * we pass wrong_option == false: helper function may set that bit.
4432 */
4433 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv,
4434 is_epilogue, false, true, a->rd, a->rs, a->rn);
4435
4436 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) {
4437 /* We may need to do MTE tag checking, so assemble the descriptor */
4438 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
4439 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
4440 desc = FIELD_DP32(desc, MTEDESC, WRITE, true);
4441 /* SIZEM1 and ALIGN we leave 0 (byte write) */
4442 }
4443 /* The helper function always needs the memidx even with MTE disabled */
4444 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx);
4445
4446 /*
4447 * The helper needs the register numbers, but since they're in
4448 * the syndrome anyway, we let it extract them from there rather
4449 * than passing in an extra three integer arguments.
4450 */
4451 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc));
4452 return true;
4453 }
4454
4455 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp)
4456 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm)
4457 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete)
4458 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp)
4459 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm)
4460 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge)
4461
4462 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32);
4463
do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4464 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn)
4465 {
4466 int rmemidx, wmemidx;
4467 uint32_t syndrome, rdesc = 0, wdesc = 0;
4468 bool wunpriv = extract32(a->options, 0, 1);
4469 bool runpriv = extract32(a->options, 1, 1);
4470
4471 /*
4472 * UNPREDICTABLE cases: we choose to UNDEF, which allows
4473 * us to pull this check before the CheckMOPSEnabled() test
4474 * (which we do in the helper function)
4475 */
4476 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd ||
4477 a->rd == 31 || a->rs == 31 || a->rn == 31) {
4478 return false;
4479 }
4480
4481 rmemidx = get_a64_user_mem_index(s, runpriv);
4482 wmemidx = get_a64_user_mem_index(s, wunpriv);
4483
4484 /*
4485 * We pass option_a == true, matching our implementation;
4486 * we pass wrong_option == false: helper function may set that bit.
4487 */
4488 syndrome = syn_mop(false, false, a->options, is_epilogue,
4489 false, true, a->rd, a->rs, a->rn);
4490
4491 /* If we need to do MTE tag checking, assemble the descriptors */
4492 if (s->mte_active[runpriv]) {
4493 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid);
4494 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma);
4495 }
4496 if (s->mte_active[wunpriv]) {
4497 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid);
4498 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma);
4499 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true);
4500 }
4501 /* The helper function needs these parts of the descriptor regardless */
4502 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx);
4503 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx);
4504
4505 /*
4506 * The helper needs the register numbers, but since they're in
4507 * the syndrome anyway, we let it extract them from there rather
4508 * than passing in an extra three integer arguments.
4509 */
4510 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc),
4511 tcg_constant_i32(rdesc));
4512 return true;
4513 }
4514
4515 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp)
4516 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym)
4517 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye)
4518 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp)
4519 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm)
4520 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe)
4521
4522 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64);
4523
gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4524 static bool gen_rri(DisasContext *s, arg_rri_sf *a,
4525 bool rd_sp, bool rn_sp, ArithTwoOp *fn)
4526 {
4527 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn);
4528 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd);
4529 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm);
4530
4531 fn(tcg_rd, tcg_rn, tcg_imm);
4532 if (!a->sf) {
4533 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4534 }
4535 return true;
4536 }
4537
4538 /*
4539 * PC-rel. addressing
4540 */
4541
trans_ADR(DisasContext * s,arg_ri * a)4542 static bool trans_ADR(DisasContext *s, arg_ri *a)
4543 {
4544 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm);
4545 return true;
4546 }
4547
trans_ADRP(DisasContext * s,arg_ri * a)4548 static bool trans_ADRP(DisasContext *s, arg_ri *a)
4549 {
4550 int64_t offset = (int64_t)a->imm << 12;
4551
4552 /* The page offset is ok for CF_PCREL. */
4553 offset -= s->pc_curr & 0xfff;
4554 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset);
4555 return true;
4556 }
4557
4558 /*
4559 * Add/subtract (immediate)
4560 */
4561 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64)
4562 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64)
4563 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC)
4564 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC)
4565
4566 /*
4567 * Add/subtract (immediate, with tags)
4568 */
4569
gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4570 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a,
4571 bool sub_op)
4572 {
4573 TCGv_i64 tcg_rn, tcg_rd;
4574 int imm;
4575
4576 imm = a->uimm6 << LOG2_TAG_GRANULE;
4577 if (sub_op) {
4578 imm = -imm;
4579 }
4580
4581 tcg_rn = cpu_reg_sp(s, a->rn);
4582 tcg_rd = cpu_reg_sp(s, a->rd);
4583
4584 if (s->ata[0]) {
4585 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn,
4586 tcg_constant_i32(imm),
4587 tcg_constant_i32(a->uimm4));
4588 } else {
4589 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4590 gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4591 }
4592 return true;
4593 }
4594
TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4595 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false)
4596 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true)
4597
4598 /* The input should be a value in the bottom e bits (with higher
4599 * bits zero); returns that value replicated into every element
4600 * of size e in a 64 bit integer.
4601 */
4602 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4603 {
4604 assert(e != 0);
4605 while (e < 64) {
4606 mask |= mask << e;
4607 e *= 2;
4608 }
4609 return mask;
4610 }
4611
4612 /*
4613 * Logical (immediate)
4614 */
4615
4616 /*
4617 * Simplified variant of pseudocode DecodeBitMasks() for the case where we
4618 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4619 * value (ie should cause a guest UNDEF exception), and true if they are
4620 * valid, in which case the decoded bit pattern is written to result.
4621 */
logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4622 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4623 unsigned int imms, unsigned int immr)
4624 {
4625 uint64_t mask;
4626 unsigned e, levels, s, r;
4627 int len;
4628
4629 assert(immn < 2 && imms < 64 && immr < 64);
4630
4631 /* The bit patterns we create here are 64 bit patterns which
4632 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4633 * 64 bits each. Each element contains the same value: a run
4634 * of between 1 and e-1 non-zero bits, rotated within the
4635 * element by between 0 and e-1 bits.
4636 *
4637 * The element size and run length are encoded into immn (1 bit)
4638 * and imms (6 bits) as follows:
4639 * 64 bit elements: immn = 1, imms = <length of run - 1>
4640 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4641 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4642 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4643 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4644 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4645 * Notice that immn = 0, imms = 11111x is the only combination
4646 * not covered by one of the above options; this is reserved.
4647 * Further, <length of run - 1> all-ones is a reserved pattern.
4648 *
4649 * In all cases the rotation is by immr % e (and immr is 6 bits).
4650 */
4651
4652 /* First determine the element size */
4653 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4654 if (len < 1) {
4655 /* This is the immn == 0, imms == 0x11111x case */
4656 return false;
4657 }
4658 e = 1 << len;
4659
4660 levels = e - 1;
4661 s = imms & levels;
4662 r = immr & levels;
4663
4664 if (s == levels) {
4665 /* <length of run - 1> mustn't be all-ones. */
4666 return false;
4667 }
4668
4669 /* Create the value of one element: s+1 set bits rotated
4670 * by r within the element (which is e bits wide)...
4671 */
4672 mask = MAKE_64BIT_MASK(0, s + 1);
4673 if (r) {
4674 mask = (mask >> r) | (mask << (e - r));
4675 mask &= MAKE_64BIT_MASK(0, e);
4676 }
4677 /* ...then replicate the element over the whole 64 bit value */
4678 mask = bitfield_replicate(mask, e);
4679 *result = mask;
4680 return true;
4681 }
4682
gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4683 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc,
4684 void (*fn)(TCGv_i64, TCGv_i64, int64_t))
4685 {
4686 TCGv_i64 tcg_rd, tcg_rn;
4687 uint64_t imm;
4688
4689 /* Some immediate field values are reserved. */
4690 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
4691 extract32(a->dbm, 0, 6),
4692 extract32(a->dbm, 6, 6))) {
4693 return false;
4694 }
4695 if (!a->sf) {
4696 imm &= 0xffffffffull;
4697 }
4698
4699 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd);
4700 tcg_rn = cpu_reg(s, a->rn);
4701
4702 fn(tcg_rd, tcg_rn, imm);
4703 if (set_cc) {
4704 gen_logic_CC(a->sf, tcg_rd);
4705 }
4706 if (!a->sf) {
4707 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4708 }
4709 return true;
4710 }
4711
TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4712 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64)
4713 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64)
4714 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64)
4715 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64)
4716
4717 /*
4718 * Move wide (immediate)
4719 */
4720
4721 static bool trans_MOVZ(DisasContext *s, arg_movw *a)
4722 {
4723 int pos = a->hw << 4;
4724 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos);
4725 return true;
4726 }
4727
trans_MOVN(DisasContext * s,arg_movw * a)4728 static bool trans_MOVN(DisasContext *s, arg_movw *a)
4729 {
4730 int pos = a->hw << 4;
4731 uint64_t imm = a->imm;
4732
4733 imm = ~(imm << pos);
4734 if (!a->sf) {
4735 imm = (uint32_t)imm;
4736 }
4737 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm);
4738 return true;
4739 }
4740
trans_MOVK(DisasContext * s,arg_movw * a)4741 static bool trans_MOVK(DisasContext *s, arg_movw *a)
4742 {
4743 int pos = a->hw << 4;
4744 TCGv_i64 tcg_rd, tcg_im;
4745
4746 tcg_rd = cpu_reg(s, a->rd);
4747 tcg_im = tcg_constant_i64(a->imm);
4748 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16);
4749 if (!a->sf) {
4750 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4751 }
4752 return true;
4753 }
4754
4755 /*
4756 * Bitfield
4757 */
4758
trans_SBFM(DisasContext * s,arg_SBFM * a)4759 static bool trans_SBFM(DisasContext *s, arg_SBFM *a)
4760 {
4761 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4762 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4763 unsigned int bitsize = a->sf ? 64 : 32;
4764 unsigned int ri = a->immr;
4765 unsigned int si = a->imms;
4766 unsigned int pos, len;
4767
4768 if (si >= ri) {
4769 /* Wd<s-r:0> = Wn<s:r> */
4770 len = (si - ri) + 1;
4771 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4772 if (!a->sf) {
4773 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4774 }
4775 } else {
4776 /* Wd<32+s-r,32-r> = Wn<s:0> */
4777 len = si + 1;
4778 pos = (bitsize - ri) & (bitsize - 1);
4779
4780 if (len < ri) {
4781 /*
4782 * Sign extend the destination field from len to fill the
4783 * balance of the word. Let the deposit below insert all
4784 * of those sign bits.
4785 */
4786 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4787 len = ri;
4788 }
4789
4790 /*
4791 * We start with zero, and we haven't modified any bits outside
4792 * bitsize, therefore no final zero-extension is unneeded for !sf.
4793 */
4794 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4795 }
4796 return true;
4797 }
4798
trans_UBFM(DisasContext * s,arg_UBFM * a)4799 static bool trans_UBFM(DisasContext *s, arg_UBFM *a)
4800 {
4801 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4802 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4803 unsigned int bitsize = a->sf ? 64 : 32;
4804 unsigned int ri = a->immr;
4805 unsigned int si = a->imms;
4806 unsigned int pos, len;
4807
4808 tcg_rd = cpu_reg(s, a->rd);
4809 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4810
4811 if (si >= ri) {
4812 /* Wd<s-r:0> = Wn<s:r> */
4813 len = (si - ri) + 1;
4814 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4815 } else {
4816 /* Wd<32+s-r,32-r> = Wn<s:0> */
4817 len = si + 1;
4818 pos = (bitsize - ri) & (bitsize - 1);
4819 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4820 }
4821 return true;
4822 }
4823
trans_BFM(DisasContext * s,arg_BFM * a)4824 static bool trans_BFM(DisasContext *s, arg_BFM *a)
4825 {
4826 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
4827 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4828 unsigned int bitsize = a->sf ? 64 : 32;
4829 unsigned int ri = a->immr;
4830 unsigned int si = a->imms;
4831 unsigned int pos, len;
4832
4833 tcg_rd = cpu_reg(s, a->rd);
4834 tcg_tmp = read_cpu_reg(s, a->rn, 1);
4835
4836 if (si >= ri) {
4837 /* Wd<s-r:0> = Wn<s:r> */
4838 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4839 len = (si - ri) + 1;
4840 pos = 0;
4841 } else {
4842 /* Wd<32+s-r,32-r> = Wn<s:0> */
4843 len = si + 1;
4844 pos = (bitsize - ri) & (bitsize - 1);
4845 }
4846
4847 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4848 if (!a->sf) {
4849 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4850 }
4851 return true;
4852 }
4853
trans_EXTR(DisasContext * s,arg_extract * a)4854 static bool trans_EXTR(DisasContext *s, arg_extract *a)
4855 {
4856 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4857
4858 tcg_rd = cpu_reg(s, a->rd);
4859
4860 if (unlikely(a->imm == 0)) {
4861 /*
4862 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4863 * so an extract from bit 0 is a special case.
4864 */
4865 if (a->sf) {
4866 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm));
4867 } else {
4868 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm));
4869 }
4870 } else {
4871 tcg_rm = cpu_reg(s, a->rm);
4872 tcg_rn = cpu_reg(s, a->rn);
4873
4874 if (a->sf) {
4875 /* Specialization to ROR happens in EXTRACT2. */
4876 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm);
4877 } else {
4878 TCGv_i32 t0 = tcg_temp_new_i32();
4879
4880 tcg_gen_extrl_i64_i32(t0, tcg_rm);
4881 if (a->rm == a->rn) {
4882 tcg_gen_rotri_i32(t0, t0, a->imm);
4883 } else {
4884 TCGv_i32 t1 = tcg_temp_new_i32();
4885 tcg_gen_extrl_i64_i32(t1, tcg_rn);
4886 tcg_gen_extract2_i32(t0, t0, t1, a->imm);
4887 }
4888 tcg_gen_extu_i32_i64(tcg_rd, t0);
4889 }
4890 }
4891 return true;
4892 }
4893
trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4894 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a)
4895 {
4896 if (fp_access_check(s)) {
4897 int len = (a->len + 1) * 16;
4898
4899 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4900 vec_full_reg_offset(s, a->rm), tcg_env,
4901 a->q ? 16 : 8, vec_full_reg_size(s),
4902 (len << 6) | (a->tbx << 5) | a->rn,
4903 gen_helper_simd_tblx);
4904 }
4905 return true;
4906 }
4907
4908 typedef int simd_permute_idx_fn(int i, int part, int elements);
4909
do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4910 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
4911 simd_permute_idx_fn *fn, int part)
4912 {
4913 MemOp esz = a->esz;
4914 int datasize = a->q ? 16 : 8;
4915 int elements = datasize >> esz;
4916 TCGv_i64 tcg_res[2], tcg_ele;
4917
4918 if (esz == MO_64 && !a->q) {
4919 return false;
4920 }
4921 if (!fp_access_check(s)) {
4922 return true;
4923 }
4924
4925 tcg_res[0] = tcg_temp_new_i64();
4926 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
4927 tcg_ele = tcg_temp_new_i64();
4928
4929 for (int i = 0; i < elements; i++) {
4930 int o, w, idx;
4931
4932 idx = fn(i, part, elements);
4933 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
4934 idx & (elements - 1), esz);
4935
4936 w = (i << (esz + 3)) / 64;
4937 o = (i << (esz + 3)) % 64;
4938 if (o == 0) {
4939 tcg_gen_mov_i64(tcg_res[w], tcg_ele);
4940 } else {
4941 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
4942 }
4943 }
4944
4945 for (int i = a->q; i >= 0; --i) {
4946 write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
4947 }
4948 clear_vec_high(s, a->q, a->rd);
4949 return true;
4950 }
4951
permute_load_uzp(int i,int part,int elements)4952 static int permute_load_uzp(int i, int part, int elements)
4953 {
4954 return 2 * i + part;
4955 }
4956
4957 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
4958 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
4959
permute_load_trn(int i,int part,int elements)4960 static int permute_load_trn(int i, int part, int elements)
4961 {
4962 return (i & 1) * elements + (i & ~1) + part;
4963 }
4964
4965 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
4966 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
4967
permute_load_zip(int i,int part,int elements)4968 static int permute_load_zip(int i, int part, int elements)
4969 {
4970 return (i & 1) * elements + ((part * elements + i) >> 1);
4971 }
4972
4973 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
4974 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
4975
4976 /*
4977 * Cryptographic AES, SHA, SHA512
4978 */
4979
4980 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese)
4981 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd)
4982 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc)
4983 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc)
4984
4985 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c)
4986 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p)
4987 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m)
4988 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0)
4989
4990 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h)
4991 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2)
4992 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1)
4993
4994 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h)
4995 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1)
4996 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0)
4997
4998 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h)
4999 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2)
5000 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1)
TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)5001 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1)
5002 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1)
5003 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2)
5004 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey)
5005
5006 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0)
5007 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e)
5008
5009 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3)
5010 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax)
5011
5012 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a)
5013 {
5014 if (!dc_isar_feature(aa64_sm3, s)) {
5015 return false;
5016 }
5017 if (fp_access_check(s)) {
5018 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5019 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5020 TCGv_i32 tcg_op3 = tcg_temp_new_i32();
5021 TCGv_i32 tcg_res = tcg_temp_new_i32();
5022
5023 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32);
5024 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32);
5025 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32);
5026
5027 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
5028 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
5029 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
5030 tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
5031
5032 /* Clear the whole register first, then store bits [127:96]. */
5033 clear_vec(s, a->rd);
5034 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32);
5035 }
5036 return true;
5037 }
5038
do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)5039 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn)
5040 {
5041 if (fp_access_check(s)) {
5042 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn);
5043 }
5044 return true;
5045 }
TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)5046 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a)
5047 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b)
5048 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a)
5049 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b)
5050
5051 static bool trans_XAR(DisasContext *s, arg_XAR *a)
5052 {
5053 if (!dc_isar_feature(aa64_sha3, s)) {
5054 return false;
5055 }
5056 if (fp_access_check(s)) {
5057 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd),
5058 vec_full_reg_offset(s, a->rn),
5059 vec_full_reg_offset(s, a->rm), a->imm, 16,
5060 vec_full_reg_size(s));
5061 }
5062 return true;
5063 }
5064
5065 /*
5066 * Advanced SIMD copy
5067 */
5068
decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)5069 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx)
5070 {
5071 unsigned esz = ctz32(imm);
5072 if (esz <= MO_64) {
5073 *pesz = esz;
5074 *pidx = imm >> (esz + 1);
5075 return true;
5076 }
5077 return false;
5078 }
5079
trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)5080 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a)
5081 {
5082 MemOp esz;
5083 unsigned idx;
5084
5085 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5086 return false;
5087 }
5088 if (fp_access_check(s)) {
5089 /*
5090 * This instruction just extracts the specified element and
5091 * zero-extends it into the bottom of the destination register.
5092 */
5093 TCGv_i64 tmp = tcg_temp_new_i64();
5094 read_vec_element(s, tmp, a->rn, idx, esz);
5095 write_fp_dreg(s, a->rd, tmp);
5096 }
5097 return true;
5098 }
5099
trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)5100 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a)
5101 {
5102 MemOp esz;
5103 unsigned idx;
5104
5105 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5106 return false;
5107 }
5108 if (esz == MO_64 && !a->q) {
5109 return false;
5110 }
5111 if (fp_access_check(s)) {
5112 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd),
5113 vec_reg_offset(s, a->rn, idx, esz),
5114 a->q ? 16 : 8, vec_full_reg_size(s));
5115 }
5116 return true;
5117 }
5118
trans_DUP_general(DisasContext * s,arg_DUP_general * a)5119 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a)
5120 {
5121 MemOp esz;
5122 unsigned idx;
5123
5124 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5125 return false;
5126 }
5127 if (esz == MO_64 && !a->q) {
5128 return false;
5129 }
5130 if (fp_access_check(s)) {
5131 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5132 a->q ? 16 : 8, vec_full_reg_size(s),
5133 cpu_reg(s, a->rn));
5134 }
5135 return true;
5136 }
5137
do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)5138 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed)
5139 {
5140 MemOp esz;
5141 unsigned idx;
5142
5143 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5144 return false;
5145 }
5146 if (is_signed) {
5147 if (esz == MO_64 || (esz == MO_32 && !a->q)) {
5148 return false;
5149 }
5150 } else {
5151 if (esz == MO_64 ? !a->q : a->q) {
5152 return false;
5153 }
5154 }
5155 if (fp_access_check(s)) {
5156 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
5157 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed);
5158 if (is_signed && !a->q) {
5159 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5160 }
5161 }
5162 return true;
5163 }
5164
TRANS(SMOV,do_smov_umov,a,MO_SIGN)5165 TRANS(SMOV, do_smov_umov, a, MO_SIGN)
5166 TRANS(UMOV, do_smov_umov, a, 0)
5167
5168 static bool trans_INS_general(DisasContext *s, arg_INS_general *a)
5169 {
5170 MemOp esz;
5171 unsigned idx;
5172
5173 if (!decode_esz_idx(a->imm, &esz, &idx)) {
5174 return false;
5175 }
5176 if (fp_access_check(s)) {
5177 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz);
5178 clear_vec_high(s, true, a->rd);
5179 }
5180 return true;
5181 }
5182
trans_INS_element(DisasContext * s,arg_INS_element * a)5183 static bool trans_INS_element(DisasContext *s, arg_INS_element *a)
5184 {
5185 MemOp esz;
5186 unsigned didx, sidx;
5187
5188 if (!decode_esz_idx(a->di, &esz, &didx)) {
5189 return false;
5190 }
5191 sidx = a->si >> esz;
5192 if (fp_access_check(s)) {
5193 TCGv_i64 tmp = tcg_temp_new_i64();
5194
5195 read_vec_element(s, tmp, a->rn, sidx, esz);
5196 write_vec_element(s, tmp, a->rd, didx, esz);
5197
5198 /* INS is considered a 128-bit write for SVE. */
5199 clear_vec_high(s, true, a->rd);
5200 }
5201 return true;
5202 }
5203
5204 /*
5205 * Advanced SIMD three same
5206 */
5207
5208 typedef struct FPScalar {
5209 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5210 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
5211 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
5212 } FPScalar;
5213
do_fp3_scalar_with_fpsttype(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg,ARMFPStatusFlavour fpsttype)5214 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a,
5215 const FPScalar *f, int mergereg,
5216 ARMFPStatusFlavour fpsttype)
5217 {
5218 switch (a->esz) {
5219 case MO_64:
5220 if (fp_access_check(s)) {
5221 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5222 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5223 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype));
5224 write_fp_dreg_merging(s, a->rd, mergereg, t0);
5225 }
5226 break;
5227 case MO_32:
5228 if (fp_access_check(s)) {
5229 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5230 TCGv_i32 t1 = read_fp_sreg(s, a->rm);
5231 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype));
5232 write_fp_sreg_merging(s, a->rd, mergereg, t0);
5233 }
5234 break;
5235 case MO_16:
5236 if (!dc_isar_feature(aa64_fp16, s)) {
5237 return false;
5238 }
5239 if (fp_access_check(s)) {
5240 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5241 TCGv_i32 t1 = read_fp_hreg(s, a->rm);
5242 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype));
5243 write_fp_hreg_merging(s, a->rd, mergereg, t0);
5244 }
5245 break;
5246 default:
5247 return false;
5248 }
5249 return true;
5250 }
5251
do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f,int mergereg)5252 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
5253 int mergereg)
5254 {
5255 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
5256 a->esz == MO_16 ?
5257 FPST_A64_F16 : FPST_A64);
5258 }
5259
do_fp3_scalar_ah_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5260 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
5261 const FPScalar *fnormal, const FPScalar *fah,
5262 int mergereg)
5263 {
5264 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal,
5265 mergereg, select_ah_fpst(s, a->esz));
5266 }
5267
5268 /* Some insns need to call different helpers when FPCR.AH == 1 */
do_fp3_scalar_2fn(DisasContext * s,arg_rrr_e * a,const FPScalar * fnormal,const FPScalar * fah,int mergereg)5269 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a,
5270 const FPScalar *fnormal,
5271 const FPScalar *fah,
5272 int mergereg)
5273 {
5274 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg);
5275 }
5276
5277 static const FPScalar f_scalar_fadd = {
5278 gen_helper_vfp_addh,
5279 gen_helper_vfp_adds,
5280 gen_helper_vfp_addd,
5281 };
5282 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn)
5283
5284 static const FPScalar f_scalar_fsub = {
5285 gen_helper_vfp_subh,
5286 gen_helper_vfp_subs,
5287 gen_helper_vfp_subd,
5288 };
5289 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn)
5290
5291 static const FPScalar f_scalar_fdiv = {
5292 gen_helper_vfp_divh,
5293 gen_helper_vfp_divs,
5294 gen_helper_vfp_divd,
5295 };
5296 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn)
5297
5298 static const FPScalar f_scalar_fmul = {
5299 gen_helper_vfp_mulh,
5300 gen_helper_vfp_muls,
5301 gen_helper_vfp_muld,
5302 };
5303 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn)
5304
5305 static const FPScalar f_scalar_fmax = {
5306 gen_helper_vfp_maxh,
5307 gen_helper_vfp_maxs,
5308 gen_helper_vfp_maxd,
5309 };
5310 static const FPScalar f_scalar_fmax_ah = {
5311 gen_helper_vfp_ah_maxh,
5312 gen_helper_vfp_ah_maxs,
5313 gen_helper_vfp_ah_maxd,
5314 };
5315 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn)
5316
5317 static const FPScalar f_scalar_fmin = {
5318 gen_helper_vfp_minh,
5319 gen_helper_vfp_mins,
5320 gen_helper_vfp_mind,
5321 };
5322 static const FPScalar f_scalar_fmin_ah = {
5323 gen_helper_vfp_ah_minh,
5324 gen_helper_vfp_ah_mins,
5325 gen_helper_vfp_ah_mind,
5326 };
5327 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn)
5328
5329 static const FPScalar f_scalar_fmaxnm = {
5330 gen_helper_vfp_maxnumh,
5331 gen_helper_vfp_maxnums,
5332 gen_helper_vfp_maxnumd,
5333 };
5334 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn)
5335
5336 static const FPScalar f_scalar_fminnm = {
5337 gen_helper_vfp_minnumh,
5338 gen_helper_vfp_minnums,
5339 gen_helper_vfp_minnumd,
5340 };
5341 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn)
5342
5343 static const FPScalar f_scalar_fmulx = {
5344 gen_helper_advsimd_mulxh,
5345 gen_helper_vfp_mulxs,
5346 gen_helper_vfp_mulxd,
5347 };
5348 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn)
5349
gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5350 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5351 {
5352 gen_helper_vfp_mulh(d, n, m, s);
5353 gen_vfp_negh(d, d);
5354 }
5355
gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5356 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5357 {
5358 gen_helper_vfp_muls(d, n, m, s);
5359 gen_vfp_negs(d, d);
5360 }
5361
gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5362 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5363 {
5364 gen_helper_vfp_muld(d, n, m, s);
5365 gen_vfp_negd(d, d);
5366 }
5367
gen_fnmul_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5368 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5369 {
5370 gen_helper_vfp_mulh(d, n, m, s);
5371 gen_vfp_ah_negh(d, d);
5372 }
5373
gen_fnmul_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5374 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5375 {
5376 gen_helper_vfp_muls(d, n, m, s);
5377 gen_vfp_ah_negs(d, d);
5378 }
5379
gen_fnmul_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5380 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5381 {
5382 gen_helper_vfp_muld(d, n, m, s);
5383 gen_vfp_ah_negd(d, d);
5384 }
5385
5386 static const FPScalar f_scalar_fnmul = {
5387 gen_fnmul_h,
5388 gen_fnmul_s,
5389 gen_fnmul_d,
5390 };
5391 static const FPScalar f_scalar_ah_fnmul = {
5392 gen_fnmul_ah_h,
5393 gen_fnmul_ah_s,
5394 gen_fnmul_ah_d,
5395 };
5396 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
5397
5398 static const FPScalar f_scalar_fcmeq = {
5399 gen_helper_advsimd_ceq_f16,
5400 gen_helper_neon_ceq_f32,
5401 gen_helper_neon_ceq_f64,
5402 };
5403 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm)
5404
5405 static const FPScalar f_scalar_fcmge = {
5406 gen_helper_advsimd_cge_f16,
5407 gen_helper_neon_cge_f32,
5408 gen_helper_neon_cge_f64,
5409 };
5410 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm)
5411
5412 static const FPScalar f_scalar_fcmgt = {
5413 gen_helper_advsimd_cgt_f16,
5414 gen_helper_neon_cgt_f32,
5415 gen_helper_neon_cgt_f64,
5416 };
5417 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm)
5418
5419 static const FPScalar f_scalar_facge = {
5420 gen_helper_advsimd_acge_f16,
5421 gen_helper_neon_acge_f32,
5422 gen_helper_neon_acge_f64,
5423 };
5424 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm)
5425
5426 static const FPScalar f_scalar_facgt = {
5427 gen_helper_advsimd_acgt_f16,
5428 gen_helper_neon_acgt_f32,
5429 gen_helper_neon_acgt_f64,
5430 };
5431 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm)
5432
gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5433 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5434 {
5435 gen_helper_vfp_subh(d, n, m, s);
5436 gen_vfp_absh(d, d);
5437 }
5438
gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5439 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5440 {
5441 gen_helper_vfp_subs(d, n, m, s);
5442 gen_vfp_abss(d, d);
5443 }
5444
gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5445 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5446 {
5447 gen_helper_vfp_subd(d, n, m, s);
5448 gen_vfp_absd(d, d);
5449 }
5450
gen_fabd_ah_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5451 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5452 {
5453 gen_helper_vfp_subh(d, n, m, s);
5454 gen_vfp_ah_absh(d, d);
5455 }
5456
gen_fabd_ah_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5457 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
5458 {
5459 gen_helper_vfp_subs(d, n, m, s);
5460 gen_vfp_ah_abss(d, d);
5461 }
5462
gen_fabd_ah_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5463 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
5464 {
5465 gen_helper_vfp_subd(d, n, m, s);
5466 gen_vfp_ah_absd(d, d);
5467 }
5468
5469 static const FPScalar f_scalar_fabd = {
5470 gen_fabd_h,
5471 gen_fabd_s,
5472 gen_fabd_d,
5473 };
5474 static const FPScalar f_scalar_ah_fabd = {
5475 gen_fabd_ah_h,
5476 gen_fabd_ah_s,
5477 gen_fabd_ah_d,
5478 };
5479 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn)
5480
5481 static const FPScalar f_scalar_frecps = {
5482 gen_helper_recpsf_f16,
5483 gen_helper_recpsf_f32,
5484 gen_helper_recpsf_f64,
5485 };
5486 static const FPScalar f_scalar_ah_frecps = {
5487 gen_helper_recpsf_ah_f16,
5488 gen_helper_recpsf_ah_f32,
5489 gen_helper_recpsf_ah_f64,
5490 };
5491 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a,
5492 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn)
5493
5494 static const FPScalar f_scalar_frsqrts = {
5495 gen_helper_rsqrtsf_f16,
5496 gen_helper_rsqrtsf_f32,
5497 gen_helper_rsqrtsf_f64,
5498 };
5499 static const FPScalar f_scalar_ah_frsqrts = {
5500 gen_helper_rsqrtsf_ah_f16,
5501 gen_helper_rsqrtsf_ah_f32,
5502 gen_helper_rsqrtsf_ah_f64,
5503 };
5504 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a,
5505 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn)
5506
do_fcmp0_s(DisasContext * s,arg_rr_e * a,const FPScalar * f,bool swap)5507 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
5508 const FPScalar *f, bool swap)
5509 {
5510 switch (a->esz) {
5511 case MO_64:
5512 if (fp_access_check(s)) {
5513 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5514 TCGv_i64 t1 = tcg_constant_i64(0);
5515 if (swap) {
5516 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
5517 } else {
5518 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
5519 }
5520 write_fp_dreg(s, a->rd, t0);
5521 }
5522 break;
5523 case MO_32:
5524 if (fp_access_check(s)) {
5525 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
5526 TCGv_i32 t1 = tcg_constant_i32(0);
5527 if (swap) {
5528 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
5529 } else {
5530 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
5531 }
5532 write_fp_sreg(s, a->rd, t0);
5533 }
5534 break;
5535 case MO_16:
5536 if (!dc_isar_feature(aa64_fp16, s)) {
5537 return false;
5538 }
5539 if (fp_access_check(s)) {
5540 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
5541 TCGv_i32 t1 = tcg_constant_i32(0);
5542 if (swap) {
5543 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
5544 } else {
5545 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
5546 }
5547 write_fp_sreg(s, a->rd, t0);
5548 }
5549 break;
5550 default:
5551 return false;
5552 }
5553 return true;
5554 }
5555
5556 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false)
5557 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false)
5558 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false)
5559 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true)
5560 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true)
5561
do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5562 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a,
5563 MemOp sgn_n, MemOp sgn_m,
5564 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp),
5565 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
5566 {
5567 TCGv_i64 t0, t1, t2, qc;
5568 MemOp esz = a->esz;
5569
5570 if (!fp_access_check(s)) {
5571 return true;
5572 }
5573
5574 t0 = tcg_temp_new_i64();
5575 t1 = tcg_temp_new_i64();
5576 t2 = tcg_temp_new_i64();
5577 qc = tcg_temp_new_i64();
5578 read_vec_element(s, t1, a->rn, 0, esz | sgn_n);
5579 read_vec_element(s, t2, a->rm, 0, esz | sgn_m);
5580 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5581
5582 if (esz == MO_64) {
5583 gen_d(t0, qc, t1, t2);
5584 } else {
5585 gen_bhs(t0, qc, t1, t2, esz);
5586 tcg_gen_ext_i64(t0, t0, esz);
5587 }
5588
5589 write_fp_dreg(s, a->rd, t0);
5590 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
5591 return true;
5592 }
5593
TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5594 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d)
5595 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d)
5596 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d)
5597 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d)
5598 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d)
5599 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d)
5600
5601 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a,
5602 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64))
5603 {
5604 if (fp_access_check(s)) {
5605 TCGv_i64 t0 = tcg_temp_new_i64();
5606 TCGv_i64 t1 = tcg_temp_new_i64();
5607
5608 read_vec_element(s, t0, a->rn, 0, MO_64);
5609 read_vec_element(s, t1, a->rm, 0, MO_64);
5610 fn(t0, t0, t1);
5611 write_fp_dreg(s, a->rd, t0);
5612 }
5613 return true;
5614 }
5615
5616 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64)
5617 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64)
5618 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64)
5619 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64)
5620 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64)
5621 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64)
5622
5623 typedef struct ENVScalar2 {
5624 NeonGenTwoOpEnvFn *gen_bhs[3];
5625 NeonGenTwo64OpEnvFn *gen_d;
5626 } ENVScalar2;
5627
do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5628 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f)
5629 {
5630 if (!fp_access_check(s)) {
5631 return true;
5632 }
5633 if (a->esz == MO_64) {
5634 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5635 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5636 f->gen_d(t0, tcg_env, t0, t1);
5637 write_fp_dreg(s, a->rd, t0);
5638 } else {
5639 TCGv_i32 t0 = tcg_temp_new_i32();
5640 TCGv_i32 t1 = tcg_temp_new_i32();
5641
5642 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5643 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5644 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
5645 write_fp_sreg(s, a->rd, t0);
5646 }
5647 return true;
5648 }
5649
5650 static const ENVScalar2 f_scalar_sqshl = {
5651 { gen_helper_neon_qshl_s8,
5652 gen_helper_neon_qshl_s16,
5653 gen_helper_neon_qshl_s32 },
5654 gen_helper_neon_qshl_s64,
5655 };
5656 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl)
5657
5658 static const ENVScalar2 f_scalar_uqshl = {
5659 { gen_helper_neon_qshl_u8,
5660 gen_helper_neon_qshl_u16,
5661 gen_helper_neon_qshl_u32 },
5662 gen_helper_neon_qshl_u64,
5663 };
5664 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl)
5665
5666 static const ENVScalar2 f_scalar_sqrshl = {
5667 { gen_helper_neon_qrshl_s8,
5668 gen_helper_neon_qrshl_s16,
5669 gen_helper_neon_qrshl_s32 },
5670 gen_helper_neon_qrshl_s64,
5671 };
5672 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl)
5673
5674 static const ENVScalar2 f_scalar_uqrshl = {
5675 { gen_helper_neon_qrshl_u8,
5676 gen_helper_neon_qrshl_u16,
5677 gen_helper_neon_qrshl_u32 },
5678 gen_helper_neon_qrshl_u64,
5679 };
5680 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
5681
do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5682 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
5683 const ENVScalar2 *f)
5684 {
5685 if (a->esz == MO_16 || a->esz == MO_32) {
5686 return do_env_scalar2(s, a, f);
5687 }
5688 return false;
5689 }
5690
5691 static const ENVScalar2 f_scalar_sqdmulh = {
5692 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
5693 };
5694 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
5695
5696 static const ENVScalar2 f_scalar_sqrdmulh = {
5697 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
5698 };
5699 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
5700
5701 typedef struct ENVScalar3 {
5702 NeonGenThreeOpEnvFn *gen_hs[2];
5703 } ENVScalar3;
5704
do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5705 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
5706 const ENVScalar3 *f)
5707 {
5708 TCGv_i32 t0, t1, t2;
5709
5710 if (a->esz != MO_16 && a->esz != MO_32) {
5711 return false;
5712 }
5713 if (!fp_access_check(s)) {
5714 return true;
5715 }
5716
5717 t0 = tcg_temp_new_i32();
5718 t1 = tcg_temp_new_i32();
5719 t2 = tcg_temp_new_i32();
5720 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
5721 read_vec_element_i32(s, t1, a->rm, 0, a->esz);
5722 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
5723 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
5724 write_fp_sreg(s, a->rd, t0);
5725 return true;
5726 }
5727
5728 static const ENVScalar3 f_scalar_sqrdmlah = {
5729 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
5730 };
5731 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah)
5732
5733 static const ENVScalar3 f_scalar_sqrdmlsh = {
5734 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
5735 };
5736 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh)
5737
do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5738 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
5739 {
5740 if (fp_access_check(s)) {
5741 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
5742 TCGv_i64 t1 = read_fp_dreg(s, a->rm);
5743 tcg_gen_negsetcond_i64(cond, t0, t0, t1);
5744 write_fp_dreg(s, a->rd, t0);
5745 }
5746 return true;
5747 }
5748
TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5749 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT)
5750 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU)
5751 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE)
5752 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
5753 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
5754 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
5755
5756 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a,
5757 int data,
5758 gen_helper_gvec_3_ptr * const fns[3],
5759 ARMFPStatusFlavour fpsttype)
5760 {
5761 MemOp esz = a->esz;
5762 int check = fp_access_check_vector_hsd(s, a->q, esz);
5763
5764 if (check <= 0) {
5765 return check == 0;
5766 }
5767
5768 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype,
5769 data, fns[esz - 1]);
5770 return true;
5771 }
5772
do_fp3_vector(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fns[3])5773 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
5774 gen_helper_gvec_3_ptr * const fns[3])
5775 {
5776 return do_fp3_vector_with_fpsttype(s, a, data, fns,
5777 a->esz == MO_16 ?
5778 FPST_A64_F16 : FPST_A64);
5779 }
5780
do_fp3_vector_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5781 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5782 gen_helper_gvec_3_ptr * const fnormal[3],
5783 gen_helper_gvec_3_ptr * const fah[3])
5784 {
5785 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal);
5786 }
5787
do_fp3_vector_ah_2fn(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3_ptr * const fnormal[3],gen_helper_gvec_3_ptr * const fah[3])5788 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data,
5789 gen_helper_gvec_3_ptr * const fnormal[3],
5790 gen_helper_gvec_3_ptr * const fah[3])
5791 {
5792 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal,
5793 select_ah_fpst(s, a->esz));
5794 }
5795
5796 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = {
5797 gen_helper_gvec_fadd_h,
5798 gen_helper_gvec_fadd_s,
5799 gen_helper_gvec_fadd_d,
5800 };
5801 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
5802
5803 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
5804 gen_helper_gvec_fsub_h,
5805 gen_helper_gvec_fsub_s,
5806 gen_helper_gvec_fsub_d,
5807 };
5808 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
5809
5810 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
5811 gen_helper_gvec_fdiv_h,
5812 gen_helper_gvec_fdiv_s,
5813 gen_helper_gvec_fdiv_d,
5814 };
5815 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
5816
5817 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
5818 gen_helper_gvec_fmul_h,
5819 gen_helper_gvec_fmul_s,
5820 gen_helper_gvec_fmul_d,
5821 };
5822 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
5823
5824 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
5825 gen_helper_gvec_fmax_h,
5826 gen_helper_gvec_fmax_s,
5827 gen_helper_gvec_fmax_d,
5828 };
5829 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = {
5830 gen_helper_gvec_ah_fmax_h,
5831 gen_helper_gvec_ah_fmax_s,
5832 gen_helper_gvec_ah_fmax_d,
5833 };
5834 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah)
5835
5836 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
5837 gen_helper_gvec_fmin_h,
5838 gen_helper_gvec_fmin_s,
5839 gen_helper_gvec_fmin_d,
5840 };
5841 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = {
5842 gen_helper_gvec_ah_fmin_h,
5843 gen_helper_gvec_ah_fmin_s,
5844 gen_helper_gvec_ah_fmin_d,
5845 };
5846 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah)
5847
5848 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
5849 gen_helper_gvec_fmaxnum_h,
5850 gen_helper_gvec_fmaxnum_s,
5851 gen_helper_gvec_fmaxnum_d,
5852 };
5853 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
5854
5855 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
5856 gen_helper_gvec_fminnum_h,
5857 gen_helper_gvec_fminnum_s,
5858 gen_helper_gvec_fminnum_d,
5859 };
5860 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
5861
5862 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
5863 gen_helper_gvec_fmulx_h,
5864 gen_helper_gvec_fmulx_s,
5865 gen_helper_gvec_fmulx_d,
5866 };
5867 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
5868
5869 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
5870 gen_helper_gvec_vfma_h,
5871 gen_helper_gvec_vfma_s,
5872 gen_helper_gvec_vfma_d,
5873 };
5874 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
5875
5876 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
5877 gen_helper_gvec_vfms_h,
5878 gen_helper_gvec_vfms_s,
5879 gen_helper_gvec_vfms_d,
5880 };
5881 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = {
5882 gen_helper_gvec_ah_vfms_h,
5883 gen_helper_gvec_ah_vfms_s,
5884 gen_helper_gvec_ah_vfms_d,
5885 };
5886 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah)
5887
5888 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
5889 gen_helper_gvec_fceq_h,
5890 gen_helper_gvec_fceq_s,
5891 gen_helper_gvec_fceq_d,
5892 };
5893 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
5894
5895 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
5896 gen_helper_gvec_fcge_h,
5897 gen_helper_gvec_fcge_s,
5898 gen_helper_gvec_fcge_d,
5899 };
5900 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
5901
5902 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
5903 gen_helper_gvec_fcgt_h,
5904 gen_helper_gvec_fcgt_s,
5905 gen_helper_gvec_fcgt_d,
5906 };
5907 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
5908
5909 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
5910 gen_helper_gvec_facge_h,
5911 gen_helper_gvec_facge_s,
5912 gen_helper_gvec_facge_d,
5913 };
5914 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
5915
5916 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
5917 gen_helper_gvec_facgt_h,
5918 gen_helper_gvec_facgt_s,
5919 gen_helper_gvec_facgt_d,
5920 };
5921 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt)
5922
5923 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = {
5924 gen_helper_gvec_fabd_h,
5925 gen_helper_gvec_fabd_s,
5926 gen_helper_gvec_fabd_d,
5927 };
5928 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = {
5929 gen_helper_gvec_ah_fabd_h,
5930 gen_helper_gvec_ah_fabd_s,
5931 gen_helper_gvec_ah_fabd_d,
5932 };
5933 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd)
5934
5935 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = {
5936 gen_helper_gvec_recps_h,
5937 gen_helper_gvec_recps_s,
5938 gen_helper_gvec_recps_d,
5939 };
5940 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = {
5941 gen_helper_gvec_ah_recps_h,
5942 gen_helper_gvec_ah_recps_s,
5943 gen_helper_gvec_ah_recps_d,
5944 };
5945 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps)
5946
5947 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = {
5948 gen_helper_gvec_rsqrts_h,
5949 gen_helper_gvec_rsqrts_s,
5950 gen_helper_gvec_rsqrts_d,
5951 };
5952 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = {
5953 gen_helper_gvec_ah_rsqrts_h,
5954 gen_helper_gvec_ah_rsqrts_s,
5955 gen_helper_gvec_ah_rsqrts_d,
5956 };
5957 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts)
5958
5959 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = {
5960 gen_helper_gvec_faddp_h,
5961 gen_helper_gvec_faddp_s,
5962 gen_helper_gvec_faddp_d,
5963 };
5964 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp)
5965
5966 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = {
5967 gen_helper_gvec_fmaxp_h,
5968 gen_helper_gvec_fmaxp_s,
5969 gen_helper_gvec_fmaxp_d,
5970 };
5971 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = {
5972 gen_helper_gvec_ah_fmaxp_h,
5973 gen_helper_gvec_ah_fmaxp_s,
5974 gen_helper_gvec_ah_fmaxp_d,
5975 };
5976 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp)
5977
5978 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = {
5979 gen_helper_gvec_fminp_h,
5980 gen_helper_gvec_fminp_s,
5981 gen_helper_gvec_fminp_d,
5982 };
5983 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = {
5984 gen_helper_gvec_ah_fminp_h,
5985 gen_helper_gvec_ah_fminp_s,
5986 gen_helper_gvec_ah_fminp_d,
5987 };
5988 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp)
5989
5990 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = {
5991 gen_helper_gvec_fmaxnump_h,
5992 gen_helper_gvec_fmaxnump_s,
5993 gen_helper_gvec_fmaxnump_d,
5994 };
5995 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp)
5996
5997 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = {
5998 gen_helper_gvec_fminnump_h,
5999 gen_helper_gvec_fminnump_s,
6000 gen_helper_gvec_fminnump_d,
6001 };
6002 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
6003
do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)6004 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
6005 {
6006 if (fp_access_check(s)) {
6007 int data = (is_2 << 1) | is_s;
6008 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6009 vec_full_reg_offset(s, a->rn),
6010 vec_full_reg_offset(s, a->rm), tcg_env,
6011 a->q ? 16 : 8, vec_full_reg_size(s),
6012 data, gen_helper_gvec_fmlal_a64);
6013 }
6014 return true;
6015 }
6016
TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)6017 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false)
6018 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false)
6019 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true)
6020 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true)
6021
6022 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp)
6023 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp)
6024 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp)
6025 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp)
6026 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp)
6027
6028 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and)
6029 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc)
6030 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or)
6031 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc)
6032 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor)
6033
6034 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c)
6035 {
6036 if (fp_access_check(s)) {
6037 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0);
6038 }
6039 return true;
6040 }
6041
6042 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm)
6043 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd)
6044 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn)
6045
TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)6046 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc)
6047 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc)
6048 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc)
6049 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc)
6050 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc)
6051 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc)
6052
6053 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl)
6054 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl)
6055 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl)
6056 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl)
6057 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl)
6058 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl)
6059 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl)
6060 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl)
6061
6062 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add)
6063 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub)
6064 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd)
6065 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd)
6066 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub)
6067 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub)
6068 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd)
6069 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd)
6070 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax)
6071 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax)
6072 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin)
6073 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin)
6074 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba)
6075 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba)
6076 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd)
6077 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd)
6078 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul)
6079 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b)
6080 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla)
6081 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls)
6082
6083 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond)
6084 {
6085 if (a->esz == MO_64 && !a->q) {
6086 return false;
6087 }
6088 if (fp_access_check(s)) {
6089 tcg_gen_gvec_cmp(cond, a->esz,
6090 vec_full_reg_offset(s, a->rd),
6091 vec_full_reg_offset(s, a->rn),
6092 vec_full_reg_offset(s, a->rm),
6093 a->q ? 16 : 8, vec_full_reg_size(s));
6094 }
6095 return true;
6096 }
6097
TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)6098 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT)
6099 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU)
6100 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE)
6101 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
6102 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
6103 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
6104
6105 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
6106 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
6107 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
6108 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
6109
6110 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
6111 gen_helper_gvec_4 *fn)
6112 {
6113 if (fp_access_check(s)) {
6114 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6115 }
6116 return true;
6117 }
6118
do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)6119 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a,
6120 gen_helper_gvec_4_ptr *fn)
6121 {
6122 if (fp_access_check(s)) {
6123 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
6124 }
6125 return true;
6126 }
6127
TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)6128 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
6129 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
6130 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
6131 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot)
6132 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla)
6133 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
6134 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
6135 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
6136
6137 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
6138 {
6139 if (!dc_isar_feature(aa64_bf16, s)) {
6140 return false;
6141 }
6142 if (fp_access_check(s)) {
6143 /* Q bit selects BFMLALB vs BFMLALT. */
6144 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6145 s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
6146 gen_helper_gvec_bfmlal);
6147 }
6148 return true;
6149 }
6150
6151 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
6152 gen_helper_gvec_fcaddh,
6153 gen_helper_gvec_fcadds,
6154 gen_helper_gvec_fcaddd,
6155 };
6156 /*
6157 * Encode FPCR.AH into the data so the helper knows whether the
6158 * negations it does should avoid flipping the sign bit on a NaN
6159 */
6160 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1),
6161 f_vector_fcadd)
6162 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1),
6163 f_vector_fcadd)
6164
trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)6165 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
6166 {
6167 static gen_helper_gvec_4_ptr * const fn[] = {
6168 [MO_16] = gen_helper_gvec_fcmlah,
6169 [MO_32] = gen_helper_gvec_fcmlas,
6170 [MO_64] = gen_helper_gvec_fcmlad,
6171 };
6172 int check;
6173
6174 if (!dc_isar_feature(aa64_fcma, s)) {
6175 return false;
6176 }
6177
6178 check = fp_access_check_vector_hsd(s, a->q, a->esz);
6179 if (check <= 0) {
6180 return check == 0;
6181 }
6182
6183 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6184 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6185 a->rot | (s->fpcr_ah << 2), fn[a->esz]);
6186 return true;
6187 }
6188
6189 /*
6190 * Widening vector x vector/indexed.
6191 *
6192 * These read from the top or bottom half of a 128-bit vector.
6193 * After widening, optionally accumulate with a 128-bit vector.
6194 * Implement these inline, as the number of elements are limited
6195 * and the related SVE and SME operations on larger vectors use
6196 * even/odd elements instead of top/bottom half.
6197 *
6198 * If idx >= 0, operand 2 is indexed, otherwise vector.
6199 * If acc, operand 0 is loaded with rd.
6200 */
6201
6202 /* For low half, iterating up. */
do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)6203 static bool do_3op_widening(DisasContext *s, MemOp memop, int top,
6204 int rd, int rn, int rm, int idx,
6205 NeonGenTwo64OpFn *fn, bool acc)
6206 {
6207 TCGv_i64 tcg_op0 = tcg_temp_new_i64();
6208 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6209 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6210 MemOp esz = memop & MO_SIZE;
6211 int half = 8 >> esz;
6212 int top_swap, top_half;
6213
6214 /* There are no 64x64->128 bit operations. */
6215 if (esz >= MO_64) {
6216 return false;
6217 }
6218 if (!fp_access_check(s)) {
6219 return true;
6220 }
6221
6222 if (idx >= 0) {
6223 read_vec_element(s, tcg_op2, rm, idx, memop);
6224 }
6225
6226 /*
6227 * For top half inputs, iterate forward; backward for bottom half.
6228 * This means the store to the destination will not occur until
6229 * overlapping input inputs are consumed.
6230 * Use top_swap to conditionally invert the forward iteration index.
6231 */
6232 top_swap = top ? 0 : half - 1;
6233 top_half = top ? half : 0;
6234
6235 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6236 int elt = elt_fwd ^ top_swap;
6237
6238 read_vec_element(s, tcg_op1, rn, elt + top_half, memop);
6239 if (idx < 0) {
6240 read_vec_element(s, tcg_op2, rm, elt + top_half, memop);
6241 }
6242 if (acc) {
6243 read_vec_element(s, tcg_op0, rd, elt, memop + 1);
6244 }
6245 fn(tcg_op0, tcg_op1, tcg_op2);
6246 write_vec_element(s, tcg_op0, rd, elt, esz + 1);
6247 }
6248 clear_vec_high(s, 1, rd);
6249 return true;
6250 }
6251
gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6252 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6253 {
6254 TCGv_i64 t = tcg_temp_new_i64();
6255 tcg_gen_mul_i64(t, n, m);
6256 tcg_gen_add_i64(d, d, t);
6257 }
6258
gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6259 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6260 {
6261 TCGv_i64 t = tcg_temp_new_i64();
6262 tcg_gen_mul_i64(t, n, m);
6263 tcg_gen_sub_i64(d, d, t);
6264 }
6265
6266 TRANS(SMULL_v, do_3op_widening,
6267 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6268 tcg_gen_mul_i64, false)
6269 TRANS(UMULL_v, do_3op_widening,
6270 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6271 tcg_gen_mul_i64, false)
6272 TRANS(SMLAL_v, do_3op_widening,
6273 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6274 gen_muladd_i64, true)
6275 TRANS(UMLAL_v, do_3op_widening,
6276 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6277 gen_muladd_i64, true)
6278 TRANS(SMLSL_v, do_3op_widening,
6279 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6280 gen_mulsub_i64, true)
6281 TRANS(UMLSL_v, do_3op_widening,
6282 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6283 gen_mulsub_i64, true)
6284
6285 TRANS(SMULL_vi, do_3op_widening,
6286 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6287 tcg_gen_mul_i64, false)
6288 TRANS(UMULL_vi, do_3op_widening,
6289 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6290 tcg_gen_mul_i64, false)
6291 TRANS(SMLAL_vi, do_3op_widening,
6292 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6293 gen_muladd_i64, true)
6294 TRANS(UMLAL_vi, do_3op_widening,
6295 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6296 gen_muladd_i64, true)
6297 TRANS(SMLSL_vi, do_3op_widening,
6298 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6299 gen_mulsub_i64, true)
6300 TRANS(UMLSL_vi, do_3op_widening,
6301 a->esz, a->q, a->rd, a->rn, a->rm, a->idx,
6302 gen_mulsub_i64, true)
6303
gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6304 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6305 {
6306 TCGv_i64 t1 = tcg_temp_new_i64();
6307 TCGv_i64 t2 = tcg_temp_new_i64();
6308
6309 tcg_gen_sub_i64(t1, n, m);
6310 tcg_gen_sub_i64(t2, m, n);
6311 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2);
6312 }
6313
gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6314 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6315 {
6316 TCGv_i64 t1 = tcg_temp_new_i64();
6317 TCGv_i64 t2 = tcg_temp_new_i64();
6318
6319 tcg_gen_sub_i64(t1, n, m);
6320 tcg_gen_sub_i64(t2, m, n);
6321 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2);
6322 }
6323
gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6324 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6325 {
6326 TCGv_i64 t = tcg_temp_new_i64();
6327 gen_sabd_i64(t, n, m);
6328 tcg_gen_add_i64(d, d, t);
6329 }
6330
gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6331 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6332 {
6333 TCGv_i64 t = tcg_temp_new_i64();
6334 gen_uabd_i64(t, n, m);
6335 tcg_gen_add_i64(d, d, t);
6336 }
6337
6338 TRANS(SADDL_v, do_3op_widening,
6339 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6340 tcg_gen_add_i64, false)
6341 TRANS(UADDL_v, do_3op_widening,
6342 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6343 tcg_gen_add_i64, false)
6344 TRANS(SSUBL_v, do_3op_widening,
6345 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6346 tcg_gen_sub_i64, false)
6347 TRANS(USUBL_v, do_3op_widening,
6348 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6349 tcg_gen_sub_i64, false)
6350 TRANS(SABDL_v, do_3op_widening,
6351 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6352 gen_sabd_i64, false)
6353 TRANS(UABDL_v, do_3op_widening,
6354 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6355 gen_uabd_i64, false)
6356 TRANS(SABAL_v, do_3op_widening,
6357 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6358 gen_saba_i64, true)
6359 TRANS(UABAL_v, do_3op_widening,
6360 a->esz, a->q, a->rd, a->rn, a->rm, -1,
6361 gen_uaba_i64, true)
6362
gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6363 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6364 {
6365 tcg_gen_mul_i64(d, n, m);
6366 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d);
6367 }
6368
gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6369 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6370 {
6371 tcg_gen_mul_i64(d, n, m);
6372 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d);
6373 }
6374
gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6375 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6376 {
6377 TCGv_i64 t = tcg_temp_new_i64();
6378
6379 tcg_gen_mul_i64(t, n, m);
6380 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6381 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6382 }
6383
gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6384 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6385 {
6386 TCGv_i64 t = tcg_temp_new_i64();
6387
6388 tcg_gen_mul_i64(t, n, m);
6389 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6390 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6391 }
6392
gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6393 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6394 {
6395 TCGv_i64 t = tcg_temp_new_i64();
6396
6397 tcg_gen_mul_i64(t, n, m);
6398 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t);
6399 tcg_gen_neg_i64(t, t);
6400 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t);
6401 }
6402
gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)6403 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
6404 {
6405 TCGv_i64 t = tcg_temp_new_i64();
6406
6407 tcg_gen_mul_i64(t, n, m);
6408 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t);
6409 tcg_gen_neg_i64(t, t);
6410 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t);
6411 }
6412
6413 TRANS(SQDMULL_v, do_3op_widening,
6414 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6415 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6416 TRANS(SQDMLAL_v, do_3op_widening,
6417 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6418 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6419 TRANS(SQDMLSL_v, do_3op_widening,
6420 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1,
6421 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6422
6423 TRANS(SQDMULL_vi, do_3op_widening,
6424 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6425 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6426 TRANS(SQDMLAL_vi, do_3op_widening,
6427 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6428 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6429 TRANS(SQDMLSL_vi, do_3op_widening,
6430 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx,
6431 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6432
do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6433 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a,
6434 MemOp sign, bool sub)
6435 {
6436 TCGv_i64 tcg_op0, tcg_op1;
6437 MemOp esz = a->esz;
6438 int half = 8 >> esz;
6439 bool top = a->q;
6440 int top_swap = top ? 0 : half - 1;
6441 int top_half = top ? half : 0;
6442
6443 /* There are no 64x64->128 bit operations. */
6444 if (esz >= MO_64) {
6445 return false;
6446 }
6447 if (!fp_access_check(s)) {
6448 return true;
6449 }
6450 tcg_op0 = tcg_temp_new_i64();
6451 tcg_op1 = tcg_temp_new_i64();
6452
6453 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6454 int elt = elt_fwd ^ top_swap;
6455
6456 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign);
6457 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6458 if (sub) {
6459 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6460 } else {
6461 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6462 }
6463 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1);
6464 }
6465 clear_vec_high(s, 1, a->rd);
6466 return true;
6467 }
6468
TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6469 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false)
6470 TRANS(UADDW, do_addsub_wide, a, 0, false)
6471 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true)
6472 TRANS(USUBW, do_addsub_wide, a, 0, true)
6473
6474 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a,
6475 bool sub, bool round)
6476 {
6477 TCGv_i64 tcg_op0, tcg_op1;
6478 MemOp esz = a->esz;
6479 int half = 8 >> esz;
6480 bool top = a->q;
6481 int ebits = 8 << esz;
6482 uint64_t rbit = 1ull << (ebits - 1);
6483 int top_swap, top_half;
6484
6485 /* There are no 128x128->64 bit operations. */
6486 if (esz >= MO_64) {
6487 return false;
6488 }
6489 if (!fp_access_check(s)) {
6490 return true;
6491 }
6492 tcg_op0 = tcg_temp_new_i64();
6493 tcg_op1 = tcg_temp_new_i64();
6494
6495 /*
6496 * For top half inputs, iterate backward; forward for bottom half.
6497 * This means the store to the destination will not occur until
6498 * overlapping input inputs are consumed.
6499 */
6500 top_swap = top ? half - 1 : 0;
6501 top_half = top ? half : 0;
6502
6503 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) {
6504 int elt = elt_fwd ^ top_swap;
6505
6506 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1);
6507 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1);
6508 if (sub) {
6509 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1);
6510 } else {
6511 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1);
6512 }
6513 if (round) {
6514 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit);
6515 }
6516 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits);
6517 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz);
6518 }
6519 clear_vec_high(s, top, a->rd);
6520 return true;
6521 }
6522
TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6523 TRANS(ADDHN, do_addsub_highnarrow, a, false, false)
6524 TRANS(SUBHN, do_addsub_highnarrow, a, true, false)
6525 TRANS(RADDHN, do_addsub_highnarrow, a, false, true)
6526 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true)
6527
6528 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn)
6529 {
6530 if (fp_access_check(s)) {
6531 /* The Q field specifies lo/hi half input for these insns. */
6532 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn);
6533 }
6534 return true;
6535 }
6536
TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6537 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h)
6538 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q)
6539
6540 /*
6541 * Advanced SIMD scalar/vector x indexed element
6542 */
6543
6544 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
6545 {
6546 switch (a->esz) {
6547 case MO_64:
6548 if (fp_access_check(s)) {
6549 TCGv_i64 t0 = read_fp_dreg(s, a->rn);
6550 TCGv_i64 t1 = tcg_temp_new_i64();
6551
6552 read_vec_element(s, t1, a->rm, a->idx, MO_64);
6553 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6554 write_fp_dreg_merging(s, a->rd, a->rn, t0);
6555 }
6556 break;
6557 case MO_32:
6558 if (fp_access_check(s)) {
6559 TCGv_i32 t0 = read_fp_sreg(s, a->rn);
6560 TCGv_i32 t1 = tcg_temp_new_i32();
6561
6562 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
6563 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6564 write_fp_sreg_merging(s, a->rd, a->rn, t0);
6565 }
6566 break;
6567 case MO_16:
6568 if (!dc_isar_feature(aa64_fp16, s)) {
6569 return false;
6570 }
6571 if (fp_access_check(s)) {
6572 TCGv_i32 t0 = read_fp_hreg(s, a->rn);
6573 TCGv_i32 t1 = tcg_temp_new_i32();
6574
6575 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
6576 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6577 write_fp_hreg_merging(s, a->rd, a->rn, t0);
6578 }
6579 break;
6580 default:
6581 g_assert_not_reached();
6582 }
6583 return true;
6584 }
6585
6586 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul)
6587 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx)
6588
do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6589 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
6590 {
6591 switch (a->esz) {
6592 case MO_64:
6593 if (fp_access_check(s)) {
6594 TCGv_i64 t0 = read_fp_dreg(s, a->rd);
6595 TCGv_i64 t1 = read_fp_dreg(s, a->rn);
6596 TCGv_i64 t2 = tcg_temp_new_i64();
6597
6598 read_vec_element(s, t2, a->rm, a->idx, MO_64);
6599 if (neg) {
6600 gen_vfp_maybe_ah_negd(s, t1, t1);
6601 }
6602 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6603 write_fp_dreg_merging(s, a->rd, a->rd, t0);
6604 }
6605 break;
6606 case MO_32:
6607 if (fp_access_check(s)) {
6608 TCGv_i32 t0 = read_fp_sreg(s, a->rd);
6609 TCGv_i32 t1 = read_fp_sreg(s, a->rn);
6610 TCGv_i32 t2 = tcg_temp_new_i32();
6611
6612 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
6613 if (neg) {
6614 gen_vfp_maybe_ah_negs(s, t1, t1);
6615 }
6616 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
6617 write_fp_sreg_merging(s, a->rd, a->rd, t0);
6618 }
6619 break;
6620 case MO_16:
6621 if (!dc_isar_feature(aa64_fp16, s)) {
6622 return false;
6623 }
6624 if (fp_access_check(s)) {
6625 TCGv_i32 t0 = read_fp_hreg(s, a->rd);
6626 TCGv_i32 t1 = read_fp_hreg(s, a->rn);
6627 TCGv_i32 t2 = tcg_temp_new_i32();
6628
6629 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
6630 if (neg) {
6631 gen_vfp_maybe_ah_negh(s, t1, t1);
6632 }
6633 gen_helper_advsimd_muladdh(t0, t1, t2, t0,
6634 fpstatus_ptr(FPST_A64_F16));
6635 write_fp_hreg_merging(s, a->rd, a->rd, t0);
6636 }
6637 break;
6638 default:
6639 g_assert_not_reached();
6640 }
6641 return true;
6642 }
6643
TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6644 TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
6645 TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
6646
6647 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
6648 const ENVScalar2 *f)
6649 {
6650 if (a->esz < MO_16 || a->esz > MO_32) {
6651 return false;
6652 }
6653 if (fp_access_check(s)) {
6654 TCGv_i32 t0 = tcg_temp_new_i32();
6655 TCGv_i32 t1 = tcg_temp_new_i32();
6656
6657 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6658 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6659 f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
6660 write_fp_sreg(s, a->rd, t0);
6661 }
6662 return true;
6663 }
6664
6665 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
6666 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
6667
do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6668 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a,
6669 const ENVScalar3 *f)
6670 {
6671 if (a->esz < MO_16 || a->esz > MO_32) {
6672 return false;
6673 }
6674 if (fp_access_check(s)) {
6675 TCGv_i32 t0 = tcg_temp_new_i32();
6676 TCGv_i32 t1 = tcg_temp_new_i32();
6677 TCGv_i32 t2 = tcg_temp_new_i32();
6678
6679 read_vec_element_i32(s, t0, a->rn, 0, a->esz);
6680 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
6681 read_vec_element_i32(s, t2, a->rd, 0, a->esz);
6682 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
6683 write_fp_sreg(s, a->rd, t0);
6684 }
6685 return true;
6686 }
6687
6688 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah)
6689 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh)
6690
do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6691 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a,
6692 NeonGenTwo64OpFn *fn, bool acc)
6693 {
6694 if (fp_access_check(s)) {
6695 TCGv_i64 t0 = tcg_temp_new_i64();
6696 TCGv_i64 t1 = tcg_temp_new_i64();
6697 TCGv_i64 t2 = tcg_temp_new_i64();
6698
6699 if (acc) {
6700 read_vec_element(s, t0, a->rd, 0, a->esz + 1);
6701 }
6702 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN);
6703 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN);
6704 fn(t0, t1, t2);
6705
6706 /* Clear the whole register first, then store scalar. */
6707 clear_vec(s, a->rd);
6708 write_vec_element(s, t0, a->rd, 0, a->esz + 1);
6709 }
6710 return true;
6711 }
6712
6713 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a,
6714 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false)
6715 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a,
6716 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true)
6717 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a,
6718 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true)
6719
do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6720 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6721 gen_helper_gvec_3_ptr * const fns[3])
6722 {
6723 MemOp esz = a->esz;
6724 int check = fp_access_check_vector_hsd(s, a->q, esz);
6725
6726 if (check <= 0) {
6727 return check == 0;
6728 }
6729
6730 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
6731 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6732 a->idx, fns[esz - 1]);
6733 return true;
6734 }
6735
6736 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = {
6737 gen_helper_gvec_fmul_idx_h,
6738 gen_helper_gvec_fmul_idx_s,
6739 gen_helper_gvec_fmul_idx_d,
6740 };
6741 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul)
6742
6743 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = {
6744 gen_helper_gvec_fmulx_idx_h,
6745 gen_helper_gvec_fmulx_idx_s,
6746 gen_helper_gvec_fmulx_idx_d,
6747 };
TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6748 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
6749
6750 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
6751 {
6752 static gen_helper_gvec_4_ptr * const fns[3][3] = {
6753 { gen_helper_gvec_fmla_idx_h,
6754 gen_helper_gvec_fmla_idx_s,
6755 gen_helper_gvec_fmla_idx_d },
6756 { gen_helper_gvec_fmls_idx_h,
6757 gen_helper_gvec_fmls_idx_s,
6758 gen_helper_gvec_fmls_idx_d },
6759 { gen_helper_gvec_ah_fmls_idx_h,
6760 gen_helper_gvec_ah_fmls_idx_s,
6761 gen_helper_gvec_ah_fmls_idx_d },
6762 };
6763 MemOp esz = a->esz;
6764 int check = fp_access_check_vector_hsd(s, a->q, esz);
6765
6766 if (check <= 0) {
6767 return check == 0;
6768 }
6769
6770 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6771 esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6772 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
6773 return true;
6774 }
6775
TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6776 TRANS(FMLA_vi, do_fmla_vector_idx, a, false)
6777 TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
6778
6779 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
6780 {
6781 if (fp_access_check(s)) {
6782 int data = (a->idx << 2) | (is_2 << 1) | is_s;
6783 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
6784 vec_full_reg_offset(s, a->rn),
6785 vec_full_reg_offset(s, a->rm), tcg_env,
6786 a->q ? 16 : 8, vec_full_reg_size(s),
6787 data, gen_helper_gvec_fmlal_idx_a64);
6788 }
6789 return true;
6790 }
6791
TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6792 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false)
6793 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false)
6794 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true)
6795 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true)
6796
6797 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a,
6798 gen_helper_gvec_3 * const fns[2])
6799 {
6800 assert(a->esz == MO_16 || a->esz == MO_32);
6801 if (fp_access_check(s)) {
6802 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]);
6803 }
6804 return true;
6805 }
6806
6807 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = {
6808 gen_helper_gvec_mul_idx_h,
6809 gen_helper_gvec_mul_idx_s,
6810 };
TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6811 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul)
6812
6813 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
6814 {
6815 static gen_helper_gvec_4 * const fns[2][2] = {
6816 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h },
6817 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s },
6818 };
6819
6820 assert(a->esz == MO_16 || a->esz == MO_32);
6821 if (fp_access_check(s)) {
6822 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd,
6823 a->idx, fns[a->esz - 1][sub]);
6824 }
6825 return true;
6826 }
6827
TRANS(MLA_vi,do_mla_vector_idx,a,false)6828 TRANS(MLA_vi, do_mla_vector_idx, a, false)
6829 TRANS(MLS_vi, do_mla_vector_idx, a, true)
6830
6831 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
6832 gen_helper_gvec_4 * const fns[2])
6833 {
6834 assert(a->esz == MO_16 || a->esz == MO_32);
6835 if (fp_access_check(s)) {
6836 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
6837 vec_full_reg_offset(s, a->rn),
6838 vec_full_reg_offset(s, a->rm),
6839 offsetof(CPUARMState, vfp.qc),
6840 a->q ? 16 : 8, vec_full_reg_size(s),
6841 a->idx, fns[a->esz - 1]);
6842 }
6843 return true;
6844 }
6845
6846 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
6847 gen_helper_neon_sqdmulh_idx_h,
6848 gen_helper_neon_sqdmulh_idx_s,
6849 };
6850 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
6851
6852 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
6853 gen_helper_neon_sqrdmulh_idx_h,
6854 gen_helper_neon_sqrdmulh_idx_s,
6855 };
6856 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
6857
6858 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = {
6859 gen_helper_neon_sqrdmlah_idx_h,
6860 gen_helper_neon_sqrdmlah_idx_s,
6861 };
6862 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6863 f_vector_idx_sqrdmlah)
6864
6865 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = {
6866 gen_helper_neon_sqrdmlsh_idx_h,
6867 gen_helper_neon_sqrdmlsh_idx_s,
6868 };
TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6869 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
6870 f_vector_idx_sqrdmlsh)
6871
6872 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
6873 gen_helper_gvec_4 *fn)
6874 {
6875 if (fp_access_check(s)) {
6876 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6877 }
6878 return true;
6879 }
6880
do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6881 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a,
6882 gen_helper_gvec_4_ptr *fn)
6883 {
6884 if (fp_access_check(s)) {
6885 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
6886 }
6887 return true;
6888 }
6889
TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6890 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
6891 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
6892 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6893 gen_helper_gvec_sudot_idx_b)
6894 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
6895 gen_helper_gvec_usdot_idx_b)
6896 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a,
6897 gen_helper_gvec_bfdot_idx)
6898
6899 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
6900 {
6901 if (!dc_isar_feature(aa64_bf16, s)) {
6902 return false;
6903 }
6904 if (fp_access_check(s)) {
6905 /* Q bit selects BFMLALB vs BFMLALT. */
6906 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
6907 s->fpcr_ah ? FPST_AH : FPST_A64,
6908 (a->idx << 1) | a->q,
6909 gen_helper_gvec_bfmlal_idx);
6910 }
6911 return true;
6912 }
6913
trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6914 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
6915 {
6916 gen_helper_gvec_4_ptr *fn;
6917
6918 if (!dc_isar_feature(aa64_fcma, s)) {
6919 return false;
6920 }
6921 switch (a->esz) {
6922 case MO_16:
6923 if (!dc_isar_feature(aa64_fp16, s)) {
6924 return false;
6925 }
6926 fn = gen_helper_gvec_fcmlah_idx;
6927 break;
6928 case MO_32:
6929 fn = gen_helper_gvec_fcmlas_idx;
6930 break;
6931 default:
6932 g_assert_not_reached();
6933 }
6934 if (fp_access_check(s)) {
6935 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
6936 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
6937 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
6938 }
6939 return true;
6940 }
6941
6942 /*
6943 * Advanced SIMD scalar pairwise
6944 */
6945
do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6946 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
6947 {
6948 switch (a->esz) {
6949 case MO_64:
6950 if (fp_access_check(s)) {
6951 TCGv_i64 t0 = tcg_temp_new_i64();
6952 TCGv_i64 t1 = tcg_temp_new_i64();
6953
6954 read_vec_element(s, t0, a->rn, 0, MO_64);
6955 read_vec_element(s, t1, a->rn, 1, MO_64);
6956 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
6957 write_fp_dreg(s, a->rd, t0);
6958 }
6959 break;
6960 case MO_32:
6961 if (fp_access_check(s)) {
6962 TCGv_i32 t0 = tcg_temp_new_i32();
6963 TCGv_i32 t1 = tcg_temp_new_i32();
6964
6965 read_vec_element_i32(s, t0, a->rn, 0, MO_32);
6966 read_vec_element_i32(s, t1, a->rn, 1, MO_32);
6967 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
6968 write_fp_sreg(s, a->rd, t0);
6969 }
6970 break;
6971 case MO_16:
6972 if (!dc_isar_feature(aa64_fp16, s)) {
6973 return false;
6974 }
6975 if (fp_access_check(s)) {
6976 TCGv_i32 t0 = tcg_temp_new_i32();
6977 TCGv_i32 t1 = tcg_temp_new_i32();
6978
6979 read_vec_element_i32(s, t0, a->rn, 0, MO_16);
6980 read_vec_element_i32(s, t1, a->rn, 1, MO_16);
6981 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
6982 write_fp_sreg(s, a->rd, t0);
6983 }
6984 break;
6985 default:
6986 g_assert_not_reached();
6987 }
6988 return true;
6989 }
6990
do_fp3_scalar_pair_2fn(DisasContext * s,arg_rr_e * a,const FPScalar * fnormal,const FPScalar * fah)6991 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a,
6992 const FPScalar *fnormal,
6993 const FPScalar *fah)
6994 {
6995 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal);
6996 }
6997
6998 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd)
6999 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah)
7000 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah)
7001 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm)
7002 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm)
7003
trans_ADDP_s(DisasContext * s,arg_rr_e * a)7004 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a)
7005 {
7006 if (fp_access_check(s)) {
7007 TCGv_i64 t0 = tcg_temp_new_i64();
7008 TCGv_i64 t1 = tcg_temp_new_i64();
7009
7010 read_vec_element(s, t0, a->rn, 0, MO_64);
7011 read_vec_element(s, t1, a->rn, 1, MO_64);
7012 tcg_gen_add_i64(t0, t0, t1);
7013 write_fp_dreg(s, a->rd, t0);
7014 }
7015 return true;
7016 }
7017
7018 /*
7019 * Floating-point conditional select
7020 */
7021
trans_FCSEL(DisasContext * s,arg_FCSEL * a)7022 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a)
7023 {
7024 TCGv_i64 t_true, t_false;
7025 DisasCompare64 c;
7026 int check = fp_access_check_scalar_hsd(s, a->esz);
7027
7028 if (check <= 0) {
7029 return check == 0;
7030 }
7031
7032 /* Zero extend sreg & hreg inputs to 64 bits now. */
7033 t_true = tcg_temp_new_i64();
7034 t_false = tcg_temp_new_i64();
7035 read_vec_element(s, t_true, a->rn, 0, a->esz);
7036 read_vec_element(s, t_false, a->rm, 0, a->esz);
7037
7038 a64_test_cc(&c, a->cond);
7039 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0),
7040 t_true, t_false);
7041
7042 /*
7043 * Note that sregs & hregs write back zeros to the high bits,
7044 * and we've already done the zero-extension.
7045 */
7046 write_fp_dreg(s, a->rd, t_true);
7047 return true;
7048 }
7049
7050 /*
7051 * Advanced SIMD Extract
7052 */
7053
trans_EXT_d(DisasContext * s,arg_EXT_d * a)7054 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a)
7055 {
7056 if (fp_access_check(s)) {
7057 TCGv_i64 lo = read_fp_dreg(s, a->rn);
7058 if (a->imm != 0) {
7059 TCGv_i64 hi = read_fp_dreg(s, a->rm);
7060 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8);
7061 }
7062 write_fp_dreg(s, a->rd, lo);
7063 }
7064 return true;
7065 }
7066
trans_EXT_q(DisasContext * s,arg_EXT_q * a)7067 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a)
7068 {
7069 TCGv_i64 lo, hi;
7070 int pos = (a->imm & 7) * 8;
7071 int elt = a->imm >> 3;
7072
7073 if (!fp_access_check(s)) {
7074 return true;
7075 }
7076
7077 lo = tcg_temp_new_i64();
7078 hi = tcg_temp_new_i64();
7079
7080 read_vec_element(s, lo, a->rn, elt, MO_64);
7081 elt++;
7082 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64);
7083 elt++;
7084
7085 if (pos != 0) {
7086 TCGv_i64 hh = tcg_temp_new_i64();
7087 tcg_gen_extract2_i64(lo, lo, hi, pos);
7088 read_vec_element(s, hh, a->rm, elt & 1, MO_64);
7089 tcg_gen_extract2_i64(hi, hi, hh, pos);
7090 }
7091
7092 write_vec_element(s, lo, a->rd, 0, MO_64);
7093 write_vec_element(s, hi, a->rd, 1, MO_64);
7094 clear_vec_high(s, true, a->rd);
7095 return true;
7096 }
7097
7098 /*
7099 * Floating-point data-processing (3 source)
7100 */
7101
do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)7102 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
7103 {
7104 TCGv_ptr fpst;
7105
7106 /*
7107 * These are fused multiply-add. Note that doing the negations here
7108 * as separate steps is correct: an input NaN should come out with
7109 * its sign bit flipped if it is a negated-input.
7110 */
7111 switch (a->esz) {
7112 case MO_64:
7113 if (fp_access_check(s)) {
7114 TCGv_i64 tn = read_fp_dreg(s, a->rn);
7115 TCGv_i64 tm = read_fp_dreg(s, a->rm);
7116 TCGv_i64 ta = read_fp_dreg(s, a->ra);
7117
7118 if (neg_a) {
7119 gen_vfp_maybe_ah_negd(s, ta, ta);
7120 }
7121 if (neg_n) {
7122 gen_vfp_maybe_ah_negd(s, tn, tn);
7123 }
7124 fpst = fpstatus_ptr(FPST_A64);
7125 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
7126 write_fp_dreg_merging(s, a->rd, a->ra, ta);
7127 }
7128 break;
7129
7130 case MO_32:
7131 if (fp_access_check(s)) {
7132 TCGv_i32 tn = read_fp_sreg(s, a->rn);
7133 TCGv_i32 tm = read_fp_sreg(s, a->rm);
7134 TCGv_i32 ta = read_fp_sreg(s, a->ra);
7135
7136 if (neg_a) {
7137 gen_vfp_maybe_ah_negs(s, ta, ta);
7138 }
7139 if (neg_n) {
7140 gen_vfp_maybe_ah_negs(s, tn, tn);
7141 }
7142 fpst = fpstatus_ptr(FPST_A64);
7143 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
7144 write_fp_sreg_merging(s, a->rd, a->ra, ta);
7145 }
7146 break;
7147
7148 case MO_16:
7149 if (!dc_isar_feature(aa64_fp16, s)) {
7150 return false;
7151 }
7152 if (fp_access_check(s)) {
7153 TCGv_i32 tn = read_fp_hreg(s, a->rn);
7154 TCGv_i32 tm = read_fp_hreg(s, a->rm);
7155 TCGv_i32 ta = read_fp_hreg(s, a->ra);
7156
7157 if (neg_a) {
7158 gen_vfp_maybe_ah_negh(s, ta, ta);
7159 }
7160 if (neg_n) {
7161 gen_vfp_maybe_ah_negh(s, tn, tn);
7162 }
7163 fpst = fpstatus_ptr(FPST_A64_F16);
7164 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
7165 write_fp_hreg_merging(s, a->rd, a->ra, ta);
7166 }
7167 break;
7168
7169 default:
7170 return false;
7171 }
7172 return true;
7173 }
7174
TRANS(FMADD,do_fmadd,a,false,false)7175 TRANS(FMADD, do_fmadd, a, false, false)
7176 TRANS(FNMADD, do_fmadd, a, true, true)
7177 TRANS(FMSUB, do_fmadd, a, false, true)
7178 TRANS(FNMSUB, do_fmadd, a, true, false)
7179
7180 /*
7181 * Advanced SIMD Across Lanes
7182 */
7183
7184 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen,
7185 MemOp src_sign, NeonGenTwo64OpFn *fn)
7186 {
7187 TCGv_i64 tcg_res, tcg_elt;
7188 MemOp src_mop = a->esz | src_sign;
7189 int elements = (a->q ? 16 : 8) >> a->esz;
7190
7191 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */
7192 if (elements < 4) {
7193 return false;
7194 }
7195 if (!fp_access_check(s)) {
7196 return true;
7197 }
7198
7199 tcg_res = tcg_temp_new_i64();
7200 tcg_elt = tcg_temp_new_i64();
7201
7202 read_vec_element(s, tcg_res, a->rn, 0, src_mop);
7203 for (int i = 1; i < elements; i++) {
7204 read_vec_element(s, tcg_elt, a->rn, i, src_mop);
7205 fn(tcg_res, tcg_res, tcg_elt);
7206 }
7207
7208 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen);
7209 write_fp_dreg(s, a->rd, tcg_res);
7210 return true;
7211 }
7212
7213 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64)
TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)7214 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64)
7215 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64)
7216 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64)
7217 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64)
7218 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64)
7219 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64)
7220
7221 /*
7222 * do_fp_reduction helper
7223 *
7224 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7225 * important for correct NaN propagation that we do these
7226 * operations in exactly the order specified by the pseudocode.
7227 *
7228 * This is a recursive function.
7229 */
7230 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz,
7231 int ebase, int ecount, TCGv_ptr fpst,
7232 NeonGenTwoSingleOpFn *fn)
7233 {
7234 if (ecount == 1) {
7235 TCGv_i32 tcg_elem = tcg_temp_new_i32();
7236 read_vec_element_i32(s, tcg_elem, rn, ebase, esz);
7237 return tcg_elem;
7238 } else {
7239 int half = ecount >> 1;
7240 TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7241
7242 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn);
7243 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn);
7244 tcg_res = tcg_temp_new_i32();
7245
7246 fn(tcg_res, tcg_lo, tcg_hi, fpst);
7247 return tcg_res;
7248 }
7249 }
7250
do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fnormal,NeonGenTwoSingleOpFn * fah)7251 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
7252 NeonGenTwoSingleOpFn *fnormal,
7253 NeonGenTwoSingleOpFn *fah)
7254 {
7255 if (fp_access_check(s)) {
7256 MemOp esz = a->esz;
7257 int elts = (a->q ? 16 : 8) >> esz;
7258 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
7259 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
7260 s->fpcr_ah ? fah : fnormal);
7261 write_fp_sreg(s, a->rd, res);
7262 }
7263 return true;
7264 }
7265
TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_vfp_maxnumh,gen_helper_vfp_maxnumh)7266 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a,
7267 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh)
7268 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a,
7269 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh)
7270 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a,
7271 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh)
7272 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a,
7273 gen_helper_vfp_minh, gen_helper_vfp_ah_minh)
7274
7275 TRANS(FMAXNMV_s, do_fp_reduction, a,
7276 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums)
7277 TRANS(FMINNMV_s, do_fp_reduction, a,
7278 gen_helper_vfp_minnums, gen_helper_vfp_minnums)
7279 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs)
7280 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins)
7281
7282 /*
7283 * Floating-point Immediate
7284 */
7285
7286 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a)
7287 {
7288 int check = fp_access_check_scalar_hsd(s, a->esz);
7289 uint64_t imm;
7290
7291 if (check <= 0) {
7292 return check == 0;
7293 }
7294
7295 imm = vfp_expand_imm(a->esz, a->imm);
7296 write_fp_dreg(s, a->rd, tcg_constant_i64(imm));
7297 return true;
7298 }
7299
7300 /*
7301 * Floating point compare, conditional compare
7302 */
7303
handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)7304 static void handle_fp_compare(DisasContext *s, int size,
7305 unsigned int rn, unsigned int rm,
7306 bool cmp_with_zero, bool signal_all_nans)
7307 {
7308 TCGv_i64 tcg_flags = tcg_temp_new_i64();
7309 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
7310
7311 if (size == MO_64) {
7312 TCGv_i64 tcg_vn, tcg_vm;
7313
7314 tcg_vn = read_fp_dreg(s, rn);
7315 if (cmp_with_zero) {
7316 tcg_vm = tcg_constant_i64(0);
7317 } else {
7318 tcg_vm = read_fp_dreg(s, rm);
7319 }
7320 if (signal_all_nans) {
7321 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7322 } else {
7323 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7324 }
7325 } else {
7326 TCGv_i32 tcg_vn = tcg_temp_new_i32();
7327 TCGv_i32 tcg_vm = tcg_temp_new_i32();
7328
7329 read_vec_element_i32(s, tcg_vn, rn, 0, size);
7330 if (cmp_with_zero) {
7331 tcg_gen_movi_i32(tcg_vm, 0);
7332 } else {
7333 read_vec_element_i32(s, tcg_vm, rm, 0, size);
7334 }
7335
7336 switch (size) {
7337 case MO_32:
7338 if (signal_all_nans) {
7339 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7340 } else {
7341 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7342 }
7343 break;
7344 case MO_16:
7345 if (signal_all_nans) {
7346 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7347 } else {
7348 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
7349 }
7350 break;
7351 default:
7352 g_assert_not_reached();
7353 }
7354 }
7355
7356 gen_set_nzcv(tcg_flags);
7357 }
7358
7359 /* FCMP, FCMPE */
trans_FCMP(DisasContext * s,arg_FCMP * a)7360 static bool trans_FCMP(DisasContext *s, arg_FCMP *a)
7361 {
7362 int check = fp_access_check_scalar_hsd(s, a->esz);
7363
7364 if (check <= 0) {
7365 return check == 0;
7366 }
7367
7368 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e);
7369 return true;
7370 }
7371
7372 /* FCCMP, FCCMPE */
trans_FCCMP(DisasContext * s,arg_FCCMP * a)7373 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a)
7374 {
7375 TCGLabel *label_continue = NULL;
7376 int check = fp_access_check_scalar_hsd(s, a->esz);
7377
7378 if (check <= 0) {
7379 return check == 0;
7380 }
7381
7382 if (a->cond < 0x0e) { /* not always */
7383 TCGLabel *label_match = gen_new_label();
7384 label_continue = gen_new_label();
7385 arm_gen_test_cc(a->cond, label_match);
7386 /* nomatch: */
7387 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28));
7388 tcg_gen_br(label_continue);
7389 gen_set_label(label_match);
7390 }
7391
7392 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e);
7393
7394 if (label_continue) {
7395 gen_set_label(label_continue);
7396 }
7397 return true;
7398 }
7399
7400 /*
7401 * Advanced SIMD Modified Immediate
7402 */
7403
trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)7404 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a)
7405 {
7406 if (!dc_isar_feature(aa64_fp16, s)) {
7407 return false;
7408 }
7409 if (fp_access_check(s)) {
7410 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd),
7411 a->q ? 16 : 8, vec_full_reg_size(s),
7412 vfp_expand_imm(MO_16, a->abcdefgh));
7413 }
7414 return true;
7415 }
7416
gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)7417 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs,
7418 int64_t c, uint32_t oprsz, uint32_t maxsz)
7419 {
7420 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c);
7421 }
7422
trans_Vimm(DisasContext * s,arg_Vimm * a)7423 static bool trans_Vimm(DisasContext *s, arg_Vimm *a)
7424 {
7425 GVecGen2iFn *fn;
7426
7427 /* Handle decode of cmode/op here between ORR/BIC/MOVI */
7428 if ((a->cmode & 1) && a->cmode < 12) {
7429 /* For op=1, the imm will be inverted, so BIC becomes AND. */
7430 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori;
7431 } else {
7432 /* There is one unallocated cmode/op combination in this space */
7433 if (a->cmode == 15 && a->op == 1 && a->q == 0) {
7434 return false;
7435 }
7436 fn = gen_movi;
7437 }
7438
7439 if (fp_access_check(s)) {
7440 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op);
7441 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64);
7442 }
7443 return true;
7444 }
7445
7446 /*
7447 * Advanced SIMD Shift by Immediate
7448 */
7449
do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)7450 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn)
7451 {
7452 if (fp_access_check(s)) {
7453 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz);
7454 }
7455 return true;
7456 }
7457
7458 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr)
7459 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr)
7460 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra)
7461 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra)
7462 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr)
7463 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr)
7464 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra)
7465 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra)
7466 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri)
7467 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli)
7468 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli);
TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)7469 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli)
7470 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli)
7471 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui)
7472
7473 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u)
7474 {
7475 TCGv_i64 tcg_rn, tcg_rd;
7476 int esz = a->esz;
7477 int esize;
7478
7479 if (!fp_access_check(s)) {
7480 return true;
7481 }
7482
7483 /*
7484 * For the LL variants the store is larger than the load,
7485 * so if rd == rn we would overwrite parts of our input.
7486 * So load everything right now and use shifts in the main loop.
7487 */
7488 tcg_rd = tcg_temp_new_i64();
7489 tcg_rn = tcg_temp_new_i64();
7490 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64);
7491
7492 esize = 8 << esz;
7493 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7494 if (is_u) {
7495 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize);
7496 } else {
7497 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize);
7498 }
7499 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm);
7500 write_vec_element(s, tcg_rd, a->rd, i, esz + 1);
7501 }
7502 clear_vec_high(s, true, a->rd);
7503 return true;
7504 }
7505
TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7506 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false)
7507 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true)
7508
7509 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7510 {
7511 assert(shift >= 0 && shift <= 64);
7512 tcg_gen_sari_i64(dst, src, MIN(shift, 63));
7513 }
7514
gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7515 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7516 {
7517 assert(shift >= 0 && shift <= 64);
7518 if (shift == 64) {
7519 tcg_gen_movi_i64(dst, 0);
7520 } else {
7521 tcg_gen_shri_i64(dst, src, shift);
7522 }
7523 }
7524
gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7525 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7526 {
7527 gen_sshr_d(src, src, shift);
7528 tcg_gen_add_i64(dst, dst, src);
7529 }
7530
gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7531 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7532 {
7533 gen_ushr_d(src, src, shift);
7534 tcg_gen_add_i64(dst, dst, src);
7535 }
7536
gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7537 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7538 {
7539 assert(shift >= 0 && shift <= 32);
7540 if (shift) {
7541 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7542 tcg_gen_add_i64(dst, src, rnd);
7543 tcg_gen_sari_i64(dst, dst, shift);
7544 } else {
7545 tcg_gen_mov_i64(dst, src);
7546 }
7547 }
7548
gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7549 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7550 {
7551 assert(shift >= 0 && shift <= 32);
7552 if (shift) {
7553 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1));
7554 tcg_gen_add_i64(dst, src, rnd);
7555 tcg_gen_shri_i64(dst, dst, shift);
7556 } else {
7557 tcg_gen_mov_i64(dst, src);
7558 }
7559 }
7560
gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7561 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7562 {
7563 assert(shift >= 0 && shift <= 64);
7564 if (shift == 0) {
7565 tcg_gen_mov_i64(dst, src);
7566 } else if (shift == 64) {
7567 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */
7568 tcg_gen_movi_i64(dst, 0);
7569 } else {
7570 TCGv_i64 rnd = tcg_temp_new_i64();
7571 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7572 tcg_gen_sari_i64(dst, src, shift);
7573 tcg_gen_add_i64(dst, dst, rnd);
7574 }
7575 }
7576
gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7577 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7578 {
7579 assert(shift >= 0 && shift <= 64);
7580 if (shift == 0) {
7581 tcg_gen_mov_i64(dst, src);
7582 } else if (shift == 64) {
7583 /* Rounding will propagate bit 63 into bit 64. */
7584 tcg_gen_shri_i64(dst, src, 63);
7585 } else {
7586 TCGv_i64 rnd = tcg_temp_new_i64();
7587 tcg_gen_extract_i64(rnd, src, shift - 1, 1);
7588 tcg_gen_shri_i64(dst, src, shift);
7589 tcg_gen_add_i64(dst, dst, rnd);
7590 }
7591 }
7592
gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7593 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7594 {
7595 gen_srshr_d(src, src, shift);
7596 tcg_gen_add_i64(dst, dst, src);
7597 }
7598
gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7599 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7600 {
7601 gen_urshr_d(src, src, shift);
7602 tcg_gen_add_i64(dst, dst, src);
7603 }
7604
gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7605 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7606 {
7607 /* If shift is 64, dst is unchanged. */
7608 if (shift != 64) {
7609 tcg_gen_shri_i64(src, src, shift);
7610 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift);
7611 }
7612 }
7613
gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7614 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift)
7615 {
7616 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift);
7617 }
7618
do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7619 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a,
7620 WideShiftImmFn * const fns[3], MemOp sign)
7621 {
7622 TCGv_i64 tcg_rn, tcg_rd;
7623 int esz = a->esz;
7624 int esize;
7625 WideShiftImmFn *fn;
7626
7627 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7628
7629 if (!fp_access_check(s)) {
7630 return true;
7631 }
7632
7633 tcg_rn = tcg_temp_new_i64();
7634 tcg_rd = tcg_temp_new_i64();
7635 tcg_gen_movi_i64(tcg_rd, 0);
7636
7637 fn = fns[esz];
7638 esize = 8 << esz;
7639 for (int i = 0, elements = 8 >> esz; i < elements; i++) {
7640 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign);
7641 fn(tcg_rn, tcg_rn, a->imm);
7642 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize);
7643 }
7644
7645 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64);
7646 clear_vec_high(s, a->q, a->rd);
7647 return true;
7648 }
7649
gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7650 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7651 {
7652 tcg_gen_sari_i64(d, s, i);
7653 tcg_gen_ext16u_i64(d, d);
7654 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7655 }
7656
gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7657 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7658 {
7659 tcg_gen_sari_i64(d, s, i);
7660 tcg_gen_ext32u_i64(d, d);
7661 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7662 }
7663
gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7664 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7665 {
7666 gen_sshr_d(d, s, i);
7667 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7668 }
7669
gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7670 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7671 {
7672 tcg_gen_shri_i64(d, s, i);
7673 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7674 }
7675
gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7676 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7677 {
7678 tcg_gen_shri_i64(d, s, i);
7679 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7680 }
7681
gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7682 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7683 {
7684 gen_ushr_d(d, s, i);
7685 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7686 }
7687
gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7688 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7689 {
7690 tcg_gen_sari_i64(d, s, i);
7691 tcg_gen_ext16u_i64(d, d);
7692 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7693 }
7694
gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7695 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7696 {
7697 tcg_gen_sari_i64(d, s, i);
7698 tcg_gen_ext32u_i64(d, d);
7699 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7700 }
7701
gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7702 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7703 {
7704 gen_sshr_d(d, s, i);
7705 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7706 }
7707
gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7708 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7709 {
7710 gen_srshr_bhs(d, s, i);
7711 tcg_gen_ext16u_i64(d, d);
7712 gen_helper_neon_narrow_sat_s8(d, tcg_env, d);
7713 }
7714
gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7715 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7716 {
7717 gen_srshr_bhs(d, s, i);
7718 tcg_gen_ext32u_i64(d, d);
7719 gen_helper_neon_narrow_sat_s16(d, tcg_env, d);
7720 }
7721
gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7722 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7723 {
7724 gen_srshr_d(d, s, i);
7725 gen_helper_neon_narrow_sat_s32(d, tcg_env, d);
7726 }
7727
gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7728 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7729 {
7730 gen_urshr_bhs(d, s, i);
7731 gen_helper_neon_narrow_sat_u8(d, tcg_env, d);
7732 }
7733
gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7734 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7735 {
7736 gen_urshr_bhs(d, s, i);
7737 gen_helper_neon_narrow_sat_u16(d, tcg_env, d);
7738 }
7739
gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7740 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7741 {
7742 gen_urshr_d(d, s, i);
7743 gen_helper_neon_narrow_sat_u32(d, tcg_env, d);
7744 }
7745
gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7746 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7747 {
7748 gen_srshr_bhs(d, s, i);
7749 tcg_gen_ext16u_i64(d, d);
7750 gen_helper_neon_unarrow_sat8(d, tcg_env, d);
7751 }
7752
gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7753 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7754 {
7755 gen_srshr_bhs(d, s, i);
7756 tcg_gen_ext32u_i64(d, d);
7757 gen_helper_neon_unarrow_sat16(d, tcg_env, d);
7758 }
7759
gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7760 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7761 {
7762 gen_srshr_d(d, s, i);
7763 gen_helper_neon_unarrow_sat32(d, tcg_env, d);
7764 }
7765
7766 static WideShiftImmFn * const shrn_fns[] = {
7767 tcg_gen_shri_i64,
7768 tcg_gen_shri_i64,
7769 gen_ushr_d,
7770 };
7771 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0)
7772
7773 static WideShiftImmFn * const rshrn_fns[] = {
7774 gen_urshr_bhs,
7775 gen_urshr_bhs,
7776 gen_urshr_d,
7777 };
7778 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0)
7779
7780 static WideShiftImmFn * const sqshrn_fns[] = {
7781 gen_sqshrn_b,
7782 gen_sqshrn_h,
7783 gen_sqshrn_s,
7784 };
7785 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN)
7786
7787 static WideShiftImmFn * const uqshrn_fns[] = {
7788 gen_uqshrn_b,
7789 gen_uqshrn_h,
7790 gen_uqshrn_s,
7791 };
7792 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0)
7793
7794 static WideShiftImmFn * const sqshrun_fns[] = {
7795 gen_sqshrun_b,
7796 gen_sqshrun_h,
7797 gen_sqshrun_s,
7798 };
7799 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN)
7800
7801 static WideShiftImmFn * const sqrshrn_fns[] = {
7802 gen_sqrshrn_b,
7803 gen_sqrshrn_h,
7804 gen_sqrshrn_s,
7805 };
7806 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN)
7807
7808 static WideShiftImmFn * const uqrshrn_fns[] = {
7809 gen_uqrshrn_b,
7810 gen_uqrshrn_h,
7811 gen_uqrshrn_s,
7812 };
7813 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0)
7814
7815 static WideShiftImmFn * const sqrshrun_fns[] = {
7816 gen_sqrshrun_b,
7817 gen_sqrshrun_h,
7818 gen_sqrshrun_s,
7819 };
TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7820 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN)
7821
7822 /*
7823 * Advanced SIMD Scalar Shift by Immediate
7824 */
7825
7826 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a,
7827 WideShiftImmFn *fn, bool accumulate,
7828 MemOp sign)
7829 {
7830 if (fp_access_check(s)) {
7831 TCGv_i64 rd = tcg_temp_new_i64();
7832 TCGv_i64 rn = tcg_temp_new_i64();
7833
7834 read_vec_element(s, rn, a->rn, 0, a->esz | sign);
7835 if (accumulate) {
7836 read_vec_element(s, rd, a->rd, 0, a->esz | sign);
7837 }
7838 fn(rd, rn, a->imm);
7839 write_fp_dreg(s, a->rd, rd);
7840 }
7841 return true;
7842 }
7843
7844 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0)
7845 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0)
7846 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0)
7847 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0)
7848 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0)
7849 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0)
7850 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0)
7851 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0)
7852 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0)
7853
7854 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0)
7855 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0)
7856
trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7857 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i,
7858 NeonGenTwoOpEnvFn *fn)
7859 {
7860 TCGv_i32 t = tcg_temp_new_i32();
7861 tcg_gen_extrl_i64_i32(t, s);
7862 fn(t, tcg_env, t, tcg_constant_i32(i));
7863 tcg_gen_extu_i32_i64(d, t);
7864 }
7865
gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7866 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7867 {
7868 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8);
7869 }
7870
gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7871 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7872 {
7873 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16);
7874 }
7875
gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7876 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7877 {
7878 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32);
7879 }
7880
gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7881 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7882 {
7883 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i));
7884 }
7885
gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7886 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7887 {
7888 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8);
7889 }
7890
gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7891 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7892 {
7893 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16);
7894 }
7895
gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7896 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7897 {
7898 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32);
7899 }
7900
gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7901 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7902 {
7903 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i));
7904 }
7905
gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7906 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i)
7907 {
7908 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8);
7909 }
7910
gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7911 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i)
7912 {
7913 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16);
7914 }
7915
gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7916 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i)
7917 {
7918 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32);
7919 }
7920
gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7921 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i)
7922 {
7923 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i));
7924 }
7925
7926 static WideShiftImmFn * const f_scalar_sqshli[] = {
7927 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d
7928 };
7929
7930 static WideShiftImmFn * const f_scalar_uqshli[] = {
7931 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d
7932 };
7933
7934 static WideShiftImmFn * const f_scalar_sqshlui[] = {
7935 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d
7936 };
7937
7938 /* Note that the helpers sign-extend their inputs, so don't do it here. */
7939 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0)
7940 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0)
7941 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0)
7942
do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7943 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a,
7944 WideShiftImmFn * const fns[3],
7945 MemOp sign, bool zext)
7946 {
7947 MemOp esz = a->esz;
7948
7949 tcg_debug_assert(esz >= MO_8 && esz <= MO_32);
7950
7951 if (fp_access_check(s)) {
7952 TCGv_i64 rd = tcg_temp_new_i64();
7953 TCGv_i64 rn = tcg_temp_new_i64();
7954
7955 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign);
7956 fns[esz](rd, rn, a->imm);
7957 if (zext) {
7958 tcg_gen_ext_i64(rd, rd, esz);
7959 }
7960 write_fp_dreg(s, a->rd, rd);
7961 }
7962 return true;
7963 }
7964
TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7965 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true)
7966 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true)
7967 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false)
7968 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false)
7969 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false)
7970 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false)
7971
7972 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed)
7973 {
7974 TCGv_i64 tcg_n, tcg_m, tcg_rd;
7975 tcg_rd = cpu_reg(s, a->rd);
7976
7977 if (!a->sf && is_signed) {
7978 tcg_n = tcg_temp_new_i64();
7979 tcg_m = tcg_temp_new_i64();
7980 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn));
7981 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm));
7982 } else {
7983 tcg_n = read_cpu_reg(s, a->rn, a->sf);
7984 tcg_m = read_cpu_reg(s, a->rm, a->sf);
7985 }
7986
7987 if (is_signed) {
7988 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
7989 } else {
7990 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
7991 }
7992
7993 if (!a->sf) { /* zero extend final result */
7994 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7995 }
7996 return true;
7997 }
7998
TRANS(SDIV,do_div,a,true)7999 TRANS(SDIV, do_div, a, true)
8000 TRANS(UDIV, do_div, a, false)
8001
8002 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
8003 * Note that it is the caller's responsibility to ensure that the
8004 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
8005 * mandated semantics for out of range shifts.
8006 */
8007 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
8008 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
8009 {
8010 switch (shift_type) {
8011 case A64_SHIFT_TYPE_LSL:
8012 tcg_gen_shl_i64(dst, src, shift_amount);
8013 break;
8014 case A64_SHIFT_TYPE_LSR:
8015 tcg_gen_shr_i64(dst, src, shift_amount);
8016 break;
8017 case A64_SHIFT_TYPE_ASR:
8018 if (!sf) {
8019 tcg_gen_ext32s_i64(dst, src);
8020 }
8021 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
8022 break;
8023 case A64_SHIFT_TYPE_ROR:
8024 if (sf) {
8025 tcg_gen_rotr_i64(dst, src, shift_amount);
8026 } else {
8027 TCGv_i32 t0, t1;
8028 t0 = tcg_temp_new_i32();
8029 t1 = tcg_temp_new_i32();
8030 tcg_gen_extrl_i64_i32(t0, src);
8031 tcg_gen_extrl_i64_i32(t1, shift_amount);
8032 tcg_gen_rotr_i32(t0, t0, t1);
8033 tcg_gen_extu_i32_i64(dst, t0);
8034 }
8035 break;
8036 default:
8037 assert(FALSE); /* all shift types should be handled */
8038 break;
8039 }
8040
8041 if (!sf) { /* zero extend final result */
8042 tcg_gen_ext32u_i64(dst, dst);
8043 }
8044 }
8045
8046 /* Shift a TCGv src by immediate, put result in dst.
8047 * The shift amount must be in range (this should always be true as the
8048 * relevant instructions will UNDEF on bad shift immediates).
8049 */
shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)8050 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
8051 enum a64_shift_type shift_type, unsigned int shift_i)
8052 {
8053 assert(shift_i < (sf ? 64 : 32));
8054
8055 if (shift_i == 0) {
8056 tcg_gen_mov_i64(dst, src);
8057 } else {
8058 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i));
8059 }
8060 }
8061
do_shift_reg(DisasContext * s,arg_rrr_sf * a,enum a64_shift_type shift_type)8062 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a,
8063 enum a64_shift_type shift_type)
8064 {
8065 TCGv_i64 tcg_shift = tcg_temp_new_i64();
8066 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8067 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8068
8069 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31);
8070 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift);
8071 return true;
8072 }
8073
TRANS(LSLV,do_shift_reg,a,A64_SHIFT_TYPE_LSL)8074 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL)
8075 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR)
8076 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR)
8077 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR)
8078
8079 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c)
8080 {
8081 TCGv_i64 tcg_acc, tcg_val, tcg_rd;
8082 TCGv_i32 tcg_bytes;
8083
8084 switch (a->esz) {
8085 case MO_8:
8086 case MO_16:
8087 case MO_32:
8088 tcg_val = tcg_temp_new_i64();
8089 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz);
8090 break;
8091 case MO_64:
8092 tcg_val = cpu_reg(s, a->rm);
8093 break;
8094 default:
8095 g_assert_not_reached();
8096 }
8097 tcg_acc = cpu_reg(s, a->rn);
8098 tcg_bytes = tcg_constant_i32(1 << a->esz);
8099 tcg_rd = cpu_reg(s, a->rd);
8100
8101 if (crc32c) {
8102 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8103 } else {
8104 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes);
8105 }
8106 return true;
8107 }
8108
TRANS_FEAT(CRC32,aa64_crc32,do_crc32,a,false)8109 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false)
8110 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true)
8111
8112 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag)
8113 {
8114 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true);
8115 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true);
8116 TCGv_i64 tcg_d = cpu_reg(s, a->rd);
8117
8118 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
8119 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
8120
8121 if (setflag) {
8122 gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
8123 } else {
8124 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
8125 }
8126 return true;
8127 }
8128
TRANS_FEAT(SUBP,aa64_mte_insn_reg,do_subp,a,false)8129 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false)
8130 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true)
8131
8132 static bool trans_IRG(DisasContext *s, arg_rrr *a)
8133 {
8134 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8135 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd);
8136 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn);
8137
8138 if (s->ata[0]) {
8139 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm));
8140 } else {
8141 gen_address_with_allocation_tag0(tcg_rd, tcg_rn);
8142 }
8143 return true;
8144 }
8145 return false;
8146 }
8147
trans_GMI(DisasContext * s,arg_rrr * a)8148 static bool trans_GMI(DisasContext *s, arg_rrr *a)
8149 {
8150 if (dc_isar_feature(aa64_mte_insn_reg, s)) {
8151 TCGv_i64 t = tcg_temp_new_i64();
8152
8153 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4);
8154 tcg_gen_shl_i64(t, tcg_constant_i64(1), t);
8155 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t);
8156 return true;
8157 }
8158 return false;
8159 }
8160
trans_PACGA(DisasContext * s,arg_rrr * a)8161 static bool trans_PACGA(DisasContext *s, arg_rrr *a)
8162 {
8163 if (dc_isar_feature(aa64_pauth, s)) {
8164 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env,
8165 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm));
8166 return true;
8167 }
8168 return false;
8169 }
8170
8171 typedef void ArithOneOp(TCGv_i64, TCGv_i64);
8172
gen_rr(DisasContext * s,int rd,int rn,ArithOneOp fn)8173 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn)
8174 {
8175 fn(cpu_reg(s, rd), cpu_reg(s, rn));
8176 return true;
8177 }
8178
gen_rbit32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8179 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8180 {
8181 TCGv_i32 t32 = tcg_temp_new_i32();
8182
8183 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8184 gen_helper_rbit(t32, t32);
8185 tcg_gen_extu_i32_i64(tcg_rd, t32);
8186 }
8187
gen_rev16_xx(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i64 mask)8188 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask)
8189 {
8190 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8191
8192 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
8193 tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
8194 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
8195 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
8196 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
8197 }
8198
gen_rev16_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8199 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8200 {
8201 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff));
8202 }
8203
gen_rev16_64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8204 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8205 {
8206 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull));
8207 }
8208
gen_rev_32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8209 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8210 {
8211 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
8212 }
8213
gen_rev32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8214 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8215 {
8216 tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
8217 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
8218 }
8219
8220 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32)
8221 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32)
8222 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32)
8223 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64)
8224
gen_clz32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8225 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8226 {
8227 TCGv_i32 t32 = tcg_temp_new_i32();
8228
8229 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8230 tcg_gen_clzi_i32(t32, t32, 32);
8231 tcg_gen_extu_i32_i64(tcg_rd, t32);
8232 }
8233
gen_clz64(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8234 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8235 {
8236 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8237 }
8238
gen_cls32(TCGv_i64 tcg_rd,TCGv_i64 tcg_rn)8239 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
8240 {
8241 TCGv_i32 t32 = tcg_temp_new_i32();
8242
8243 tcg_gen_extrl_i64_i32(t32, tcg_rn);
8244 tcg_gen_clrsb_i32(t32, t32);
8245 tcg_gen_extu_i32_i64(tcg_rd, t32);
8246 }
8247
8248 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32)
8249 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32)
8250
gen_pacaut(DisasContext * s,arg_pacaut * a,NeonGenTwo64OpEnvFn fn)8251 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn)
8252 {
8253 TCGv_i64 tcg_rd, tcg_rn;
8254
8255 if (a->z) {
8256 if (a->rn != 31) {
8257 return false;
8258 }
8259 tcg_rn = tcg_constant_i64(0);
8260 } else {
8261 tcg_rn = cpu_reg_sp(s, a->rn);
8262 }
8263 if (s->pauth_active) {
8264 tcg_rd = cpu_reg(s, a->rd);
8265 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn);
8266 }
8267 return true;
8268 }
8269
TRANS_FEAT(PACIA,aa64_pauth,gen_pacaut,a,gen_helper_pacia)8270 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia)
8271 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib)
8272 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda)
8273 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb)
8274
8275 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia)
8276 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib)
8277 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda)
8278 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb)
8279
8280 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn)
8281 {
8282 if (s->pauth_active) {
8283 TCGv_i64 tcg_rd = cpu_reg(s, rd);
8284 fn(tcg_rd, tcg_env, tcg_rd);
8285 }
8286 return true;
8287 }
8288
8289 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci)
8290 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd)
8291
do_logic_reg(DisasContext * s,arg_logic_shift * a,ArithTwoOp * fn,ArithTwoOp * inv_fn,bool setflags)8292 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a,
8293 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags)
8294 {
8295 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
8296
8297 if (!a->sf && (a->sa & (1 << 5))) {
8298 return false;
8299 }
8300
8301 tcg_rd = cpu_reg(s, a->rd);
8302 tcg_rn = cpu_reg(s, a->rn);
8303
8304 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8305 if (a->sa) {
8306 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8307 }
8308
8309 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm);
8310 if (!a->sf) {
8311 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8312 }
8313 if (setflags) {
8314 gen_logic_CC(a->sf, tcg_rd);
8315 }
8316 return true;
8317 }
8318
trans_ORR_r(DisasContext * s,arg_logic_shift * a)8319 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a)
8320 {
8321 /*
8322 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for
8323 * register-register MOV and MVN, so it is worth special casing.
8324 */
8325 if (a->sa == 0 && a->st == 0 && a->rn == 31) {
8326 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8327 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8328
8329 if (a->n) {
8330 tcg_gen_not_i64(tcg_rd, tcg_rm);
8331 if (!a->sf) {
8332 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8333 }
8334 } else {
8335 if (a->sf) {
8336 tcg_gen_mov_i64(tcg_rd, tcg_rm);
8337 } else {
8338 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
8339 }
8340 }
8341 return true;
8342 }
8343
8344 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false);
8345 }
8346
TRANS(AND_r,do_logic_reg,a,tcg_gen_and_i64,tcg_gen_andc_i64,false)8347 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false)
8348 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true)
8349 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false)
8350
8351 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a,
8352 bool sub_op, bool setflags)
8353 {
8354 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result;
8355
8356 if (a->sa > 4) {
8357 return false;
8358 }
8359
8360 /* non-flag setting ops may use SP */
8361 if (!setflags) {
8362 tcg_rd = cpu_reg_sp(s, a->rd);
8363 } else {
8364 tcg_rd = cpu_reg(s, a->rd);
8365 }
8366 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf);
8367
8368 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8369 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa);
8370
8371 tcg_result = tcg_temp_new_i64();
8372 if (!setflags) {
8373 if (sub_op) {
8374 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8375 } else {
8376 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8377 }
8378 } else {
8379 if (sub_op) {
8380 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8381 } else {
8382 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8383 }
8384 }
8385
8386 if (a->sf) {
8387 tcg_gen_mov_i64(tcg_rd, tcg_result);
8388 } else {
8389 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8390 }
8391 return true;
8392 }
8393
TRANS(ADD_ext,do_addsub_ext,a,false,false)8394 TRANS(ADD_ext, do_addsub_ext, a, false, false)
8395 TRANS(SUB_ext, do_addsub_ext, a, true, false)
8396 TRANS(ADDS_ext, do_addsub_ext, a, false, true)
8397 TRANS(SUBS_ext, do_addsub_ext, a, true, true)
8398
8399 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a,
8400 bool sub_op, bool setflags)
8401 {
8402 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result;
8403
8404 if (a->st == 3 || (!a->sf && (a->sa & 32))) {
8405 return false;
8406 }
8407
8408 tcg_rd = cpu_reg(s, a->rd);
8409 tcg_rn = read_cpu_reg(s, a->rn, a->sf);
8410 tcg_rm = read_cpu_reg(s, a->rm, a->sf);
8411
8412 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa);
8413
8414 tcg_result = tcg_temp_new_i64();
8415 if (!setflags) {
8416 if (sub_op) {
8417 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
8418 } else {
8419 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
8420 }
8421 } else {
8422 if (sub_op) {
8423 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8424 } else {
8425 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm);
8426 }
8427 }
8428
8429 if (a->sf) {
8430 tcg_gen_mov_i64(tcg_rd, tcg_result);
8431 } else {
8432 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
8433 }
8434 return true;
8435 }
8436
TRANS(ADD_r,do_addsub_reg,a,false,false)8437 TRANS(ADD_r, do_addsub_reg, a, false, false)
8438 TRANS(SUB_r, do_addsub_reg, a, true, false)
8439 TRANS(ADDS_r, do_addsub_reg, a, false, true)
8440 TRANS(SUBS_r, do_addsub_reg, a, true, true)
8441
8442 static bool do_mulh(DisasContext *s, arg_rrr *a,
8443 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
8444 {
8445 TCGv_i64 discard = tcg_temp_new_i64();
8446 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8447 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
8448 TCGv_i64 tcg_rm = cpu_reg(s, a->rm);
8449
8450 fn(discard, tcg_rd, tcg_rn, tcg_rm);
8451 return true;
8452 }
8453
TRANS(SMULH,do_mulh,a,tcg_gen_muls2_i64)8454 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64)
8455 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64)
8456
8457 static bool do_muladd(DisasContext *s, arg_rrrr *a,
8458 bool sf, bool is_sub, MemOp mop)
8459 {
8460 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8461 TCGv_i64 tcg_op1, tcg_op2;
8462
8463 if (mop == MO_64) {
8464 tcg_op1 = cpu_reg(s, a->rn);
8465 tcg_op2 = cpu_reg(s, a->rm);
8466 } else {
8467 tcg_op1 = tcg_temp_new_i64();
8468 tcg_op2 = tcg_temp_new_i64();
8469 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop);
8470 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop);
8471 }
8472
8473 if (a->ra == 31 && !is_sub) {
8474 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
8475 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2);
8476 } else {
8477 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8478 TCGv_i64 tcg_ra = cpu_reg(s, a->ra);
8479
8480 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
8481 if (is_sub) {
8482 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp);
8483 } else {
8484 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp);
8485 }
8486 }
8487
8488 if (!sf) {
8489 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8490 }
8491 return true;
8492 }
8493
TRANS(MADD_w,do_muladd,a,false,false,MO_64)8494 TRANS(MADD_w, do_muladd, a, false, false, MO_64)
8495 TRANS(MSUB_w, do_muladd, a, false, true, MO_64)
8496 TRANS(MADD_x, do_muladd, a, true, false, MO_64)
8497 TRANS(MSUB_x, do_muladd, a, true, true, MO_64)
8498
8499 TRANS(SMADDL, do_muladd, a, true, false, MO_SL)
8500 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL)
8501 TRANS(UMADDL, do_muladd, a, true, false, MO_UL)
8502 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL)
8503
8504 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a,
8505 bool is_sub, bool setflags)
8506 {
8507 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
8508
8509 tcg_rd = cpu_reg(s, a->rd);
8510 tcg_rn = cpu_reg(s, a->rn);
8511
8512 if (is_sub) {
8513 tcg_y = tcg_temp_new_i64();
8514 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm));
8515 } else {
8516 tcg_y = cpu_reg(s, a->rm);
8517 }
8518
8519 if (setflags) {
8520 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y);
8521 } else {
8522 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y);
8523 }
8524 return true;
8525 }
8526
TRANS(ADC,do_adc_sbc,a,false,false)8527 TRANS(ADC, do_adc_sbc, a, false, false)
8528 TRANS(SBC, do_adc_sbc, a, true, false)
8529 TRANS(ADCS, do_adc_sbc, a, false, true)
8530 TRANS(SBCS, do_adc_sbc, a, true, true)
8531
8532 static bool trans_RMIF(DisasContext *s, arg_RMIF *a)
8533 {
8534 int mask = a->mask;
8535 TCGv_i64 tcg_rn;
8536 TCGv_i32 nzcv;
8537
8538 if (!dc_isar_feature(aa64_condm_4, s)) {
8539 return false;
8540 }
8541
8542 tcg_rn = read_cpu_reg(s, a->rn, 1);
8543 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm);
8544
8545 nzcv = tcg_temp_new_i32();
8546 tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
8547
8548 if (mask & 8) { /* N */
8549 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
8550 }
8551 if (mask & 4) { /* Z */
8552 tcg_gen_not_i32(cpu_ZF, nzcv);
8553 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
8554 }
8555 if (mask & 2) { /* C */
8556 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
8557 }
8558 if (mask & 1) { /* V */
8559 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
8560 }
8561 return true;
8562 }
8563
do_setf(DisasContext * s,int rn,int shift)8564 static bool do_setf(DisasContext *s, int rn, int shift)
8565 {
8566 TCGv_i32 tmp = tcg_temp_new_i32();
8567
8568 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
8569 tcg_gen_shli_i32(cpu_NF, tmp, shift);
8570 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
8571 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
8572 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
8573 return true;
8574 }
8575
8576 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24)
8577 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16)
8578
8579 /* CCMP, CCMN */
trans_CCMP(DisasContext * s,arg_CCMP * a)8580 static bool trans_CCMP(DisasContext *s, arg_CCMP *a)
8581 {
8582 TCGv_i32 tcg_t0 = tcg_temp_new_i32();
8583 TCGv_i32 tcg_t1 = tcg_temp_new_i32();
8584 TCGv_i32 tcg_t2 = tcg_temp_new_i32();
8585 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8586 TCGv_i64 tcg_rn, tcg_y;
8587 DisasCompare c;
8588 unsigned nzcv;
8589 bool has_andc;
8590
8591 /* Set T0 = !COND. */
8592 arm_test_cc(&c, a->cond);
8593 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
8594
8595 /* Load the arguments for the new comparison. */
8596 if (a->imm) {
8597 tcg_y = tcg_constant_i64(a->y);
8598 } else {
8599 tcg_y = cpu_reg(s, a->y);
8600 }
8601 tcg_rn = cpu_reg(s, a->rn);
8602
8603 /* Set the flags for the new comparison. */
8604 if (a->op) {
8605 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8606 } else {
8607 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y);
8608 }
8609
8610 /*
8611 * If COND was false, force the flags to #nzcv. Compute two masks
8612 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
8613 * For tcg hosts that support ANDC, we can make do with just T1.
8614 * In either case, allow the tcg optimizer to delete any unused mask.
8615 */
8616 tcg_gen_neg_i32(tcg_t1, tcg_t0);
8617 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
8618
8619 nzcv = a->nzcv;
8620 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0);
8621 if (nzcv & 8) { /* N */
8622 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
8623 } else {
8624 if (has_andc) {
8625 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
8626 } else {
8627 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
8628 }
8629 }
8630 if (nzcv & 4) { /* Z */
8631 if (has_andc) {
8632 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
8633 } else {
8634 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
8635 }
8636 } else {
8637 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
8638 }
8639 if (nzcv & 2) { /* C */
8640 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
8641 } else {
8642 if (has_andc) {
8643 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
8644 } else {
8645 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
8646 }
8647 }
8648 if (nzcv & 1) { /* V */
8649 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
8650 } else {
8651 if (has_andc) {
8652 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
8653 } else {
8654 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
8655 }
8656 }
8657 return true;
8658 }
8659
trans_CSEL(DisasContext * s,arg_CSEL * a)8660 static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
8661 {
8662 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
8663 TCGv_i64 zero = tcg_constant_i64(0);
8664 DisasCompare64 c;
8665
8666 a64_test_cc(&c, a->cond);
8667
8668 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) {
8669 /* CSET & CSETM. */
8670 if (a->else_inv) {
8671 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond),
8672 tcg_rd, c.value, zero);
8673 } else {
8674 tcg_gen_setcond_i64(tcg_invert_cond(c.cond),
8675 tcg_rd, c.value, zero);
8676 }
8677 } else {
8678 TCGv_i64 t_true = cpu_reg(s, a->rn);
8679 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1);
8680
8681 if (a->else_inv && a->else_inc) {
8682 tcg_gen_neg_i64(t_false, t_false);
8683 } else if (a->else_inv) {
8684 tcg_gen_not_i64(t_false, t_false);
8685 } else if (a->else_inc) {
8686 tcg_gen_addi_i64(t_false, t_false, 1);
8687 }
8688 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
8689 }
8690
8691 if (!a->sf) {
8692 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8693 }
8694 return true;
8695 }
8696
8697 typedef struct FPScalar1Int {
8698 void (*gen_h)(TCGv_i32, TCGv_i32);
8699 void (*gen_s)(TCGv_i32, TCGv_i32);
8700 void (*gen_d)(TCGv_i64, TCGv_i64);
8701 } FPScalar1Int;
8702
do_fp1_scalar_int(DisasContext * s,arg_rr_e * a,const FPScalar1Int * f,bool merging)8703 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
8704 const FPScalar1Int *f,
8705 bool merging)
8706 {
8707 switch (a->esz) {
8708 case MO_64:
8709 if (fp_access_check(s)) {
8710 TCGv_i64 t = read_fp_dreg(s, a->rn);
8711 f->gen_d(t, t);
8712 if (merging) {
8713 write_fp_dreg_merging(s, a->rd, a->rd, t);
8714 } else {
8715 write_fp_dreg(s, a->rd, t);
8716 }
8717 }
8718 break;
8719 case MO_32:
8720 if (fp_access_check(s)) {
8721 TCGv_i32 t = read_fp_sreg(s, a->rn);
8722 f->gen_s(t, t);
8723 if (merging) {
8724 write_fp_sreg_merging(s, a->rd, a->rd, t);
8725 } else {
8726 write_fp_sreg(s, a->rd, t);
8727 }
8728 }
8729 break;
8730 case MO_16:
8731 if (!dc_isar_feature(aa64_fp16, s)) {
8732 return false;
8733 }
8734 if (fp_access_check(s)) {
8735 TCGv_i32 t = read_fp_hreg(s, a->rn);
8736 f->gen_h(t, t);
8737 if (merging) {
8738 write_fp_hreg_merging(s, a->rd, a->rd, t);
8739 } else {
8740 write_fp_sreg(s, a->rd, t);
8741 }
8742 }
8743 break;
8744 default:
8745 return false;
8746 }
8747 return true;
8748 }
8749
do_fp1_scalar_int_2fn(DisasContext * s,arg_rr_e * a,const FPScalar1Int * fnormal,const FPScalar1Int * fah)8750 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
8751 const FPScalar1Int *fnormal,
8752 const FPScalar1Int *fah)
8753 {
8754 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
8755 }
8756
8757 static const FPScalar1Int f_scalar_fmov = {
8758 tcg_gen_mov_i32,
8759 tcg_gen_mov_i32,
8760 tcg_gen_mov_i64,
8761 };
8762 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false)
8763
8764 static const FPScalar1Int f_scalar_fabs = {
8765 gen_vfp_absh,
8766 gen_vfp_abss,
8767 gen_vfp_absd,
8768 };
8769 static const FPScalar1Int f_scalar_ah_fabs = {
8770 gen_vfp_ah_absh,
8771 gen_vfp_ah_abss,
8772 gen_vfp_ah_absd,
8773 };
8774 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs)
8775
8776 static const FPScalar1Int f_scalar_fneg = {
8777 gen_vfp_negh,
8778 gen_vfp_negs,
8779 gen_vfp_negd,
8780 };
8781 static const FPScalar1Int f_scalar_ah_fneg = {
8782 gen_vfp_ah_negh,
8783 gen_vfp_ah_negs,
8784 gen_vfp_ah_negd,
8785 };
8786 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
8787
8788 typedef struct FPScalar1 {
8789 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);
8790 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr);
8791 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr);
8792 } FPScalar1;
8793
do_fp1_scalar_with_fpsttype(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode,ARMFPStatusFlavour fpsttype)8794 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a,
8795 const FPScalar1 *f, int rmode,
8796 ARMFPStatusFlavour fpsttype)
8797 {
8798 TCGv_i32 tcg_rmode = NULL;
8799 TCGv_ptr fpst;
8800 TCGv_i64 t64;
8801 TCGv_i32 t32;
8802 int check = fp_access_check_scalar_hsd(s, a->esz);
8803
8804 if (check <= 0) {
8805 return check == 0;
8806 }
8807
8808 fpst = fpstatus_ptr(fpsttype);
8809 if (rmode >= 0) {
8810 tcg_rmode = gen_set_rmode(rmode, fpst);
8811 }
8812
8813 switch (a->esz) {
8814 case MO_64:
8815 t64 = read_fp_dreg(s, a->rn);
8816 f->gen_d(t64, t64, fpst);
8817 write_fp_dreg_merging(s, a->rd, a->rd, t64);
8818 break;
8819 case MO_32:
8820 t32 = read_fp_sreg(s, a->rn);
8821 f->gen_s(t32, t32, fpst);
8822 write_fp_sreg_merging(s, a->rd, a->rd, t32);
8823 break;
8824 case MO_16:
8825 t32 = read_fp_hreg(s, a->rn);
8826 f->gen_h(t32, t32, fpst);
8827 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8828 break;
8829 default:
8830 g_assert_not_reached();
8831 }
8832
8833 if (rmode >= 0) {
8834 gen_restore_rmode(tcg_rmode, fpst);
8835 }
8836 return true;
8837 }
8838
do_fp1_scalar(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8839 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
8840 const FPScalar1 *f, int rmode)
8841 {
8842 return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
8843 a->esz == MO_16 ?
8844 FPST_A64_F16 : FPST_A64);
8845 }
8846
do_fp1_scalar_ah(DisasContext * s,arg_rr_e * a,const FPScalar1 * f,int rmode)8847 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
8848 const FPScalar1 *f, int rmode)
8849 {
8850 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz));
8851 }
8852
8853 static const FPScalar1 f_scalar_fsqrt = {
8854 gen_helper_vfp_sqrth,
8855 gen_helper_vfp_sqrts,
8856 gen_helper_vfp_sqrtd,
8857 };
8858 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1)
8859
8860 static const FPScalar1 f_scalar_frint = {
8861 gen_helper_advsimd_rinth,
8862 gen_helper_rints,
8863 gen_helper_rintd,
8864 };
8865 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
8866 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF)
8867 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF)
8868 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO)
8869 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
8870 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1)
8871
8872 static const FPScalar1 f_scalar_frintx = {
8873 gen_helper_advsimd_rinth_exact,
8874 gen_helper_rints_exact,
8875 gen_helper_rintd_exact,
8876 };
8877 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
8878
trans_BFCVT_s(DisasContext * s,arg_rr_e * a)8879 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
8880 {
8881 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
8882 TCGv_i32 t32;
8883 int check;
8884
8885 if (!dc_isar_feature(aa64_bf16, s)) {
8886 return false;
8887 }
8888
8889 check = fp_access_check_scalar_hsd(s, a->esz);
8890
8891 if (check <= 0) {
8892 return check == 0;
8893 }
8894
8895 t32 = read_fp_sreg(s, a->rn);
8896 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype));
8897 write_fp_hreg_merging(s, a->rd, a->rd, t32);
8898 return true;
8899 }
8900
8901 static const FPScalar1 f_scalar_frint32 = {
8902 NULL,
8903 gen_helper_frint32_s,
8904 gen_helper_frint32_d,
8905 };
8906 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a,
8907 &f_scalar_frint32, FPROUNDING_ZERO)
8908 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1)
8909
8910 static const FPScalar1 f_scalar_frint64 = {
8911 NULL,
8912 gen_helper_frint64_s,
8913 gen_helper_frint64_d,
8914 };
8915 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a,
8916 &f_scalar_frint64, FPROUNDING_ZERO)
8917 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1)
8918
8919 static const FPScalar1 f_scalar_frecpe = {
8920 gen_helper_recpe_f16,
8921 gen_helper_recpe_f32,
8922 gen_helper_recpe_f64,
8923 };
8924 static const FPScalar1 f_scalar_frecpe_rpres = {
8925 gen_helper_recpe_f16,
8926 gen_helper_recpe_rpres_f32,
8927 gen_helper_recpe_f64,
8928 };
8929 TRANS(FRECPE_s, do_fp1_scalar_ah, a,
8930 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8931 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1)
8932
8933 static const FPScalar1 f_scalar_frecpx = {
8934 gen_helper_frecpx_f16,
8935 gen_helper_frecpx_f32,
8936 gen_helper_frecpx_f64,
8937 };
8938 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1)
8939
8940 static const FPScalar1 f_scalar_frsqrte = {
8941 gen_helper_rsqrte_f16,
8942 gen_helper_rsqrte_f32,
8943 gen_helper_rsqrte_f64,
8944 };
8945 static const FPScalar1 f_scalar_frsqrte_rpres = {
8946 gen_helper_rsqrte_f16,
8947 gen_helper_rsqrte_rpres_f32,
8948 gen_helper_rsqrte_f64,
8949 };
8950 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a,
8951 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
8952 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1)
8953
trans_FCVT_s_ds(DisasContext * s,arg_rr * a)8954 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
8955 {
8956 if (fp_access_check(s)) {
8957 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
8958 TCGv_i64 tcg_rd = tcg_temp_new_i64();
8959 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8960
8961 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
8962 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
8963 }
8964 return true;
8965 }
8966
trans_FCVT_s_hs(DisasContext * s,arg_rr * a)8967 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
8968 {
8969 if (fp_access_check(s)) {
8970 TCGv_i32 tmp = read_fp_sreg(s, a->rn);
8971 TCGv_i32 ahp = get_ahp_flag();
8972 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8973
8974 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
8975 /* write_fp_hreg_merging is OK here because top half of result is zero */
8976 write_fp_hreg_merging(s, a->rd, a->rd, tmp);
8977 }
8978 return true;
8979 }
8980
trans_FCVT_s_sd(DisasContext * s,arg_rr * a)8981 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
8982 {
8983 if (fp_access_check(s)) {
8984 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8985 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8986 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
8987
8988 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
8989 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
8990 }
8991 return true;
8992 }
8993
trans_FCVT_s_hd(DisasContext * s,arg_rr * a)8994 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
8995 {
8996 if (fp_access_check(s)) {
8997 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
8998 TCGv_i32 tcg_rd = tcg_temp_new_i32();
8999 TCGv_i32 ahp = get_ahp_flag();
9000 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9001
9002 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
9003 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
9004 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd);
9005 }
9006 return true;
9007 }
9008
trans_FCVT_s_sh(DisasContext * s,arg_rr * a)9009 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
9010 {
9011 if (fp_access_check(s)) {
9012 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9013 TCGv_i32 tcg_rd = tcg_temp_new_i32();
9014 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9015 TCGv_i32 tcg_ahp = get_ahp_flag();
9016
9017 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9018 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
9019 }
9020 return true;
9021 }
9022
trans_FCVT_s_dh(DisasContext * s,arg_rr * a)9023 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
9024 {
9025 if (fp_access_check(s)) {
9026 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
9027 TCGv_i64 tcg_rd = tcg_temp_new_i64();
9028 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
9029 TCGv_i32 tcg_ahp = get_ahp_flag();
9030
9031 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
9032 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
9033 }
9034 return true;
9035 }
9036
do_cvtf_scalar(DisasContext * s,MemOp esz,int rd,int shift,TCGv_i64 tcg_int,bool is_signed)9037 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
9038 TCGv_i64 tcg_int, bool is_signed)
9039 {
9040 TCGv_ptr tcg_fpstatus;
9041 TCGv_i32 tcg_shift, tcg_single;
9042 TCGv_i64 tcg_double;
9043
9044 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9045 tcg_shift = tcg_constant_i32(shift);
9046
9047 switch (esz) {
9048 case MO_64:
9049 tcg_double = tcg_temp_new_i64();
9050 if (is_signed) {
9051 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9052 } else {
9053 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus);
9054 }
9055 write_fp_dreg_merging(s, rd, rd, tcg_double);
9056 break;
9057
9058 case MO_32:
9059 tcg_single = tcg_temp_new_i32();
9060 if (is_signed) {
9061 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9062 } else {
9063 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9064 }
9065 write_fp_sreg_merging(s, rd, rd, tcg_single);
9066 break;
9067
9068 case MO_16:
9069 tcg_single = tcg_temp_new_i32();
9070 if (is_signed) {
9071 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9072 } else {
9073 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus);
9074 }
9075 write_fp_hreg_merging(s, rd, rd, tcg_single);
9076 break;
9077
9078 default:
9079 g_assert_not_reached();
9080 }
9081 return true;
9082 }
9083
do_cvtf_g(DisasContext * s,arg_fcvt * a,bool is_signed)9084 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed)
9085 {
9086 TCGv_i64 tcg_int;
9087 int check = fp_access_check_scalar_hsd(s, a->esz);
9088
9089 if (check <= 0) {
9090 return check == 0;
9091 }
9092
9093 if (a->sf) {
9094 tcg_int = cpu_reg(s, a->rn);
9095 } else {
9096 tcg_int = read_cpu_reg(s, a->rn, true);
9097 if (is_signed) {
9098 tcg_gen_ext32s_i64(tcg_int, tcg_int);
9099 } else {
9100 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9101 }
9102 }
9103 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9104 }
9105
TRANS(SCVTF_g,do_cvtf_g,a,true)9106 TRANS(SCVTF_g, do_cvtf_g, a, true)
9107 TRANS(UCVTF_g, do_cvtf_g, a, false)
9108
9109 /*
9110 * [US]CVTF (vector), scalar version.
9111 * Which sounds weird, but really just means input from fp register
9112 * instead of input from general register. Input and output element
9113 * size are always equal.
9114 */
9115 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed)
9116 {
9117 TCGv_i64 tcg_int;
9118 int check = fp_access_check_scalar_hsd(s, a->esz);
9119
9120 if (check <= 0) {
9121 return check == 0;
9122 }
9123
9124 tcg_int = tcg_temp_new_i64();
9125 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0));
9126 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed);
9127 }
9128
TRANS(SCVTF_f,do_cvtf_f,a,true)9129 TRANS(SCVTF_f, do_cvtf_f, a, true)
9130 TRANS(UCVTF_f, do_cvtf_f, a, false)
9131
9132 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
9133 TCGv_i64 tcg_out, int shift, int rn,
9134 ARMFPRounding rmode)
9135 {
9136 TCGv_ptr tcg_fpstatus;
9137 TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
9138
9139 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9140 tcg_shift = tcg_constant_i32(shift);
9141 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
9142
9143 switch (esz) {
9144 case MO_64:
9145 read_vec_element(s, tcg_out, rn, 0, MO_64);
9146 switch (out) {
9147 case MO_64 | MO_SIGN:
9148 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9149 break;
9150 case MO_64:
9151 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9152 break;
9153 case MO_32 | MO_SIGN:
9154 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9155 break;
9156 case MO_32:
9157 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus);
9158 break;
9159 default:
9160 g_assert_not_reached();
9161 }
9162 break;
9163
9164 case MO_32:
9165 tcg_single = read_fp_sreg(s, rn);
9166 switch (out) {
9167 case MO_64 | MO_SIGN:
9168 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9169 break;
9170 case MO_64:
9171 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9172 break;
9173 case MO_32 | MO_SIGN:
9174 gen_helper_vfp_tosls(tcg_single, tcg_single,
9175 tcg_shift, tcg_fpstatus);
9176 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9177 break;
9178 case MO_32:
9179 gen_helper_vfp_touls(tcg_single, tcg_single,
9180 tcg_shift, tcg_fpstatus);
9181 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9182 break;
9183 default:
9184 g_assert_not_reached();
9185 }
9186 break;
9187
9188 case MO_16:
9189 tcg_single = read_fp_hreg(s, rn);
9190 switch (out) {
9191 case MO_64 | MO_SIGN:
9192 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9193 break;
9194 case MO_64:
9195 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus);
9196 break;
9197 case MO_32 | MO_SIGN:
9198 gen_helper_vfp_toslh(tcg_single, tcg_single,
9199 tcg_shift, tcg_fpstatus);
9200 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9201 break;
9202 case MO_32:
9203 gen_helper_vfp_toulh(tcg_single, tcg_single,
9204 tcg_shift, tcg_fpstatus);
9205 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9206 break;
9207 case MO_16 | MO_SIGN:
9208 gen_helper_vfp_toshh(tcg_single, tcg_single,
9209 tcg_shift, tcg_fpstatus);
9210 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9211 break;
9212 case MO_16:
9213 gen_helper_vfp_touhh(tcg_single, tcg_single,
9214 tcg_shift, tcg_fpstatus);
9215 tcg_gen_extu_i32_i64(tcg_out, tcg_single);
9216 break;
9217 default:
9218 g_assert_not_reached();
9219 }
9220 break;
9221
9222 default:
9223 g_assert_not_reached();
9224 }
9225
9226 gen_restore_rmode(tcg_rmode, tcg_fpstatus);
9227 }
9228
do_fcvt_g(DisasContext * s,arg_fcvt * a,ARMFPRounding rmode,bool is_signed)9229 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a,
9230 ARMFPRounding rmode, bool is_signed)
9231 {
9232 TCGv_i64 tcg_int;
9233 int check = fp_access_check_scalar_hsd(s, a->esz);
9234
9235 if (check <= 0) {
9236 return check == 0;
9237 }
9238
9239 tcg_int = cpu_reg(s, a->rd);
9240 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0),
9241 a->esz, tcg_int, a->shift, a->rn, rmode);
9242
9243 if (!a->sf) {
9244 tcg_gen_ext32u_i64(tcg_int, tcg_int);
9245 }
9246 return true;
9247 }
9248
TRANS(FCVTNS_g,do_fcvt_g,a,FPROUNDING_TIEEVEN,true)9249 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true)
9250 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false)
9251 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true)
9252 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false)
9253 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true)
9254 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false)
9255 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true)
9256 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false)
9257 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true)
9258 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false)
9259
9260 /*
9261 * FCVT* (vector), scalar version.
9262 * Which sounds weird, but really just means output to fp register
9263 * instead of output to general register. Input and output element
9264 * size are always equal.
9265 */
9266 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a,
9267 ARMFPRounding rmode, bool is_signed)
9268 {
9269 TCGv_i64 tcg_int;
9270 int check = fp_access_check_scalar_hsd(s, a->esz);
9271
9272 if (check <= 0) {
9273 return check == 0;
9274 }
9275
9276 tcg_int = tcg_temp_new_i64();
9277 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0),
9278 a->esz, tcg_int, a->shift, a->rn, rmode);
9279
9280 if (!s->fpcr_nep) {
9281 clear_vec(s, a->rd);
9282 }
9283 write_vec_element(s, tcg_int, a->rd, 0, a->esz);
9284 return true;
9285 }
9286
TRANS(FCVTNS_f,do_fcvt_f,a,FPROUNDING_TIEEVEN,true)9287 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true)
9288 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false)
9289 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true)
9290 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false)
9291 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true)
9292 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false)
9293 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true)
9294 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false)
9295 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true)
9296 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false)
9297
9298 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
9299 {
9300 if (!dc_isar_feature(aa64_jscvt, s)) {
9301 return false;
9302 }
9303 if (fp_access_check(s)) {
9304 TCGv_i64 t = read_fp_dreg(s, a->rn);
9305 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
9306
9307 gen_helper_fjcvtzs(t, t, fpstatus);
9308
9309 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t);
9310 tcg_gen_extrh_i64_i32(cpu_ZF, t);
9311 tcg_gen_movi_i32(cpu_CF, 0);
9312 tcg_gen_movi_i32(cpu_NF, 0);
9313 tcg_gen_movi_i32(cpu_VF, 0);
9314 }
9315 return true;
9316 }
9317
trans_FMOV_hx(DisasContext * s,arg_rr * a)9318 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a)
9319 {
9320 if (!dc_isar_feature(aa64_fp16, s)) {
9321 return false;
9322 }
9323 if (fp_access_check(s)) {
9324 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9325 TCGv_i64 tmp = tcg_temp_new_i64();
9326 tcg_gen_ext16u_i64(tmp, tcg_rn);
9327 write_fp_dreg(s, a->rd, tmp);
9328 }
9329 return true;
9330 }
9331
trans_FMOV_sw(DisasContext * s,arg_rr * a)9332 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a)
9333 {
9334 if (fp_access_check(s)) {
9335 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9336 TCGv_i64 tmp = tcg_temp_new_i64();
9337 tcg_gen_ext32u_i64(tmp, tcg_rn);
9338 write_fp_dreg(s, a->rd, tmp);
9339 }
9340 return true;
9341 }
9342
trans_FMOV_dx(DisasContext * s,arg_rr * a)9343 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a)
9344 {
9345 if (fp_access_check(s)) {
9346 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9347 write_fp_dreg(s, a->rd, tcg_rn);
9348 }
9349 return true;
9350 }
9351
trans_FMOV_ux(DisasContext * s,arg_rr * a)9352 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a)
9353 {
9354 if (fp_access_check(s)) {
9355 TCGv_i64 tcg_rn = cpu_reg(s, a->rn);
9356 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd));
9357 clear_vec_high(s, true, a->rd);
9358 }
9359 return true;
9360 }
9361
trans_FMOV_xh(DisasContext * s,arg_rr * a)9362 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a)
9363 {
9364 if (!dc_isar_feature(aa64_fp16, s)) {
9365 return false;
9366 }
9367 if (fp_access_check(s)) {
9368 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9369 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16));
9370 }
9371 return true;
9372 }
9373
trans_FMOV_ws(DisasContext * s,arg_rr * a)9374 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a)
9375 {
9376 if (fp_access_check(s)) {
9377 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9378 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32));
9379 }
9380 return true;
9381 }
9382
trans_FMOV_xd(DisasContext * s,arg_rr * a)9383 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a)
9384 {
9385 if (fp_access_check(s)) {
9386 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9387 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64));
9388 }
9389 return true;
9390 }
9391
trans_FMOV_xu(DisasContext * s,arg_rr * a)9392 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a)
9393 {
9394 if (fp_access_check(s)) {
9395 TCGv_i64 tcg_rd = cpu_reg(s, a->rd);
9396 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn));
9397 }
9398 return true;
9399 }
9400
9401 typedef struct ENVScalar1 {
9402 NeonGenOneOpEnvFn *gen_bhs[3];
9403 NeonGenOne64OpEnvFn *gen_d;
9404 } ENVScalar1;
9405
do_env_scalar1(DisasContext * s,arg_rr_e * a,const ENVScalar1 * f)9406 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f)
9407 {
9408 if (!fp_access_check(s)) {
9409 return true;
9410 }
9411 if (a->esz == MO_64) {
9412 TCGv_i64 t = read_fp_dreg(s, a->rn);
9413 f->gen_d(t, tcg_env, t);
9414 write_fp_dreg(s, a->rd, t);
9415 } else {
9416 TCGv_i32 t = tcg_temp_new_i32();
9417
9418 read_vec_element_i32(s, t, a->rn, 0, a->esz);
9419 f->gen_bhs[a->esz](t, tcg_env, t);
9420 write_fp_sreg(s, a->rd, t);
9421 }
9422 return true;
9423 }
9424
do_env_vector1(DisasContext * s,arg_qrr_e * a,const ENVScalar1 * f)9425 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f)
9426 {
9427 if (a->esz == MO_64 && !a->q) {
9428 return false;
9429 }
9430 if (!fp_access_check(s)) {
9431 return true;
9432 }
9433 if (a->esz == MO_64) {
9434 TCGv_i64 t = tcg_temp_new_i64();
9435
9436 for (int i = 0; i < 2; ++i) {
9437 read_vec_element(s, t, a->rn, i, MO_64);
9438 f->gen_d(t, tcg_env, t);
9439 write_vec_element(s, t, a->rd, i, MO_64);
9440 }
9441 } else {
9442 TCGv_i32 t = tcg_temp_new_i32();
9443 int n = (a->q ? 16 : 8) >> a->esz;
9444
9445 for (int i = 0; i < n; ++i) {
9446 read_vec_element_i32(s, t, a->rn, i, a->esz);
9447 f->gen_bhs[a->esz](t, tcg_env, t);
9448 write_vec_element_i32(s, t, a->rd, i, a->esz);
9449 }
9450 }
9451 clear_vec_high(s, a->q, a->rd);
9452 return true;
9453 }
9454
9455 static const ENVScalar1 f_scalar_sqabs = {
9456 { gen_helper_neon_qabs_s8,
9457 gen_helper_neon_qabs_s16,
9458 gen_helper_neon_qabs_s32 },
9459 gen_helper_neon_qabs_s64,
9460 };
9461 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs)
9462 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs)
9463
9464 static const ENVScalar1 f_scalar_sqneg = {
9465 { gen_helper_neon_qneg_s8,
9466 gen_helper_neon_qneg_s16,
9467 gen_helper_neon_qneg_s32 },
9468 gen_helper_neon_qneg_s64,
9469 };
9470 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg)
9471 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg)
9472
do_scalar1_d(DisasContext * s,arg_rr * a,ArithOneOp * f)9473 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f)
9474 {
9475 if (fp_access_check(s)) {
9476 TCGv_i64 t = read_fp_dreg(s, a->rn);
9477 f(t, t);
9478 write_fp_dreg(s, a->rd, t);
9479 }
9480 return true;
9481 }
9482
TRANS(ABS_s,do_scalar1_d,a,tcg_gen_abs_i64)9483 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64)
9484 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64)
9485
9486 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond)
9487 {
9488 if (fp_access_check(s)) {
9489 TCGv_i64 t = read_fp_dreg(s, a->rn);
9490 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0));
9491 write_fp_dreg(s, a->rd, t);
9492 }
9493 return true;
9494 }
9495
TRANS(CMGT0_s,do_cmop0_d,a,TCG_COND_GT)9496 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT)
9497 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE)
9498 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE)
9499 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT)
9500 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ)
9501
9502 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a,
9503 ArithOneOp * const fn[3])
9504 {
9505 if (a->esz == MO_64) {
9506 return false;
9507 }
9508 if (fp_access_check(s)) {
9509 TCGv_i64 t = tcg_temp_new_i64();
9510
9511 read_vec_element(s, t, a->rn, 0, a->esz + 1);
9512 fn[a->esz](t, t);
9513 clear_vec(s, a->rd);
9514 write_vec_element(s, t, a->rd, 0, a->esz);
9515 }
9516 return true;
9517 }
9518
9519 #define WRAP_ENV(NAME) \
9520 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \
9521 { gen_helper_##NAME(d, tcg_env, n); }
9522
9523 WRAP_ENV(neon_unarrow_sat8)
9524 WRAP_ENV(neon_unarrow_sat16)
9525 WRAP_ENV(neon_unarrow_sat32)
9526
9527 static ArithOneOp * const f_scalar_sqxtun[] = {
9528 gen_neon_unarrow_sat8,
9529 gen_neon_unarrow_sat16,
9530 gen_neon_unarrow_sat32,
9531 };
9532 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun)
9533
9534 WRAP_ENV(neon_narrow_sat_s8)
9535 WRAP_ENV(neon_narrow_sat_s16)
9536 WRAP_ENV(neon_narrow_sat_s32)
9537
9538 static ArithOneOp * const f_scalar_sqxtn[] = {
9539 gen_neon_narrow_sat_s8,
9540 gen_neon_narrow_sat_s16,
9541 gen_neon_narrow_sat_s32,
9542 };
9543 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn)
9544
9545 WRAP_ENV(neon_narrow_sat_u8)
9546 WRAP_ENV(neon_narrow_sat_u16)
9547 WRAP_ENV(neon_narrow_sat_u32)
9548
9549 static ArithOneOp * const f_scalar_uqxtn[] = {
9550 gen_neon_narrow_sat_u8,
9551 gen_neon_narrow_sat_u16,
9552 gen_neon_narrow_sat_u32,
9553 };
TRANS(UQXTN_s,do_2misc_narrow_scalar,a,f_scalar_uqxtn)9554 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn)
9555
9556 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
9557 {
9558 if (fp_access_check(s)) {
9559 /*
9560 * 64 bit to 32 bit float conversion
9561 * with von Neumann rounding (round to odd)
9562 */
9563 TCGv_i64 src = read_fp_dreg(s, a->rn);
9564 TCGv_i32 dst = tcg_temp_new_i32();
9565 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
9566 write_fp_sreg_merging(s, a->rd, a->rd, dst);
9567 }
9568 return true;
9569 }
9570
9571 #undef WRAP_ENV
9572
do_gvec_fn2(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9573 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9574 {
9575 if (!a->q && a->esz == MO_64) {
9576 return false;
9577 }
9578 if (fp_access_check(s)) {
9579 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9580 }
9581 return true;
9582 }
9583
TRANS(ABS_v,do_gvec_fn2,a,tcg_gen_gvec_abs)9584 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs)
9585 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg)
9586 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not)
9587 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt)
9588 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit)
9589 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0)
9590 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0)
9591 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0)
9592 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0)
9593 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0)
9594 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16)
9595 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32)
9596 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe)
9597 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte)
9598
9599 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9600 {
9601 if (a->esz == MO_64) {
9602 return false;
9603 }
9604 if (fp_access_check(s)) {
9605 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9606 }
9607 return true;
9608 }
9609
TRANS(CLS_v,do_gvec_fn2_bhs,a,gen_gvec_cls)9610 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
9611 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
9612 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
9613 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
9614 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
9615 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
9616 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
9617
9618 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a,
9619 ArithOneOp * const fn[3])
9620 {
9621 if (a->esz == MO_64) {
9622 return false;
9623 }
9624 if (fp_access_check(s)) {
9625 TCGv_i64 t0 = tcg_temp_new_i64();
9626 TCGv_i64 t1 = tcg_temp_new_i64();
9627
9628 read_vec_element(s, t0, a->rn, 0, MO_64);
9629 read_vec_element(s, t1, a->rn, 1, MO_64);
9630 fn[a->esz](t0, t0);
9631 fn[a->esz](t1, t1);
9632 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32);
9633 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32);
9634 clear_vec_high(s, a->q, a->rd);
9635 }
9636 return true;
9637 }
9638
9639 static ArithOneOp * const f_scalar_xtn[] = {
9640 gen_helper_neon_narrow_u8,
9641 gen_helper_neon_narrow_u16,
9642 tcg_gen_ext32u_i64,
9643 };
TRANS(XTN,do_2misc_narrow_vector,a,f_scalar_xtn)9644 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn)
9645 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun)
9646 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn)
9647 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn)
9648
9649 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9650 {
9651 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9652 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9653 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9654 TCGv_i32 ahp = get_ahp_flag();
9655
9656 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
9657 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9658 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9659 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16);
9660 tcg_gen_extu_i32_i64(d, tcg_lo);
9661 }
9662
gen_fcvtn_sd(TCGv_i64 d,TCGv_i64 n)9663 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
9664 {
9665 TCGv_i32 tmp = tcg_temp_new_i32();
9666 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9667
9668 gen_helper_vfp_fcvtsd(tmp, n, fpst);
9669 tcg_gen_extu_i32_i64(d, tmp);
9670 }
9671
gen_fcvtxn_sd(TCGv_i64 d,TCGv_i64 n)9672 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
9673 {
9674 /*
9675 * 64 bit to 32 bit float conversion
9676 * with von Neumann rounding (round to odd)
9677 */
9678 TCGv_i32 tmp = tcg_temp_new_i32();
9679 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
9680 tcg_gen_extu_i32_i64(d, tmp);
9681 }
9682
9683 static ArithOneOp * const f_vector_fcvtn[] = {
9684 NULL,
9685 gen_fcvtn_hs,
9686 gen_fcvtn_sd,
9687 };
9688 static ArithOneOp * const f_scalar_fcvtxn[] = {
9689 NULL,
9690 NULL,
9691 gen_fcvtxn_sd,
9692 };
TRANS(FCVTN_v,do_2misc_narrow_vector,a,f_vector_fcvtn)9693 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn)
9694 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
9695
9696 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
9697 {
9698 TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
9699 TCGv_i32 tmp = tcg_temp_new_i32();
9700 gen_helper_bfcvt_pair(tmp, n, fpst);
9701 tcg_gen_extu_i32_i64(d, tmp);
9702 }
9703
gen_bfcvtn_ah_hs(TCGv_i64 d,TCGv_i64 n)9704 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
9705 {
9706 TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
9707 TCGv_i32 tmp = tcg_temp_new_i32();
9708 gen_helper_bfcvt_pair(tmp, n, fpst);
9709 tcg_gen_extu_i32_i64(d, tmp);
9710 }
9711
9712 static ArithOneOp * const f_vector_bfcvtn[2][3] = {
9713 {
9714 NULL,
9715 gen_bfcvtn_hs,
9716 NULL,
9717 }, {
9718 NULL,
9719 gen_bfcvtn_ah_hs,
9720 NULL,
9721 }
9722 };
TRANS_FEAT(BFCVTN_v,aa64_bf16,do_2misc_narrow_vector,a,f_vector_bfcvtn[s->fpcr_ah])9723 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a,
9724 f_vector_bfcvtn[s->fpcr_ah])
9725
9726 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a)
9727 {
9728 static NeonGenWidenFn * const widenfns[3] = {
9729 gen_helper_neon_widen_u8,
9730 gen_helper_neon_widen_u16,
9731 tcg_gen_extu_i32_i64,
9732 };
9733 NeonGenWidenFn *widenfn;
9734 TCGv_i64 tcg_res[2];
9735 TCGv_i32 tcg_op;
9736 int part, pass;
9737
9738 if (a->esz == MO_64) {
9739 return false;
9740 }
9741 if (!fp_access_check(s)) {
9742 return true;
9743 }
9744
9745 tcg_op = tcg_temp_new_i32();
9746 widenfn = widenfns[a->esz];
9747 part = a->q ? 2 : 0;
9748
9749 for (pass = 0; pass < 2; pass++) {
9750 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32);
9751 tcg_res[pass] = tcg_temp_new_i64();
9752 widenfn(tcg_res[pass], tcg_op);
9753 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz);
9754 }
9755
9756 for (pass = 0; pass < 2; pass++) {
9757 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
9758 }
9759 return true;
9760 }
9761
do_fabs_fneg_v(DisasContext * s,arg_qrr_e * a,GVecGen2Fn * fn)9762 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
9763 {
9764 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9765
9766 if (check <= 0) {
9767 return check == 0;
9768 }
9769
9770 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz);
9771 return true;
9772 }
9773
TRANS(FABS_v,do_fabs_fneg_v,a,gen_gvec_fabs)9774 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs)
9775 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg)
9776
9777 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
9778 const FPScalar1 *f, int rmode)
9779 {
9780 TCGv_i32 tcg_rmode = NULL;
9781 TCGv_ptr fpst;
9782 int check = fp_access_check_vector_hsd(s, a->q, a->esz);
9783
9784 if (check <= 0) {
9785 return check == 0;
9786 }
9787
9788 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
9789 if (rmode >= 0) {
9790 tcg_rmode = gen_set_rmode(rmode, fpst);
9791 }
9792
9793 if (a->esz == MO_64) {
9794 TCGv_i64 t64 = tcg_temp_new_i64();
9795
9796 for (int pass = 0; pass < 2; ++pass) {
9797 read_vec_element(s, t64, a->rn, pass, MO_64);
9798 f->gen_d(t64, t64, fpst);
9799 write_vec_element(s, t64, a->rd, pass, MO_64);
9800 }
9801 } else {
9802 TCGv_i32 t32 = tcg_temp_new_i32();
9803 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr)
9804 = (a->esz == MO_16 ? f->gen_h : f->gen_s);
9805
9806 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) {
9807 read_vec_element_i32(s, t32, a->rn, pass, a->esz);
9808 gen(t32, t32, fpst);
9809 write_vec_element_i32(s, t32, a->rd, pass, a->esz);
9810 }
9811 }
9812 clear_vec_high(s, a->q, a->rd);
9813
9814 if (rmode >= 0) {
9815 gen_restore_rmode(tcg_rmode, fpst);
9816 }
9817 return true;
9818 }
9819
9820 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1)
9821
9822 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN)
9823 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF)
9824 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF)
9825 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO)
9826 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY)
9827 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1)
9828 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1)
9829
9830 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a,
9831 &f_scalar_frint32, FPROUNDING_ZERO)
9832 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1)
9833 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a,
9834 &f_scalar_frint64, FPROUNDING_ZERO)
9835 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1)
9836
do_gvec_op2_fpst_with_fpsttype(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3],ARMFPStatusFlavour fpsttype)9837 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz,
9838 bool is_q, int rd, int rn, int data,
9839 gen_helper_gvec_2_ptr * const fns[3],
9840 ARMFPStatusFlavour fpsttype)
9841 {
9842 int check = fp_access_check_vector_hsd(s, is_q, esz);
9843 TCGv_ptr fpst;
9844
9845 if (check <= 0) {
9846 return check == 0;
9847 }
9848
9849 fpst = fpstatus_ptr(fpsttype);
9850 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
9851 vec_full_reg_offset(s, rn), fpst,
9852 is_q ? 16 : 8, vec_full_reg_size(s),
9853 data, fns[esz - 1]);
9854 return true;
9855 }
9856
do_gvec_op2_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9857 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
9858 int rd, int rn, int data,
9859 gen_helper_gvec_2_ptr * const fns[3])
9860 {
9861 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
9862 esz == MO_16 ? FPST_A64_F16 :
9863 FPST_A64);
9864 }
9865
do_gvec_op2_ah_fpst(DisasContext * s,MemOp esz,bool is_q,int rd,int rn,int data,gen_helper_gvec_2_ptr * const fns[3])9866 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
9867 int rd, int rn, int data,
9868 gen_helper_gvec_2_ptr * const fns[3])
9869 {
9870 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data,
9871 fns, select_ah_fpst(s, esz));
9872 }
9873
9874 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = {
9875 gen_helper_gvec_vcvt_sh,
9876 gen_helper_gvec_vcvt_sf,
9877 gen_helper_gvec_vcvt_sd,
9878 };
9879 TRANS(SCVTF_vi, do_gvec_op2_fpst,
9880 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v)
9881 TRANS(SCVTF_vf, do_gvec_op2_fpst,
9882 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v)
9883
9884 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = {
9885 gen_helper_gvec_vcvt_uh,
9886 gen_helper_gvec_vcvt_uf,
9887 gen_helper_gvec_vcvt_ud,
9888 };
9889 TRANS(UCVTF_vi, do_gvec_op2_fpst,
9890 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v)
9891 TRANS(UCVTF_vf, do_gvec_op2_fpst,
9892 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v)
9893
9894 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = {
9895 gen_helper_gvec_vcvt_rz_hs,
9896 gen_helper_gvec_vcvt_rz_fs,
9897 gen_helper_gvec_vcvt_rz_ds,
9898 };
9899 TRANS(FCVTZS_vf, do_gvec_op2_fpst,
9900 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf)
9901
9902 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = {
9903 gen_helper_gvec_vcvt_rz_hu,
9904 gen_helper_gvec_vcvt_rz_fu,
9905 gen_helper_gvec_vcvt_rz_du,
9906 };
9907 TRANS(FCVTZU_vf, do_gvec_op2_fpst,
9908 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf)
9909
9910 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = {
9911 gen_helper_gvec_vcvt_rm_sh,
9912 gen_helper_gvec_vcvt_rm_ss,
9913 gen_helper_gvec_vcvt_rm_sd,
9914 };
9915
9916 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = {
9917 gen_helper_gvec_vcvt_rm_uh,
9918 gen_helper_gvec_vcvt_rm_us,
9919 gen_helper_gvec_vcvt_rm_ud,
9920 };
9921
9922 TRANS(FCVTNS_vi, do_gvec_op2_fpst,
9923 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi)
9924 TRANS(FCVTNU_vi, do_gvec_op2_fpst,
9925 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi)
9926 TRANS(FCVTPS_vi, do_gvec_op2_fpst,
9927 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi)
9928 TRANS(FCVTPU_vi, do_gvec_op2_fpst,
9929 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi)
9930 TRANS(FCVTMS_vi, do_gvec_op2_fpst,
9931 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi)
9932 TRANS(FCVTMU_vi, do_gvec_op2_fpst,
9933 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi)
9934 TRANS(FCVTZS_vi, do_gvec_op2_fpst,
9935 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi)
9936 TRANS(FCVTZU_vi, do_gvec_op2_fpst,
9937 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi)
9938 TRANS(FCVTAS_vi, do_gvec_op2_fpst,
9939 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi)
9940 TRANS(FCVTAU_vi, do_gvec_op2_fpst,
9941 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi)
9942
9943 static gen_helper_gvec_2_ptr * const f_fceq0[] = {
9944 gen_helper_gvec_fceq0_h,
9945 gen_helper_gvec_fceq0_s,
9946 gen_helper_gvec_fceq0_d,
9947 };
9948 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0)
9949
9950 static gen_helper_gvec_2_ptr * const f_fcgt0[] = {
9951 gen_helper_gvec_fcgt0_h,
9952 gen_helper_gvec_fcgt0_s,
9953 gen_helper_gvec_fcgt0_d,
9954 };
9955 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0)
9956
9957 static gen_helper_gvec_2_ptr * const f_fcge0[] = {
9958 gen_helper_gvec_fcge0_h,
9959 gen_helper_gvec_fcge0_s,
9960 gen_helper_gvec_fcge0_d,
9961 };
9962 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0)
9963
9964 static gen_helper_gvec_2_ptr * const f_fclt0[] = {
9965 gen_helper_gvec_fclt0_h,
9966 gen_helper_gvec_fclt0_s,
9967 gen_helper_gvec_fclt0_d,
9968 };
9969 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0)
9970
9971 static gen_helper_gvec_2_ptr * const f_fcle0[] = {
9972 gen_helper_gvec_fcle0_h,
9973 gen_helper_gvec_fcle0_s,
9974 gen_helper_gvec_fcle0_d,
9975 };
9976 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0)
9977
9978 static gen_helper_gvec_2_ptr * const f_frecpe[] = {
9979 gen_helper_gvec_frecpe_h,
9980 gen_helper_gvec_frecpe_s,
9981 gen_helper_gvec_frecpe_d,
9982 };
9983 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = {
9984 gen_helper_gvec_frecpe_h,
9985 gen_helper_gvec_frecpe_rpres_s,
9986 gen_helper_gvec_frecpe_d,
9987 };
9988 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
9989 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe)
9990
9991 static gen_helper_gvec_2_ptr * const f_frsqrte[] = {
9992 gen_helper_gvec_frsqrte_h,
9993 gen_helper_gvec_frsqrte_s,
9994 gen_helper_gvec_frsqrte_d,
9995 };
9996 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = {
9997 gen_helper_gvec_frsqrte_h,
9998 gen_helper_gvec_frsqrte_rpres_s,
9999 gen_helper_gvec_frsqrte_d,
10000 };
10001 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0,
10002 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte)
10003
trans_FCVTL_v(DisasContext * s,arg_qrr_e * a)10004 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
10005 {
10006 /* Handle 2-reg-misc ops which are widening (so each size element
10007 * in the source becomes a 2*size element in the destination.
10008 * The only instruction like this is FCVTL.
10009 */
10010 int pass;
10011 TCGv_ptr fpst;
10012
10013 if (!fp_access_check(s)) {
10014 return true;
10015 }
10016
10017 if (a->esz == MO_64) {
10018 /* 32 -> 64 bit fp conversion */
10019 TCGv_i64 tcg_res[2];
10020 TCGv_i32 tcg_op = tcg_temp_new_i32();
10021 int srcelt = a->q ? 2 : 0;
10022
10023 fpst = fpstatus_ptr(FPST_A64);
10024
10025 for (pass = 0; pass < 2; pass++) {
10026 tcg_res[pass] = tcg_temp_new_i64();
10027 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32);
10028 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst);
10029 }
10030 for (pass = 0; pass < 2; pass++) {
10031 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64);
10032 }
10033 } else {
10034 /* 16 -> 32 bit fp conversion */
10035 int srcelt = a->q ? 4 : 0;
10036 TCGv_i32 tcg_res[4];
10037 TCGv_i32 ahp = get_ahp_flag();
10038
10039 fpst = fpstatus_ptr(FPST_A64_F16);
10040
10041 for (pass = 0; pass < 4; pass++) {
10042 tcg_res[pass] = tcg_temp_new_i32();
10043 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16);
10044 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10045 fpst, ahp);
10046 }
10047 for (pass = 0; pass < 4; pass++) {
10048 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32);
10049 }
10050 }
10051 clear_vec_high(s, true, a->rd);
10052 return true;
10053 }
10054
trans_OK(DisasContext * s,arg_OK * a)10055 static bool trans_OK(DisasContext *s, arg_OK *a)
10056 {
10057 return true;
10058 }
10059
trans_FAIL(DisasContext * s,arg_OK * a)10060 static bool trans_FAIL(DisasContext *s, arg_OK *a)
10061 {
10062 s->is_nonstreaming = true;
10063 return true;
10064 }
10065
10066 /**
10067 * btype_destination_ok:
10068 * @insn: The instruction at the branch destination
10069 * @bt: SCTLR_ELx.BT
10070 * @btype: PSTATE.BTYPE, and is non-zero
10071 *
10072 * On a guarded page, there are a limited number of insns
10073 * that may be present at the branch target:
10074 * - branch target identifiers,
10075 * - paciasp, pacibsp,
10076 * - BRK insn
10077 * - HLT insn
10078 * Anything else causes a Branch Target Exception.
10079 *
10080 * Return true if the branch is compatible, false to raise BTITRAP.
10081 */
btype_destination_ok(uint32_t insn,bool bt,int btype)10082 static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
10083 {
10084 if ((insn & 0xfffff01fu) == 0xd503201fu) {
10085 /* HINT space */
10086 switch (extract32(insn, 5, 7)) {
10087 case 0b011001: /* PACIASP */
10088 case 0b011011: /* PACIBSP */
10089 /*
10090 * If SCTLR_ELx.BT, then PACI*SP are not compatible
10091 * with btype == 3. Otherwise all btype are ok.
10092 */
10093 return !bt || btype != 3;
10094 case 0b100000: /* BTI */
10095 /* Not compatible with any btype. */
10096 return false;
10097 case 0b100010: /* BTI c */
10098 /* Not compatible with btype == 3 */
10099 return btype != 3;
10100 case 0b100100: /* BTI j */
10101 /* Not compatible with btype == 2 */
10102 return btype != 2;
10103 case 0b100110: /* BTI jc */
10104 /* Compatible with any btype. */
10105 return true;
10106 }
10107 } else {
10108 switch (insn & 0xffe0001fu) {
10109 case 0xd4200000u: /* BRK */
10110 case 0xd4400000u: /* HLT */
10111 /* Give priority to the breakpoint exception. */
10112 return true;
10113 }
10114 }
10115 return false;
10116 }
10117
aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)10118 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
10119 CPUState *cpu)
10120 {
10121 DisasContext *dc = container_of(dcbase, DisasContext, base);
10122 CPUARMState *env = cpu_env(cpu);
10123 ARMCPU *arm_cpu = env_archcpu(env);
10124 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
10125 int bound, core_mmu_idx;
10126
10127 dc->isar = &arm_cpu->isar;
10128 dc->condjmp = 0;
10129 dc->pc_save = dc->base.pc_first;
10130 dc->aarch64 = true;
10131 dc->thumb = false;
10132 dc->sctlr_b = 0;
10133 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
10134 dc->condexec_mask = 0;
10135 dc->condexec_cond = 0;
10136 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
10137 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
10138 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
10139 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
10140 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
10141 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10142 #if !defined(CONFIG_USER_ONLY)
10143 dc->user = (dc->current_el == 0);
10144 #endif
10145 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
10146 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
10147 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL);
10148 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE);
10149 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC);
10150 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET);
10151 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
10152 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL);
10153 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16;
10154 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16;
10155 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
10156 dc->bt = EX_TBFLAG_A64(tb_flags, BT);
10157 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
10158 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
10159 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA);
10160 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0);
10161 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
10162 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
10163 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
10164 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
10165 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
10166 dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
10167 dc->nv = EX_TBFLAG_A64(tb_flags, NV);
10168 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1);
10169 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2);
10170 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20);
10171 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE);
10172 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH);
10173 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP);
10174 dc->vec_len = 0;
10175 dc->vec_stride = 0;
10176 dc->cp_regs = arm_cpu->cp_regs;
10177 dc->features = env->features;
10178 dc->dcz_blocksize = arm_cpu->dcz_blocksize;
10179 dc->gm_blocksize = arm_cpu->gm_blocksize;
10180
10181 #ifdef CONFIG_USER_ONLY
10182 /* In sve_probe_page, we assume TBI is enabled. */
10183 tcg_debug_assert(dc->tbid & 1);
10184 #endif
10185
10186 dc->lse2 = dc_isar_feature(aa64_lse2, dc);
10187
10188 /* Single step state. The code-generation logic here is:
10189 * SS_ACTIVE == 0:
10190 * generate code with no special handling for single-stepping (except
10191 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10192 * this happens anyway because those changes are all system register or
10193 * PSTATE writes).
10194 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10195 * emit code for one insn
10196 * emit code to clear PSTATE.SS
10197 * emit code to generate software step exception for completed step
10198 * end TB (as usual for having generated an exception)
10199 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10200 * emit code to generate a software step exception
10201 * end the TB
10202 */
10203 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
10204 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
10205 dc->is_ldex = false;
10206
10207 /* Bound the number of insns to execute to those left on the page. */
10208 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10209
10210 /* If architectural single step active, limit to 1. */
10211 if (dc->ss_active) {
10212 bound = 1;
10213 }
10214 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10215 }
10216
aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)10217 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
10218 {
10219 }
10220
aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)10221 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10222 {
10223 DisasContext *dc = container_of(dcbase, DisasContext, base);
10224 target_ulong pc_arg = dc->base.pc_next;
10225
10226 if (tb_cflags(dcbase->tb) & CF_PCREL) {
10227 pc_arg &= ~TARGET_PAGE_MASK;
10228 }
10229 tcg_gen_insn_start(pc_arg, 0, 0);
10230 dc->insn_start_updated = false;
10231 }
10232
aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)10233 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10234 {
10235 DisasContext *s = container_of(dcbase, DisasContext, base);
10236 CPUARMState *env = cpu_env(cpu);
10237 uint64_t pc = s->base.pc_next;
10238 uint32_t insn;
10239
10240 /* Singlestep exceptions have the highest priority. */
10241 if (s->ss_active && !s->pstate_ss) {
10242 /* Singlestep state is Active-pending.
10243 * If we're in this state at the start of a TB then either
10244 * a) we just took an exception to an EL which is being debugged
10245 * and this is the first insn in the exception handler
10246 * b) debug exceptions were masked and we just unmasked them
10247 * without changing EL (eg by clearing PSTATE.D)
10248 * In either case we're going to take a swstep exception in the
10249 * "did not step an insn" case, and so the syndrome ISV and EX
10250 * bits should be zero.
10251 */
10252 assert(s->base.num_insns == 1);
10253 gen_swstep_exception(s, 0, 0);
10254 s->base.is_jmp = DISAS_NORETURN;
10255 s->base.pc_next = pc + 4;
10256 return;
10257 }
10258
10259 if (pc & 3) {
10260 /*
10261 * PC alignment fault. This has priority over the instruction abort
10262 * that we would receive from a translation fault via arm_ldl_code.
10263 * This should only be possible after an indirect branch, at the
10264 * start of the TB.
10265 */
10266 assert(s->base.num_insns == 1);
10267 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc));
10268 s->base.is_jmp = DISAS_NORETURN;
10269 s->base.pc_next = QEMU_ALIGN_UP(pc, 4);
10270 return;
10271 }
10272
10273 s->pc_curr = pc;
10274 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b);
10275 s->insn = insn;
10276 s->base.pc_next = pc + 4;
10277
10278 s->fp_access_checked = 0;
10279 s->sve_access_checked = 0;
10280
10281 if (s->pstate_il) {
10282 /*
10283 * Illegal execution state. This has priority over BTI
10284 * exceptions, but comes after instruction abort exceptions.
10285 */
10286 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate());
10287 return;
10288 }
10289
10290 if (dc_isar_feature(aa64_bti, s)) {
10291 if (s->base.num_insns == 1) {
10292 /* First insn can have btype set to non-zero. */
10293 tcg_debug_assert(s->btype >= 0);
10294
10295 /*
10296 * Note that the Branch Target Exception has fairly high
10297 * priority -- below debugging exceptions but above most
10298 * everything else. This allows us to handle this now
10299 * instead of waiting until the insn is otherwise decoded.
10300 *
10301 * We can check all but the guarded page check here;
10302 * defer the latter to a helper.
10303 */
10304 if (s->btype != 0
10305 && !btype_destination_ok(insn, s->bt, s->btype)) {
10306 gen_helper_guarded_page_check(tcg_env);
10307 }
10308 } else {
10309 /* Not the first insn: btype must be 0. */
10310 tcg_debug_assert(s->btype == 0);
10311 }
10312 }
10313
10314 s->is_nonstreaming = false;
10315 if (s->sme_trap_nonstreaming) {
10316 disas_sme_fa64(s, insn);
10317 }
10318
10319 if (!disas_a64(s, insn) &&
10320 !disas_sme(s, insn) &&
10321 !disas_sve(s, insn)) {
10322 unallocated_encoding(s);
10323 }
10324
10325 /*
10326 * After execution of most insns, btype is reset to 0.
10327 * Note that we set btype == -1 when the insn sets btype.
10328 */
10329 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
10330 reset_btype(s);
10331 }
10332 }
10333
aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)10334 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10335 {
10336 DisasContext *dc = container_of(dcbase, DisasContext, base);
10337
10338 if (unlikely(dc->ss_active)) {
10339 /* Note that this means single stepping WFI doesn't halt the CPU.
10340 * For conditional branch insns this is harmless unreachable code as
10341 * gen_goto_tb() has already handled emitting the debug exception
10342 * (and thus a tb-jump is not possible when singlestepping).
10343 */
10344 switch (dc->base.is_jmp) {
10345 default:
10346 gen_a64_update_pc(dc, 4);
10347 /* fall through */
10348 case DISAS_EXIT:
10349 case DISAS_JUMP:
10350 gen_step_complete_exception(dc);
10351 break;
10352 case DISAS_NORETURN:
10353 break;
10354 }
10355 } else {
10356 switch (dc->base.is_jmp) {
10357 case DISAS_NEXT:
10358 case DISAS_TOO_MANY:
10359 gen_goto_tb(dc, 1, 4);
10360 break;
10361 default:
10362 case DISAS_UPDATE_EXIT:
10363 gen_a64_update_pc(dc, 4);
10364 /* fall through */
10365 case DISAS_EXIT:
10366 tcg_gen_exit_tb(NULL, 0);
10367 break;
10368 case DISAS_UPDATE_NOCHAIN:
10369 gen_a64_update_pc(dc, 4);
10370 /* fall through */
10371 case DISAS_JUMP:
10372 tcg_gen_lookup_and_goto_ptr();
10373 break;
10374 case DISAS_NORETURN:
10375 case DISAS_SWI:
10376 break;
10377 case DISAS_WFE:
10378 gen_a64_update_pc(dc, 4);
10379 gen_helper_wfe(tcg_env);
10380 break;
10381 case DISAS_YIELD:
10382 gen_a64_update_pc(dc, 4);
10383 gen_helper_yield(tcg_env);
10384 break;
10385 case DISAS_WFI:
10386 /*
10387 * This is a special case because we don't want to just halt
10388 * the CPU if trying to debug across a WFI.
10389 */
10390 gen_a64_update_pc(dc, 4);
10391 gen_helper_wfi(tcg_env, tcg_constant_i32(4));
10392 /*
10393 * The helper doesn't necessarily throw an exception, but we
10394 * must go back to the main loop to check for interrupts anyway.
10395 */
10396 tcg_gen_exit_tb(NULL, 0);
10397 break;
10398 }
10399 }
10400 }
10401
10402 const TranslatorOps aarch64_translator_ops = {
10403 .init_disas_context = aarch64_tr_init_disas_context,
10404 .tb_start = aarch64_tr_tb_start,
10405 .insn_start = aarch64_tr_insn_start,
10406 .translate_insn = aarch64_tr_translate_insn,
10407 .tb_stop = aarch64_tr_tb_stop,
10408 };
10409