1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* initialize TCG globals. */ 79 void a64_translate_init(void) 80 { 81 int i; 82 83 cpu_pc = tcg_global_mem_new_i64(tcg_env, 84 offsetof(CPUARMState, pc), 85 "pc"); 86 for (i = 0; i < 32; i++) { 87 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 88 offsetof(CPUARMState, xregs[i]), 89 regnames[i]); 90 } 91 92 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 93 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 94 } 95 96 /* 97 * Return the core mmu_idx to use for A64 load/store insns which 98 * have a "unprivileged load/store" variant. Those insns access 99 * EL0 if executed from an EL which has control over EL0 (usually 100 * EL1) but behave like normal loads and stores if executed from 101 * elsewhere (eg EL3). 102 * 103 * @unpriv : true for the unprivileged encoding; false for the 104 * normal encoding (in which case we will return the same 105 * thing as get_mem_index(). 106 */ 107 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 108 { 109 /* 110 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 111 * which is the usual mmu_idx for this cpu state. 112 */ 113 ARMMMUIdx useridx = s->mmu_idx; 114 115 if (unpriv && s->unpriv) { 116 /* 117 * We have pre-computed the condition for AccType_UNPRIV. 118 * Therefore we should never get here with a mmu_idx for 119 * which we do not know the corresponding user mmu_idx. 120 */ 121 switch (useridx) { 122 case ARMMMUIdx_E10_1: 123 case ARMMMUIdx_E10_1_PAN: 124 useridx = ARMMMUIdx_E10_0; 125 break; 126 case ARMMMUIdx_E20_2: 127 case ARMMMUIdx_E20_2_PAN: 128 useridx = ARMMMUIdx_E20_0; 129 break; 130 default: 131 g_assert_not_reached(); 132 } 133 } 134 return arm_to_core_mmu_idx(useridx); 135 } 136 137 static void set_btype_raw(int val) 138 { 139 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 140 offsetof(CPUARMState, btype)); 141 } 142 143 static void set_btype(DisasContext *s, int val) 144 { 145 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 146 tcg_debug_assert(val >= 1 && val <= 3); 147 set_btype_raw(val); 148 s->btype = -1; 149 } 150 151 static void reset_btype(DisasContext *s) 152 { 153 if (s->btype != 0) { 154 set_btype_raw(0); 155 s->btype = 0; 156 } 157 } 158 159 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 160 { 161 assert(s->pc_save != -1); 162 if (tb_cflags(s->base.tb) & CF_PCREL) { 163 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 164 } else { 165 tcg_gen_movi_i64(dest, s->pc_curr + diff); 166 } 167 } 168 169 void gen_a64_update_pc(DisasContext *s, target_long diff) 170 { 171 gen_pc_plus_diff(s, cpu_pc, diff); 172 s->pc_save = s->pc_curr + diff; 173 } 174 175 /* 176 * Handle Top Byte Ignore (TBI) bits. 177 * 178 * If address tagging is enabled via the TCR TBI bits: 179 * + for EL2 and EL3 there is only one TBI bit, and if it is set 180 * then the address is zero-extended, clearing bits [63:56] 181 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 182 * and TBI1 controls addresses with bit 55 == 1. 183 * If the appropriate TBI bit is set for the address then 184 * the address is sign-extended from bit 55 into bits [63:56] 185 * 186 * Here We have concatenated TBI{1,0} into tbi. 187 */ 188 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 189 TCGv_i64 src, int tbi) 190 { 191 if (tbi == 0) { 192 /* Load unmodified address */ 193 tcg_gen_mov_i64(dst, src); 194 } else if (!regime_has_2_ranges(s->mmu_idx)) { 195 /* Force tag byte to all zero */ 196 tcg_gen_extract_i64(dst, src, 0, 56); 197 } else { 198 /* Sign-extend from bit 55. */ 199 tcg_gen_sextract_i64(dst, src, 0, 56); 200 201 switch (tbi) { 202 case 1: 203 /* tbi0 but !tbi1: only use the extension if positive */ 204 tcg_gen_and_i64(dst, dst, src); 205 break; 206 case 2: 207 /* !tbi0 but tbi1: only use the extension if negative */ 208 tcg_gen_or_i64(dst, dst, src); 209 break; 210 case 3: 211 /* tbi0 and tbi1: always use the extension */ 212 break; 213 default: 214 g_assert_not_reached(); 215 } 216 } 217 } 218 219 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 220 { 221 /* 222 * If address tagging is enabled for instructions via the TCR TBI bits, 223 * then loading an address into the PC will clear out any tag. 224 */ 225 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 226 s->pc_save = -1; 227 } 228 229 /* 230 * Handle MTE and/or TBI. 231 * 232 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 233 * for the tag to be present in the FAR_ELx register. But for user-only 234 * mode we do not have a TLB with which to implement this, so we must 235 * remove the top byte now. 236 * 237 * Always return a fresh temporary that we can increment independently 238 * of the write-back address. 239 */ 240 241 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 242 { 243 TCGv_i64 clean = tcg_temp_new_i64(); 244 #ifdef CONFIG_USER_ONLY 245 gen_top_byte_ignore(s, clean, addr, s->tbid); 246 #else 247 tcg_gen_mov_i64(clean, addr); 248 #endif 249 return clean; 250 } 251 252 /* Insert a zero tag into src, with the result at dst. */ 253 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 254 { 255 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 256 } 257 258 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 259 MMUAccessType acc, int log2_size) 260 { 261 gen_helper_probe_access(tcg_env, ptr, 262 tcg_constant_i32(acc), 263 tcg_constant_i32(get_mem_index(s)), 264 tcg_constant_i32(1 << log2_size)); 265 } 266 267 /* 268 * For MTE, check a single logical or atomic access. This probes a single 269 * address, the exact one specified. The size and alignment of the access 270 * is not relevant to MTE, per se, but watchpoints do require the size, 271 * and we want to recognize those before making any other changes to state. 272 */ 273 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 274 bool is_write, bool tag_checked, 275 MemOp memop, bool is_unpriv, 276 int core_idx) 277 { 278 if (tag_checked && s->mte_active[is_unpriv]) { 279 TCGv_i64 ret; 280 int desc = 0; 281 282 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 283 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 284 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 285 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 286 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 287 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 288 289 ret = tcg_temp_new_i64(); 290 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 291 292 return ret; 293 } 294 return clean_data_tbi(s, addr); 295 } 296 297 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 298 bool tag_checked, MemOp memop) 299 { 300 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 301 false, get_mem_index(s)); 302 } 303 304 /* 305 * For MTE, check multiple logical sequential accesses. 306 */ 307 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 308 bool tag_checked, int total_size, MemOp single_mop) 309 { 310 if (tag_checked && s->mte_active[0]) { 311 TCGv_i64 ret; 312 int desc = 0; 313 314 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 315 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 316 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 317 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 318 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 319 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 320 321 ret = tcg_temp_new_i64(); 322 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 323 324 return ret; 325 } 326 return clean_data_tbi(s, addr); 327 } 328 329 /* 330 * Generate the special alignment check that applies to AccType_ATOMIC 331 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 332 * naturally aligned, but it must not cross a 16-byte boundary. 333 * See AArch64.CheckAlignment(). 334 */ 335 static void check_lse2_align(DisasContext *s, int rn, int imm, 336 bool is_write, MemOp mop) 337 { 338 TCGv_i32 tmp; 339 TCGv_i64 addr; 340 TCGLabel *over_label; 341 MMUAccessType type; 342 int mmu_idx; 343 344 tmp = tcg_temp_new_i32(); 345 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 346 tcg_gen_addi_i32(tmp, tmp, imm & 15); 347 tcg_gen_andi_i32(tmp, tmp, 15); 348 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 349 350 over_label = gen_new_label(); 351 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 352 353 addr = tcg_temp_new_i64(); 354 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 355 356 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 357 mmu_idx = get_mem_index(s); 358 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 359 tcg_constant_i32(mmu_idx)); 360 361 gen_set_label(over_label); 362 363 } 364 365 /* Handle the alignment check for AccType_ATOMIC instructions. */ 366 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 367 { 368 MemOp size = mop & MO_SIZE; 369 370 if (size == MO_8) { 371 return mop; 372 } 373 374 /* 375 * If size == MO_128, this is a LDXP, and the operation is single-copy 376 * atomic for each doubleword, not the entire quadword; it still must 377 * be quadword aligned. 378 */ 379 if (size == MO_128) { 380 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 381 MO_ATOM_IFALIGN_PAIR); 382 } 383 if (dc_isar_feature(aa64_lse2, s)) { 384 check_lse2_align(s, rn, 0, true, mop); 385 } else { 386 mop |= MO_ALIGN; 387 } 388 return finalize_memop(s, mop); 389 } 390 391 /* Handle the alignment check for AccType_ORDERED instructions. */ 392 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 393 bool is_write, MemOp mop) 394 { 395 MemOp size = mop & MO_SIZE; 396 397 if (size == MO_8) { 398 return mop; 399 } 400 if (size == MO_128) { 401 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 402 MO_ATOM_IFALIGN_PAIR); 403 } 404 if (!dc_isar_feature(aa64_lse2, s)) { 405 mop |= MO_ALIGN; 406 } else if (!s->naa) { 407 check_lse2_align(s, rn, imm, is_write, mop); 408 } 409 return finalize_memop(s, mop); 410 } 411 412 typedef struct DisasCompare64 { 413 TCGCond cond; 414 TCGv_i64 value; 415 } DisasCompare64; 416 417 static void a64_test_cc(DisasCompare64 *c64, int cc) 418 { 419 DisasCompare c32; 420 421 arm_test_cc(&c32, cc); 422 423 /* 424 * Sign-extend the 32-bit value so that the GE/LT comparisons work 425 * properly. The NE/EQ comparisons are also fine with this choice. 426 */ 427 c64->cond = c32.cond; 428 c64->value = tcg_temp_new_i64(); 429 tcg_gen_ext_i32_i64(c64->value, c32.value); 430 } 431 432 static void gen_rebuild_hflags(DisasContext *s) 433 { 434 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 435 } 436 437 static void gen_exception_internal(int excp) 438 { 439 assert(excp_is_internal(excp)); 440 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 441 } 442 443 static void gen_exception_internal_insn(DisasContext *s, int excp) 444 { 445 gen_a64_update_pc(s, 0); 446 gen_exception_internal(excp); 447 s->base.is_jmp = DISAS_NORETURN; 448 } 449 450 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 451 { 452 gen_a64_update_pc(s, 0); 453 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 454 s->base.is_jmp = DISAS_NORETURN; 455 } 456 457 static void gen_step_complete_exception(DisasContext *s) 458 { 459 /* We just completed step of an insn. Move from Active-not-pending 460 * to Active-pending, and then also take the swstep exception. 461 * This corresponds to making the (IMPDEF) choice to prioritize 462 * swstep exceptions over asynchronous exceptions taken to an exception 463 * level where debug is disabled. This choice has the advantage that 464 * we do not need to maintain internal state corresponding to the 465 * ISV/EX syndrome bits between completion of the step and generation 466 * of the exception, and our syndrome information is always correct. 467 */ 468 gen_ss_advance(s); 469 gen_swstep_exception(s, 1, s->is_ldex); 470 s->base.is_jmp = DISAS_NORETURN; 471 } 472 473 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 474 { 475 if (s->ss_active) { 476 return false; 477 } 478 return translator_use_goto_tb(&s->base, dest); 479 } 480 481 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 482 { 483 if (use_goto_tb(s, s->pc_curr + diff)) { 484 /* 485 * For pcrel, the pc must always be up-to-date on entry to 486 * the linked TB, so that it can use simple additions for all 487 * further adjustments. For !pcrel, the linked TB is compiled 488 * to know its full virtual address, so we can delay the 489 * update to pc to the unlinked path. A long chain of links 490 * can thus avoid many updates to the PC. 491 */ 492 if (tb_cflags(s->base.tb) & CF_PCREL) { 493 gen_a64_update_pc(s, diff); 494 tcg_gen_goto_tb(n); 495 } else { 496 tcg_gen_goto_tb(n); 497 gen_a64_update_pc(s, diff); 498 } 499 tcg_gen_exit_tb(s->base.tb, n); 500 s->base.is_jmp = DISAS_NORETURN; 501 } else { 502 gen_a64_update_pc(s, diff); 503 if (s->ss_active) { 504 gen_step_complete_exception(s); 505 } else { 506 tcg_gen_lookup_and_goto_ptr(); 507 s->base.is_jmp = DISAS_NORETURN; 508 } 509 } 510 } 511 512 /* 513 * Register access functions 514 * 515 * These functions are used for directly accessing a register in where 516 * changes to the final register value are likely to be made. If you 517 * need to use a register for temporary calculation (e.g. index type 518 * operations) use the read_* form. 519 * 520 * B1.2.1 Register mappings 521 * 522 * In instruction register encoding 31 can refer to ZR (zero register) or 523 * the SP (stack pointer) depending on context. In QEMU's case we map SP 524 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 525 * This is the point of the _sp forms. 526 */ 527 TCGv_i64 cpu_reg(DisasContext *s, int reg) 528 { 529 if (reg == 31) { 530 TCGv_i64 t = tcg_temp_new_i64(); 531 tcg_gen_movi_i64(t, 0); 532 return t; 533 } else { 534 return cpu_X[reg]; 535 } 536 } 537 538 /* register access for when 31 == SP */ 539 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 540 { 541 return cpu_X[reg]; 542 } 543 544 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 545 * representing the register contents. This TCGv is an auto-freed 546 * temporary so it need not be explicitly freed, and may be modified. 547 */ 548 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 549 { 550 TCGv_i64 v = tcg_temp_new_i64(); 551 if (reg != 31) { 552 if (sf) { 553 tcg_gen_mov_i64(v, cpu_X[reg]); 554 } else { 555 tcg_gen_ext32u_i64(v, cpu_X[reg]); 556 } 557 } else { 558 tcg_gen_movi_i64(v, 0); 559 } 560 return v; 561 } 562 563 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 564 { 565 TCGv_i64 v = tcg_temp_new_i64(); 566 if (sf) { 567 tcg_gen_mov_i64(v, cpu_X[reg]); 568 } else { 569 tcg_gen_ext32u_i64(v, cpu_X[reg]); 570 } 571 return v; 572 } 573 574 /* Return the offset into CPUARMState of a slice (from 575 * the least significant end) of FP register Qn (ie 576 * Dn, Sn, Hn or Bn). 577 * (Note that this is not the same mapping as for A32; see cpu.h) 578 */ 579 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 580 { 581 return vec_reg_offset(s, regno, 0, size); 582 } 583 584 /* Offset of the high half of the 128 bit vector Qn */ 585 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 586 { 587 return vec_reg_offset(s, regno, 1, MO_64); 588 } 589 590 /* Convenience accessors for reading and writing single and double 591 * FP registers. Writing clears the upper parts of the associated 592 * 128 bit vector register, as required by the architecture. 593 * Note that unlike the GP register accessors, the values returned 594 * by the read functions must be manually freed. 595 */ 596 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 597 { 598 TCGv_i64 v = tcg_temp_new_i64(); 599 600 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 601 return v; 602 } 603 604 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 605 { 606 TCGv_i32 v = tcg_temp_new_i32(); 607 608 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 609 return v; 610 } 611 612 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 613 { 614 TCGv_i32 v = tcg_temp_new_i32(); 615 616 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 617 return v; 618 } 619 620 static void clear_vec(DisasContext *s, int rd) 621 { 622 unsigned ofs = fp_reg_offset(s, rd, MO_64); 623 unsigned vsz = vec_full_reg_size(s); 624 625 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 626 } 627 628 /* 629 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 630 * If SVE is not enabled, then there are only 128 bits in the vector. 631 */ 632 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 633 { 634 unsigned ofs = fp_reg_offset(s, rd, MO_64); 635 unsigned vsz = vec_full_reg_size(s); 636 637 /* Nop move, with side effect of clearing the tail. */ 638 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 639 } 640 641 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 642 { 643 unsigned ofs = fp_reg_offset(s, reg, MO_64); 644 645 tcg_gen_st_i64(v, tcg_env, ofs); 646 clear_vec_high(s, false, reg); 647 } 648 649 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 650 { 651 TCGv_i64 tmp = tcg_temp_new_i64(); 652 653 tcg_gen_extu_i32_i64(tmp, v); 654 write_fp_dreg(s, reg, tmp); 655 } 656 657 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 658 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 659 GVecGen2Fn *gvec_fn, int vece) 660 { 661 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 662 is_q ? 16 : 8, vec_full_reg_size(s)); 663 } 664 665 /* Expand a 2-operand + immediate AdvSIMD vector operation using 666 * an expander function. 667 */ 668 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 669 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 670 { 671 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 672 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 673 } 674 675 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 676 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 677 GVecGen3Fn *gvec_fn, int vece) 678 { 679 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 680 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 681 } 682 683 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 684 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 685 int rx, GVecGen4Fn *gvec_fn, int vece) 686 { 687 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 688 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 689 is_q ? 16 : 8, vec_full_reg_size(s)); 690 } 691 692 /* Expand a 2-operand operation using an out-of-line helper. */ 693 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 694 int rn, int data, gen_helper_gvec_2 *fn) 695 { 696 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 697 vec_full_reg_offset(s, rn), 698 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 699 } 700 701 /* Expand a 3-operand operation using an out-of-line helper. */ 702 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 703 int rn, int rm, int data, gen_helper_gvec_3 *fn) 704 { 705 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 706 vec_full_reg_offset(s, rn), 707 vec_full_reg_offset(s, rm), 708 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 709 } 710 711 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 712 * an out-of-line helper. 713 */ 714 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 715 int rm, bool is_fp16, int data, 716 gen_helper_gvec_3_ptr *fn) 717 { 718 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 719 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 720 vec_full_reg_offset(s, rn), 721 vec_full_reg_offset(s, rm), fpst, 722 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 723 } 724 725 /* Expand a 4-operand operation using an out-of-line helper. */ 726 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 727 int rm, int ra, int data, gen_helper_gvec_4 *fn) 728 { 729 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 730 vec_full_reg_offset(s, rn), 731 vec_full_reg_offset(s, rm), 732 vec_full_reg_offset(s, ra), 733 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 734 } 735 736 /* 737 * Expand a 4-operand operation using an out-of-line helper that takes 738 * a pointer to the CPU env. 739 */ 740 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 741 int rm, int ra, int data, 742 gen_helper_gvec_4_ptr *fn) 743 { 744 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 745 vec_full_reg_offset(s, rn), 746 vec_full_reg_offset(s, rm), 747 vec_full_reg_offset(s, ra), 748 tcg_env, 749 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 750 } 751 752 /* 753 * Expand a 4-operand + fpstatus pointer + simd data value operation using 754 * an out-of-line helper. 755 */ 756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 757 int rm, int ra, bool is_fp16, int data, 758 gen_helper_gvec_4_ptr *fn) 759 { 760 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64); 761 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 762 vec_full_reg_offset(s, rn), 763 vec_full_reg_offset(s, rm), 764 vec_full_reg_offset(s, ra), fpst, 765 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 766 } 767 768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 769 * than the 32 bit equivalent. 770 */ 771 static inline void gen_set_NZ64(TCGv_i64 result) 772 { 773 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 774 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 775 } 776 777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 778 static inline void gen_logic_CC(int sf, TCGv_i64 result) 779 { 780 if (sf) { 781 gen_set_NZ64(result); 782 } else { 783 tcg_gen_extrl_i64_i32(cpu_ZF, result); 784 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 785 } 786 tcg_gen_movi_i32(cpu_CF, 0); 787 tcg_gen_movi_i32(cpu_VF, 0); 788 } 789 790 /* dest = T0 + T1; compute C, N, V and Z flags */ 791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 792 { 793 TCGv_i64 result, flag, tmp; 794 result = tcg_temp_new_i64(); 795 flag = tcg_temp_new_i64(); 796 tmp = tcg_temp_new_i64(); 797 798 tcg_gen_movi_i64(tmp, 0); 799 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 800 801 tcg_gen_extrl_i64_i32(cpu_CF, flag); 802 803 gen_set_NZ64(result); 804 805 tcg_gen_xor_i64(flag, result, t0); 806 tcg_gen_xor_i64(tmp, t0, t1); 807 tcg_gen_andc_i64(flag, flag, tmp); 808 tcg_gen_extrh_i64_i32(cpu_VF, flag); 809 810 tcg_gen_mov_i64(dest, result); 811 } 812 813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 814 { 815 TCGv_i32 t0_32 = tcg_temp_new_i32(); 816 TCGv_i32 t1_32 = tcg_temp_new_i32(); 817 TCGv_i32 tmp = tcg_temp_new_i32(); 818 819 tcg_gen_movi_i32(tmp, 0); 820 tcg_gen_extrl_i64_i32(t0_32, t0); 821 tcg_gen_extrl_i64_i32(t1_32, t1); 822 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 823 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 824 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 825 tcg_gen_xor_i32(tmp, t0_32, t1_32); 826 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 827 tcg_gen_extu_i32_i64(dest, cpu_NF); 828 } 829 830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 831 { 832 if (sf) { 833 gen_add64_CC(dest, t0, t1); 834 } else { 835 gen_add32_CC(dest, t0, t1); 836 } 837 } 838 839 /* dest = T0 - T1; compute C, N, V and Z flags */ 840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 841 { 842 /* 64 bit arithmetic */ 843 TCGv_i64 result, flag, tmp; 844 845 result = tcg_temp_new_i64(); 846 flag = tcg_temp_new_i64(); 847 tcg_gen_sub_i64(result, t0, t1); 848 849 gen_set_NZ64(result); 850 851 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 852 tcg_gen_extrl_i64_i32(cpu_CF, flag); 853 854 tcg_gen_xor_i64(flag, result, t0); 855 tmp = tcg_temp_new_i64(); 856 tcg_gen_xor_i64(tmp, t0, t1); 857 tcg_gen_and_i64(flag, flag, tmp); 858 tcg_gen_extrh_i64_i32(cpu_VF, flag); 859 tcg_gen_mov_i64(dest, result); 860 } 861 862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 863 { 864 /* 32 bit arithmetic */ 865 TCGv_i32 t0_32 = tcg_temp_new_i32(); 866 TCGv_i32 t1_32 = tcg_temp_new_i32(); 867 TCGv_i32 tmp; 868 869 tcg_gen_extrl_i64_i32(t0_32, t0); 870 tcg_gen_extrl_i64_i32(t1_32, t1); 871 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 872 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 873 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 874 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 875 tmp = tcg_temp_new_i32(); 876 tcg_gen_xor_i32(tmp, t0_32, t1_32); 877 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 878 tcg_gen_extu_i32_i64(dest, cpu_NF); 879 } 880 881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 882 { 883 if (sf) { 884 gen_sub64_CC(dest, t0, t1); 885 } else { 886 gen_sub32_CC(dest, t0, t1); 887 } 888 } 889 890 /* dest = T0 + T1 + CF; do not compute flags. */ 891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 892 { 893 TCGv_i64 flag = tcg_temp_new_i64(); 894 tcg_gen_extu_i32_i64(flag, cpu_CF); 895 tcg_gen_add_i64(dest, t0, t1); 896 tcg_gen_add_i64(dest, dest, flag); 897 898 if (!sf) { 899 tcg_gen_ext32u_i64(dest, dest); 900 } 901 } 902 903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 905 { 906 if (sf) { 907 TCGv_i64 result = tcg_temp_new_i64(); 908 TCGv_i64 cf_64 = tcg_temp_new_i64(); 909 TCGv_i64 vf_64 = tcg_temp_new_i64(); 910 TCGv_i64 tmp = tcg_temp_new_i64(); 911 TCGv_i64 zero = tcg_constant_i64(0); 912 913 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 914 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 915 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 916 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 917 gen_set_NZ64(result); 918 919 tcg_gen_xor_i64(vf_64, result, t0); 920 tcg_gen_xor_i64(tmp, t0, t1); 921 tcg_gen_andc_i64(vf_64, vf_64, tmp); 922 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 923 924 tcg_gen_mov_i64(dest, result); 925 } else { 926 TCGv_i32 t0_32 = tcg_temp_new_i32(); 927 TCGv_i32 t1_32 = tcg_temp_new_i32(); 928 TCGv_i32 tmp = tcg_temp_new_i32(); 929 TCGv_i32 zero = tcg_constant_i32(0); 930 931 tcg_gen_extrl_i64_i32(t0_32, t0); 932 tcg_gen_extrl_i64_i32(t1_32, t1); 933 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 934 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 935 936 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 937 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 938 tcg_gen_xor_i32(tmp, t0_32, t1_32); 939 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 940 tcg_gen_extu_i32_i64(dest, cpu_NF); 941 } 942 } 943 944 /* 945 * Load/Store generators 946 */ 947 948 /* 949 * Store from GPR register to memory. 950 */ 951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 952 TCGv_i64 tcg_addr, MemOp memop, int memidx, 953 bool iss_valid, 954 unsigned int iss_srt, 955 bool iss_sf, bool iss_ar) 956 { 957 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 958 959 if (iss_valid) { 960 uint32_t syn; 961 962 syn = syn_data_abort_with_iss(0, 963 (memop & MO_SIZE), 964 false, 965 iss_srt, 966 iss_sf, 967 iss_ar, 968 0, 0, 0, 0, 0, false); 969 disas_set_insn_syndrome(s, syn); 970 } 971 } 972 973 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 974 TCGv_i64 tcg_addr, MemOp memop, 975 bool iss_valid, 976 unsigned int iss_srt, 977 bool iss_sf, bool iss_ar) 978 { 979 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 980 iss_valid, iss_srt, iss_sf, iss_ar); 981 } 982 983 /* 984 * Load from memory to GPR register 985 */ 986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 987 MemOp memop, bool extend, int memidx, 988 bool iss_valid, unsigned int iss_srt, 989 bool iss_sf, bool iss_ar) 990 { 991 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 992 993 if (extend && (memop & MO_SIGN)) { 994 g_assert((memop & MO_SIZE) <= MO_32); 995 tcg_gen_ext32u_i64(dest, dest); 996 } 997 998 if (iss_valid) { 999 uint32_t syn; 1000 1001 syn = syn_data_abort_with_iss(0, 1002 (memop & MO_SIZE), 1003 (memop & MO_SIGN) != 0, 1004 iss_srt, 1005 iss_sf, 1006 iss_ar, 1007 0, 0, 0, 0, 0, false); 1008 disas_set_insn_syndrome(s, syn); 1009 } 1010 } 1011 1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1013 MemOp memop, bool extend, 1014 bool iss_valid, unsigned int iss_srt, 1015 bool iss_sf, bool iss_ar) 1016 { 1017 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1018 iss_valid, iss_srt, iss_sf, iss_ar); 1019 } 1020 1021 /* 1022 * Store from FP register to memory 1023 */ 1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1025 { 1026 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1027 TCGv_i64 tmplo = tcg_temp_new_i64(); 1028 1029 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1030 1031 if ((mop & MO_SIZE) < MO_128) { 1032 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1033 } else { 1034 TCGv_i64 tmphi = tcg_temp_new_i64(); 1035 TCGv_i128 t16 = tcg_temp_new_i128(); 1036 1037 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1038 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1039 1040 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1041 } 1042 } 1043 1044 /* 1045 * Load from memory to FP register 1046 */ 1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1048 { 1049 /* This always zero-extends and writes to a full 128 bit wide vector */ 1050 TCGv_i64 tmplo = tcg_temp_new_i64(); 1051 TCGv_i64 tmphi = NULL; 1052 1053 if ((mop & MO_SIZE) < MO_128) { 1054 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1055 } else { 1056 TCGv_i128 t16 = tcg_temp_new_i128(); 1057 1058 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1059 1060 tmphi = tcg_temp_new_i64(); 1061 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1062 } 1063 1064 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1065 1066 if (tmphi) { 1067 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1068 } 1069 clear_vec_high(s, tmphi != NULL, destidx); 1070 } 1071 1072 /* 1073 * Vector load/store helpers. 1074 * 1075 * The principal difference between this and a FP load is that we don't 1076 * zero extend as we are filling a partial chunk of the vector register. 1077 * These functions don't support 128 bit loads/stores, which would be 1078 * normal load/store operations. 1079 * 1080 * The _i32 versions are useful when operating on 32 bit quantities 1081 * (eg for floating point single or using Neon helper functions). 1082 */ 1083 1084 /* Get value of an element within a vector register */ 1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1086 int element, MemOp memop) 1087 { 1088 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1089 switch ((unsigned)memop) { 1090 case MO_8: 1091 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1092 break; 1093 case MO_16: 1094 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1095 break; 1096 case MO_32: 1097 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1098 break; 1099 case MO_8|MO_SIGN: 1100 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1101 break; 1102 case MO_16|MO_SIGN: 1103 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1104 break; 1105 case MO_32|MO_SIGN: 1106 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1107 break; 1108 case MO_64: 1109 case MO_64|MO_SIGN: 1110 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1111 break; 1112 default: 1113 g_assert_not_reached(); 1114 } 1115 } 1116 1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1118 int element, MemOp memop) 1119 { 1120 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1121 switch (memop) { 1122 case MO_8: 1123 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1124 break; 1125 case MO_16: 1126 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1127 break; 1128 case MO_8|MO_SIGN: 1129 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1130 break; 1131 case MO_16|MO_SIGN: 1132 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1133 break; 1134 case MO_32: 1135 case MO_32|MO_SIGN: 1136 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1137 break; 1138 default: 1139 g_assert_not_reached(); 1140 } 1141 } 1142 1143 /* Set value of an element within a vector register */ 1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1145 int element, MemOp memop) 1146 { 1147 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1148 switch (memop) { 1149 case MO_8: 1150 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1151 break; 1152 case MO_16: 1153 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1154 break; 1155 case MO_32: 1156 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1157 break; 1158 case MO_64: 1159 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1160 break; 1161 default: 1162 g_assert_not_reached(); 1163 } 1164 } 1165 1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1167 int destidx, int element, MemOp memop) 1168 { 1169 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1170 switch (memop) { 1171 case MO_8: 1172 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1173 break; 1174 case MO_16: 1175 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1176 break; 1177 case MO_32: 1178 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1179 break; 1180 default: 1181 g_assert_not_reached(); 1182 } 1183 } 1184 1185 /* Store from vector register to memory */ 1186 static void do_vec_st(DisasContext *s, int srcidx, int element, 1187 TCGv_i64 tcg_addr, MemOp mop) 1188 { 1189 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1190 1191 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1192 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1193 } 1194 1195 /* Load from memory to vector register */ 1196 static void do_vec_ld(DisasContext *s, int destidx, int element, 1197 TCGv_i64 tcg_addr, MemOp mop) 1198 { 1199 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1200 1201 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1202 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1203 } 1204 1205 /* Check that FP/Neon access is enabled. If it is, return 1206 * true. If not, emit code to generate an appropriate exception, 1207 * and return false; the caller should not emit any code for 1208 * the instruction. Note that this check must happen after all 1209 * unallocated-encoding checks (otherwise the syndrome information 1210 * for the resulting exception will be incorrect). 1211 */ 1212 static bool fp_access_check_only(DisasContext *s) 1213 { 1214 if (s->fp_excp_el) { 1215 assert(!s->fp_access_checked); 1216 s->fp_access_checked = true; 1217 1218 gen_exception_insn_el(s, 0, EXCP_UDEF, 1219 syn_fp_access_trap(1, 0xe, false, 0), 1220 s->fp_excp_el); 1221 return false; 1222 } 1223 s->fp_access_checked = true; 1224 return true; 1225 } 1226 1227 static bool fp_access_check(DisasContext *s) 1228 { 1229 if (!fp_access_check_only(s)) { 1230 return false; 1231 } 1232 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1233 gen_exception_insn(s, 0, EXCP_UDEF, 1234 syn_smetrap(SME_ET_Streaming, false)); 1235 return false; 1236 } 1237 return true; 1238 } 1239 1240 /* 1241 * Return <0 for non-supported element sizes, with MO_16 controlled by 1242 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1243 */ 1244 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1245 { 1246 switch (esz) { 1247 case MO_64: 1248 case MO_32: 1249 break; 1250 case MO_16: 1251 if (!dc_isar_feature(aa64_fp16, s)) { 1252 return -1; 1253 } 1254 break; 1255 default: 1256 return -1; 1257 } 1258 return fp_access_check(s); 1259 } 1260 1261 /* Likewise, but vector MO_64 must have two elements. */ 1262 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1263 { 1264 switch (esz) { 1265 case MO_64: 1266 if (!is_q) { 1267 return -1; 1268 } 1269 break; 1270 case MO_32: 1271 break; 1272 case MO_16: 1273 if (!dc_isar_feature(aa64_fp16, s)) { 1274 return -1; 1275 } 1276 break; 1277 default: 1278 return -1; 1279 } 1280 return fp_access_check(s); 1281 } 1282 1283 /* 1284 * Check that SVE access is enabled. If it is, return true. 1285 * If not, emit code to generate an appropriate exception and return false. 1286 * This function corresponds to CheckSVEEnabled(). 1287 */ 1288 bool sve_access_check(DisasContext *s) 1289 { 1290 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1291 assert(dc_isar_feature(aa64_sme, s)); 1292 if (!sme_sm_enabled_check(s)) { 1293 goto fail_exit; 1294 } 1295 } else if (s->sve_excp_el) { 1296 gen_exception_insn_el(s, 0, EXCP_UDEF, 1297 syn_sve_access_trap(), s->sve_excp_el); 1298 goto fail_exit; 1299 } 1300 s->sve_access_checked = true; 1301 return fp_access_check(s); 1302 1303 fail_exit: 1304 /* Assert that we only raise one exception per instruction. */ 1305 assert(!s->sve_access_checked); 1306 s->sve_access_checked = true; 1307 return false; 1308 } 1309 1310 /* 1311 * Check that SME access is enabled, raise an exception if not. 1312 * Note that this function corresponds to CheckSMEAccess and is 1313 * only used directly for cpregs. 1314 */ 1315 static bool sme_access_check(DisasContext *s) 1316 { 1317 if (s->sme_excp_el) { 1318 gen_exception_insn_el(s, 0, EXCP_UDEF, 1319 syn_smetrap(SME_ET_AccessTrap, false), 1320 s->sme_excp_el); 1321 return false; 1322 } 1323 return true; 1324 } 1325 1326 /* This function corresponds to CheckSMEEnabled. */ 1327 bool sme_enabled_check(DisasContext *s) 1328 { 1329 /* 1330 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1331 * to be zero when fp_excp_el has priority. This is because we need 1332 * sme_excp_el by itself for cpregs access checks. 1333 */ 1334 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1335 s->fp_access_checked = true; 1336 return sme_access_check(s); 1337 } 1338 return fp_access_check_only(s); 1339 } 1340 1341 /* Common subroutine for CheckSMEAnd*Enabled. */ 1342 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1343 { 1344 if (!sme_enabled_check(s)) { 1345 return false; 1346 } 1347 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1348 gen_exception_insn(s, 0, EXCP_UDEF, 1349 syn_smetrap(SME_ET_NotStreaming, false)); 1350 return false; 1351 } 1352 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1353 gen_exception_insn(s, 0, EXCP_UDEF, 1354 syn_smetrap(SME_ET_InactiveZA, false)); 1355 return false; 1356 } 1357 return true; 1358 } 1359 1360 /* 1361 * Expanders for AdvSIMD translation functions. 1362 */ 1363 1364 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1365 gen_helper_gvec_2 *fn) 1366 { 1367 if (!a->q && a->esz == MO_64) { 1368 return false; 1369 } 1370 if (fp_access_check(s)) { 1371 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1372 } 1373 return true; 1374 } 1375 1376 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1377 gen_helper_gvec_3 *fn) 1378 { 1379 if (!a->q && a->esz == MO_64) { 1380 return false; 1381 } 1382 if (fp_access_check(s)) { 1383 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1384 } 1385 return true; 1386 } 1387 1388 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1389 { 1390 if (!a->q && a->esz == MO_64) { 1391 return false; 1392 } 1393 if (fp_access_check(s)) { 1394 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1395 } 1396 return true; 1397 } 1398 1399 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1400 { 1401 if (a->esz == MO_64) { 1402 return false; 1403 } 1404 if (fp_access_check(s)) { 1405 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1406 } 1407 return true; 1408 } 1409 1410 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1411 { 1412 if (a->esz == MO_8) { 1413 return false; 1414 } 1415 return do_gvec_fn3_no64(s, a, fn); 1416 } 1417 1418 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1419 { 1420 if (!a->q && a->esz == MO_64) { 1421 return false; 1422 } 1423 if (fp_access_check(s)) { 1424 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1425 } 1426 return true; 1427 } 1428 1429 /* 1430 * This utility function is for doing register extension with an 1431 * optional shift. You will likely want to pass a temporary for the 1432 * destination register. See DecodeRegExtend() in the ARM ARM. 1433 */ 1434 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1435 int option, unsigned int shift) 1436 { 1437 int extsize = extract32(option, 0, 2); 1438 bool is_signed = extract32(option, 2, 1); 1439 1440 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1441 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1442 } 1443 1444 static inline void gen_check_sp_alignment(DisasContext *s) 1445 { 1446 /* The AArch64 architecture mandates that (if enabled via PSTATE 1447 * or SCTLR bits) there is a check that SP is 16-aligned on every 1448 * SP-relative load or store (with an exception generated if it is not). 1449 * In line with general QEMU practice regarding misaligned accesses, 1450 * we omit these checks for the sake of guest program performance. 1451 * This function is provided as a hook so we can more easily add these 1452 * checks in future (possibly as a "favour catching guest program bugs 1453 * over speed" user selectable option). 1454 */ 1455 } 1456 1457 /* 1458 * The instruction disassembly implemented here matches 1459 * the instruction encoding classifications in chapter C4 1460 * of the ARM Architecture Reference Manual (DDI0487B_a); 1461 * classification names and decode diagrams here should generally 1462 * match up with those in the manual. 1463 */ 1464 1465 static bool trans_B(DisasContext *s, arg_i *a) 1466 { 1467 reset_btype(s); 1468 gen_goto_tb(s, 0, a->imm); 1469 return true; 1470 } 1471 1472 static bool trans_BL(DisasContext *s, arg_i *a) 1473 { 1474 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1475 reset_btype(s); 1476 gen_goto_tb(s, 0, a->imm); 1477 return true; 1478 } 1479 1480 1481 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1482 { 1483 DisasLabel match; 1484 TCGv_i64 tcg_cmp; 1485 1486 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1487 reset_btype(s); 1488 1489 match = gen_disas_label(s); 1490 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1491 tcg_cmp, 0, match.label); 1492 gen_goto_tb(s, 0, 4); 1493 set_disas_label(s, match); 1494 gen_goto_tb(s, 1, a->imm); 1495 return true; 1496 } 1497 1498 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1499 { 1500 DisasLabel match; 1501 TCGv_i64 tcg_cmp; 1502 1503 tcg_cmp = tcg_temp_new_i64(); 1504 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1505 1506 reset_btype(s); 1507 1508 match = gen_disas_label(s); 1509 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1510 tcg_cmp, 0, match.label); 1511 gen_goto_tb(s, 0, 4); 1512 set_disas_label(s, match); 1513 gen_goto_tb(s, 1, a->imm); 1514 return true; 1515 } 1516 1517 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1518 { 1519 /* BC.cond is only present with FEAT_HBC */ 1520 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1521 return false; 1522 } 1523 reset_btype(s); 1524 if (a->cond < 0x0e) { 1525 /* genuinely conditional branches */ 1526 DisasLabel match = gen_disas_label(s); 1527 arm_gen_test_cc(a->cond, match.label); 1528 gen_goto_tb(s, 0, 4); 1529 set_disas_label(s, match); 1530 gen_goto_tb(s, 1, a->imm); 1531 } else { 1532 /* 0xe and 0xf are both "always" conditions */ 1533 gen_goto_tb(s, 0, a->imm); 1534 } 1535 return true; 1536 } 1537 1538 static void set_btype_for_br(DisasContext *s, int rn) 1539 { 1540 if (dc_isar_feature(aa64_bti, s)) { 1541 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1542 if (rn == 16 || rn == 17) { 1543 set_btype(s, 1); 1544 } else { 1545 TCGv_i64 pc = tcg_temp_new_i64(); 1546 gen_pc_plus_diff(s, pc, 0); 1547 gen_helper_guarded_page_br(tcg_env, pc); 1548 s->btype = -1; 1549 } 1550 } 1551 } 1552 1553 static void set_btype_for_blr(DisasContext *s) 1554 { 1555 if (dc_isar_feature(aa64_bti, s)) { 1556 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1557 set_btype(s, 2); 1558 } 1559 } 1560 1561 static bool trans_BR(DisasContext *s, arg_r *a) 1562 { 1563 set_btype_for_br(s, a->rn); 1564 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1565 s->base.is_jmp = DISAS_JUMP; 1566 return true; 1567 } 1568 1569 static bool trans_BLR(DisasContext *s, arg_r *a) 1570 { 1571 TCGv_i64 dst = cpu_reg(s, a->rn); 1572 TCGv_i64 lr = cpu_reg(s, 30); 1573 if (dst == lr) { 1574 TCGv_i64 tmp = tcg_temp_new_i64(); 1575 tcg_gen_mov_i64(tmp, dst); 1576 dst = tmp; 1577 } 1578 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1579 gen_a64_set_pc(s, dst); 1580 set_btype_for_blr(s); 1581 s->base.is_jmp = DISAS_JUMP; 1582 return true; 1583 } 1584 1585 static bool trans_RET(DisasContext *s, arg_r *a) 1586 { 1587 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1588 s->base.is_jmp = DISAS_JUMP; 1589 return true; 1590 } 1591 1592 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1593 TCGv_i64 modifier, bool use_key_a) 1594 { 1595 TCGv_i64 truedst; 1596 /* 1597 * Return the branch target for a BRAA/RETA/etc, which is either 1598 * just the destination dst, or that value with the pauth check 1599 * done and the code removed from the high bits. 1600 */ 1601 if (!s->pauth_active) { 1602 return dst; 1603 } 1604 1605 truedst = tcg_temp_new_i64(); 1606 if (use_key_a) { 1607 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1608 } else { 1609 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1610 } 1611 return truedst; 1612 } 1613 1614 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1615 { 1616 TCGv_i64 dst; 1617 1618 if (!dc_isar_feature(aa64_pauth, s)) { 1619 return false; 1620 } 1621 1622 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1623 set_btype_for_br(s, a->rn); 1624 gen_a64_set_pc(s, dst); 1625 s->base.is_jmp = DISAS_JUMP; 1626 return true; 1627 } 1628 1629 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1630 { 1631 TCGv_i64 dst, lr; 1632 1633 if (!dc_isar_feature(aa64_pauth, s)) { 1634 return false; 1635 } 1636 1637 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1638 lr = cpu_reg(s, 30); 1639 if (dst == lr) { 1640 TCGv_i64 tmp = tcg_temp_new_i64(); 1641 tcg_gen_mov_i64(tmp, dst); 1642 dst = tmp; 1643 } 1644 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1645 gen_a64_set_pc(s, dst); 1646 set_btype_for_blr(s); 1647 s->base.is_jmp = DISAS_JUMP; 1648 return true; 1649 } 1650 1651 static bool trans_RETA(DisasContext *s, arg_reta *a) 1652 { 1653 TCGv_i64 dst; 1654 1655 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1656 gen_a64_set_pc(s, dst); 1657 s->base.is_jmp = DISAS_JUMP; 1658 return true; 1659 } 1660 1661 static bool trans_BRA(DisasContext *s, arg_bra *a) 1662 { 1663 TCGv_i64 dst; 1664 1665 if (!dc_isar_feature(aa64_pauth, s)) { 1666 return false; 1667 } 1668 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1669 gen_a64_set_pc(s, dst); 1670 set_btype_for_br(s, a->rn); 1671 s->base.is_jmp = DISAS_JUMP; 1672 return true; 1673 } 1674 1675 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1676 { 1677 TCGv_i64 dst, lr; 1678 1679 if (!dc_isar_feature(aa64_pauth, s)) { 1680 return false; 1681 } 1682 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1683 lr = cpu_reg(s, 30); 1684 if (dst == lr) { 1685 TCGv_i64 tmp = tcg_temp_new_i64(); 1686 tcg_gen_mov_i64(tmp, dst); 1687 dst = tmp; 1688 } 1689 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1690 gen_a64_set_pc(s, dst); 1691 set_btype_for_blr(s); 1692 s->base.is_jmp = DISAS_JUMP; 1693 return true; 1694 } 1695 1696 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1697 { 1698 TCGv_i64 dst; 1699 1700 if (s->current_el == 0) { 1701 return false; 1702 } 1703 if (s->trap_eret) { 1704 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1705 return true; 1706 } 1707 dst = tcg_temp_new_i64(); 1708 tcg_gen_ld_i64(dst, tcg_env, 1709 offsetof(CPUARMState, elr_el[s->current_el])); 1710 1711 translator_io_start(&s->base); 1712 1713 gen_helper_exception_return(tcg_env, dst); 1714 /* Must exit loop to check un-masked IRQs */ 1715 s->base.is_jmp = DISAS_EXIT; 1716 return true; 1717 } 1718 1719 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1720 { 1721 TCGv_i64 dst; 1722 1723 if (!dc_isar_feature(aa64_pauth, s)) { 1724 return false; 1725 } 1726 if (s->current_el == 0) { 1727 return false; 1728 } 1729 /* The FGT trap takes precedence over an auth trap. */ 1730 if (s->trap_eret) { 1731 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1732 return true; 1733 } 1734 dst = tcg_temp_new_i64(); 1735 tcg_gen_ld_i64(dst, tcg_env, 1736 offsetof(CPUARMState, elr_el[s->current_el])); 1737 1738 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1739 1740 translator_io_start(&s->base); 1741 1742 gen_helper_exception_return(tcg_env, dst); 1743 /* Must exit loop to check un-masked IRQs */ 1744 s->base.is_jmp = DISAS_EXIT; 1745 return true; 1746 } 1747 1748 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1749 { 1750 return true; 1751 } 1752 1753 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1754 { 1755 /* 1756 * When running in MTTCG we don't generate jumps to the yield and 1757 * WFE helpers as it won't affect the scheduling of other vCPUs. 1758 * If we wanted to more completely model WFE/SEV so we don't busy 1759 * spin unnecessarily we would need to do something more involved. 1760 */ 1761 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1762 s->base.is_jmp = DISAS_YIELD; 1763 } 1764 return true; 1765 } 1766 1767 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1768 { 1769 s->base.is_jmp = DISAS_WFI; 1770 return true; 1771 } 1772 1773 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1774 { 1775 /* 1776 * When running in MTTCG we don't generate jumps to the yield and 1777 * WFE helpers as it won't affect the scheduling of other vCPUs. 1778 * If we wanted to more completely model WFE/SEV so we don't busy 1779 * spin unnecessarily we would need to do something more involved. 1780 */ 1781 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1782 s->base.is_jmp = DISAS_WFE; 1783 } 1784 return true; 1785 } 1786 1787 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1788 { 1789 if (!dc_isar_feature(aa64_wfxt, s)) { 1790 return false; 1791 } 1792 1793 /* 1794 * Because we need to pass the register value to the helper, 1795 * it's easier to emit the code now, unlike trans_WFI which 1796 * defers it to aarch64_tr_tb_stop(). That means we need to 1797 * check ss_active so that single-stepping a WFIT doesn't halt. 1798 */ 1799 if (s->ss_active) { 1800 /* Act like a NOP under architectural singlestep */ 1801 return true; 1802 } 1803 1804 gen_a64_update_pc(s, 4); 1805 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1806 /* Go back to the main loop to check for interrupts */ 1807 s->base.is_jmp = DISAS_EXIT; 1808 return true; 1809 } 1810 1811 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1812 { 1813 if (!dc_isar_feature(aa64_wfxt, s)) { 1814 return false; 1815 } 1816 1817 /* 1818 * We rely here on our WFE implementation being a NOP, so we 1819 * don't need to do anything different to handle the WFET timeout 1820 * from what trans_WFE does. 1821 */ 1822 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1823 s->base.is_jmp = DISAS_WFE; 1824 } 1825 return true; 1826 } 1827 1828 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1829 { 1830 if (s->pauth_active) { 1831 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1832 } 1833 return true; 1834 } 1835 1836 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1837 { 1838 if (s->pauth_active) { 1839 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1840 } 1841 return true; 1842 } 1843 1844 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1845 { 1846 if (s->pauth_active) { 1847 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1848 } 1849 return true; 1850 } 1851 1852 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1853 { 1854 if (s->pauth_active) { 1855 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1856 } 1857 return true; 1858 } 1859 1860 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1861 { 1862 if (s->pauth_active) { 1863 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1864 } 1865 return true; 1866 } 1867 1868 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1869 { 1870 /* Without RAS, we must implement this as NOP. */ 1871 if (dc_isar_feature(aa64_ras, s)) { 1872 /* 1873 * QEMU does not have a source of physical SErrors, 1874 * so we are only concerned with virtual SErrors. 1875 * The pseudocode in the ARM for this case is 1876 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1877 * AArch64.vESBOperation(); 1878 * Most of the condition can be evaluated at translation time. 1879 * Test for EL2 present, and defer test for SEL2 to runtime. 1880 */ 1881 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1882 gen_helper_vesb(tcg_env); 1883 } 1884 } 1885 return true; 1886 } 1887 1888 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1889 { 1890 if (s->pauth_active) { 1891 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1892 } 1893 return true; 1894 } 1895 1896 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1897 { 1898 if (s->pauth_active) { 1899 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1900 } 1901 return true; 1902 } 1903 1904 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1905 { 1906 if (s->pauth_active) { 1907 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1908 } 1909 return true; 1910 } 1911 1912 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1913 { 1914 if (s->pauth_active) { 1915 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1916 } 1917 return true; 1918 } 1919 1920 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1921 { 1922 if (s->pauth_active) { 1923 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1924 } 1925 return true; 1926 } 1927 1928 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1929 { 1930 if (s->pauth_active) { 1931 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1932 } 1933 return true; 1934 } 1935 1936 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1937 { 1938 if (s->pauth_active) { 1939 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1940 } 1941 return true; 1942 } 1943 1944 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1945 { 1946 if (s->pauth_active) { 1947 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1948 } 1949 return true; 1950 } 1951 1952 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1953 { 1954 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1955 return true; 1956 } 1957 1958 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1959 { 1960 /* We handle DSB and DMB the same way */ 1961 TCGBar bar; 1962 1963 switch (a->types) { 1964 case 1: /* MBReqTypes_Reads */ 1965 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1966 break; 1967 case 2: /* MBReqTypes_Writes */ 1968 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1969 break; 1970 default: /* MBReqTypes_All */ 1971 bar = TCG_BAR_SC | TCG_MO_ALL; 1972 break; 1973 } 1974 tcg_gen_mb(bar); 1975 return true; 1976 } 1977 1978 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 1979 { 1980 if (!dc_isar_feature(aa64_xs, s)) { 1981 return false; 1982 } 1983 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 1984 return true; 1985 } 1986 1987 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1988 { 1989 /* 1990 * We need to break the TB after this insn to execute 1991 * self-modifying code correctly and also to take 1992 * any pending interrupts immediately. 1993 */ 1994 reset_btype(s); 1995 gen_goto_tb(s, 0, 4); 1996 return true; 1997 } 1998 1999 static bool trans_SB(DisasContext *s, arg_SB *a) 2000 { 2001 if (!dc_isar_feature(aa64_sb, s)) { 2002 return false; 2003 } 2004 /* 2005 * TODO: There is no speculation barrier opcode for TCG; 2006 * MB and end the TB instead. 2007 */ 2008 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2009 gen_goto_tb(s, 0, 4); 2010 return true; 2011 } 2012 2013 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2014 { 2015 if (!dc_isar_feature(aa64_condm_4, s)) { 2016 return false; 2017 } 2018 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2019 return true; 2020 } 2021 2022 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2023 { 2024 TCGv_i32 z; 2025 2026 if (!dc_isar_feature(aa64_condm_5, s)) { 2027 return false; 2028 } 2029 2030 z = tcg_temp_new_i32(); 2031 2032 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2033 2034 /* 2035 * (!C & !Z) << 31 2036 * (!(C | Z)) << 31 2037 * ~((C | Z) << 31) 2038 * ~-(C | Z) 2039 * (C | Z) - 1 2040 */ 2041 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2042 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2043 2044 /* !(Z & C) */ 2045 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2046 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2047 2048 /* (!C & Z) << 31 -> -(Z & ~C) */ 2049 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2050 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2051 2052 /* C | Z */ 2053 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2054 2055 return true; 2056 } 2057 2058 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2059 { 2060 if (!dc_isar_feature(aa64_condm_5, s)) { 2061 return false; 2062 } 2063 2064 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2065 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2066 2067 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2068 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2069 2070 tcg_gen_movi_i32(cpu_NF, 0); 2071 tcg_gen_movi_i32(cpu_VF, 0); 2072 2073 return true; 2074 } 2075 2076 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2077 { 2078 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2079 return false; 2080 } 2081 if (a->imm & 1) { 2082 set_pstate_bits(PSTATE_UAO); 2083 } else { 2084 clear_pstate_bits(PSTATE_UAO); 2085 } 2086 gen_rebuild_hflags(s); 2087 s->base.is_jmp = DISAS_TOO_MANY; 2088 return true; 2089 } 2090 2091 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2092 { 2093 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2094 return false; 2095 } 2096 if (a->imm & 1) { 2097 set_pstate_bits(PSTATE_PAN); 2098 } else { 2099 clear_pstate_bits(PSTATE_PAN); 2100 } 2101 gen_rebuild_hflags(s); 2102 s->base.is_jmp = DISAS_TOO_MANY; 2103 return true; 2104 } 2105 2106 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2107 { 2108 if (s->current_el == 0) { 2109 return false; 2110 } 2111 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2112 s->base.is_jmp = DISAS_TOO_MANY; 2113 return true; 2114 } 2115 2116 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2117 { 2118 if (!dc_isar_feature(aa64_ssbs, s)) { 2119 return false; 2120 } 2121 if (a->imm & 1) { 2122 set_pstate_bits(PSTATE_SSBS); 2123 } else { 2124 clear_pstate_bits(PSTATE_SSBS); 2125 } 2126 /* Don't need to rebuild hflags since SSBS is a nop */ 2127 s->base.is_jmp = DISAS_TOO_MANY; 2128 return true; 2129 } 2130 2131 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2132 { 2133 if (!dc_isar_feature(aa64_dit, s)) { 2134 return false; 2135 } 2136 if (a->imm & 1) { 2137 set_pstate_bits(PSTATE_DIT); 2138 } else { 2139 clear_pstate_bits(PSTATE_DIT); 2140 } 2141 /* There's no need to rebuild hflags because DIT is a nop */ 2142 s->base.is_jmp = DISAS_TOO_MANY; 2143 return true; 2144 } 2145 2146 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2147 { 2148 if (dc_isar_feature(aa64_mte, s)) { 2149 /* Full MTE is enabled -- set the TCO bit as directed. */ 2150 if (a->imm & 1) { 2151 set_pstate_bits(PSTATE_TCO); 2152 } else { 2153 clear_pstate_bits(PSTATE_TCO); 2154 } 2155 gen_rebuild_hflags(s); 2156 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2157 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2158 return true; 2159 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2160 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2161 return true; 2162 } else { 2163 /* Insn not present */ 2164 return false; 2165 } 2166 } 2167 2168 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2169 { 2170 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2171 s->base.is_jmp = DISAS_TOO_MANY; 2172 return true; 2173 } 2174 2175 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2176 { 2177 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2178 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2179 s->base.is_jmp = DISAS_UPDATE_EXIT; 2180 return true; 2181 } 2182 2183 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2184 { 2185 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2186 return false; 2187 } 2188 2189 if (a->imm == 0) { 2190 clear_pstate_bits(PSTATE_ALLINT); 2191 } else if (s->current_el > 1) { 2192 set_pstate_bits(PSTATE_ALLINT); 2193 } else { 2194 gen_helper_msr_set_allint_el1(tcg_env); 2195 } 2196 2197 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2198 s->base.is_jmp = DISAS_UPDATE_EXIT; 2199 return true; 2200 } 2201 2202 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2203 { 2204 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2205 return false; 2206 } 2207 if (sme_access_check(s)) { 2208 int old = s->pstate_sm | (s->pstate_za << 1); 2209 int new = a->imm * 3; 2210 2211 if ((old ^ new) & a->mask) { 2212 /* At least one bit changes. */ 2213 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2214 tcg_constant_i32(a->mask)); 2215 s->base.is_jmp = DISAS_TOO_MANY; 2216 } 2217 } 2218 return true; 2219 } 2220 2221 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2222 { 2223 TCGv_i32 tmp = tcg_temp_new_i32(); 2224 TCGv_i32 nzcv = tcg_temp_new_i32(); 2225 2226 /* build bit 31, N */ 2227 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2228 /* build bit 30, Z */ 2229 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2230 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2231 /* build bit 29, C */ 2232 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2233 /* build bit 28, V */ 2234 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2235 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2236 /* generate result */ 2237 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2238 } 2239 2240 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2241 { 2242 TCGv_i32 nzcv = tcg_temp_new_i32(); 2243 2244 /* take NZCV from R[t] */ 2245 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2246 2247 /* bit 31, N */ 2248 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2249 /* bit 30, Z */ 2250 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2251 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2252 /* bit 29, C */ 2253 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2254 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2255 /* bit 28, V */ 2256 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2257 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2258 } 2259 2260 static void gen_sysreg_undef(DisasContext *s, bool isread, 2261 uint8_t op0, uint8_t op1, uint8_t op2, 2262 uint8_t crn, uint8_t crm, uint8_t rt) 2263 { 2264 /* 2265 * Generate code to emit an UNDEF with correct syndrome 2266 * information for a failed system register access. 2267 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2268 * but if FEAT_IDST is implemented then read accesses to registers 2269 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2270 * syndrome. 2271 */ 2272 uint32_t syndrome; 2273 2274 if (isread && dc_isar_feature(aa64_ids, s) && 2275 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2276 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2277 } else { 2278 syndrome = syn_uncategorized(); 2279 } 2280 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2281 } 2282 2283 /* MRS - move from system register 2284 * MSR (register) - move to system register 2285 * SYS 2286 * SYSL 2287 * These are all essentially the same insn in 'read' and 'write' 2288 * versions, with varying op0 fields. 2289 */ 2290 static void handle_sys(DisasContext *s, bool isread, 2291 unsigned int op0, unsigned int op1, unsigned int op2, 2292 unsigned int crn, unsigned int crm, unsigned int rt) 2293 { 2294 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2295 crn, crm, op0, op1, op2); 2296 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2297 bool need_exit_tb = false; 2298 bool nv_trap_to_el2 = false; 2299 bool nv_redirect_reg = false; 2300 bool skip_fp_access_checks = false; 2301 bool nv2_mem_redirect = false; 2302 TCGv_ptr tcg_ri = NULL; 2303 TCGv_i64 tcg_rt; 2304 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2305 2306 if (crn == 11 || crn == 15) { 2307 /* 2308 * Check for TIDCP trap, which must take precedence over 2309 * the UNDEF for "no such register" etc. 2310 */ 2311 switch (s->current_el) { 2312 case 0: 2313 if (dc_isar_feature(aa64_tidcp1, s)) { 2314 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2315 } 2316 break; 2317 case 1: 2318 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2319 break; 2320 } 2321 } 2322 2323 if (!ri) { 2324 /* Unknown register; this might be a guest error or a QEMU 2325 * unimplemented feature. 2326 */ 2327 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2328 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2329 isread ? "read" : "write", op0, op1, crn, crm, op2); 2330 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2331 return; 2332 } 2333 2334 if (s->nv2 && ri->nv2_redirect_offset) { 2335 /* 2336 * Some registers always redirect to memory; some only do so if 2337 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2338 * pairs which share an offset; see the table in R_CSRPQ). 2339 */ 2340 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2341 nv2_mem_redirect = s->nv1; 2342 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2343 nv2_mem_redirect = !s->nv1; 2344 } else { 2345 nv2_mem_redirect = true; 2346 } 2347 } 2348 2349 /* Check access permissions */ 2350 if (!cp_access_ok(s->current_el, ri, isread)) { 2351 /* 2352 * FEAT_NV/NV2 handling does not do the usual FP access checks 2353 * for registers only accessible at EL2 (though it *does* do them 2354 * for registers accessible at EL1). 2355 */ 2356 skip_fp_access_checks = true; 2357 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2358 /* 2359 * This is one of the few EL2 registers which should redirect 2360 * to the equivalent EL1 register. We do that after running 2361 * the EL2 register's accessfn. 2362 */ 2363 nv_redirect_reg = true; 2364 assert(!nv2_mem_redirect); 2365 } else if (nv2_mem_redirect) { 2366 /* 2367 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2368 * UNDEF to EL1. 2369 */ 2370 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2371 /* 2372 * This register / instruction exists and is an EL2 register, so 2373 * we must trap to EL2 if accessed in nested virtualization EL1 2374 * instead of UNDEFing. We'll do that after the usual access checks. 2375 * (This makes a difference only for a couple of registers like 2376 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2377 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2378 * an accessfn which does nothing when called from EL1, because 2379 * the trap-to-EL3 controls which would apply to that register 2380 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2381 */ 2382 nv_trap_to_el2 = true; 2383 } else { 2384 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2385 return; 2386 } 2387 } 2388 2389 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2390 /* Emit code to perform further access permissions checks at 2391 * runtime; this may result in an exception. 2392 */ 2393 gen_a64_update_pc(s, 0); 2394 tcg_ri = tcg_temp_new_ptr(); 2395 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2396 tcg_constant_i32(key), 2397 tcg_constant_i32(syndrome), 2398 tcg_constant_i32(isread)); 2399 } else if (ri->type & ARM_CP_RAISES_EXC) { 2400 /* 2401 * The readfn or writefn might raise an exception; 2402 * synchronize the CPU state in case it does. 2403 */ 2404 gen_a64_update_pc(s, 0); 2405 } 2406 2407 if (!skip_fp_access_checks) { 2408 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2409 return; 2410 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2411 return; 2412 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2413 return; 2414 } 2415 } 2416 2417 if (nv_trap_to_el2) { 2418 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2419 return; 2420 } 2421 2422 if (nv_redirect_reg) { 2423 /* 2424 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2425 * Conveniently in all cases the encoding of the EL1 register is 2426 * identical to the EL2 register except that opc1 is 0. 2427 * Get the reginfo for the EL1 register to use for the actual access. 2428 * We don't use the EL1 register's access function, and 2429 * fine-grained-traps on EL1 also do not apply here. 2430 */ 2431 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2432 crn, crm, op0, 0, op2); 2433 ri = get_arm_cp_reginfo(s->cp_regs, key); 2434 assert(ri); 2435 assert(cp_access_ok(s->current_el, ri, isread)); 2436 /* 2437 * We might not have done an update_pc earlier, so check we don't 2438 * need it. We could support this in future if necessary. 2439 */ 2440 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2441 } 2442 2443 if (nv2_mem_redirect) { 2444 /* 2445 * This system register is being redirected into an EL2 memory access. 2446 * This means it is not an IO operation, doesn't change hflags, 2447 * and need not end the TB, because it has no side effects. 2448 * 2449 * The access is 64-bit single copy atomic, guaranteed aligned because 2450 * of the definition of VCNR_EL2. Its endianness depends on 2451 * SCTLR_EL2.EE, not on the data endianness of EL1. 2452 * It is done under either the EL2 translation regime or the EL2&0 2453 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2454 * PSTATE.PAN is 0. 2455 */ 2456 TCGv_i64 ptr = tcg_temp_new_i64(); 2457 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2458 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2459 int memidx = arm_to_core_mmu_idx(armmemidx); 2460 uint32_t syn; 2461 2462 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2463 2464 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2465 tcg_gen_addi_i64(ptr, ptr, 2466 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2467 tcg_rt = cpu_reg(s, rt); 2468 2469 syn = syn_data_abort_vncr(0, !isread, 0); 2470 disas_set_insn_syndrome(s, syn); 2471 if (isread) { 2472 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2473 } else { 2474 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2475 } 2476 return; 2477 } 2478 2479 /* Handle special cases first */ 2480 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2481 case 0: 2482 break; 2483 case ARM_CP_NOP: 2484 return; 2485 case ARM_CP_NZCV: 2486 tcg_rt = cpu_reg(s, rt); 2487 if (isread) { 2488 gen_get_nzcv(tcg_rt); 2489 } else { 2490 gen_set_nzcv(tcg_rt); 2491 } 2492 return; 2493 case ARM_CP_CURRENTEL: 2494 { 2495 /* 2496 * Reads as current EL value from pstate, which is 2497 * guaranteed to be constant by the tb flags. 2498 * For nested virt we should report EL2. 2499 */ 2500 int el = s->nv ? 2 : s->current_el; 2501 tcg_rt = cpu_reg(s, rt); 2502 tcg_gen_movi_i64(tcg_rt, el << 2); 2503 return; 2504 } 2505 case ARM_CP_DC_ZVA: 2506 /* Writes clear the aligned block of memory which rt points into. */ 2507 if (s->mte_active[0]) { 2508 int desc = 0; 2509 2510 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2511 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2512 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2513 2514 tcg_rt = tcg_temp_new_i64(); 2515 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2516 tcg_constant_i32(desc), cpu_reg(s, rt)); 2517 } else { 2518 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2519 } 2520 gen_helper_dc_zva(tcg_env, tcg_rt); 2521 return; 2522 case ARM_CP_DC_GVA: 2523 { 2524 TCGv_i64 clean_addr, tag; 2525 2526 /* 2527 * DC_GVA, like DC_ZVA, requires that we supply the original 2528 * pointer for an invalid page. Probe that address first. 2529 */ 2530 tcg_rt = cpu_reg(s, rt); 2531 clean_addr = clean_data_tbi(s, tcg_rt); 2532 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2533 2534 if (s->ata[0]) { 2535 /* Extract the tag from the register to match STZGM. */ 2536 tag = tcg_temp_new_i64(); 2537 tcg_gen_shri_i64(tag, tcg_rt, 56); 2538 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2539 } 2540 } 2541 return; 2542 case ARM_CP_DC_GZVA: 2543 { 2544 TCGv_i64 clean_addr, tag; 2545 2546 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2547 tcg_rt = cpu_reg(s, rt); 2548 clean_addr = clean_data_tbi(s, tcg_rt); 2549 gen_helper_dc_zva(tcg_env, clean_addr); 2550 2551 if (s->ata[0]) { 2552 /* Extract the tag from the register to match STZGM. */ 2553 tag = tcg_temp_new_i64(); 2554 tcg_gen_shri_i64(tag, tcg_rt, 56); 2555 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2556 } 2557 } 2558 return; 2559 default: 2560 g_assert_not_reached(); 2561 } 2562 2563 if (ri->type & ARM_CP_IO) { 2564 /* I/O operations must end the TB here (whether read or write) */ 2565 need_exit_tb = translator_io_start(&s->base); 2566 } 2567 2568 tcg_rt = cpu_reg(s, rt); 2569 2570 if (isread) { 2571 if (ri->type & ARM_CP_CONST) { 2572 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2573 } else if (ri->readfn) { 2574 if (!tcg_ri) { 2575 tcg_ri = gen_lookup_cp_reg(key); 2576 } 2577 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2578 } else { 2579 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2580 } 2581 } else { 2582 if (ri->type & ARM_CP_CONST) { 2583 /* If not forbidden by access permissions, treat as WI */ 2584 return; 2585 } else if (ri->writefn) { 2586 if (!tcg_ri) { 2587 tcg_ri = gen_lookup_cp_reg(key); 2588 } 2589 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2590 } else { 2591 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2592 } 2593 } 2594 2595 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2596 /* 2597 * A write to any coprocessor register that ends a TB 2598 * must rebuild the hflags for the next TB. 2599 */ 2600 gen_rebuild_hflags(s); 2601 /* 2602 * We default to ending the TB on a coprocessor register write, 2603 * but allow this to be suppressed by the register definition 2604 * (usually only necessary to work around guest bugs). 2605 */ 2606 need_exit_tb = true; 2607 } 2608 if (need_exit_tb) { 2609 s->base.is_jmp = DISAS_UPDATE_EXIT; 2610 } 2611 } 2612 2613 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2614 { 2615 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2616 return true; 2617 } 2618 2619 static bool trans_SVC(DisasContext *s, arg_i *a) 2620 { 2621 /* 2622 * For SVC, HVC and SMC we advance the single-step state 2623 * machine before taking the exception. This is architecturally 2624 * mandated, to ensure that single-stepping a system call 2625 * instruction works properly. 2626 */ 2627 uint32_t syndrome = syn_aa64_svc(a->imm); 2628 if (s->fgt_svc) { 2629 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2630 return true; 2631 } 2632 gen_ss_advance(s); 2633 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2634 return true; 2635 } 2636 2637 static bool trans_HVC(DisasContext *s, arg_i *a) 2638 { 2639 int target_el = s->current_el == 3 ? 3 : 2; 2640 2641 if (s->current_el == 0) { 2642 unallocated_encoding(s); 2643 return true; 2644 } 2645 /* 2646 * The pre HVC helper handles cases when HVC gets trapped 2647 * as an undefined insn by runtime configuration. 2648 */ 2649 gen_a64_update_pc(s, 0); 2650 gen_helper_pre_hvc(tcg_env); 2651 /* Architecture requires ss advance before we do the actual work */ 2652 gen_ss_advance(s); 2653 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2654 return true; 2655 } 2656 2657 static bool trans_SMC(DisasContext *s, arg_i *a) 2658 { 2659 if (s->current_el == 0) { 2660 unallocated_encoding(s); 2661 return true; 2662 } 2663 gen_a64_update_pc(s, 0); 2664 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2665 /* Architecture requires ss advance before we do the actual work */ 2666 gen_ss_advance(s); 2667 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2668 return true; 2669 } 2670 2671 static bool trans_BRK(DisasContext *s, arg_i *a) 2672 { 2673 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2674 return true; 2675 } 2676 2677 static bool trans_HLT(DisasContext *s, arg_i *a) 2678 { 2679 /* 2680 * HLT. This has two purposes. 2681 * Architecturally, it is an external halting debug instruction. 2682 * Since QEMU doesn't implement external debug, we treat this as 2683 * it is required for halting debug disabled: it will UNDEF. 2684 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2685 */ 2686 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2687 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2688 } else { 2689 unallocated_encoding(s); 2690 } 2691 return true; 2692 } 2693 2694 /* 2695 * Load/Store exclusive instructions are implemented by remembering 2696 * the value/address loaded, and seeing if these are the same 2697 * when the store is performed. This is not actually the architecturally 2698 * mandated semantics, but it works for typical guest code sequences 2699 * and avoids having to monitor regular stores. 2700 * 2701 * The store exclusive uses the atomic cmpxchg primitives to avoid 2702 * races in multi-threaded linux-user and when MTTCG softmmu is 2703 * enabled. 2704 */ 2705 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2706 int size, bool is_pair) 2707 { 2708 int idx = get_mem_index(s); 2709 TCGv_i64 dirty_addr, clean_addr; 2710 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2711 2712 s->is_ldex = true; 2713 dirty_addr = cpu_reg_sp(s, rn); 2714 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2715 2716 g_assert(size <= 3); 2717 if (is_pair) { 2718 g_assert(size >= 2); 2719 if (size == 2) { 2720 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2721 if (s->be_data == MO_LE) { 2722 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2723 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2724 } else { 2725 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2726 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2727 } 2728 } else { 2729 TCGv_i128 t16 = tcg_temp_new_i128(); 2730 2731 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2732 2733 if (s->be_data == MO_LE) { 2734 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2735 cpu_exclusive_high, t16); 2736 } else { 2737 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2738 cpu_exclusive_val, t16); 2739 } 2740 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2741 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2742 } 2743 } else { 2744 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2745 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2746 } 2747 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2748 } 2749 2750 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2751 int rn, int size, int is_pair) 2752 { 2753 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2754 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2755 * [addr] = {Rt}; 2756 * if (is_pair) { 2757 * [addr + datasize] = {Rt2}; 2758 * } 2759 * {Rd} = 0; 2760 * } else { 2761 * {Rd} = 1; 2762 * } 2763 * env->exclusive_addr = -1; 2764 */ 2765 TCGLabel *fail_label = gen_new_label(); 2766 TCGLabel *done_label = gen_new_label(); 2767 TCGv_i64 tmp, clean_addr; 2768 MemOp memop; 2769 2770 /* 2771 * FIXME: We are out of spec here. We have recorded only the address 2772 * from load_exclusive, not the entire range, and we assume that the 2773 * size of the access on both sides match. The architecture allows the 2774 * store to be smaller than the load, so long as the stored bytes are 2775 * within the range recorded by the load. 2776 */ 2777 2778 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2779 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2780 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2781 2782 /* 2783 * The write, and any associated faults, only happen if the virtual 2784 * and physical addresses pass the exclusive monitor check. These 2785 * faults are exceedingly unlikely, because normally the guest uses 2786 * the exact same address register for the load_exclusive, and we 2787 * would have recognized these faults there. 2788 * 2789 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2790 * unaligned 4-byte write within the range of an aligned 8-byte load. 2791 * With LSE2, the store would need to cross a 16-byte boundary when the 2792 * load did not, which would mean the store is outside the range 2793 * recorded for the monitor, which would have failed a corrected monitor 2794 * check above. For now, we assume no size change and retain the 2795 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2796 * 2797 * It is possible to trigger an MTE fault, by performing the load with 2798 * a virtual address with a valid tag and performing the store with the 2799 * same virtual address and a different invalid tag. 2800 */ 2801 memop = size + is_pair; 2802 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2803 memop |= MO_ALIGN; 2804 } 2805 memop = finalize_memop(s, memop); 2806 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2807 2808 tmp = tcg_temp_new_i64(); 2809 if (is_pair) { 2810 if (size == 2) { 2811 if (s->be_data == MO_LE) { 2812 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2813 } else { 2814 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2815 } 2816 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2817 cpu_exclusive_val, tmp, 2818 get_mem_index(s), memop); 2819 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2820 } else { 2821 TCGv_i128 t16 = tcg_temp_new_i128(); 2822 TCGv_i128 c16 = tcg_temp_new_i128(); 2823 TCGv_i64 a, b; 2824 2825 if (s->be_data == MO_LE) { 2826 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2827 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2828 cpu_exclusive_high); 2829 } else { 2830 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2831 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2832 cpu_exclusive_val); 2833 } 2834 2835 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2836 get_mem_index(s), memop); 2837 2838 a = tcg_temp_new_i64(); 2839 b = tcg_temp_new_i64(); 2840 if (s->be_data == MO_LE) { 2841 tcg_gen_extr_i128_i64(a, b, t16); 2842 } else { 2843 tcg_gen_extr_i128_i64(b, a, t16); 2844 } 2845 2846 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2847 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2848 tcg_gen_or_i64(tmp, a, b); 2849 2850 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2851 } 2852 } else { 2853 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2854 cpu_reg(s, rt), get_mem_index(s), memop); 2855 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2856 } 2857 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2858 tcg_gen_br(done_label); 2859 2860 gen_set_label(fail_label); 2861 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2862 gen_set_label(done_label); 2863 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2864 } 2865 2866 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2867 int rn, int size) 2868 { 2869 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2870 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2871 int memidx = get_mem_index(s); 2872 TCGv_i64 clean_addr; 2873 MemOp memop; 2874 2875 if (rn == 31) { 2876 gen_check_sp_alignment(s); 2877 } 2878 memop = check_atomic_align(s, rn, size); 2879 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2880 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2881 memidx, memop); 2882 } 2883 2884 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2885 int rn, int size) 2886 { 2887 TCGv_i64 s1 = cpu_reg(s, rs); 2888 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2889 TCGv_i64 t1 = cpu_reg(s, rt); 2890 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2891 TCGv_i64 clean_addr; 2892 int memidx = get_mem_index(s); 2893 MemOp memop; 2894 2895 if (rn == 31) { 2896 gen_check_sp_alignment(s); 2897 } 2898 2899 /* This is a single atomic access, despite the "pair". */ 2900 memop = check_atomic_align(s, rn, size + 1); 2901 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2902 2903 if (size == 2) { 2904 TCGv_i64 cmp = tcg_temp_new_i64(); 2905 TCGv_i64 val = tcg_temp_new_i64(); 2906 2907 if (s->be_data == MO_LE) { 2908 tcg_gen_concat32_i64(val, t1, t2); 2909 tcg_gen_concat32_i64(cmp, s1, s2); 2910 } else { 2911 tcg_gen_concat32_i64(val, t2, t1); 2912 tcg_gen_concat32_i64(cmp, s2, s1); 2913 } 2914 2915 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2916 2917 if (s->be_data == MO_LE) { 2918 tcg_gen_extr32_i64(s1, s2, cmp); 2919 } else { 2920 tcg_gen_extr32_i64(s2, s1, cmp); 2921 } 2922 } else { 2923 TCGv_i128 cmp = tcg_temp_new_i128(); 2924 TCGv_i128 val = tcg_temp_new_i128(); 2925 2926 if (s->be_data == MO_LE) { 2927 tcg_gen_concat_i64_i128(val, t1, t2); 2928 tcg_gen_concat_i64_i128(cmp, s1, s2); 2929 } else { 2930 tcg_gen_concat_i64_i128(val, t2, t1); 2931 tcg_gen_concat_i64_i128(cmp, s2, s1); 2932 } 2933 2934 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2935 2936 if (s->be_data == MO_LE) { 2937 tcg_gen_extr_i128_i64(s1, s2, cmp); 2938 } else { 2939 tcg_gen_extr_i128_i64(s2, s1, cmp); 2940 } 2941 } 2942 } 2943 2944 /* 2945 * Compute the ISS.SF bit for syndrome information if an exception 2946 * is taken on a load or store. This indicates whether the instruction 2947 * is accessing a 32-bit or 64-bit register. This logic is derived 2948 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2949 */ 2950 static bool ldst_iss_sf(int size, bool sign, bool ext) 2951 { 2952 2953 if (sign) { 2954 /* 2955 * Signed loads are 64 bit results if we are not going to 2956 * do a zero-extend from 32 to 64 after the load. 2957 * (For a store, sign and ext are always false.) 2958 */ 2959 return !ext; 2960 } else { 2961 /* Unsigned loads/stores work at the specified size */ 2962 return size == MO_64; 2963 } 2964 } 2965 2966 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2967 { 2968 if (a->rn == 31) { 2969 gen_check_sp_alignment(s); 2970 } 2971 if (a->lasr) { 2972 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2973 } 2974 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2975 return true; 2976 } 2977 2978 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2979 { 2980 if (a->rn == 31) { 2981 gen_check_sp_alignment(s); 2982 } 2983 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2984 if (a->lasr) { 2985 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2986 } 2987 return true; 2988 } 2989 2990 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2991 { 2992 TCGv_i64 clean_addr; 2993 MemOp memop; 2994 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2995 2996 /* 2997 * StoreLORelease is the same as Store-Release for QEMU, but 2998 * needs the feature-test. 2999 */ 3000 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3001 return false; 3002 } 3003 /* Generate ISS for non-exclusive accesses including LASR. */ 3004 if (a->rn == 31) { 3005 gen_check_sp_alignment(s); 3006 } 3007 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3008 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3009 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3010 true, a->rn != 31, memop); 3011 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3012 iss_sf, a->lasr); 3013 return true; 3014 } 3015 3016 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3017 { 3018 TCGv_i64 clean_addr; 3019 MemOp memop; 3020 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3021 3022 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3023 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3024 return false; 3025 } 3026 /* Generate ISS for non-exclusive accesses including LASR. */ 3027 if (a->rn == 31) { 3028 gen_check_sp_alignment(s); 3029 } 3030 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3031 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3032 false, a->rn != 31, memop); 3033 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3034 a->rt, iss_sf, a->lasr); 3035 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3036 return true; 3037 } 3038 3039 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3040 { 3041 if (a->rn == 31) { 3042 gen_check_sp_alignment(s); 3043 } 3044 if (a->lasr) { 3045 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3046 } 3047 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3048 return true; 3049 } 3050 3051 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3052 { 3053 if (a->rn == 31) { 3054 gen_check_sp_alignment(s); 3055 } 3056 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3057 if (a->lasr) { 3058 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3059 } 3060 return true; 3061 } 3062 3063 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3064 { 3065 if (!dc_isar_feature(aa64_atomics, s)) { 3066 return false; 3067 } 3068 if (((a->rt | a->rs) & 1) != 0) { 3069 return false; 3070 } 3071 3072 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3073 return true; 3074 } 3075 3076 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3077 { 3078 if (!dc_isar_feature(aa64_atomics, s)) { 3079 return false; 3080 } 3081 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3082 return true; 3083 } 3084 3085 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3086 { 3087 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3088 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3089 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3090 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3091 3092 gen_pc_plus_diff(s, clean_addr, a->imm); 3093 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3094 false, true, a->rt, iss_sf, false); 3095 return true; 3096 } 3097 3098 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3099 { 3100 /* Load register (literal), vector version */ 3101 TCGv_i64 clean_addr; 3102 MemOp memop; 3103 3104 if (!fp_access_check(s)) { 3105 return true; 3106 } 3107 memop = finalize_memop_asimd(s, a->sz); 3108 clean_addr = tcg_temp_new_i64(); 3109 gen_pc_plus_diff(s, clean_addr, a->imm); 3110 do_fp_ld(s, a->rt, clean_addr, memop); 3111 return true; 3112 } 3113 3114 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3115 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3116 uint64_t offset, bool is_store, MemOp mop) 3117 { 3118 if (a->rn == 31) { 3119 gen_check_sp_alignment(s); 3120 } 3121 3122 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3123 if (!a->p) { 3124 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3125 } 3126 3127 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3128 (a->w || a->rn != 31), 2 << a->sz, mop); 3129 } 3130 3131 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3132 TCGv_i64 dirty_addr, uint64_t offset) 3133 { 3134 if (a->w) { 3135 if (a->p) { 3136 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3137 } 3138 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3139 } 3140 } 3141 3142 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3143 { 3144 uint64_t offset = a->imm << a->sz; 3145 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3146 MemOp mop = finalize_memop(s, a->sz); 3147 3148 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3149 tcg_rt = cpu_reg(s, a->rt); 3150 tcg_rt2 = cpu_reg(s, a->rt2); 3151 /* 3152 * We built mop above for the single logical access -- rebuild it 3153 * now for the paired operation. 3154 * 3155 * With LSE2, non-sign-extending pairs are treated atomically if 3156 * aligned, and if unaligned one of the pair will be completely 3157 * within a 16-byte block and that element will be atomic. 3158 * Otherwise each element is separately atomic. 3159 * In all cases, issue one operation with the correct atomicity. 3160 */ 3161 mop = a->sz + 1; 3162 if (s->align_mem) { 3163 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3164 } 3165 mop = finalize_memop_pair(s, mop); 3166 if (a->sz == 2) { 3167 TCGv_i64 tmp = tcg_temp_new_i64(); 3168 3169 if (s->be_data == MO_LE) { 3170 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3171 } else { 3172 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3173 } 3174 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3175 } else { 3176 TCGv_i128 tmp = tcg_temp_new_i128(); 3177 3178 if (s->be_data == MO_LE) { 3179 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3180 } else { 3181 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3182 } 3183 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3184 } 3185 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3186 return true; 3187 } 3188 3189 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3190 { 3191 uint64_t offset = a->imm << a->sz; 3192 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3193 MemOp mop = finalize_memop(s, a->sz); 3194 3195 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3196 tcg_rt = cpu_reg(s, a->rt); 3197 tcg_rt2 = cpu_reg(s, a->rt2); 3198 3199 /* 3200 * We built mop above for the single logical access -- rebuild it 3201 * now for the paired operation. 3202 * 3203 * With LSE2, non-sign-extending pairs are treated atomically if 3204 * aligned, and if unaligned one of the pair will be completely 3205 * within a 16-byte block and that element will be atomic. 3206 * Otherwise each element is separately atomic. 3207 * In all cases, issue one operation with the correct atomicity. 3208 * 3209 * This treats sign-extending loads like zero-extending loads, 3210 * since that reuses the most code below. 3211 */ 3212 mop = a->sz + 1; 3213 if (s->align_mem) { 3214 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3215 } 3216 mop = finalize_memop_pair(s, mop); 3217 if (a->sz == 2) { 3218 int o2 = s->be_data == MO_LE ? 32 : 0; 3219 int o1 = o2 ^ 32; 3220 3221 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3222 if (a->sign) { 3223 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3224 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3225 } else { 3226 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3227 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3228 } 3229 } else { 3230 TCGv_i128 tmp = tcg_temp_new_i128(); 3231 3232 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3233 if (s->be_data == MO_LE) { 3234 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3235 } else { 3236 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3237 } 3238 } 3239 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3240 return true; 3241 } 3242 3243 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3244 { 3245 uint64_t offset = a->imm << a->sz; 3246 TCGv_i64 clean_addr, dirty_addr; 3247 MemOp mop; 3248 3249 if (!fp_access_check(s)) { 3250 return true; 3251 } 3252 3253 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3254 mop = finalize_memop_asimd(s, a->sz); 3255 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3256 do_fp_st(s, a->rt, clean_addr, mop); 3257 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3258 do_fp_st(s, a->rt2, clean_addr, mop); 3259 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3260 return true; 3261 } 3262 3263 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3264 { 3265 uint64_t offset = a->imm << a->sz; 3266 TCGv_i64 clean_addr, dirty_addr; 3267 MemOp mop; 3268 3269 if (!fp_access_check(s)) { 3270 return true; 3271 } 3272 3273 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3274 mop = finalize_memop_asimd(s, a->sz); 3275 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3276 do_fp_ld(s, a->rt, clean_addr, mop); 3277 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3278 do_fp_ld(s, a->rt2, clean_addr, mop); 3279 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3280 return true; 3281 } 3282 3283 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3284 { 3285 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3286 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3287 MemOp mop; 3288 TCGv_i128 tmp; 3289 3290 /* STGP only comes in one size. */ 3291 tcg_debug_assert(a->sz == MO_64); 3292 3293 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3294 return false; 3295 } 3296 3297 if (a->rn == 31) { 3298 gen_check_sp_alignment(s); 3299 } 3300 3301 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3302 if (!a->p) { 3303 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3304 } 3305 3306 clean_addr = clean_data_tbi(s, dirty_addr); 3307 tcg_rt = cpu_reg(s, a->rt); 3308 tcg_rt2 = cpu_reg(s, a->rt2); 3309 3310 /* 3311 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3312 * and one tag operation. We implement it as one single aligned 16-byte 3313 * memory operation for convenience. Note that the alignment ensures 3314 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3315 */ 3316 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3317 3318 tmp = tcg_temp_new_i128(); 3319 if (s->be_data == MO_LE) { 3320 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3321 } else { 3322 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3323 } 3324 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3325 3326 /* Perform the tag store, if tag access enabled. */ 3327 if (s->ata[0]) { 3328 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3329 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3330 } else { 3331 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3332 } 3333 } 3334 3335 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3336 return true; 3337 } 3338 3339 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3340 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3341 uint64_t offset, bool is_store, MemOp mop) 3342 { 3343 int memidx; 3344 3345 if (a->rn == 31) { 3346 gen_check_sp_alignment(s); 3347 } 3348 3349 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3350 if (!a->p) { 3351 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3352 } 3353 memidx = get_a64_user_mem_index(s, a->unpriv); 3354 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3355 a->w || a->rn != 31, 3356 mop, a->unpriv, memidx); 3357 } 3358 3359 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3360 TCGv_i64 dirty_addr, uint64_t offset) 3361 { 3362 if (a->w) { 3363 if (a->p) { 3364 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3365 } 3366 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3367 } 3368 } 3369 3370 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3371 { 3372 bool iss_sf, iss_valid = !a->w; 3373 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3374 int memidx = get_a64_user_mem_index(s, a->unpriv); 3375 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3376 3377 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3378 3379 tcg_rt = cpu_reg(s, a->rt); 3380 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3381 3382 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3383 iss_valid, a->rt, iss_sf, false); 3384 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3385 return true; 3386 } 3387 3388 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3389 { 3390 bool iss_sf, iss_valid = !a->w; 3391 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3392 int memidx = get_a64_user_mem_index(s, a->unpriv); 3393 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3394 3395 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3396 3397 tcg_rt = cpu_reg(s, a->rt); 3398 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3399 3400 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3401 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3402 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3403 return true; 3404 } 3405 3406 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3407 { 3408 TCGv_i64 clean_addr, dirty_addr; 3409 MemOp mop; 3410 3411 if (!fp_access_check(s)) { 3412 return true; 3413 } 3414 mop = finalize_memop_asimd(s, a->sz); 3415 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3416 do_fp_st(s, a->rt, clean_addr, mop); 3417 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3418 return true; 3419 } 3420 3421 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3422 { 3423 TCGv_i64 clean_addr, dirty_addr; 3424 MemOp mop; 3425 3426 if (!fp_access_check(s)) { 3427 return true; 3428 } 3429 mop = finalize_memop_asimd(s, a->sz); 3430 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3431 do_fp_ld(s, a->rt, clean_addr, mop); 3432 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3433 return true; 3434 } 3435 3436 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3437 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3438 bool is_store, MemOp memop) 3439 { 3440 TCGv_i64 tcg_rm; 3441 3442 if (a->rn == 31) { 3443 gen_check_sp_alignment(s); 3444 } 3445 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3446 3447 tcg_rm = read_cpu_reg(s, a->rm, 1); 3448 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3449 3450 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3451 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3452 } 3453 3454 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3455 { 3456 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3457 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3458 MemOp memop; 3459 3460 if (extract32(a->opt, 1, 1) == 0) { 3461 return false; 3462 } 3463 3464 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3465 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3466 tcg_rt = cpu_reg(s, a->rt); 3467 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3468 a->ext, true, a->rt, iss_sf, false); 3469 return true; 3470 } 3471 3472 static bool trans_STR(DisasContext *s, arg_ldst *a) 3473 { 3474 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3475 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3476 MemOp memop; 3477 3478 if (extract32(a->opt, 1, 1) == 0) { 3479 return false; 3480 } 3481 3482 memop = finalize_memop(s, a->sz); 3483 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3484 tcg_rt = cpu_reg(s, a->rt); 3485 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3486 return true; 3487 } 3488 3489 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3490 { 3491 TCGv_i64 clean_addr, dirty_addr; 3492 MemOp memop; 3493 3494 if (extract32(a->opt, 1, 1) == 0) { 3495 return false; 3496 } 3497 3498 if (!fp_access_check(s)) { 3499 return true; 3500 } 3501 3502 memop = finalize_memop_asimd(s, a->sz); 3503 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3504 do_fp_ld(s, a->rt, clean_addr, memop); 3505 return true; 3506 } 3507 3508 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3509 { 3510 TCGv_i64 clean_addr, dirty_addr; 3511 MemOp memop; 3512 3513 if (extract32(a->opt, 1, 1) == 0) { 3514 return false; 3515 } 3516 3517 if (!fp_access_check(s)) { 3518 return true; 3519 } 3520 3521 memop = finalize_memop_asimd(s, a->sz); 3522 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3523 do_fp_st(s, a->rt, clean_addr, memop); 3524 return true; 3525 } 3526 3527 3528 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3529 int sign, bool invert) 3530 { 3531 MemOp mop = a->sz | sign; 3532 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3533 3534 if (a->rn == 31) { 3535 gen_check_sp_alignment(s); 3536 } 3537 mop = check_atomic_align(s, a->rn, mop); 3538 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3539 a->rn != 31, mop); 3540 tcg_rs = read_cpu_reg(s, a->rs, true); 3541 tcg_rt = cpu_reg(s, a->rt); 3542 if (invert) { 3543 tcg_gen_not_i64(tcg_rs, tcg_rs); 3544 } 3545 /* 3546 * The tcg atomic primitives are all full barriers. Therefore we 3547 * can ignore the Acquire and Release bits of this instruction. 3548 */ 3549 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3550 3551 if (mop & MO_SIGN) { 3552 switch (a->sz) { 3553 case MO_8: 3554 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3555 break; 3556 case MO_16: 3557 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3558 break; 3559 case MO_32: 3560 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3561 break; 3562 case MO_64: 3563 break; 3564 default: 3565 g_assert_not_reached(); 3566 } 3567 } 3568 return true; 3569 } 3570 3571 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3572 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3573 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3574 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3575 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3576 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3577 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3578 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3579 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3580 3581 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3582 { 3583 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3584 TCGv_i64 clean_addr; 3585 MemOp mop; 3586 3587 if (!dc_isar_feature(aa64_atomics, s) || 3588 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3589 return false; 3590 } 3591 if (a->rn == 31) { 3592 gen_check_sp_alignment(s); 3593 } 3594 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3595 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3596 a->rn != 31, mop); 3597 /* 3598 * LDAPR* are a special case because they are a simple load, not a 3599 * fetch-and-do-something op. 3600 * The architectural consistency requirements here are weaker than 3601 * full load-acquire (we only need "load-acquire processor consistent"), 3602 * but we choose to implement them as full LDAQ. 3603 */ 3604 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3605 true, a->rt, iss_sf, true); 3606 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3607 return true; 3608 } 3609 3610 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3611 { 3612 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3613 MemOp memop; 3614 3615 /* Load with pointer authentication */ 3616 if (!dc_isar_feature(aa64_pauth, s)) { 3617 return false; 3618 } 3619 3620 if (a->rn == 31) { 3621 gen_check_sp_alignment(s); 3622 } 3623 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3624 3625 if (s->pauth_active) { 3626 if (!a->m) { 3627 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3628 tcg_constant_i64(0)); 3629 } else { 3630 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3631 tcg_constant_i64(0)); 3632 } 3633 } 3634 3635 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3636 3637 memop = finalize_memop(s, MO_64); 3638 3639 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3640 clean_addr = gen_mte_check1(s, dirty_addr, false, 3641 a->w || a->rn != 31, memop); 3642 3643 tcg_rt = cpu_reg(s, a->rt); 3644 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3645 /* extend */ false, /* iss_valid */ !a->w, 3646 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3647 3648 if (a->w) { 3649 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3650 } 3651 return true; 3652 } 3653 3654 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3655 { 3656 TCGv_i64 clean_addr, dirty_addr; 3657 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3658 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3659 3660 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3661 return false; 3662 } 3663 3664 if (a->rn == 31) { 3665 gen_check_sp_alignment(s); 3666 } 3667 3668 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3669 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3670 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3671 clean_addr = clean_data_tbi(s, dirty_addr); 3672 3673 /* 3674 * Load-AcquirePC semantics; we implement as the slightly more 3675 * restrictive Load-Acquire. 3676 */ 3677 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3678 a->rt, iss_sf, true); 3679 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3680 return true; 3681 } 3682 3683 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3684 { 3685 TCGv_i64 clean_addr, dirty_addr; 3686 MemOp mop = a->sz; 3687 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3688 3689 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3690 return false; 3691 } 3692 3693 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3694 3695 if (a->rn == 31) { 3696 gen_check_sp_alignment(s); 3697 } 3698 3699 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3700 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3701 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3702 clean_addr = clean_data_tbi(s, dirty_addr); 3703 3704 /* Store-Release semantics */ 3705 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3706 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3707 return true; 3708 } 3709 3710 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3711 { 3712 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3713 MemOp endian, align, mop; 3714 3715 int total; /* total bytes */ 3716 int elements; /* elements per vector */ 3717 int r; 3718 int size = a->sz; 3719 3720 if (!a->p && a->rm != 0) { 3721 /* For non-postindexed accesses the Rm field must be 0 */ 3722 return false; 3723 } 3724 if (size == 3 && !a->q && a->selem != 1) { 3725 return false; 3726 } 3727 if (!fp_access_check(s)) { 3728 return true; 3729 } 3730 3731 if (a->rn == 31) { 3732 gen_check_sp_alignment(s); 3733 } 3734 3735 /* For our purposes, bytes are always little-endian. */ 3736 endian = s->be_data; 3737 if (size == 0) { 3738 endian = MO_LE; 3739 } 3740 3741 total = a->rpt * a->selem * (a->q ? 16 : 8); 3742 tcg_rn = cpu_reg_sp(s, a->rn); 3743 3744 /* 3745 * Issue the MTE check vs the logical repeat count, before we 3746 * promote consecutive little-endian elements below. 3747 */ 3748 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3749 finalize_memop_asimd(s, size)); 3750 3751 /* 3752 * Consecutive little-endian elements from a single register 3753 * can be promoted to a larger little-endian operation. 3754 */ 3755 align = MO_ALIGN; 3756 if (a->selem == 1 && endian == MO_LE) { 3757 align = pow2_align(size); 3758 size = 3; 3759 } 3760 if (!s->align_mem) { 3761 align = 0; 3762 } 3763 mop = endian | size | align; 3764 3765 elements = (a->q ? 16 : 8) >> size; 3766 tcg_ebytes = tcg_constant_i64(1 << size); 3767 for (r = 0; r < a->rpt; r++) { 3768 int e; 3769 for (e = 0; e < elements; e++) { 3770 int xs; 3771 for (xs = 0; xs < a->selem; xs++) { 3772 int tt = (a->rt + r + xs) % 32; 3773 do_vec_ld(s, tt, e, clean_addr, mop); 3774 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3775 } 3776 } 3777 } 3778 3779 /* 3780 * For non-quad operations, setting a slice of the low 64 bits of 3781 * the register clears the high 64 bits (in the ARM ARM pseudocode 3782 * this is implicit in the fact that 'rval' is a 64 bit wide 3783 * variable). For quad operations, we might still need to zero 3784 * the high bits of SVE. 3785 */ 3786 for (r = 0; r < a->rpt * a->selem; r++) { 3787 int tt = (a->rt + r) % 32; 3788 clear_vec_high(s, a->q, tt); 3789 } 3790 3791 if (a->p) { 3792 if (a->rm == 31) { 3793 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3794 } else { 3795 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3796 } 3797 } 3798 return true; 3799 } 3800 3801 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3802 { 3803 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3804 MemOp endian, align, mop; 3805 3806 int total; /* total bytes */ 3807 int elements; /* elements per vector */ 3808 int r; 3809 int size = a->sz; 3810 3811 if (!a->p && a->rm != 0) { 3812 /* For non-postindexed accesses the Rm field must be 0 */ 3813 return false; 3814 } 3815 if (size == 3 && !a->q && a->selem != 1) { 3816 return false; 3817 } 3818 if (!fp_access_check(s)) { 3819 return true; 3820 } 3821 3822 if (a->rn == 31) { 3823 gen_check_sp_alignment(s); 3824 } 3825 3826 /* For our purposes, bytes are always little-endian. */ 3827 endian = s->be_data; 3828 if (size == 0) { 3829 endian = MO_LE; 3830 } 3831 3832 total = a->rpt * a->selem * (a->q ? 16 : 8); 3833 tcg_rn = cpu_reg_sp(s, a->rn); 3834 3835 /* 3836 * Issue the MTE check vs the logical repeat count, before we 3837 * promote consecutive little-endian elements below. 3838 */ 3839 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3840 finalize_memop_asimd(s, size)); 3841 3842 /* 3843 * Consecutive little-endian elements from a single register 3844 * can be promoted to a larger little-endian operation. 3845 */ 3846 align = MO_ALIGN; 3847 if (a->selem == 1 && endian == MO_LE) { 3848 align = pow2_align(size); 3849 size = 3; 3850 } 3851 if (!s->align_mem) { 3852 align = 0; 3853 } 3854 mop = endian | size | align; 3855 3856 elements = (a->q ? 16 : 8) >> size; 3857 tcg_ebytes = tcg_constant_i64(1 << size); 3858 for (r = 0; r < a->rpt; r++) { 3859 int e; 3860 for (e = 0; e < elements; e++) { 3861 int xs; 3862 for (xs = 0; xs < a->selem; xs++) { 3863 int tt = (a->rt + r + xs) % 32; 3864 do_vec_st(s, tt, e, clean_addr, mop); 3865 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3866 } 3867 } 3868 } 3869 3870 if (a->p) { 3871 if (a->rm == 31) { 3872 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3873 } else { 3874 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3875 } 3876 } 3877 return true; 3878 } 3879 3880 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3881 { 3882 int xs, total, rt; 3883 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3884 MemOp mop; 3885 3886 if (!a->p && a->rm != 0) { 3887 return false; 3888 } 3889 if (!fp_access_check(s)) { 3890 return true; 3891 } 3892 3893 if (a->rn == 31) { 3894 gen_check_sp_alignment(s); 3895 } 3896 3897 total = a->selem << a->scale; 3898 tcg_rn = cpu_reg_sp(s, a->rn); 3899 3900 mop = finalize_memop_asimd(s, a->scale); 3901 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3902 total, mop); 3903 3904 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3905 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3906 do_vec_st(s, rt, a->index, clean_addr, mop); 3907 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3908 } 3909 3910 if (a->p) { 3911 if (a->rm == 31) { 3912 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3913 } else { 3914 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3915 } 3916 } 3917 return true; 3918 } 3919 3920 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3921 { 3922 int xs, total, rt; 3923 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3924 MemOp mop; 3925 3926 if (!a->p && a->rm != 0) { 3927 return false; 3928 } 3929 if (!fp_access_check(s)) { 3930 return true; 3931 } 3932 3933 if (a->rn == 31) { 3934 gen_check_sp_alignment(s); 3935 } 3936 3937 total = a->selem << a->scale; 3938 tcg_rn = cpu_reg_sp(s, a->rn); 3939 3940 mop = finalize_memop_asimd(s, a->scale); 3941 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3942 total, mop); 3943 3944 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3945 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3946 do_vec_ld(s, rt, a->index, clean_addr, mop); 3947 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3948 } 3949 3950 if (a->p) { 3951 if (a->rm == 31) { 3952 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3953 } else { 3954 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3955 } 3956 } 3957 return true; 3958 } 3959 3960 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3961 { 3962 int xs, total, rt; 3963 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3964 MemOp mop; 3965 3966 if (!a->p && a->rm != 0) { 3967 return false; 3968 } 3969 if (!fp_access_check(s)) { 3970 return true; 3971 } 3972 3973 if (a->rn == 31) { 3974 gen_check_sp_alignment(s); 3975 } 3976 3977 total = a->selem << a->scale; 3978 tcg_rn = cpu_reg_sp(s, a->rn); 3979 3980 mop = finalize_memop_asimd(s, a->scale); 3981 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3982 total, mop); 3983 3984 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3985 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3986 /* Load and replicate to all elements */ 3987 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3988 3989 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3990 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3991 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3992 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3993 } 3994 3995 if (a->p) { 3996 if (a->rm == 31) { 3997 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3998 } else { 3999 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4000 } 4001 } 4002 return true; 4003 } 4004 4005 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4006 { 4007 TCGv_i64 addr, clean_addr, tcg_rt; 4008 int size = 4 << s->dcz_blocksize; 4009 4010 if (!dc_isar_feature(aa64_mte, s)) { 4011 return false; 4012 } 4013 if (s->current_el == 0) { 4014 return false; 4015 } 4016 4017 if (a->rn == 31) { 4018 gen_check_sp_alignment(s); 4019 } 4020 4021 addr = read_cpu_reg_sp(s, a->rn, true); 4022 tcg_gen_addi_i64(addr, addr, a->imm); 4023 tcg_rt = cpu_reg(s, a->rt); 4024 4025 if (s->ata[0]) { 4026 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4027 } 4028 /* 4029 * The non-tags portion of STZGM is mostly like DC_ZVA, 4030 * except the alignment happens before the access. 4031 */ 4032 clean_addr = clean_data_tbi(s, addr); 4033 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4034 gen_helper_dc_zva(tcg_env, clean_addr); 4035 return true; 4036 } 4037 4038 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4039 { 4040 TCGv_i64 addr, clean_addr, tcg_rt; 4041 4042 if (!dc_isar_feature(aa64_mte, s)) { 4043 return false; 4044 } 4045 if (s->current_el == 0) { 4046 return false; 4047 } 4048 4049 if (a->rn == 31) { 4050 gen_check_sp_alignment(s); 4051 } 4052 4053 addr = read_cpu_reg_sp(s, a->rn, true); 4054 tcg_gen_addi_i64(addr, addr, a->imm); 4055 tcg_rt = cpu_reg(s, a->rt); 4056 4057 if (s->ata[0]) { 4058 gen_helper_stgm(tcg_env, addr, tcg_rt); 4059 } else { 4060 MMUAccessType acc = MMU_DATA_STORE; 4061 int size = 4 << s->gm_blocksize; 4062 4063 clean_addr = clean_data_tbi(s, addr); 4064 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4065 gen_probe_access(s, clean_addr, acc, size); 4066 } 4067 return true; 4068 } 4069 4070 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4071 { 4072 TCGv_i64 addr, clean_addr, tcg_rt; 4073 4074 if (!dc_isar_feature(aa64_mte, s)) { 4075 return false; 4076 } 4077 if (s->current_el == 0) { 4078 return false; 4079 } 4080 4081 if (a->rn == 31) { 4082 gen_check_sp_alignment(s); 4083 } 4084 4085 addr = read_cpu_reg_sp(s, a->rn, true); 4086 tcg_gen_addi_i64(addr, addr, a->imm); 4087 tcg_rt = cpu_reg(s, a->rt); 4088 4089 if (s->ata[0]) { 4090 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4091 } else { 4092 MMUAccessType acc = MMU_DATA_LOAD; 4093 int size = 4 << s->gm_blocksize; 4094 4095 clean_addr = clean_data_tbi(s, addr); 4096 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4097 gen_probe_access(s, clean_addr, acc, size); 4098 /* The result tags are zeros. */ 4099 tcg_gen_movi_i64(tcg_rt, 0); 4100 } 4101 return true; 4102 } 4103 4104 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4105 { 4106 TCGv_i64 addr, clean_addr, tcg_rt; 4107 4108 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4109 return false; 4110 } 4111 4112 if (a->rn == 31) { 4113 gen_check_sp_alignment(s); 4114 } 4115 4116 addr = read_cpu_reg_sp(s, a->rn, true); 4117 if (!a->p) { 4118 /* pre-index or signed offset */ 4119 tcg_gen_addi_i64(addr, addr, a->imm); 4120 } 4121 4122 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4123 tcg_rt = cpu_reg(s, a->rt); 4124 if (s->ata[0]) { 4125 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4126 } else { 4127 /* 4128 * Tag access disabled: we must check for aborts on the load 4129 * load from [rn+offset], and then insert a 0 tag into rt. 4130 */ 4131 clean_addr = clean_data_tbi(s, addr); 4132 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4133 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4134 } 4135 4136 if (a->w) { 4137 /* pre-index or post-index */ 4138 if (a->p) { 4139 /* post-index */ 4140 tcg_gen_addi_i64(addr, addr, a->imm); 4141 } 4142 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4143 } 4144 return true; 4145 } 4146 4147 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4148 { 4149 TCGv_i64 addr, tcg_rt; 4150 4151 if (a->rn == 31) { 4152 gen_check_sp_alignment(s); 4153 } 4154 4155 addr = read_cpu_reg_sp(s, a->rn, true); 4156 if (!a->p) { 4157 /* pre-index or signed offset */ 4158 tcg_gen_addi_i64(addr, addr, a->imm); 4159 } 4160 tcg_rt = cpu_reg_sp(s, a->rt); 4161 if (!s->ata[0]) { 4162 /* 4163 * For STG and ST2G, we need to check alignment and probe memory. 4164 * TODO: For STZG and STZ2G, we could rely on the stores below, 4165 * at least for system mode; user-only won't enforce alignment. 4166 */ 4167 if (is_pair) { 4168 gen_helper_st2g_stub(tcg_env, addr); 4169 } else { 4170 gen_helper_stg_stub(tcg_env, addr); 4171 } 4172 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4173 if (is_pair) { 4174 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4175 } else { 4176 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4177 } 4178 } else { 4179 if (is_pair) { 4180 gen_helper_st2g(tcg_env, addr, tcg_rt); 4181 } else { 4182 gen_helper_stg(tcg_env, addr, tcg_rt); 4183 } 4184 } 4185 4186 if (is_zero) { 4187 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4188 TCGv_i64 zero64 = tcg_constant_i64(0); 4189 TCGv_i128 zero128 = tcg_temp_new_i128(); 4190 int mem_index = get_mem_index(s); 4191 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4192 4193 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4194 4195 /* This is 1 or 2 atomic 16-byte operations. */ 4196 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4197 if (is_pair) { 4198 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4199 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4200 } 4201 } 4202 4203 if (a->w) { 4204 /* pre-index or post-index */ 4205 if (a->p) { 4206 /* post-index */ 4207 tcg_gen_addi_i64(addr, addr, a->imm); 4208 } 4209 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4210 } 4211 return true; 4212 } 4213 4214 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4215 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4216 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4217 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4218 4219 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4220 4221 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4222 bool is_setg, SetFn fn) 4223 { 4224 int memidx; 4225 uint32_t syndrome, desc = 0; 4226 4227 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4228 return false; 4229 } 4230 4231 /* 4232 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4233 * us to pull this check before the CheckMOPSEnabled() test 4234 * (which we do in the helper function) 4235 */ 4236 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4237 a->rd == 31 || a->rn == 31) { 4238 return false; 4239 } 4240 4241 memidx = get_a64_user_mem_index(s, a->unpriv); 4242 4243 /* 4244 * We pass option_a == true, matching our implementation; 4245 * we pass wrong_option == false: helper function may set that bit. 4246 */ 4247 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4248 is_epilogue, false, true, a->rd, a->rs, a->rn); 4249 4250 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4251 /* We may need to do MTE tag checking, so assemble the descriptor */ 4252 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4253 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4254 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4255 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4256 } 4257 /* The helper function always needs the memidx even with MTE disabled */ 4258 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4259 4260 /* 4261 * The helper needs the register numbers, but since they're in 4262 * the syndrome anyway, we let it extract them from there rather 4263 * than passing in an extra three integer arguments. 4264 */ 4265 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4266 return true; 4267 } 4268 4269 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4270 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4271 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4272 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4273 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4274 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4275 4276 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4277 4278 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4279 { 4280 int rmemidx, wmemidx; 4281 uint32_t syndrome, rdesc = 0, wdesc = 0; 4282 bool wunpriv = extract32(a->options, 0, 1); 4283 bool runpriv = extract32(a->options, 1, 1); 4284 4285 /* 4286 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4287 * us to pull this check before the CheckMOPSEnabled() test 4288 * (which we do in the helper function) 4289 */ 4290 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4291 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4292 return false; 4293 } 4294 4295 rmemidx = get_a64_user_mem_index(s, runpriv); 4296 wmemidx = get_a64_user_mem_index(s, wunpriv); 4297 4298 /* 4299 * We pass option_a == true, matching our implementation; 4300 * we pass wrong_option == false: helper function may set that bit. 4301 */ 4302 syndrome = syn_mop(false, false, a->options, is_epilogue, 4303 false, true, a->rd, a->rs, a->rn); 4304 4305 /* If we need to do MTE tag checking, assemble the descriptors */ 4306 if (s->mte_active[runpriv]) { 4307 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4308 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4309 } 4310 if (s->mte_active[wunpriv]) { 4311 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4312 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4313 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4314 } 4315 /* The helper function needs these parts of the descriptor regardless */ 4316 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4317 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4318 4319 /* 4320 * The helper needs the register numbers, but since they're in 4321 * the syndrome anyway, we let it extract them from there rather 4322 * than passing in an extra three integer arguments. 4323 */ 4324 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4325 tcg_constant_i32(rdesc)); 4326 return true; 4327 } 4328 4329 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4330 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4331 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4332 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4333 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4334 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4335 4336 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4337 4338 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4339 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4340 { 4341 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4342 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4343 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4344 4345 fn(tcg_rd, tcg_rn, tcg_imm); 4346 if (!a->sf) { 4347 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4348 } 4349 return true; 4350 } 4351 4352 /* 4353 * PC-rel. addressing 4354 */ 4355 4356 static bool trans_ADR(DisasContext *s, arg_ri *a) 4357 { 4358 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4359 return true; 4360 } 4361 4362 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4363 { 4364 int64_t offset = (int64_t)a->imm << 12; 4365 4366 /* The page offset is ok for CF_PCREL. */ 4367 offset -= s->pc_curr & 0xfff; 4368 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4369 return true; 4370 } 4371 4372 /* 4373 * Add/subtract (immediate) 4374 */ 4375 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4376 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4377 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4378 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4379 4380 /* 4381 * Add/subtract (immediate, with tags) 4382 */ 4383 4384 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4385 bool sub_op) 4386 { 4387 TCGv_i64 tcg_rn, tcg_rd; 4388 int imm; 4389 4390 imm = a->uimm6 << LOG2_TAG_GRANULE; 4391 if (sub_op) { 4392 imm = -imm; 4393 } 4394 4395 tcg_rn = cpu_reg_sp(s, a->rn); 4396 tcg_rd = cpu_reg_sp(s, a->rd); 4397 4398 if (s->ata[0]) { 4399 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4400 tcg_constant_i32(imm), 4401 tcg_constant_i32(a->uimm4)); 4402 } else { 4403 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4404 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4405 } 4406 return true; 4407 } 4408 4409 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4410 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4411 4412 /* The input should be a value in the bottom e bits (with higher 4413 * bits zero); returns that value replicated into every element 4414 * of size e in a 64 bit integer. 4415 */ 4416 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4417 { 4418 assert(e != 0); 4419 while (e < 64) { 4420 mask |= mask << e; 4421 e *= 2; 4422 } 4423 return mask; 4424 } 4425 4426 /* 4427 * Logical (immediate) 4428 */ 4429 4430 /* 4431 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4432 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4433 * value (ie should cause a guest UNDEF exception), and true if they are 4434 * valid, in which case the decoded bit pattern is written to result. 4435 */ 4436 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4437 unsigned int imms, unsigned int immr) 4438 { 4439 uint64_t mask; 4440 unsigned e, levels, s, r; 4441 int len; 4442 4443 assert(immn < 2 && imms < 64 && immr < 64); 4444 4445 /* The bit patterns we create here are 64 bit patterns which 4446 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4447 * 64 bits each. Each element contains the same value: a run 4448 * of between 1 and e-1 non-zero bits, rotated within the 4449 * element by between 0 and e-1 bits. 4450 * 4451 * The element size and run length are encoded into immn (1 bit) 4452 * and imms (6 bits) as follows: 4453 * 64 bit elements: immn = 1, imms = <length of run - 1> 4454 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4455 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4456 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4457 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4458 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4459 * Notice that immn = 0, imms = 11111x is the only combination 4460 * not covered by one of the above options; this is reserved. 4461 * Further, <length of run - 1> all-ones is a reserved pattern. 4462 * 4463 * In all cases the rotation is by immr % e (and immr is 6 bits). 4464 */ 4465 4466 /* First determine the element size */ 4467 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4468 if (len < 1) { 4469 /* This is the immn == 0, imms == 0x11111x case */ 4470 return false; 4471 } 4472 e = 1 << len; 4473 4474 levels = e - 1; 4475 s = imms & levels; 4476 r = immr & levels; 4477 4478 if (s == levels) { 4479 /* <length of run - 1> mustn't be all-ones. */ 4480 return false; 4481 } 4482 4483 /* Create the value of one element: s+1 set bits rotated 4484 * by r within the element (which is e bits wide)... 4485 */ 4486 mask = MAKE_64BIT_MASK(0, s + 1); 4487 if (r) { 4488 mask = (mask >> r) | (mask << (e - r)); 4489 mask &= MAKE_64BIT_MASK(0, e); 4490 } 4491 /* ...then replicate the element over the whole 64 bit value */ 4492 mask = bitfield_replicate(mask, e); 4493 *result = mask; 4494 return true; 4495 } 4496 4497 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4498 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4499 { 4500 TCGv_i64 tcg_rd, tcg_rn; 4501 uint64_t imm; 4502 4503 /* Some immediate field values are reserved. */ 4504 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4505 extract32(a->dbm, 0, 6), 4506 extract32(a->dbm, 6, 6))) { 4507 return false; 4508 } 4509 if (!a->sf) { 4510 imm &= 0xffffffffull; 4511 } 4512 4513 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4514 tcg_rn = cpu_reg(s, a->rn); 4515 4516 fn(tcg_rd, tcg_rn, imm); 4517 if (set_cc) { 4518 gen_logic_CC(a->sf, tcg_rd); 4519 } 4520 if (!a->sf) { 4521 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4522 } 4523 return true; 4524 } 4525 4526 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4527 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4528 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4529 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4530 4531 /* 4532 * Move wide (immediate) 4533 */ 4534 4535 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4536 { 4537 int pos = a->hw << 4; 4538 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4539 return true; 4540 } 4541 4542 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4543 { 4544 int pos = a->hw << 4; 4545 uint64_t imm = a->imm; 4546 4547 imm = ~(imm << pos); 4548 if (!a->sf) { 4549 imm = (uint32_t)imm; 4550 } 4551 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4552 return true; 4553 } 4554 4555 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4556 { 4557 int pos = a->hw << 4; 4558 TCGv_i64 tcg_rd, tcg_im; 4559 4560 tcg_rd = cpu_reg(s, a->rd); 4561 tcg_im = tcg_constant_i64(a->imm); 4562 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4563 if (!a->sf) { 4564 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4565 } 4566 return true; 4567 } 4568 4569 /* 4570 * Bitfield 4571 */ 4572 4573 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4574 { 4575 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4576 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4577 unsigned int bitsize = a->sf ? 64 : 32; 4578 unsigned int ri = a->immr; 4579 unsigned int si = a->imms; 4580 unsigned int pos, len; 4581 4582 if (si >= ri) { 4583 /* Wd<s-r:0> = Wn<s:r> */ 4584 len = (si - ri) + 1; 4585 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4586 if (!a->sf) { 4587 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4588 } 4589 } else { 4590 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4591 len = si + 1; 4592 pos = (bitsize - ri) & (bitsize - 1); 4593 4594 if (len < ri) { 4595 /* 4596 * Sign extend the destination field from len to fill the 4597 * balance of the word. Let the deposit below insert all 4598 * of those sign bits. 4599 */ 4600 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4601 len = ri; 4602 } 4603 4604 /* 4605 * We start with zero, and we haven't modified any bits outside 4606 * bitsize, therefore no final zero-extension is unneeded for !sf. 4607 */ 4608 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4609 } 4610 return true; 4611 } 4612 4613 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4614 { 4615 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4616 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4617 unsigned int bitsize = a->sf ? 64 : 32; 4618 unsigned int ri = a->immr; 4619 unsigned int si = a->imms; 4620 unsigned int pos, len; 4621 4622 tcg_rd = cpu_reg(s, a->rd); 4623 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4624 4625 if (si >= ri) { 4626 /* Wd<s-r:0> = Wn<s:r> */ 4627 len = (si - ri) + 1; 4628 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4629 } else { 4630 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4631 len = si + 1; 4632 pos = (bitsize - ri) & (bitsize - 1); 4633 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4634 } 4635 return true; 4636 } 4637 4638 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4639 { 4640 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4641 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4642 unsigned int bitsize = a->sf ? 64 : 32; 4643 unsigned int ri = a->immr; 4644 unsigned int si = a->imms; 4645 unsigned int pos, len; 4646 4647 tcg_rd = cpu_reg(s, a->rd); 4648 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4649 4650 if (si >= ri) { 4651 /* Wd<s-r:0> = Wn<s:r> */ 4652 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4653 len = (si - ri) + 1; 4654 pos = 0; 4655 } else { 4656 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4657 len = si + 1; 4658 pos = (bitsize - ri) & (bitsize - 1); 4659 } 4660 4661 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4662 if (!a->sf) { 4663 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4664 } 4665 return true; 4666 } 4667 4668 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4669 { 4670 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4671 4672 tcg_rd = cpu_reg(s, a->rd); 4673 4674 if (unlikely(a->imm == 0)) { 4675 /* 4676 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4677 * so an extract from bit 0 is a special case. 4678 */ 4679 if (a->sf) { 4680 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4681 } else { 4682 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4683 } 4684 } else { 4685 tcg_rm = cpu_reg(s, a->rm); 4686 tcg_rn = cpu_reg(s, a->rn); 4687 4688 if (a->sf) { 4689 /* Specialization to ROR happens in EXTRACT2. */ 4690 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4691 } else { 4692 TCGv_i32 t0 = tcg_temp_new_i32(); 4693 4694 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4695 if (a->rm == a->rn) { 4696 tcg_gen_rotri_i32(t0, t0, a->imm); 4697 } else { 4698 TCGv_i32 t1 = tcg_temp_new_i32(); 4699 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4700 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4701 } 4702 tcg_gen_extu_i32_i64(tcg_rd, t0); 4703 } 4704 } 4705 return true; 4706 } 4707 4708 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4709 { 4710 if (fp_access_check(s)) { 4711 int len = (a->len + 1) * 16; 4712 4713 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4714 vec_full_reg_offset(s, a->rm), tcg_env, 4715 a->q ? 16 : 8, vec_full_reg_size(s), 4716 (len << 6) | (a->tbx << 5) | a->rn, 4717 gen_helper_simd_tblx); 4718 } 4719 return true; 4720 } 4721 4722 typedef int simd_permute_idx_fn(int i, int part, int elements); 4723 4724 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4725 simd_permute_idx_fn *fn, int part) 4726 { 4727 MemOp esz = a->esz; 4728 int datasize = a->q ? 16 : 8; 4729 int elements = datasize >> esz; 4730 TCGv_i64 tcg_res[2], tcg_ele; 4731 4732 if (esz == MO_64 && !a->q) { 4733 return false; 4734 } 4735 if (!fp_access_check(s)) { 4736 return true; 4737 } 4738 4739 tcg_res[0] = tcg_temp_new_i64(); 4740 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4741 tcg_ele = tcg_temp_new_i64(); 4742 4743 for (int i = 0; i < elements; i++) { 4744 int o, w, idx; 4745 4746 idx = fn(i, part, elements); 4747 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4748 idx & (elements - 1), esz); 4749 4750 w = (i << (esz + 3)) / 64; 4751 o = (i << (esz + 3)) % 64; 4752 if (o == 0) { 4753 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4754 } else { 4755 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4756 } 4757 } 4758 4759 for (int i = a->q; i >= 0; --i) { 4760 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4761 } 4762 clear_vec_high(s, a->q, a->rd); 4763 return true; 4764 } 4765 4766 static int permute_load_uzp(int i, int part, int elements) 4767 { 4768 return 2 * i + part; 4769 } 4770 4771 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4772 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4773 4774 static int permute_load_trn(int i, int part, int elements) 4775 { 4776 return (i & 1) * elements + (i & ~1) + part; 4777 } 4778 4779 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4780 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4781 4782 static int permute_load_zip(int i, int part, int elements) 4783 { 4784 return (i & 1) * elements + ((part * elements + i) >> 1); 4785 } 4786 4787 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4788 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4789 4790 /* 4791 * Cryptographic AES, SHA, SHA512 4792 */ 4793 4794 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4795 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4796 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4797 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4798 4799 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4800 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4801 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4802 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4803 4804 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4805 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4806 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4807 4808 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4809 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4810 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4811 4812 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4813 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4814 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4815 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4816 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4817 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4818 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4819 4820 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4821 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4822 4823 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4824 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4825 4826 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4827 { 4828 if (!dc_isar_feature(aa64_sm3, s)) { 4829 return false; 4830 } 4831 if (fp_access_check(s)) { 4832 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4833 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4834 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4835 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4836 4837 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4838 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4839 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4840 4841 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4842 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4843 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4844 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4845 4846 /* Clear the whole register first, then store bits [127:96]. */ 4847 clear_vec(s, a->rd); 4848 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4849 } 4850 return true; 4851 } 4852 4853 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4854 { 4855 if (fp_access_check(s)) { 4856 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4857 } 4858 return true; 4859 } 4860 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4861 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4862 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4863 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4864 4865 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4866 { 4867 if (!dc_isar_feature(aa64_sha3, s)) { 4868 return false; 4869 } 4870 if (fp_access_check(s)) { 4871 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4872 vec_full_reg_offset(s, a->rn), 4873 vec_full_reg_offset(s, a->rm), a->imm, 16, 4874 vec_full_reg_size(s)); 4875 } 4876 return true; 4877 } 4878 4879 /* 4880 * Advanced SIMD copy 4881 */ 4882 4883 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4884 { 4885 unsigned esz = ctz32(imm); 4886 if (esz <= MO_64) { 4887 *pesz = esz; 4888 *pidx = imm >> (esz + 1); 4889 return true; 4890 } 4891 return false; 4892 } 4893 4894 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4895 { 4896 MemOp esz; 4897 unsigned idx; 4898 4899 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4900 return false; 4901 } 4902 if (fp_access_check(s)) { 4903 /* 4904 * This instruction just extracts the specified element and 4905 * zero-extends it into the bottom of the destination register. 4906 */ 4907 TCGv_i64 tmp = tcg_temp_new_i64(); 4908 read_vec_element(s, tmp, a->rn, idx, esz); 4909 write_fp_dreg(s, a->rd, tmp); 4910 } 4911 return true; 4912 } 4913 4914 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4915 { 4916 MemOp esz; 4917 unsigned idx; 4918 4919 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4920 return false; 4921 } 4922 if (esz == MO_64 && !a->q) { 4923 return false; 4924 } 4925 if (fp_access_check(s)) { 4926 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4927 vec_reg_offset(s, a->rn, idx, esz), 4928 a->q ? 16 : 8, vec_full_reg_size(s)); 4929 } 4930 return true; 4931 } 4932 4933 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4934 { 4935 MemOp esz; 4936 unsigned idx; 4937 4938 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4939 return false; 4940 } 4941 if (esz == MO_64 && !a->q) { 4942 return false; 4943 } 4944 if (fp_access_check(s)) { 4945 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4946 a->q ? 16 : 8, vec_full_reg_size(s), 4947 cpu_reg(s, a->rn)); 4948 } 4949 return true; 4950 } 4951 4952 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4953 { 4954 MemOp esz; 4955 unsigned idx; 4956 4957 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4958 return false; 4959 } 4960 if (is_signed) { 4961 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4962 return false; 4963 } 4964 } else { 4965 if (esz == MO_64 ? !a->q : a->q) { 4966 return false; 4967 } 4968 } 4969 if (fp_access_check(s)) { 4970 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4971 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4972 if (is_signed && !a->q) { 4973 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4974 } 4975 } 4976 return true; 4977 } 4978 4979 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4980 TRANS(UMOV, do_smov_umov, a, 0) 4981 4982 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4983 { 4984 MemOp esz; 4985 unsigned idx; 4986 4987 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4988 return false; 4989 } 4990 if (fp_access_check(s)) { 4991 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 4992 clear_vec_high(s, true, a->rd); 4993 } 4994 return true; 4995 } 4996 4997 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 4998 { 4999 MemOp esz; 5000 unsigned didx, sidx; 5001 5002 if (!decode_esz_idx(a->di, &esz, &didx)) { 5003 return false; 5004 } 5005 sidx = a->si >> esz; 5006 if (fp_access_check(s)) { 5007 TCGv_i64 tmp = tcg_temp_new_i64(); 5008 5009 read_vec_element(s, tmp, a->rn, sidx, esz); 5010 write_vec_element(s, tmp, a->rd, didx, esz); 5011 5012 /* INS is considered a 128-bit write for SVE. */ 5013 clear_vec_high(s, true, a->rd); 5014 } 5015 return true; 5016 } 5017 5018 /* 5019 * Advanced SIMD three same 5020 */ 5021 5022 typedef struct FPScalar { 5023 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5024 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5025 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5026 } FPScalar; 5027 5028 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 5029 { 5030 switch (a->esz) { 5031 case MO_64: 5032 if (fp_access_check(s)) { 5033 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5034 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5035 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5036 write_fp_dreg(s, a->rd, t0); 5037 } 5038 break; 5039 case MO_32: 5040 if (fp_access_check(s)) { 5041 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5042 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5043 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5044 write_fp_sreg(s, a->rd, t0); 5045 } 5046 break; 5047 case MO_16: 5048 if (!dc_isar_feature(aa64_fp16, s)) { 5049 return false; 5050 } 5051 if (fp_access_check(s)) { 5052 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5053 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5054 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5055 write_fp_sreg(s, a->rd, t0); 5056 } 5057 break; 5058 default: 5059 return false; 5060 } 5061 return true; 5062 } 5063 5064 static const FPScalar f_scalar_fadd = { 5065 gen_helper_vfp_addh, 5066 gen_helper_vfp_adds, 5067 gen_helper_vfp_addd, 5068 }; 5069 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 5070 5071 static const FPScalar f_scalar_fsub = { 5072 gen_helper_vfp_subh, 5073 gen_helper_vfp_subs, 5074 gen_helper_vfp_subd, 5075 }; 5076 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 5077 5078 static const FPScalar f_scalar_fdiv = { 5079 gen_helper_vfp_divh, 5080 gen_helper_vfp_divs, 5081 gen_helper_vfp_divd, 5082 }; 5083 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 5084 5085 static const FPScalar f_scalar_fmul = { 5086 gen_helper_vfp_mulh, 5087 gen_helper_vfp_muls, 5088 gen_helper_vfp_muld, 5089 }; 5090 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 5091 5092 static const FPScalar f_scalar_fmax = { 5093 gen_helper_vfp_maxh, 5094 gen_helper_vfp_maxs, 5095 gen_helper_vfp_maxd, 5096 }; 5097 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 5098 5099 static const FPScalar f_scalar_fmin = { 5100 gen_helper_vfp_minh, 5101 gen_helper_vfp_mins, 5102 gen_helper_vfp_mind, 5103 }; 5104 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 5105 5106 static const FPScalar f_scalar_fmaxnm = { 5107 gen_helper_vfp_maxnumh, 5108 gen_helper_vfp_maxnums, 5109 gen_helper_vfp_maxnumd, 5110 }; 5111 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 5112 5113 static const FPScalar f_scalar_fminnm = { 5114 gen_helper_vfp_minnumh, 5115 gen_helper_vfp_minnums, 5116 gen_helper_vfp_minnumd, 5117 }; 5118 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 5119 5120 static const FPScalar f_scalar_fmulx = { 5121 gen_helper_advsimd_mulxh, 5122 gen_helper_vfp_mulxs, 5123 gen_helper_vfp_mulxd, 5124 }; 5125 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5126 5127 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5128 { 5129 gen_helper_vfp_mulh(d, n, m, s); 5130 gen_vfp_negh(d, d); 5131 } 5132 5133 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5134 { 5135 gen_helper_vfp_muls(d, n, m, s); 5136 gen_vfp_negs(d, d); 5137 } 5138 5139 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5140 { 5141 gen_helper_vfp_muld(d, n, m, s); 5142 gen_vfp_negd(d, d); 5143 } 5144 5145 static const FPScalar f_scalar_fnmul = { 5146 gen_fnmul_h, 5147 gen_fnmul_s, 5148 gen_fnmul_d, 5149 }; 5150 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5151 5152 static const FPScalar f_scalar_fcmeq = { 5153 gen_helper_advsimd_ceq_f16, 5154 gen_helper_neon_ceq_f32, 5155 gen_helper_neon_ceq_f64, 5156 }; 5157 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5158 5159 static const FPScalar f_scalar_fcmge = { 5160 gen_helper_advsimd_cge_f16, 5161 gen_helper_neon_cge_f32, 5162 gen_helper_neon_cge_f64, 5163 }; 5164 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5165 5166 static const FPScalar f_scalar_fcmgt = { 5167 gen_helper_advsimd_cgt_f16, 5168 gen_helper_neon_cgt_f32, 5169 gen_helper_neon_cgt_f64, 5170 }; 5171 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5172 5173 static const FPScalar f_scalar_facge = { 5174 gen_helper_advsimd_acge_f16, 5175 gen_helper_neon_acge_f32, 5176 gen_helper_neon_acge_f64, 5177 }; 5178 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5179 5180 static const FPScalar f_scalar_facgt = { 5181 gen_helper_advsimd_acgt_f16, 5182 gen_helper_neon_acgt_f32, 5183 gen_helper_neon_acgt_f64, 5184 }; 5185 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5186 5187 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5188 { 5189 gen_helper_vfp_subh(d, n, m, s); 5190 gen_vfp_absh(d, d); 5191 } 5192 5193 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5194 { 5195 gen_helper_vfp_subs(d, n, m, s); 5196 gen_vfp_abss(d, d); 5197 } 5198 5199 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5200 { 5201 gen_helper_vfp_subd(d, n, m, s); 5202 gen_vfp_absd(d, d); 5203 } 5204 5205 static const FPScalar f_scalar_fabd = { 5206 gen_fabd_h, 5207 gen_fabd_s, 5208 gen_fabd_d, 5209 }; 5210 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5211 5212 static const FPScalar f_scalar_frecps = { 5213 gen_helper_recpsf_f16, 5214 gen_helper_recpsf_f32, 5215 gen_helper_recpsf_f64, 5216 }; 5217 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5218 5219 static const FPScalar f_scalar_frsqrts = { 5220 gen_helper_rsqrtsf_f16, 5221 gen_helper_rsqrtsf_f32, 5222 gen_helper_rsqrtsf_f64, 5223 }; 5224 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5225 5226 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5227 const FPScalar *f, bool swap) 5228 { 5229 switch (a->esz) { 5230 case MO_64: 5231 if (fp_access_check(s)) { 5232 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5233 TCGv_i64 t1 = tcg_constant_i64(0); 5234 if (swap) { 5235 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5236 } else { 5237 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5238 } 5239 write_fp_dreg(s, a->rd, t0); 5240 } 5241 break; 5242 case MO_32: 5243 if (fp_access_check(s)) { 5244 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5245 TCGv_i32 t1 = tcg_constant_i32(0); 5246 if (swap) { 5247 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5248 } else { 5249 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5250 } 5251 write_fp_sreg(s, a->rd, t0); 5252 } 5253 break; 5254 case MO_16: 5255 if (!dc_isar_feature(aa64_fp16, s)) { 5256 return false; 5257 } 5258 if (fp_access_check(s)) { 5259 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5260 TCGv_i32 t1 = tcg_constant_i32(0); 5261 if (swap) { 5262 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 5263 } else { 5264 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 5265 } 5266 write_fp_sreg(s, a->rd, t0); 5267 } 5268 break; 5269 default: 5270 return false; 5271 } 5272 return true; 5273 } 5274 5275 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 5276 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 5277 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 5278 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 5279 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 5280 5281 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5282 MemOp sgn_n, MemOp sgn_m, 5283 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5284 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5285 { 5286 TCGv_i64 t0, t1, t2, qc; 5287 MemOp esz = a->esz; 5288 5289 if (!fp_access_check(s)) { 5290 return true; 5291 } 5292 5293 t0 = tcg_temp_new_i64(); 5294 t1 = tcg_temp_new_i64(); 5295 t2 = tcg_temp_new_i64(); 5296 qc = tcg_temp_new_i64(); 5297 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5298 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5299 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5300 5301 if (esz == MO_64) { 5302 gen_d(t0, qc, t1, t2); 5303 } else { 5304 gen_bhs(t0, qc, t1, t2, esz); 5305 tcg_gen_ext_i64(t0, t0, esz); 5306 } 5307 5308 write_fp_dreg(s, a->rd, t0); 5309 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5310 return true; 5311 } 5312 5313 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5314 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5315 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5316 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5317 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5318 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5319 5320 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5321 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5322 { 5323 if (fp_access_check(s)) { 5324 TCGv_i64 t0 = tcg_temp_new_i64(); 5325 TCGv_i64 t1 = tcg_temp_new_i64(); 5326 5327 read_vec_element(s, t0, a->rn, 0, MO_64); 5328 read_vec_element(s, t1, a->rm, 0, MO_64); 5329 fn(t0, t0, t1); 5330 write_fp_dreg(s, a->rd, t0); 5331 } 5332 return true; 5333 } 5334 5335 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5336 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5337 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5338 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5339 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5340 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5341 5342 typedef struct ENVScalar2 { 5343 NeonGenTwoOpEnvFn *gen_bhs[3]; 5344 NeonGenTwo64OpEnvFn *gen_d; 5345 } ENVScalar2; 5346 5347 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5348 { 5349 if (!fp_access_check(s)) { 5350 return true; 5351 } 5352 if (a->esz == MO_64) { 5353 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5354 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5355 f->gen_d(t0, tcg_env, t0, t1); 5356 write_fp_dreg(s, a->rd, t0); 5357 } else { 5358 TCGv_i32 t0 = tcg_temp_new_i32(); 5359 TCGv_i32 t1 = tcg_temp_new_i32(); 5360 5361 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5362 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5363 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5364 write_fp_sreg(s, a->rd, t0); 5365 } 5366 return true; 5367 } 5368 5369 static const ENVScalar2 f_scalar_sqshl = { 5370 { gen_helper_neon_qshl_s8, 5371 gen_helper_neon_qshl_s16, 5372 gen_helper_neon_qshl_s32 }, 5373 gen_helper_neon_qshl_s64, 5374 }; 5375 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5376 5377 static const ENVScalar2 f_scalar_uqshl = { 5378 { gen_helper_neon_qshl_u8, 5379 gen_helper_neon_qshl_u16, 5380 gen_helper_neon_qshl_u32 }, 5381 gen_helper_neon_qshl_u64, 5382 }; 5383 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5384 5385 static const ENVScalar2 f_scalar_sqrshl = { 5386 { gen_helper_neon_qrshl_s8, 5387 gen_helper_neon_qrshl_s16, 5388 gen_helper_neon_qrshl_s32 }, 5389 gen_helper_neon_qrshl_s64, 5390 }; 5391 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5392 5393 static const ENVScalar2 f_scalar_uqrshl = { 5394 { gen_helper_neon_qrshl_u8, 5395 gen_helper_neon_qrshl_u16, 5396 gen_helper_neon_qrshl_u32 }, 5397 gen_helper_neon_qrshl_u64, 5398 }; 5399 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5400 5401 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5402 const ENVScalar2 *f) 5403 { 5404 if (a->esz == MO_16 || a->esz == MO_32) { 5405 return do_env_scalar2(s, a, f); 5406 } 5407 return false; 5408 } 5409 5410 static const ENVScalar2 f_scalar_sqdmulh = { 5411 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5412 }; 5413 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5414 5415 static const ENVScalar2 f_scalar_sqrdmulh = { 5416 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5417 }; 5418 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5419 5420 typedef struct ENVScalar3 { 5421 NeonGenThreeOpEnvFn *gen_hs[2]; 5422 } ENVScalar3; 5423 5424 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5425 const ENVScalar3 *f) 5426 { 5427 TCGv_i32 t0, t1, t2; 5428 5429 if (a->esz != MO_16 && a->esz != MO_32) { 5430 return false; 5431 } 5432 if (!fp_access_check(s)) { 5433 return true; 5434 } 5435 5436 t0 = tcg_temp_new_i32(); 5437 t1 = tcg_temp_new_i32(); 5438 t2 = tcg_temp_new_i32(); 5439 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5440 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5441 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5442 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5443 write_fp_sreg(s, a->rd, t0); 5444 return true; 5445 } 5446 5447 static const ENVScalar3 f_scalar_sqrdmlah = { 5448 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5449 }; 5450 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5451 5452 static const ENVScalar3 f_scalar_sqrdmlsh = { 5453 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5454 }; 5455 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5456 5457 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5458 { 5459 if (fp_access_check(s)) { 5460 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5461 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5462 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5463 write_fp_dreg(s, a->rd, t0); 5464 } 5465 return true; 5466 } 5467 5468 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5469 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5470 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5471 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5472 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5473 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5474 5475 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5476 gen_helper_gvec_3_ptr * const fns[3]) 5477 { 5478 MemOp esz = a->esz; 5479 int check = fp_access_check_vector_hsd(s, a->q, esz); 5480 5481 if (check <= 0) { 5482 return check == 0; 5483 } 5484 5485 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5486 esz == MO_16, data, fns[esz - 1]); 5487 return true; 5488 } 5489 5490 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5491 gen_helper_gvec_fadd_h, 5492 gen_helper_gvec_fadd_s, 5493 gen_helper_gvec_fadd_d, 5494 }; 5495 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5496 5497 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5498 gen_helper_gvec_fsub_h, 5499 gen_helper_gvec_fsub_s, 5500 gen_helper_gvec_fsub_d, 5501 }; 5502 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5503 5504 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5505 gen_helper_gvec_fdiv_h, 5506 gen_helper_gvec_fdiv_s, 5507 gen_helper_gvec_fdiv_d, 5508 }; 5509 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5510 5511 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5512 gen_helper_gvec_fmul_h, 5513 gen_helper_gvec_fmul_s, 5514 gen_helper_gvec_fmul_d, 5515 }; 5516 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5517 5518 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5519 gen_helper_gvec_fmax_h, 5520 gen_helper_gvec_fmax_s, 5521 gen_helper_gvec_fmax_d, 5522 }; 5523 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5524 5525 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5526 gen_helper_gvec_fmin_h, 5527 gen_helper_gvec_fmin_s, 5528 gen_helper_gvec_fmin_d, 5529 }; 5530 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5531 5532 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5533 gen_helper_gvec_fmaxnum_h, 5534 gen_helper_gvec_fmaxnum_s, 5535 gen_helper_gvec_fmaxnum_d, 5536 }; 5537 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5538 5539 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5540 gen_helper_gvec_fminnum_h, 5541 gen_helper_gvec_fminnum_s, 5542 gen_helper_gvec_fminnum_d, 5543 }; 5544 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5545 5546 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5547 gen_helper_gvec_fmulx_h, 5548 gen_helper_gvec_fmulx_s, 5549 gen_helper_gvec_fmulx_d, 5550 }; 5551 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5552 5553 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5554 gen_helper_gvec_vfma_h, 5555 gen_helper_gvec_vfma_s, 5556 gen_helper_gvec_vfma_d, 5557 }; 5558 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5559 5560 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5561 gen_helper_gvec_vfms_h, 5562 gen_helper_gvec_vfms_s, 5563 gen_helper_gvec_vfms_d, 5564 }; 5565 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5566 5567 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5568 gen_helper_gvec_fceq_h, 5569 gen_helper_gvec_fceq_s, 5570 gen_helper_gvec_fceq_d, 5571 }; 5572 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5573 5574 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5575 gen_helper_gvec_fcge_h, 5576 gen_helper_gvec_fcge_s, 5577 gen_helper_gvec_fcge_d, 5578 }; 5579 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5580 5581 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5582 gen_helper_gvec_fcgt_h, 5583 gen_helper_gvec_fcgt_s, 5584 gen_helper_gvec_fcgt_d, 5585 }; 5586 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5587 5588 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5589 gen_helper_gvec_facge_h, 5590 gen_helper_gvec_facge_s, 5591 gen_helper_gvec_facge_d, 5592 }; 5593 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5594 5595 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5596 gen_helper_gvec_facgt_h, 5597 gen_helper_gvec_facgt_s, 5598 gen_helper_gvec_facgt_d, 5599 }; 5600 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5601 5602 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5603 gen_helper_gvec_fabd_h, 5604 gen_helper_gvec_fabd_s, 5605 gen_helper_gvec_fabd_d, 5606 }; 5607 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5608 5609 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5610 gen_helper_gvec_recps_h, 5611 gen_helper_gvec_recps_s, 5612 gen_helper_gvec_recps_d, 5613 }; 5614 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5615 5616 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5617 gen_helper_gvec_rsqrts_h, 5618 gen_helper_gvec_rsqrts_s, 5619 gen_helper_gvec_rsqrts_d, 5620 }; 5621 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5622 5623 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5624 gen_helper_gvec_faddp_h, 5625 gen_helper_gvec_faddp_s, 5626 gen_helper_gvec_faddp_d, 5627 }; 5628 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5629 5630 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5631 gen_helper_gvec_fmaxp_h, 5632 gen_helper_gvec_fmaxp_s, 5633 gen_helper_gvec_fmaxp_d, 5634 }; 5635 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5636 5637 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5638 gen_helper_gvec_fminp_h, 5639 gen_helper_gvec_fminp_s, 5640 gen_helper_gvec_fminp_d, 5641 }; 5642 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5643 5644 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5645 gen_helper_gvec_fmaxnump_h, 5646 gen_helper_gvec_fmaxnump_s, 5647 gen_helper_gvec_fmaxnump_d, 5648 }; 5649 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5650 5651 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5652 gen_helper_gvec_fminnump_h, 5653 gen_helper_gvec_fminnump_s, 5654 gen_helper_gvec_fminnump_d, 5655 }; 5656 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5657 5658 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5659 { 5660 if (fp_access_check(s)) { 5661 int data = (is_2 << 1) | is_s; 5662 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5663 vec_full_reg_offset(s, a->rn), 5664 vec_full_reg_offset(s, a->rm), tcg_env, 5665 a->q ? 16 : 8, vec_full_reg_size(s), 5666 data, gen_helper_gvec_fmlal_a64); 5667 } 5668 return true; 5669 } 5670 5671 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5672 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5673 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5674 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5675 5676 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5677 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5678 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5679 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5680 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5681 5682 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5683 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5684 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5685 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5686 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5687 5688 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5689 { 5690 if (fp_access_check(s)) { 5691 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5692 } 5693 return true; 5694 } 5695 5696 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5697 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5698 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5699 5700 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5701 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5702 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5703 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5704 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5705 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5706 5707 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5708 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5709 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5710 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5711 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5712 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5713 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5714 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5715 5716 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5717 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5718 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5719 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5720 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5721 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5722 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5723 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5724 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5725 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5726 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5727 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5728 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5729 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5730 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5731 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5732 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5733 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5734 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5735 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5736 5737 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5738 { 5739 if (a->esz == MO_64 && !a->q) { 5740 return false; 5741 } 5742 if (fp_access_check(s)) { 5743 tcg_gen_gvec_cmp(cond, a->esz, 5744 vec_full_reg_offset(s, a->rd), 5745 vec_full_reg_offset(s, a->rn), 5746 vec_full_reg_offset(s, a->rm), 5747 a->q ? 16 : 8, vec_full_reg_size(s)); 5748 } 5749 return true; 5750 } 5751 5752 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5753 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5754 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5755 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5756 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5757 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5758 5759 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5760 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5761 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5762 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5763 5764 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5765 gen_helper_gvec_4 *fn) 5766 { 5767 if (fp_access_check(s)) { 5768 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5769 } 5770 return true; 5771 } 5772 5773 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 5774 gen_helper_gvec_4_ptr *fn) 5775 { 5776 if (fp_access_check(s)) { 5777 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5778 } 5779 return true; 5780 } 5781 5782 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5783 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5784 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5785 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 5786 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 5787 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5788 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5789 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5790 5791 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5792 { 5793 if (!dc_isar_feature(aa64_bf16, s)) { 5794 return false; 5795 } 5796 if (fp_access_check(s)) { 5797 /* Q bit selects BFMLALB vs BFMLALT. */ 5798 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5799 gen_helper_gvec_bfmlal); 5800 } 5801 return true; 5802 } 5803 5804 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5805 gen_helper_gvec_fcaddh, 5806 gen_helper_gvec_fcadds, 5807 gen_helper_gvec_fcaddd, 5808 }; 5809 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5810 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5811 5812 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5813 { 5814 static gen_helper_gvec_4_ptr * const fn[] = { 5815 [MO_16] = gen_helper_gvec_fcmlah, 5816 [MO_32] = gen_helper_gvec_fcmlas, 5817 [MO_64] = gen_helper_gvec_fcmlad, 5818 }; 5819 int check; 5820 5821 if (!dc_isar_feature(aa64_fcma, s)) { 5822 return false; 5823 } 5824 5825 check = fp_access_check_vector_hsd(s, a->q, a->esz); 5826 if (check <= 0) { 5827 return check == 0; 5828 } 5829 5830 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5831 a->esz == MO_16, a->rot, fn[a->esz]); 5832 return true; 5833 } 5834 5835 /* 5836 * Widening vector x vector/indexed. 5837 * 5838 * These read from the top or bottom half of a 128-bit vector. 5839 * After widening, optionally accumulate with a 128-bit vector. 5840 * Implement these inline, as the number of elements are limited 5841 * and the related SVE and SME operations on larger vectors use 5842 * even/odd elements instead of top/bottom half. 5843 * 5844 * If idx >= 0, operand 2 is indexed, otherwise vector. 5845 * If acc, operand 0 is loaded with rd. 5846 */ 5847 5848 /* For low half, iterating up. */ 5849 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5850 int rd, int rn, int rm, int idx, 5851 NeonGenTwo64OpFn *fn, bool acc) 5852 { 5853 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5854 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5855 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5856 MemOp esz = memop & MO_SIZE; 5857 int half = 8 >> esz; 5858 int top_swap, top_half; 5859 5860 /* There are no 64x64->128 bit operations. */ 5861 if (esz >= MO_64) { 5862 return false; 5863 } 5864 if (!fp_access_check(s)) { 5865 return true; 5866 } 5867 5868 if (idx >= 0) { 5869 read_vec_element(s, tcg_op2, rm, idx, memop); 5870 } 5871 5872 /* 5873 * For top half inputs, iterate forward; backward for bottom half. 5874 * This means the store to the destination will not occur until 5875 * overlapping input inputs are consumed. 5876 * Use top_swap to conditionally invert the forward iteration index. 5877 */ 5878 top_swap = top ? 0 : half - 1; 5879 top_half = top ? half : 0; 5880 5881 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5882 int elt = elt_fwd ^ top_swap; 5883 5884 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5885 if (idx < 0) { 5886 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5887 } 5888 if (acc) { 5889 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5890 } 5891 fn(tcg_op0, tcg_op1, tcg_op2); 5892 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5893 } 5894 clear_vec_high(s, 1, rd); 5895 return true; 5896 } 5897 5898 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5899 { 5900 TCGv_i64 t = tcg_temp_new_i64(); 5901 tcg_gen_mul_i64(t, n, m); 5902 tcg_gen_add_i64(d, d, t); 5903 } 5904 5905 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5906 { 5907 TCGv_i64 t = tcg_temp_new_i64(); 5908 tcg_gen_mul_i64(t, n, m); 5909 tcg_gen_sub_i64(d, d, t); 5910 } 5911 5912 TRANS(SMULL_v, do_3op_widening, 5913 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5914 tcg_gen_mul_i64, false) 5915 TRANS(UMULL_v, do_3op_widening, 5916 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5917 tcg_gen_mul_i64, false) 5918 TRANS(SMLAL_v, do_3op_widening, 5919 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5920 gen_muladd_i64, true) 5921 TRANS(UMLAL_v, do_3op_widening, 5922 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5923 gen_muladd_i64, true) 5924 TRANS(SMLSL_v, do_3op_widening, 5925 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5926 gen_mulsub_i64, true) 5927 TRANS(UMLSL_v, do_3op_widening, 5928 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5929 gen_mulsub_i64, true) 5930 5931 TRANS(SMULL_vi, do_3op_widening, 5932 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5933 tcg_gen_mul_i64, false) 5934 TRANS(UMULL_vi, do_3op_widening, 5935 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5936 tcg_gen_mul_i64, false) 5937 TRANS(SMLAL_vi, do_3op_widening, 5938 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5939 gen_muladd_i64, true) 5940 TRANS(UMLAL_vi, do_3op_widening, 5941 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5942 gen_muladd_i64, true) 5943 TRANS(SMLSL_vi, do_3op_widening, 5944 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5945 gen_mulsub_i64, true) 5946 TRANS(UMLSL_vi, do_3op_widening, 5947 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5948 gen_mulsub_i64, true) 5949 5950 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5951 { 5952 TCGv_i64 t1 = tcg_temp_new_i64(); 5953 TCGv_i64 t2 = tcg_temp_new_i64(); 5954 5955 tcg_gen_sub_i64(t1, n, m); 5956 tcg_gen_sub_i64(t2, m, n); 5957 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5958 } 5959 5960 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5961 { 5962 TCGv_i64 t1 = tcg_temp_new_i64(); 5963 TCGv_i64 t2 = tcg_temp_new_i64(); 5964 5965 tcg_gen_sub_i64(t1, n, m); 5966 tcg_gen_sub_i64(t2, m, n); 5967 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5968 } 5969 5970 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5971 { 5972 TCGv_i64 t = tcg_temp_new_i64(); 5973 gen_sabd_i64(t, n, m); 5974 tcg_gen_add_i64(d, d, t); 5975 } 5976 5977 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5978 { 5979 TCGv_i64 t = tcg_temp_new_i64(); 5980 gen_uabd_i64(t, n, m); 5981 tcg_gen_add_i64(d, d, t); 5982 } 5983 5984 TRANS(SADDL_v, do_3op_widening, 5985 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5986 tcg_gen_add_i64, false) 5987 TRANS(UADDL_v, do_3op_widening, 5988 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5989 tcg_gen_add_i64, false) 5990 TRANS(SSUBL_v, do_3op_widening, 5991 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5992 tcg_gen_sub_i64, false) 5993 TRANS(USUBL_v, do_3op_widening, 5994 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5995 tcg_gen_sub_i64, false) 5996 TRANS(SABDL_v, do_3op_widening, 5997 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5998 gen_sabd_i64, false) 5999 TRANS(UABDL_v, do_3op_widening, 6000 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6001 gen_uabd_i64, false) 6002 TRANS(SABAL_v, do_3op_widening, 6003 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6004 gen_saba_i64, true) 6005 TRANS(UABAL_v, do_3op_widening, 6006 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6007 gen_uaba_i64, true) 6008 6009 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6010 { 6011 tcg_gen_mul_i64(d, n, m); 6012 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6013 } 6014 6015 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6016 { 6017 tcg_gen_mul_i64(d, n, m); 6018 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6019 } 6020 6021 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6022 { 6023 TCGv_i64 t = tcg_temp_new_i64(); 6024 6025 tcg_gen_mul_i64(t, n, m); 6026 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6027 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6028 } 6029 6030 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6031 { 6032 TCGv_i64 t = tcg_temp_new_i64(); 6033 6034 tcg_gen_mul_i64(t, n, m); 6035 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6036 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6037 } 6038 6039 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6040 { 6041 TCGv_i64 t = tcg_temp_new_i64(); 6042 6043 tcg_gen_mul_i64(t, n, m); 6044 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6045 tcg_gen_neg_i64(t, t); 6046 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6047 } 6048 6049 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6050 { 6051 TCGv_i64 t = tcg_temp_new_i64(); 6052 6053 tcg_gen_mul_i64(t, n, m); 6054 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6055 tcg_gen_neg_i64(t, t); 6056 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6057 } 6058 6059 TRANS(SQDMULL_v, do_3op_widening, 6060 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6061 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6062 TRANS(SQDMLAL_v, do_3op_widening, 6063 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6064 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6065 TRANS(SQDMLSL_v, do_3op_widening, 6066 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6067 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6068 6069 TRANS(SQDMULL_vi, do_3op_widening, 6070 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6071 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6072 TRANS(SQDMLAL_vi, do_3op_widening, 6073 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6074 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6075 TRANS(SQDMLSL_vi, do_3op_widening, 6076 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6077 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6078 6079 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6080 MemOp sign, bool sub) 6081 { 6082 TCGv_i64 tcg_op0, tcg_op1; 6083 MemOp esz = a->esz; 6084 int half = 8 >> esz; 6085 bool top = a->q; 6086 int top_swap = top ? 0 : half - 1; 6087 int top_half = top ? half : 0; 6088 6089 /* There are no 64x64->128 bit operations. */ 6090 if (esz >= MO_64) { 6091 return false; 6092 } 6093 if (!fp_access_check(s)) { 6094 return true; 6095 } 6096 tcg_op0 = tcg_temp_new_i64(); 6097 tcg_op1 = tcg_temp_new_i64(); 6098 6099 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6100 int elt = elt_fwd ^ top_swap; 6101 6102 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6103 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6104 if (sub) { 6105 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6106 } else { 6107 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6108 } 6109 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6110 } 6111 clear_vec_high(s, 1, a->rd); 6112 return true; 6113 } 6114 6115 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6116 TRANS(UADDW, do_addsub_wide, a, 0, false) 6117 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6118 TRANS(USUBW, do_addsub_wide, a, 0, true) 6119 6120 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6121 bool sub, bool round) 6122 { 6123 TCGv_i64 tcg_op0, tcg_op1; 6124 MemOp esz = a->esz; 6125 int half = 8 >> esz; 6126 bool top = a->q; 6127 int ebits = 8 << esz; 6128 uint64_t rbit = 1ull << (ebits - 1); 6129 int top_swap, top_half; 6130 6131 /* There are no 128x128->64 bit operations. */ 6132 if (esz >= MO_64) { 6133 return false; 6134 } 6135 if (!fp_access_check(s)) { 6136 return true; 6137 } 6138 tcg_op0 = tcg_temp_new_i64(); 6139 tcg_op1 = tcg_temp_new_i64(); 6140 6141 /* 6142 * For top half inputs, iterate backward; forward for bottom half. 6143 * This means the store to the destination will not occur until 6144 * overlapping input inputs are consumed. 6145 */ 6146 top_swap = top ? half - 1 : 0; 6147 top_half = top ? half : 0; 6148 6149 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6150 int elt = elt_fwd ^ top_swap; 6151 6152 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6153 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6154 if (sub) { 6155 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6156 } else { 6157 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6158 } 6159 if (round) { 6160 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6161 } 6162 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6163 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6164 } 6165 clear_vec_high(s, top, a->rd); 6166 return true; 6167 } 6168 6169 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6170 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6171 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6172 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6173 6174 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6175 { 6176 if (fp_access_check(s)) { 6177 /* The Q field specifies lo/hi half input for these insns. */ 6178 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6179 } 6180 return true; 6181 } 6182 6183 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6184 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6185 6186 /* 6187 * Advanced SIMD scalar/vector x indexed element 6188 */ 6189 6190 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6191 { 6192 switch (a->esz) { 6193 case MO_64: 6194 if (fp_access_check(s)) { 6195 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6196 TCGv_i64 t1 = tcg_temp_new_i64(); 6197 6198 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6199 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6200 write_fp_dreg(s, a->rd, t0); 6201 } 6202 break; 6203 case MO_32: 6204 if (fp_access_check(s)) { 6205 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6206 TCGv_i32 t1 = tcg_temp_new_i32(); 6207 6208 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6209 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6210 write_fp_sreg(s, a->rd, t0); 6211 } 6212 break; 6213 case MO_16: 6214 if (!dc_isar_feature(aa64_fp16, s)) { 6215 return false; 6216 } 6217 if (fp_access_check(s)) { 6218 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6219 TCGv_i32 t1 = tcg_temp_new_i32(); 6220 6221 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6222 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6223 write_fp_sreg(s, a->rd, t0); 6224 } 6225 break; 6226 default: 6227 g_assert_not_reached(); 6228 } 6229 return true; 6230 } 6231 6232 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6233 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6234 6235 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6236 { 6237 switch (a->esz) { 6238 case MO_64: 6239 if (fp_access_check(s)) { 6240 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6241 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6242 TCGv_i64 t2 = tcg_temp_new_i64(); 6243 6244 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6245 if (neg) { 6246 gen_vfp_negd(t1, t1); 6247 } 6248 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6249 write_fp_dreg(s, a->rd, t0); 6250 } 6251 break; 6252 case MO_32: 6253 if (fp_access_check(s)) { 6254 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6255 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6256 TCGv_i32 t2 = tcg_temp_new_i32(); 6257 6258 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6259 if (neg) { 6260 gen_vfp_negs(t1, t1); 6261 } 6262 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 6263 write_fp_sreg(s, a->rd, t0); 6264 } 6265 break; 6266 case MO_16: 6267 if (!dc_isar_feature(aa64_fp16, s)) { 6268 return false; 6269 } 6270 if (fp_access_check(s)) { 6271 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6272 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6273 TCGv_i32 t2 = tcg_temp_new_i32(); 6274 6275 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6276 if (neg) { 6277 gen_vfp_negh(t1, t1); 6278 } 6279 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6280 fpstatus_ptr(FPST_A64_F16)); 6281 write_fp_sreg(s, a->rd, t0); 6282 } 6283 break; 6284 default: 6285 g_assert_not_reached(); 6286 } 6287 return true; 6288 } 6289 6290 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6291 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6292 6293 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6294 const ENVScalar2 *f) 6295 { 6296 if (a->esz < MO_16 || a->esz > MO_32) { 6297 return false; 6298 } 6299 if (fp_access_check(s)) { 6300 TCGv_i32 t0 = tcg_temp_new_i32(); 6301 TCGv_i32 t1 = tcg_temp_new_i32(); 6302 6303 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6304 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6305 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6306 write_fp_sreg(s, a->rd, t0); 6307 } 6308 return true; 6309 } 6310 6311 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6312 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6313 6314 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6315 const ENVScalar3 *f) 6316 { 6317 if (a->esz < MO_16 || a->esz > MO_32) { 6318 return false; 6319 } 6320 if (fp_access_check(s)) { 6321 TCGv_i32 t0 = tcg_temp_new_i32(); 6322 TCGv_i32 t1 = tcg_temp_new_i32(); 6323 TCGv_i32 t2 = tcg_temp_new_i32(); 6324 6325 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6326 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6327 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6328 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6329 write_fp_sreg(s, a->rd, t0); 6330 } 6331 return true; 6332 } 6333 6334 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6335 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6336 6337 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6338 NeonGenTwo64OpFn *fn, bool acc) 6339 { 6340 if (fp_access_check(s)) { 6341 TCGv_i64 t0 = tcg_temp_new_i64(); 6342 TCGv_i64 t1 = tcg_temp_new_i64(); 6343 TCGv_i64 t2 = tcg_temp_new_i64(); 6344 6345 if (acc) { 6346 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6347 } 6348 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6349 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6350 fn(t0, t1, t2); 6351 6352 /* Clear the whole register first, then store scalar. */ 6353 clear_vec(s, a->rd); 6354 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6355 } 6356 return true; 6357 } 6358 6359 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6360 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6361 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6362 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6363 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6364 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6365 6366 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6367 gen_helper_gvec_3_ptr * const fns[3]) 6368 { 6369 MemOp esz = a->esz; 6370 int check = fp_access_check_vector_hsd(s, a->q, esz); 6371 6372 if (check <= 0) { 6373 return check == 0; 6374 } 6375 6376 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6377 esz == MO_16, a->idx, fns[esz - 1]); 6378 return true; 6379 } 6380 6381 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6382 gen_helper_gvec_fmul_idx_h, 6383 gen_helper_gvec_fmul_idx_s, 6384 gen_helper_gvec_fmul_idx_d, 6385 }; 6386 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6387 6388 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6389 gen_helper_gvec_fmulx_idx_h, 6390 gen_helper_gvec_fmulx_idx_s, 6391 gen_helper_gvec_fmulx_idx_d, 6392 }; 6393 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6394 6395 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6396 { 6397 static gen_helper_gvec_4_ptr * const fns[3] = { 6398 gen_helper_gvec_fmla_idx_h, 6399 gen_helper_gvec_fmla_idx_s, 6400 gen_helper_gvec_fmla_idx_d, 6401 }; 6402 MemOp esz = a->esz; 6403 int check = fp_access_check_vector_hsd(s, a->q, esz); 6404 6405 if (check <= 0) { 6406 return check == 0; 6407 } 6408 6409 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6410 esz == MO_16, (a->idx << 1) | neg, 6411 fns[esz - 1]); 6412 return true; 6413 } 6414 6415 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6416 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6417 6418 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6419 { 6420 if (fp_access_check(s)) { 6421 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6422 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6423 vec_full_reg_offset(s, a->rn), 6424 vec_full_reg_offset(s, a->rm), tcg_env, 6425 a->q ? 16 : 8, vec_full_reg_size(s), 6426 data, gen_helper_gvec_fmlal_idx_a64); 6427 } 6428 return true; 6429 } 6430 6431 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6432 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6433 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6434 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6435 6436 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6437 gen_helper_gvec_3 * const fns[2]) 6438 { 6439 assert(a->esz == MO_16 || a->esz == MO_32); 6440 if (fp_access_check(s)) { 6441 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6442 } 6443 return true; 6444 } 6445 6446 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6447 gen_helper_gvec_mul_idx_h, 6448 gen_helper_gvec_mul_idx_s, 6449 }; 6450 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6451 6452 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6453 { 6454 static gen_helper_gvec_4 * const fns[2][2] = { 6455 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6456 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6457 }; 6458 6459 assert(a->esz == MO_16 || a->esz == MO_32); 6460 if (fp_access_check(s)) { 6461 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6462 a->idx, fns[a->esz - 1][sub]); 6463 } 6464 return true; 6465 } 6466 6467 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6468 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6469 6470 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6471 gen_helper_gvec_4 * const fns[2]) 6472 { 6473 assert(a->esz == MO_16 || a->esz == MO_32); 6474 if (fp_access_check(s)) { 6475 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6476 vec_full_reg_offset(s, a->rn), 6477 vec_full_reg_offset(s, a->rm), 6478 offsetof(CPUARMState, vfp.qc), 6479 a->q ? 16 : 8, vec_full_reg_size(s), 6480 a->idx, fns[a->esz - 1]); 6481 } 6482 return true; 6483 } 6484 6485 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6486 gen_helper_neon_sqdmulh_idx_h, 6487 gen_helper_neon_sqdmulh_idx_s, 6488 }; 6489 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6490 6491 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6492 gen_helper_neon_sqrdmulh_idx_h, 6493 gen_helper_neon_sqrdmulh_idx_s, 6494 }; 6495 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6496 6497 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6498 gen_helper_neon_sqrdmlah_idx_h, 6499 gen_helper_neon_sqrdmlah_idx_s, 6500 }; 6501 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6502 f_vector_idx_sqrdmlah) 6503 6504 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6505 gen_helper_neon_sqrdmlsh_idx_h, 6506 gen_helper_neon_sqrdmlsh_idx_s, 6507 }; 6508 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6509 f_vector_idx_sqrdmlsh) 6510 6511 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6512 gen_helper_gvec_4 *fn) 6513 { 6514 if (fp_access_check(s)) { 6515 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6516 } 6517 return true; 6518 } 6519 6520 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6521 gen_helper_gvec_4_ptr *fn) 6522 { 6523 if (fp_access_check(s)) { 6524 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6525 } 6526 return true; 6527 } 6528 6529 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6530 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6531 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6532 gen_helper_gvec_sudot_idx_b) 6533 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6534 gen_helper_gvec_usdot_idx_b) 6535 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6536 gen_helper_gvec_bfdot_idx) 6537 6538 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6539 { 6540 if (!dc_isar_feature(aa64_bf16, s)) { 6541 return false; 6542 } 6543 if (fp_access_check(s)) { 6544 /* Q bit selects BFMLALB vs BFMLALT. */ 6545 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6546 (a->idx << 1) | a->q, 6547 gen_helper_gvec_bfmlal_idx); 6548 } 6549 return true; 6550 } 6551 6552 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6553 { 6554 gen_helper_gvec_4_ptr *fn; 6555 6556 if (!dc_isar_feature(aa64_fcma, s)) { 6557 return false; 6558 } 6559 switch (a->esz) { 6560 case MO_16: 6561 if (!dc_isar_feature(aa64_fp16, s)) { 6562 return false; 6563 } 6564 fn = gen_helper_gvec_fcmlah_idx; 6565 break; 6566 case MO_32: 6567 fn = gen_helper_gvec_fcmlas_idx; 6568 break; 6569 default: 6570 g_assert_not_reached(); 6571 } 6572 if (fp_access_check(s)) { 6573 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6574 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6575 } 6576 return true; 6577 } 6578 6579 /* 6580 * Advanced SIMD scalar pairwise 6581 */ 6582 6583 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6584 { 6585 switch (a->esz) { 6586 case MO_64: 6587 if (fp_access_check(s)) { 6588 TCGv_i64 t0 = tcg_temp_new_i64(); 6589 TCGv_i64 t1 = tcg_temp_new_i64(); 6590 6591 read_vec_element(s, t0, a->rn, 0, MO_64); 6592 read_vec_element(s, t1, a->rn, 1, MO_64); 6593 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6594 write_fp_dreg(s, a->rd, t0); 6595 } 6596 break; 6597 case MO_32: 6598 if (fp_access_check(s)) { 6599 TCGv_i32 t0 = tcg_temp_new_i32(); 6600 TCGv_i32 t1 = tcg_temp_new_i32(); 6601 6602 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6603 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6604 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6605 write_fp_sreg(s, a->rd, t0); 6606 } 6607 break; 6608 case MO_16: 6609 if (!dc_isar_feature(aa64_fp16, s)) { 6610 return false; 6611 } 6612 if (fp_access_check(s)) { 6613 TCGv_i32 t0 = tcg_temp_new_i32(); 6614 TCGv_i32 t1 = tcg_temp_new_i32(); 6615 6616 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6617 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6618 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6619 write_fp_sreg(s, a->rd, t0); 6620 } 6621 break; 6622 default: 6623 g_assert_not_reached(); 6624 } 6625 return true; 6626 } 6627 6628 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6629 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6630 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6631 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6632 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6633 6634 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6635 { 6636 if (fp_access_check(s)) { 6637 TCGv_i64 t0 = tcg_temp_new_i64(); 6638 TCGv_i64 t1 = tcg_temp_new_i64(); 6639 6640 read_vec_element(s, t0, a->rn, 0, MO_64); 6641 read_vec_element(s, t1, a->rn, 1, MO_64); 6642 tcg_gen_add_i64(t0, t0, t1); 6643 write_fp_dreg(s, a->rd, t0); 6644 } 6645 return true; 6646 } 6647 6648 /* 6649 * Floating-point conditional select 6650 */ 6651 6652 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6653 { 6654 TCGv_i64 t_true, t_false; 6655 DisasCompare64 c; 6656 int check = fp_access_check_scalar_hsd(s, a->esz); 6657 6658 if (check <= 0) { 6659 return check == 0; 6660 } 6661 6662 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6663 t_true = tcg_temp_new_i64(); 6664 t_false = tcg_temp_new_i64(); 6665 read_vec_element(s, t_true, a->rn, 0, a->esz); 6666 read_vec_element(s, t_false, a->rm, 0, a->esz); 6667 6668 a64_test_cc(&c, a->cond); 6669 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6670 t_true, t_false); 6671 6672 /* 6673 * Note that sregs & hregs write back zeros to the high bits, 6674 * and we've already done the zero-extension. 6675 */ 6676 write_fp_dreg(s, a->rd, t_true); 6677 return true; 6678 } 6679 6680 /* 6681 * Advanced SIMD Extract 6682 */ 6683 6684 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 6685 { 6686 if (fp_access_check(s)) { 6687 TCGv_i64 lo = read_fp_dreg(s, a->rn); 6688 if (a->imm != 0) { 6689 TCGv_i64 hi = read_fp_dreg(s, a->rm); 6690 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 6691 } 6692 write_fp_dreg(s, a->rd, lo); 6693 } 6694 return true; 6695 } 6696 6697 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 6698 { 6699 TCGv_i64 lo, hi; 6700 int pos = (a->imm & 7) * 8; 6701 int elt = a->imm >> 3; 6702 6703 if (!fp_access_check(s)) { 6704 return true; 6705 } 6706 6707 lo = tcg_temp_new_i64(); 6708 hi = tcg_temp_new_i64(); 6709 6710 read_vec_element(s, lo, a->rn, elt, MO_64); 6711 elt++; 6712 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 6713 elt++; 6714 6715 if (pos != 0) { 6716 TCGv_i64 hh = tcg_temp_new_i64(); 6717 tcg_gen_extract2_i64(lo, lo, hi, pos); 6718 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 6719 tcg_gen_extract2_i64(hi, hi, hh, pos); 6720 } 6721 6722 write_vec_element(s, lo, a->rd, 0, MO_64); 6723 write_vec_element(s, hi, a->rd, 1, MO_64); 6724 clear_vec_high(s, true, a->rd); 6725 return true; 6726 } 6727 6728 /* 6729 * Floating-point data-processing (3 source) 6730 */ 6731 6732 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6733 { 6734 TCGv_ptr fpst; 6735 6736 /* 6737 * These are fused multiply-add. Note that doing the negations here 6738 * as separate steps is correct: an input NaN should come out with 6739 * its sign bit flipped if it is a negated-input. 6740 */ 6741 switch (a->esz) { 6742 case MO_64: 6743 if (fp_access_check(s)) { 6744 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6745 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6746 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6747 6748 if (neg_a) { 6749 gen_vfp_negd(ta, ta); 6750 } 6751 if (neg_n) { 6752 gen_vfp_negd(tn, tn); 6753 } 6754 fpst = fpstatus_ptr(FPST_A64); 6755 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6756 write_fp_dreg(s, a->rd, ta); 6757 } 6758 break; 6759 6760 case MO_32: 6761 if (fp_access_check(s)) { 6762 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6763 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6764 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6765 6766 if (neg_a) { 6767 gen_vfp_negs(ta, ta); 6768 } 6769 if (neg_n) { 6770 gen_vfp_negs(tn, tn); 6771 } 6772 fpst = fpstatus_ptr(FPST_A64); 6773 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6774 write_fp_sreg(s, a->rd, ta); 6775 } 6776 break; 6777 6778 case MO_16: 6779 if (!dc_isar_feature(aa64_fp16, s)) { 6780 return false; 6781 } 6782 if (fp_access_check(s)) { 6783 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6784 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6785 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6786 6787 if (neg_a) { 6788 gen_vfp_negh(ta, ta); 6789 } 6790 if (neg_n) { 6791 gen_vfp_negh(tn, tn); 6792 } 6793 fpst = fpstatus_ptr(FPST_A64_F16); 6794 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6795 write_fp_sreg(s, a->rd, ta); 6796 } 6797 break; 6798 6799 default: 6800 return false; 6801 } 6802 return true; 6803 } 6804 6805 TRANS(FMADD, do_fmadd, a, false, false) 6806 TRANS(FNMADD, do_fmadd, a, true, true) 6807 TRANS(FMSUB, do_fmadd, a, false, true) 6808 TRANS(FNMSUB, do_fmadd, a, true, false) 6809 6810 /* 6811 * Advanced SIMD Across Lanes 6812 */ 6813 6814 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 6815 MemOp src_sign, NeonGenTwo64OpFn *fn) 6816 { 6817 TCGv_i64 tcg_res, tcg_elt; 6818 MemOp src_mop = a->esz | src_sign; 6819 int elements = (a->q ? 16 : 8) >> a->esz; 6820 6821 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 6822 if (elements < 4) { 6823 return false; 6824 } 6825 if (!fp_access_check(s)) { 6826 return true; 6827 } 6828 6829 tcg_res = tcg_temp_new_i64(); 6830 tcg_elt = tcg_temp_new_i64(); 6831 6832 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 6833 for (int i = 1; i < elements; i++) { 6834 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 6835 fn(tcg_res, tcg_res, tcg_elt); 6836 } 6837 6838 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 6839 write_fp_dreg(s, a->rd, tcg_res); 6840 return true; 6841 } 6842 6843 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 6844 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 6845 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 6846 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 6847 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 6848 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 6849 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 6850 6851 /* 6852 * do_fp_reduction helper 6853 * 6854 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 6855 * important for correct NaN propagation that we do these 6856 * operations in exactly the order specified by the pseudocode. 6857 * 6858 * This is a recursive function. 6859 */ 6860 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 6861 int ebase, int ecount, TCGv_ptr fpst, 6862 NeonGenTwoSingleOpFn *fn) 6863 { 6864 if (ecount == 1) { 6865 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 6866 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 6867 return tcg_elem; 6868 } else { 6869 int half = ecount >> 1; 6870 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 6871 6872 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 6873 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 6874 tcg_res = tcg_temp_new_i32(); 6875 6876 fn(tcg_res, tcg_lo, tcg_hi, fpst); 6877 return tcg_res; 6878 } 6879 } 6880 6881 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 6882 NeonGenTwoSingleOpFn *fn) 6883 { 6884 if (fp_access_check(s)) { 6885 MemOp esz = a->esz; 6886 int elts = (a->q ? 16 : 8) >> esz; 6887 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 6888 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); 6889 write_fp_sreg(s, a->rd, res); 6890 } 6891 return true; 6892 } 6893 6894 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxnumh) 6895 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minnumh) 6896 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_maxh) 6897 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_vfp_minh) 6898 6899 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) 6900 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) 6901 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) 6902 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) 6903 6904 /* 6905 * Floating-point Immediate 6906 */ 6907 6908 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 6909 { 6910 int check = fp_access_check_scalar_hsd(s, a->esz); 6911 uint64_t imm; 6912 6913 if (check <= 0) { 6914 return check == 0; 6915 } 6916 6917 imm = vfp_expand_imm(a->esz, a->imm); 6918 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 6919 return true; 6920 } 6921 6922 /* 6923 * Floating point compare, conditional compare 6924 */ 6925 6926 static void handle_fp_compare(DisasContext *s, int size, 6927 unsigned int rn, unsigned int rm, 6928 bool cmp_with_zero, bool signal_all_nans) 6929 { 6930 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 6931 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 6932 6933 if (size == MO_64) { 6934 TCGv_i64 tcg_vn, tcg_vm; 6935 6936 tcg_vn = read_fp_dreg(s, rn); 6937 if (cmp_with_zero) { 6938 tcg_vm = tcg_constant_i64(0); 6939 } else { 6940 tcg_vm = read_fp_dreg(s, rm); 6941 } 6942 if (signal_all_nans) { 6943 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6944 } else { 6945 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6946 } 6947 } else { 6948 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6949 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6950 6951 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6952 if (cmp_with_zero) { 6953 tcg_gen_movi_i32(tcg_vm, 0); 6954 } else { 6955 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6956 } 6957 6958 switch (size) { 6959 case MO_32: 6960 if (signal_all_nans) { 6961 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6962 } else { 6963 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6964 } 6965 break; 6966 case MO_16: 6967 if (signal_all_nans) { 6968 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6969 } else { 6970 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6971 } 6972 break; 6973 default: 6974 g_assert_not_reached(); 6975 } 6976 } 6977 6978 gen_set_nzcv(tcg_flags); 6979 } 6980 6981 /* FCMP, FCMPE */ 6982 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 6983 { 6984 int check = fp_access_check_scalar_hsd(s, a->esz); 6985 6986 if (check <= 0) { 6987 return check == 0; 6988 } 6989 6990 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 6991 return true; 6992 } 6993 6994 /* FCCMP, FCCMPE */ 6995 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 6996 { 6997 TCGLabel *label_continue = NULL; 6998 int check = fp_access_check_scalar_hsd(s, a->esz); 6999 7000 if (check <= 0) { 7001 return check == 0; 7002 } 7003 7004 if (a->cond < 0x0e) { /* not always */ 7005 TCGLabel *label_match = gen_new_label(); 7006 label_continue = gen_new_label(); 7007 arm_gen_test_cc(a->cond, label_match); 7008 /* nomatch: */ 7009 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7010 tcg_gen_br(label_continue); 7011 gen_set_label(label_match); 7012 } 7013 7014 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7015 7016 if (label_continue) { 7017 gen_set_label(label_continue); 7018 } 7019 return true; 7020 } 7021 7022 /* 7023 * Advanced SIMD Modified Immediate 7024 */ 7025 7026 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7027 { 7028 if (!dc_isar_feature(aa64_fp16, s)) { 7029 return false; 7030 } 7031 if (fp_access_check(s)) { 7032 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7033 a->q ? 16 : 8, vec_full_reg_size(s), 7034 vfp_expand_imm(MO_16, a->abcdefgh)); 7035 } 7036 return true; 7037 } 7038 7039 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7040 int64_t c, uint32_t oprsz, uint32_t maxsz) 7041 { 7042 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7043 } 7044 7045 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7046 { 7047 GVecGen2iFn *fn; 7048 7049 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7050 if ((a->cmode & 1) && a->cmode < 12) { 7051 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7052 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7053 } else { 7054 /* There is one unallocated cmode/op combination in this space */ 7055 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7056 return false; 7057 } 7058 fn = gen_movi; 7059 } 7060 7061 if (fp_access_check(s)) { 7062 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7063 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7064 } 7065 return true; 7066 } 7067 7068 /* 7069 * Advanced SIMD Shift by Immediate 7070 */ 7071 7072 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7073 { 7074 if (fp_access_check(s)) { 7075 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7076 } 7077 return true; 7078 } 7079 7080 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7081 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7082 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7083 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7084 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7085 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7086 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7087 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7088 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7089 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7090 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7091 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7092 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7093 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7094 7095 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7096 { 7097 TCGv_i64 tcg_rn, tcg_rd; 7098 int esz = a->esz; 7099 int esize; 7100 7101 if (!fp_access_check(s)) { 7102 return true; 7103 } 7104 7105 /* 7106 * For the LL variants the store is larger than the load, 7107 * so if rd == rn we would overwrite parts of our input. 7108 * So load everything right now and use shifts in the main loop. 7109 */ 7110 tcg_rd = tcg_temp_new_i64(); 7111 tcg_rn = tcg_temp_new_i64(); 7112 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7113 7114 esize = 8 << esz; 7115 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7116 if (is_u) { 7117 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7118 } else { 7119 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7120 } 7121 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7122 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7123 } 7124 clear_vec_high(s, true, a->rd); 7125 return true; 7126 } 7127 7128 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7129 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7130 7131 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7132 { 7133 assert(shift >= 0 && shift <= 64); 7134 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7135 } 7136 7137 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7138 { 7139 assert(shift >= 0 && shift <= 64); 7140 if (shift == 64) { 7141 tcg_gen_movi_i64(dst, 0); 7142 } else { 7143 tcg_gen_shri_i64(dst, src, shift); 7144 } 7145 } 7146 7147 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7148 { 7149 gen_sshr_d(src, src, shift); 7150 tcg_gen_add_i64(dst, dst, src); 7151 } 7152 7153 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7154 { 7155 gen_ushr_d(src, src, shift); 7156 tcg_gen_add_i64(dst, dst, src); 7157 } 7158 7159 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7160 { 7161 assert(shift >= 0 && shift <= 32); 7162 if (shift) { 7163 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7164 tcg_gen_add_i64(dst, src, rnd); 7165 tcg_gen_sari_i64(dst, dst, shift); 7166 } else { 7167 tcg_gen_mov_i64(dst, src); 7168 } 7169 } 7170 7171 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7172 { 7173 assert(shift >= 0 && shift <= 32); 7174 if (shift) { 7175 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7176 tcg_gen_add_i64(dst, src, rnd); 7177 tcg_gen_shri_i64(dst, dst, shift); 7178 } else { 7179 tcg_gen_mov_i64(dst, src); 7180 } 7181 } 7182 7183 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7184 { 7185 assert(shift >= 0 && shift <= 64); 7186 if (shift == 0) { 7187 tcg_gen_mov_i64(dst, src); 7188 } else if (shift == 64) { 7189 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7190 tcg_gen_movi_i64(dst, 0); 7191 } else { 7192 TCGv_i64 rnd = tcg_temp_new_i64(); 7193 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7194 tcg_gen_sari_i64(dst, src, shift); 7195 tcg_gen_add_i64(dst, dst, rnd); 7196 } 7197 } 7198 7199 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7200 { 7201 assert(shift >= 0 && shift <= 64); 7202 if (shift == 0) { 7203 tcg_gen_mov_i64(dst, src); 7204 } else if (shift == 64) { 7205 /* Rounding will propagate bit 63 into bit 64. */ 7206 tcg_gen_shri_i64(dst, src, 63); 7207 } else { 7208 TCGv_i64 rnd = tcg_temp_new_i64(); 7209 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7210 tcg_gen_shri_i64(dst, src, shift); 7211 tcg_gen_add_i64(dst, dst, rnd); 7212 } 7213 } 7214 7215 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7216 { 7217 gen_srshr_d(src, src, shift); 7218 tcg_gen_add_i64(dst, dst, src); 7219 } 7220 7221 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7222 { 7223 gen_urshr_d(src, src, shift); 7224 tcg_gen_add_i64(dst, dst, src); 7225 } 7226 7227 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7228 { 7229 /* If shift is 64, dst is unchanged. */ 7230 if (shift != 64) { 7231 tcg_gen_shri_i64(src, src, shift); 7232 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7233 } 7234 } 7235 7236 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7237 { 7238 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7239 } 7240 7241 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7242 WideShiftImmFn * const fns[3], MemOp sign) 7243 { 7244 TCGv_i64 tcg_rn, tcg_rd; 7245 int esz = a->esz; 7246 int esize; 7247 WideShiftImmFn *fn; 7248 7249 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7250 7251 if (!fp_access_check(s)) { 7252 return true; 7253 } 7254 7255 tcg_rn = tcg_temp_new_i64(); 7256 tcg_rd = tcg_temp_new_i64(); 7257 tcg_gen_movi_i64(tcg_rd, 0); 7258 7259 fn = fns[esz]; 7260 esize = 8 << esz; 7261 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7262 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7263 fn(tcg_rn, tcg_rn, a->imm); 7264 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7265 } 7266 7267 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7268 clear_vec_high(s, a->q, a->rd); 7269 return true; 7270 } 7271 7272 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7273 { 7274 tcg_gen_sari_i64(d, s, i); 7275 tcg_gen_ext16u_i64(d, d); 7276 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7277 } 7278 7279 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7280 { 7281 tcg_gen_sari_i64(d, s, i); 7282 tcg_gen_ext32u_i64(d, d); 7283 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7284 } 7285 7286 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7287 { 7288 gen_sshr_d(d, s, i); 7289 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7290 } 7291 7292 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7293 { 7294 tcg_gen_shri_i64(d, s, i); 7295 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7296 } 7297 7298 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7299 { 7300 tcg_gen_shri_i64(d, s, i); 7301 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7302 } 7303 7304 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7305 { 7306 gen_ushr_d(d, s, i); 7307 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7308 } 7309 7310 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7311 { 7312 tcg_gen_sari_i64(d, s, i); 7313 tcg_gen_ext16u_i64(d, d); 7314 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7315 } 7316 7317 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7318 { 7319 tcg_gen_sari_i64(d, s, i); 7320 tcg_gen_ext32u_i64(d, d); 7321 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7322 } 7323 7324 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7325 { 7326 gen_sshr_d(d, s, i); 7327 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7328 } 7329 7330 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7331 { 7332 gen_srshr_bhs(d, s, i); 7333 tcg_gen_ext16u_i64(d, d); 7334 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7335 } 7336 7337 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7338 { 7339 gen_srshr_bhs(d, s, i); 7340 tcg_gen_ext32u_i64(d, d); 7341 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7342 } 7343 7344 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7345 { 7346 gen_srshr_d(d, s, i); 7347 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7348 } 7349 7350 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7351 { 7352 gen_urshr_bhs(d, s, i); 7353 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7354 } 7355 7356 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7357 { 7358 gen_urshr_bhs(d, s, i); 7359 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7360 } 7361 7362 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7363 { 7364 gen_urshr_d(d, s, i); 7365 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7366 } 7367 7368 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7369 { 7370 gen_srshr_bhs(d, s, i); 7371 tcg_gen_ext16u_i64(d, d); 7372 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7373 } 7374 7375 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7376 { 7377 gen_srshr_bhs(d, s, i); 7378 tcg_gen_ext32u_i64(d, d); 7379 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7380 } 7381 7382 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7383 { 7384 gen_srshr_d(d, s, i); 7385 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7386 } 7387 7388 static WideShiftImmFn * const shrn_fns[] = { 7389 tcg_gen_shri_i64, 7390 tcg_gen_shri_i64, 7391 gen_ushr_d, 7392 }; 7393 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7394 7395 static WideShiftImmFn * const rshrn_fns[] = { 7396 gen_urshr_bhs, 7397 gen_urshr_bhs, 7398 gen_urshr_d, 7399 }; 7400 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7401 7402 static WideShiftImmFn * const sqshrn_fns[] = { 7403 gen_sqshrn_b, 7404 gen_sqshrn_h, 7405 gen_sqshrn_s, 7406 }; 7407 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7408 7409 static WideShiftImmFn * const uqshrn_fns[] = { 7410 gen_uqshrn_b, 7411 gen_uqshrn_h, 7412 gen_uqshrn_s, 7413 }; 7414 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7415 7416 static WideShiftImmFn * const sqshrun_fns[] = { 7417 gen_sqshrun_b, 7418 gen_sqshrun_h, 7419 gen_sqshrun_s, 7420 }; 7421 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7422 7423 static WideShiftImmFn * const sqrshrn_fns[] = { 7424 gen_sqrshrn_b, 7425 gen_sqrshrn_h, 7426 gen_sqrshrn_s, 7427 }; 7428 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7429 7430 static WideShiftImmFn * const uqrshrn_fns[] = { 7431 gen_uqrshrn_b, 7432 gen_uqrshrn_h, 7433 gen_uqrshrn_s, 7434 }; 7435 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7436 7437 static WideShiftImmFn * const sqrshrun_fns[] = { 7438 gen_sqrshrun_b, 7439 gen_sqrshrun_h, 7440 gen_sqrshrun_s, 7441 }; 7442 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7443 7444 /* 7445 * Advanced SIMD Scalar Shift by Immediate 7446 */ 7447 7448 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7449 WideShiftImmFn *fn, bool accumulate, 7450 MemOp sign) 7451 { 7452 if (fp_access_check(s)) { 7453 TCGv_i64 rd = tcg_temp_new_i64(); 7454 TCGv_i64 rn = tcg_temp_new_i64(); 7455 7456 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7457 if (accumulate) { 7458 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7459 } 7460 fn(rd, rn, a->imm); 7461 write_fp_dreg(s, a->rd, rd); 7462 } 7463 return true; 7464 } 7465 7466 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7467 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7468 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7469 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7470 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7471 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7472 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7473 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7474 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7475 7476 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7477 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7478 7479 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7480 NeonGenTwoOpEnvFn *fn) 7481 { 7482 TCGv_i32 t = tcg_temp_new_i32(); 7483 tcg_gen_extrl_i64_i32(t, s); 7484 fn(t, tcg_env, t, tcg_constant_i32(i)); 7485 tcg_gen_extu_i32_i64(d, t); 7486 } 7487 7488 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7489 { 7490 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7491 } 7492 7493 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7494 { 7495 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7496 } 7497 7498 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7499 { 7500 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7501 } 7502 7503 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7504 { 7505 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7506 } 7507 7508 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7509 { 7510 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7511 } 7512 7513 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7514 { 7515 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7516 } 7517 7518 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7519 { 7520 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7521 } 7522 7523 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7524 { 7525 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7526 } 7527 7528 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7529 { 7530 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7531 } 7532 7533 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7534 { 7535 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7536 } 7537 7538 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7539 { 7540 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7541 } 7542 7543 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7544 { 7545 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7546 } 7547 7548 static WideShiftImmFn * const f_scalar_sqshli[] = { 7549 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7550 }; 7551 7552 static WideShiftImmFn * const f_scalar_uqshli[] = { 7553 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7554 }; 7555 7556 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7557 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7558 }; 7559 7560 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7561 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7562 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7563 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7564 7565 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7566 WideShiftImmFn * const fns[3], 7567 MemOp sign, bool zext) 7568 { 7569 MemOp esz = a->esz; 7570 7571 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7572 7573 if (fp_access_check(s)) { 7574 TCGv_i64 rd = tcg_temp_new_i64(); 7575 TCGv_i64 rn = tcg_temp_new_i64(); 7576 7577 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7578 fns[esz](rd, rn, a->imm); 7579 if (zext) { 7580 tcg_gen_ext_i64(rd, rd, esz); 7581 } 7582 write_fp_dreg(s, a->rd, rd); 7583 } 7584 return true; 7585 } 7586 7587 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7588 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7589 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7590 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7591 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7592 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7593 7594 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 7595 { 7596 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7597 tcg_rd = cpu_reg(s, a->rd); 7598 7599 if (!a->sf && is_signed) { 7600 tcg_n = tcg_temp_new_i64(); 7601 tcg_m = tcg_temp_new_i64(); 7602 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 7603 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 7604 } else { 7605 tcg_n = read_cpu_reg(s, a->rn, a->sf); 7606 tcg_m = read_cpu_reg(s, a->rm, a->sf); 7607 } 7608 7609 if (is_signed) { 7610 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7611 } else { 7612 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7613 } 7614 7615 if (!a->sf) { /* zero extend final result */ 7616 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7617 } 7618 return true; 7619 } 7620 7621 TRANS(SDIV, do_div, a, true) 7622 TRANS(UDIV, do_div, a, false) 7623 7624 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7625 * Note that it is the caller's responsibility to ensure that the 7626 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7627 * mandated semantics for out of range shifts. 7628 */ 7629 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7630 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7631 { 7632 switch (shift_type) { 7633 case A64_SHIFT_TYPE_LSL: 7634 tcg_gen_shl_i64(dst, src, shift_amount); 7635 break; 7636 case A64_SHIFT_TYPE_LSR: 7637 tcg_gen_shr_i64(dst, src, shift_amount); 7638 break; 7639 case A64_SHIFT_TYPE_ASR: 7640 if (!sf) { 7641 tcg_gen_ext32s_i64(dst, src); 7642 } 7643 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 7644 break; 7645 case A64_SHIFT_TYPE_ROR: 7646 if (sf) { 7647 tcg_gen_rotr_i64(dst, src, shift_amount); 7648 } else { 7649 TCGv_i32 t0, t1; 7650 t0 = tcg_temp_new_i32(); 7651 t1 = tcg_temp_new_i32(); 7652 tcg_gen_extrl_i64_i32(t0, src); 7653 tcg_gen_extrl_i64_i32(t1, shift_amount); 7654 tcg_gen_rotr_i32(t0, t0, t1); 7655 tcg_gen_extu_i32_i64(dst, t0); 7656 } 7657 break; 7658 default: 7659 assert(FALSE); /* all shift types should be handled */ 7660 break; 7661 } 7662 7663 if (!sf) { /* zero extend final result */ 7664 tcg_gen_ext32u_i64(dst, dst); 7665 } 7666 } 7667 7668 /* Shift a TCGv src by immediate, put result in dst. 7669 * The shift amount must be in range (this should always be true as the 7670 * relevant instructions will UNDEF on bad shift immediates). 7671 */ 7672 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 7673 enum a64_shift_type shift_type, unsigned int shift_i) 7674 { 7675 assert(shift_i < (sf ? 64 : 32)); 7676 7677 if (shift_i == 0) { 7678 tcg_gen_mov_i64(dst, src); 7679 } else { 7680 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 7681 } 7682 } 7683 7684 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 7685 enum a64_shift_type shift_type) 7686 { 7687 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 7688 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7689 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 7690 7691 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 7692 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 7693 return true; 7694 } 7695 7696 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 7697 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 7698 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 7699 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 7700 7701 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 7702 { 7703 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 7704 TCGv_i32 tcg_bytes; 7705 7706 switch (a->esz) { 7707 case MO_8: 7708 case MO_16: 7709 case MO_32: 7710 tcg_val = tcg_temp_new_i64(); 7711 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 7712 break; 7713 case MO_64: 7714 tcg_val = cpu_reg(s, a->rm); 7715 break; 7716 default: 7717 g_assert_not_reached(); 7718 } 7719 tcg_acc = cpu_reg(s, a->rn); 7720 tcg_bytes = tcg_constant_i32(1 << a->esz); 7721 tcg_rd = cpu_reg(s, a->rd); 7722 7723 if (crc32c) { 7724 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7725 } else { 7726 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 7727 } 7728 return true; 7729 } 7730 7731 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 7732 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 7733 7734 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 7735 { 7736 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 7737 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 7738 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 7739 7740 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 7741 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 7742 7743 if (setflag) { 7744 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 7745 } else { 7746 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 7747 } 7748 return true; 7749 } 7750 7751 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 7752 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 7753 7754 static bool trans_IRG(DisasContext *s, arg_rrr *a) 7755 { 7756 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7757 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 7758 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 7759 7760 if (s->ata[0]) { 7761 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 7762 } else { 7763 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 7764 } 7765 return true; 7766 } 7767 return false; 7768 } 7769 7770 static bool trans_GMI(DisasContext *s, arg_rrr *a) 7771 { 7772 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 7773 TCGv_i64 t = tcg_temp_new_i64(); 7774 7775 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 7776 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 7777 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 7778 return true; 7779 } 7780 return false; 7781 } 7782 7783 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 7784 { 7785 if (dc_isar_feature(aa64_pauth, s)) { 7786 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 7787 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 7788 return true; 7789 } 7790 return false; 7791 } 7792 7793 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 7794 7795 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 7796 { 7797 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 7798 return true; 7799 } 7800 7801 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7802 { 7803 TCGv_i32 t32 = tcg_temp_new_i32(); 7804 7805 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7806 gen_helper_rbit(t32, t32); 7807 tcg_gen_extu_i32_i64(tcg_rd, t32); 7808 } 7809 7810 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 7811 { 7812 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7813 7814 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 7815 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 7816 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 7817 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 7818 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 7819 } 7820 7821 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7822 { 7823 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 7824 } 7825 7826 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7827 { 7828 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 7829 } 7830 7831 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7832 { 7833 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 7834 } 7835 7836 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7837 { 7838 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 7839 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 7840 } 7841 7842 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 7843 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 7844 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 7845 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 7846 7847 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7848 { 7849 TCGv_i32 t32 = tcg_temp_new_i32(); 7850 7851 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7852 tcg_gen_clzi_i32(t32, t32, 32); 7853 tcg_gen_extu_i32_i64(tcg_rd, t32); 7854 } 7855 7856 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7857 { 7858 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 7859 } 7860 7861 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 7862 { 7863 TCGv_i32 t32 = tcg_temp_new_i32(); 7864 7865 tcg_gen_extrl_i64_i32(t32, tcg_rn); 7866 tcg_gen_clrsb_i32(t32, t32); 7867 tcg_gen_extu_i32_i64(tcg_rd, t32); 7868 } 7869 7870 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 7871 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 7872 7873 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 7874 { 7875 TCGv_i64 tcg_rd, tcg_rn; 7876 7877 if (a->z) { 7878 if (a->rn != 31) { 7879 return false; 7880 } 7881 tcg_rn = tcg_constant_i64(0); 7882 } else { 7883 tcg_rn = cpu_reg_sp(s, a->rn); 7884 } 7885 if (s->pauth_active) { 7886 tcg_rd = cpu_reg(s, a->rd); 7887 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 7888 } 7889 return true; 7890 } 7891 7892 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 7893 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 7894 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 7895 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 7896 7897 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 7898 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 7899 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 7900 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 7901 7902 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 7903 { 7904 if (s->pauth_active) { 7905 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7906 fn(tcg_rd, tcg_env, tcg_rd); 7907 } 7908 return true; 7909 } 7910 7911 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 7912 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 7913 7914 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 7915 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 7916 { 7917 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 7918 7919 if (!a->sf && (a->sa & (1 << 5))) { 7920 return false; 7921 } 7922 7923 tcg_rd = cpu_reg(s, a->rd); 7924 tcg_rn = cpu_reg(s, a->rn); 7925 7926 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 7927 if (a->sa) { 7928 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 7929 } 7930 7931 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 7932 if (!a->sf) { 7933 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7934 } 7935 if (setflags) { 7936 gen_logic_CC(a->sf, tcg_rd); 7937 } 7938 return true; 7939 } 7940 7941 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 7942 { 7943 /* 7944 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 7945 * register-register MOV and MVN, so it is worth special casing. 7946 */ 7947 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 7948 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 7949 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 7950 7951 if (a->n) { 7952 tcg_gen_not_i64(tcg_rd, tcg_rm); 7953 if (!a->sf) { 7954 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7955 } 7956 } else { 7957 if (a->sf) { 7958 tcg_gen_mov_i64(tcg_rd, tcg_rm); 7959 } else { 7960 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 7961 } 7962 } 7963 return true; 7964 } 7965 7966 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 7967 } 7968 7969 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 7970 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 7971 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 7972 7973 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 7974 bool sub_op, bool setflags) 7975 { 7976 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 7977 7978 if (a->sa > 4) { 7979 return false; 7980 } 7981 7982 /* non-flag setting ops may use SP */ 7983 if (!setflags) { 7984 tcg_rd = cpu_reg_sp(s, a->rd); 7985 } else { 7986 tcg_rd = cpu_reg(s, a->rd); 7987 } 7988 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 7989 7990 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 7991 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 7992 7993 tcg_result = tcg_temp_new_i64(); 7994 if (!setflags) { 7995 if (sub_op) { 7996 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7997 } else { 7998 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 7999 } 8000 } else { 8001 if (sub_op) { 8002 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8003 } else { 8004 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8005 } 8006 } 8007 8008 if (a->sf) { 8009 tcg_gen_mov_i64(tcg_rd, tcg_result); 8010 } else { 8011 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8012 } 8013 return true; 8014 } 8015 8016 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8017 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8018 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8019 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8020 8021 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8022 bool sub_op, bool setflags) 8023 { 8024 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8025 8026 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8027 return false; 8028 } 8029 8030 tcg_rd = cpu_reg(s, a->rd); 8031 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8032 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8033 8034 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8035 8036 tcg_result = tcg_temp_new_i64(); 8037 if (!setflags) { 8038 if (sub_op) { 8039 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8040 } else { 8041 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8042 } 8043 } else { 8044 if (sub_op) { 8045 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8046 } else { 8047 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8048 } 8049 } 8050 8051 if (a->sf) { 8052 tcg_gen_mov_i64(tcg_rd, tcg_result); 8053 } else { 8054 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8055 } 8056 return true; 8057 } 8058 8059 TRANS(ADD_r, do_addsub_reg, a, false, false) 8060 TRANS(SUB_r, do_addsub_reg, a, true, false) 8061 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8062 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8063 8064 static bool do_mulh(DisasContext *s, arg_rrr *a, 8065 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8066 { 8067 TCGv_i64 discard = tcg_temp_new_i64(); 8068 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8069 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8070 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8071 8072 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8073 return true; 8074 } 8075 8076 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8077 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8078 8079 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8080 bool sf, bool is_sub, MemOp mop) 8081 { 8082 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8083 TCGv_i64 tcg_op1, tcg_op2; 8084 8085 if (mop == MO_64) { 8086 tcg_op1 = cpu_reg(s, a->rn); 8087 tcg_op2 = cpu_reg(s, a->rm); 8088 } else { 8089 tcg_op1 = tcg_temp_new_i64(); 8090 tcg_op2 = tcg_temp_new_i64(); 8091 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 8092 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 8093 } 8094 8095 if (a->ra == 31 && !is_sub) { 8096 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 8097 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 8098 } else { 8099 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8100 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 8101 8102 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 8103 if (is_sub) { 8104 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 8105 } else { 8106 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 8107 } 8108 } 8109 8110 if (!sf) { 8111 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8112 } 8113 return true; 8114 } 8115 8116 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 8117 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 8118 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 8119 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 8120 8121 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 8122 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 8123 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 8124 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 8125 8126 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 8127 bool is_sub, bool setflags) 8128 { 8129 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 8130 8131 tcg_rd = cpu_reg(s, a->rd); 8132 tcg_rn = cpu_reg(s, a->rn); 8133 8134 if (is_sub) { 8135 tcg_y = tcg_temp_new_i64(); 8136 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 8137 } else { 8138 tcg_y = cpu_reg(s, a->rm); 8139 } 8140 8141 if (setflags) { 8142 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 8143 } else { 8144 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 8145 } 8146 return true; 8147 } 8148 8149 TRANS(ADC, do_adc_sbc, a, false, false) 8150 TRANS(SBC, do_adc_sbc, a, true, false) 8151 TRANS(ADCS, do_adc_sbc, a, false, true) 8152 TRANS(SBCS, do_adc_sbc, a, true, true) 8153 8154 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 8155 { 8156 int mask = a->mask; 8157 TCGv_i64 tcg_rn; 8158 TCGv_i32 nzcv; 8159 8160 if (!dc_isar_feature(aa64_condm_4, s)) { 8161 return false; 8162 } 8163 8164 tcg_rn = read_cpu_reg(s, a->rn, 1); 8165 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 8166 8167 nzcv = tcg_temp_new_i32(); 8168 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 8169 8170 if (mask & 8) { /* N */ 8171 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 8172 } 8173 if (mask & 4) { /* Z */ 8174 tcg_gen_not_i32(cpu_ZF, nzcv); 8175 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 8176 } 8177 if (mask & 2) { /* C */ 8178 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 8179 } 8180 if (mask & 1) { /* V */ 8181 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 8182 } 8183 return true; 8184 } 8185 8186 static bool do_setf(DisasContext *s, int rn, int shift) 8187 { 8188 TCGv_i32 tmp = tcg_temp_new_i32(); 8189 8190 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 8191 tcg_gen_shli_i32(cpu_NF, tmp, shift); 8192 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 8193 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 8194 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 8195 return true; 8196 } 8197 8198 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 8199 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 8200 8201 /* CCMP, CCMN */ 8202 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 8203 { 8204 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 8205 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 8206 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 8207 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8208 TCGv_i64 tcg_rn, tcg_y; 8209 DisasCompare c; 8210 unsigned nzcv; 8211 bool has_andc; 8212 8213 /* Set T0 = !COND. */ 8214 arm_test_cc(&c, a->cond); 8215 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8216 8217 /* Load the arguments for the new comparison. */ 8218 if (a->imm) { 8219 tcg_y = tcg_constant_i64(a->y); 8220 } else { 8221 tcg_y = cpu_reg(s, a->y); 8222 } 8223 tcg_rn = cpu_reg(s, a->rn); 8224 8225 /* Set the flags for the new comparison. */ 8226 if (a->op) { 8227 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8228 } else { 8229 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 8230 } 8231 8232 /* 8233 * If COND was false, force the flags to #nzcv. Compute two masks 8234 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8235 * For tcg hosts that support ANDC, we can make do with just T1. 8236 * In either case, allow the tcg optimizer to delete any unused mask. 8237 */ 8238 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8239 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8240 8241 nzcv = a->nzcv; 8242 has_andc = tcg_op_supported(INDEX_op_andc_i32, TCG_TYPE_I32, 0); 8243 if (nzcv & 8) { /* N */ 8244 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8245 } else { 8246 if (has_andc) { 8247 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8248 } else { 8249 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8250 } 8251 } 8252 if (nzcv & 4) { /* Z */ 8253 if (has_andc) { 8254 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8255 } else { 8256 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8257 } 8258 } else { 8259 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8260 } 8261 if (nzcv & 2) { /* C */ 8262 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8263 } else { 8264 if (has_andc) { 8265 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8266 } else { 8267 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8268 } 8269 } 8270 if (nzcv & 1) { /* V */ 8271 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8272 } else { 8273 if (has_andc) { 8274 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8275 } else { 8276 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8277 } 8278 } 8279 return true; 8280 } 8281 8282 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 8283 { 8284 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8285 TCGv_i64 zero = tcg_constant_i64(0); 8286 DisasCompare64 c; 8287 8288 a64_test_cc(&c, a->cond); 8289 8290 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 8291 /* CSET & CSETM. */ 8292 if (a->else_inv) { 8293 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8294 tcg_rd, c.value, zero); 8295 } else { 8296 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8297 tcg_rd, c.value, zero); 8298 } 8299 } else { 8300 TCGv_i64 t_true = cpu_reg(s, a->rn); 8301 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 8302 8303 if (a->else_inv && a->else_inc) { 8304 tcg_gen_neg_i64(t_false, t_false); 8305 } else if (a->else_inv) { 8306 tcg_gen_not_i64(t_false, t_false); 8307 } else if (a->else_inc) { 8308 tcg_gen_addi_i64(t_false, t_false, 1); 8309 } 8310 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8311 } 8312 8313 if (!a->sf) { 8314 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8315 } 8316 return true; 8317 } 8318 8319 typedef struct FPScalar1Int { 8320 void (*gen_h)(TCGv_i32, TCGv_i32); 8321 void (*gen_s)(TCGv_i32, TCGv_i32); 8322 void (*gen_d)(TCGv_i64, TCGv_i64); 8323 } FPScalar1Int; 8324 8325 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 8326 const FPScalar1Int *f) 8327 { 8328 switch (a->esz) { 8329 case MO_64: 8330 if (fp_access_check(s)) { 8331 TCGv_i64 t = read_fp_dreg(s, a->rn); 8332 f->gen_d(t, t); 8333 write_fp_dreg(s, a->rd, t); 8334 } 8335 break; 8336 case MO_32: 8337 if (fp_access_check(s)) { 8338 TCGv_i32 t = read_fp_sreg(s, a->rn); 8339 f->gen_s(t, t); 8340 write_fp_sreg(s, a->rd, t); 8341 } 8342 break; 8343 case MO_16: 8344 if (!dc_isar_feature(aa64_fp16, s)) { 8345 return false; 8346 } 8347 if (fp_access_check(s)) { 8348 TCGv_i32 t = read_fp_hreg(s, a->rn); 8349 f->gen_h(t, t); 8350 write_fp_sreg(s, a->rd, t); 8351 } 8352 break; 8353 default: 8354 return false; 8355 } 8356 return true; 8357 } 8358 8359 static const FPScalar1Int f_scalar_fmov = { 8360 tcg_gen_mov_i32, 8361 tcg_gen_mov_i32, 8362 tcg_gen_mov_i64, 8363 }; 8364 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) 8365 8366 static const FPScalar1Int f_scalar_fabs = { 8367 gen_vfp_absh, 8368 gen_vfp_abss, 8369 gen_vfp_absd, 8370 }; 8371 TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) 8372 8373 static const FPScalar1Int f_scalar_fneg = { 8374 gen_vfp_negh, 8375 gen_vfp_negs, 8376 gen_vfp_negd, 8377 }; 8378 TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) 8379 8380 typedef struct FPScalar1 { 8381 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 8382 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 8383 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 8384 } FPScalar1; 8385 8386 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 8387 const FPScalar1 *f, int rmode) 8388 { 8389 TCGv_i32 tcg_rmode = NULL; 8390 TCGv_ptr fpst; 8391 TCGv_i64 t64; 8392 TCGv_i32 t32; 8393 int check = fp_access_check_scalar_hsd(s, a->esz); 8394 8395 if (check <= 0) { 8396 return check == 0; 8397 } 8398 8399 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8400 if (rmode >= 0) { 8401 tcg_rmode = gen_set_rmode(rmode, fpst); 8402 } 8403 8404 switch (a->esz) { 8405 case MO_64: 8406 t64 = read_fp_dreg(s, a->rn); 8407 f->gen_d(t64, t64, fpst); 8408 write_fp_dreg(s, a->rd, t64); 8409 break; 8410 case MO_32: 8411 t32 = read_fp_sreg(s, a->rn); 8412 f->gen_s(t32, t32, fpst); 8413 write_fp_sreg(s, a->rd, t32); 8414 break; 8415 case MO_16: 8416 t32 = read_fp_hreg(s, a->rn); 8417 f->gen_h(t32, t32, fpst); 8418 write_fp_sreg(s, a->rd, t32); 8419 break; 8420 default: 8421 g_assert_not_reached(); 8422 } 8423 8424 if (rmode >= 0) { 8425 gen_restore_rmode(tcg_rmode, fpst); 8426 } 8427 return true; 8428 } 8429 8430 static const FPScalar1 f_scalar_fsqrt = { 8431 gen_helper_vfp_sqrth, 8432 gen_helper_vfp_sqrts, 8433 gen_helper_vfp_sqrtd, 8434 }; 8435 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 8436 8437 static const FPScalar1 f_scalar_frint = { 8438 gen_helper_advsimd_rinth, 8439 gen_helper_rints, 8440 gen_helper_rintd, 8441 }; 8442 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 8443 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 8444 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 8445 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 8446 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 8447 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 8448 8449 static const FPScalar1 f_scalar_frintx = { 8450 gen_helper_advsimd_rinth_exact, 8451 gen_helper_rints_exact, 8452 gen_helper_rintd_exact, 8453 }; 8454 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 8455 8456 static const FPScalar1 f_scalar_bfcvt = { 8457 .gen_s = gen_helper_bfcvt, 8458 }; 8459 TRANS_FEAT(BFCVT_s, aa64_bf16, do_fp1_scalar, a, &f_scalar_bfcvt, -1) 8460 8461 static const FPScalar1 f_scalar_frint32 = { 8462 NULL, 8463 gen_helper_frint32_s, 8464 gen_helper_frint32_d, 8465 }; 8466 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 8467 &f_scalar_frint32, FPROUNDING_ZERO) 8468 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 8469 8470 static const FPScalar1 f_scalar_frint64 = { 8471 NULL, 8472 gen_helper_frint64_s, 8473 gen_helper_frint64_d, 8474 }; 8475 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 8476 &f_scalar_frint64, FPROUNDING_ZERO) 8477 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 8478 8479 static const FPScalar1 f_scalar_frecpe = { 8480 gen_helper_recpe_f16, 8481 gen_helper_recpe_f32, 8482 gen_helper_recpe_f64, 8483 }; 8484 TRANS(FRECPE_s, do_fp1_scalar, a, &f_scalar_frecpe, -1) 8485 8486 static const FPScalar1 f_scalar_frecpx = { 8487 gen_helper_frecpx_f16, 8488 gen_helper_frecpx_f32, 8489 gen_helper_frecpx_f64, 8490 }; 8491 TRANS(FRECPX_s, do_fp1_scalar, a, &f_scalar_frecpx, -1) 8492 8493 static const FPScalar1 f_scalar_frsqrte = { 8494 gen_helper_rsqrte_f16, 8495 gen_helper_rsqrte_f32, 8496 gen_helper_rsqrte_f64, 8497 }; 8498 TRANS(FRSQRTE_s, do_fp1_scalar, a, &f_scalar_frsqrte, -1) 8499 8500 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 8501 { 8502 if (fp_access_check(s)) { 8503 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 8504 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8505 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8506 8507 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 8508 write_fp_dreg(s, a->rd, tcg_rd); 8509 } 8510 return true; 8511 } 8512 8513 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 8514 { 8515 if (fp_access_check(s)) { 8516 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 8517 TCGv_i32 ahp = get_ahp_flag(); 8518 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8519 8520 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 8521 /* write_fp_sreg is OK here because top half of result is zero */ 8522 write_fp_sreg(s, a->rd, tmp); 8523 } 8524 return true; 8525 } 8526 8527 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 8528 { 8529 if (fp_access_check(s)) { 8530 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8531 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8532 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8533 8534 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 8535 write_fp_sreg(s, a->rd, tcg_rd); 8536 } 8537 return true; 8538 } 8539 8540 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 8541 { 8542 if (fp_access_check(s)) { 8543 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 8544 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8545 TCGv_i32 ahp = get_ahp_flag(); 8546 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 8547 8548 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8549 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 8550 write_fp_sreg(s, a->rd, tcg_rd); 8551 } 8552 return true; 8553 } 8554 8555 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 8556 { 8557 if (fp_access_check(s)) { 8558 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8559 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8560 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8561 TCGv_i32 tcg_ahp = get_ahp_flag(); 8562 8563 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8564 write_fp_sreg(s, a->rd, tcg_rd); 8565 } 8566 return true; 8567 } 8568 8569 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 8570 { 8571 if (fp_access_check(s)) { 8572 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 8573 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8574 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 8575 TCGv_i32 tcg_ahp = get_ahp_flag(); 8576 8577 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8578 write_fp_dreg(s, a->rd, tcg_rd); 8579 } 8580 return true; 8581 } 8582 8583 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 8584 TCGv_i64 tcg_int, bool is_signed) 8585 { 8586 TCGv_ptr tcg_fpstatus; 8587 TCGv_i32 tcg_shift, tcg_single; 8588 TCGv_i64 tcg_double; 8589 8590 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8591 tcg_shift = tcg_constant_i32(shift); 8592 8593 switch (esz) { 8594 case MO_64: 8595 tcg_double = tcg_temp_new_i64(); 8596 if (is_signed) { 8597 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8598 } else { 8599 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 8600 } 8601 write_fp_dreg(s, rd, tcg_double); 8602 break; 8603 8604 case MO_32: 8605 tcg_single = tcg_temp_new_i32(); 8606 if (is_signed) { 8607 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8608 } else { 8609 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8610 } 8611 write_fp_sreg(s, rd, tcg_single); 8612 break; 8613 8614 case MO_16: 8615 tcg_single = tcg_temp_new_i32(); 8616 if (is_signed) { 8617 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8618 } else { 8619 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 8620 } 8621 write_fp_sreg(s, rd, tcg_single); 8622 break; 8623 8624 default: 8625 g_assert_not_reached(); 8626 } 8627 return true; 8628 } 8629 8630 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 8631 { 8632 TCGv_i64 tcg_int; 8633 int check = fp_access_check_scalar_hsd(s, a->esz); 8634 8635 if (check <= 0) { 8636 return check == 0; 8637 } 8638 8639 if (a->sf) { 8640 tcg_int = cpu_reg(s, a->rn); 8641 } else { 8642 tcg_int = read_cpu_reg(s, a->rn, true); 8643 if (is_signed) { 8644 tcg_gen_ext32s_i64(tcg_int, tcg_int); 8645 } else { 8646 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8647 } 8648 } 8649 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8650 } 8651 8652 TRANS(SCVTF_g, do_cvtf_g, a, true) 8653 TRANS(UCVTF_g, do_cvtf_g, a, false) 8654 8655 /* 8656 * [US]CVTF (vector), scalar version. 8657 * Which sounds weird, but really just means input from fp register 8658 * instead of input from general register. Input and output element 8659 * size are always equal. 8660 */ 8661 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 8662 { 8663 TCGv_i64 tcg_int; 8664 int check = fp_access_check_scalar_hsd(s, a->esz); 8665 8666 if (check <= 0) { 8667 return check == 0; 8668 } 8669 8670 tcg_int = tcg_temp_new_i64(); 8671 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 8672 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 8673 } 8674 8675 TRANS(SCVTF_f, do_cvtf_f, a, true) 8676 TRANS(UCVTF_f, do_cvtf_f, a, false) 8677 8678 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 8679 TCGv_i64 tcg_out, int shift, int rn, 8680 ARMFPRounding rmode) 8681 { 8682 TCGv_ptr tcg_fpstatus; 8683 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 8684 8685 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 8686 tcg_shift = tcg_constant_i32(shift); 8687 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 8688 8689 switch (esz) { 8690 case MO_64: 8691 read_vec_element(s, tcg_out, rn, 0, MO_64); 8692 switch (out) { 8693 case MO_64 | MO_SIGN: 8694 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8695 break; 8696 case MO_64: 8697 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8698 break; 8699 case MO_32 | MO_SIGN: 8700 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8701 break; 8702 case MO_32: 8703 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 8704 break; 8705 default: 8706 g_assert_not_reached(); 8707 } 8708 break; 8709 8710 case MO_32: 8711 tcg_single = read_fp_sreg(s, rn); 8712 switch (out) { 8713 case MO_64 | MO_SIGN: 8714 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8715 break; 8716 case MO_64: 8717 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8718 break; 8719 case MO_32 | MO_SIGN: 8720 gen_helper_vfp_tosls(tcg_single, tcg_single, 8721 tcg_shift, tcg_fpstatus); 8722 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8723 break; 8724 case MO_32: 8725 gen_helper_vfp_touls(tcg_single, tcg_single, 8726 tcg_shift, tcg_fpstatus); 8727 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8728 break; 8729 default: 8730 g_assert_not_reached(); 8731 } 8732 break; 8733 8734 case MO_16: 8735 tcg_single = read_fp_hreg(s, rn); 8736 switch (out) { 8737 case MO_64 | MO_SIGN: 8738 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8739 break; 8740 case MO_64: 8741 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 8742 break; 8743 case MO_32 | MO_SIGN: 8744 gen_helper_vfp_toslh(tcg_single, tcg_single, 8745 tcg_shift, tcg_fpstatus); 8746 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8747 break; 8748 case MO_32: 8749 gen_helper_vfp_toulh(tcg_single, tcg_single, 8750 tcg_shift, tcg_fpstatus); 8751 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8752 break; 8753 case MO_16 | MO_SIGN: 8754 gen_helper_vfp_toshh(tcg_single, tcg_single, 8755 tcg_shift, tcg_fpstatus); 8756 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8757 break; 8758 case MO_16: 8759 gen_helper_vfp_touhh(tcg_single, tcg_single, 8760 tcg_shift, tcg_fpstatus); 8761 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 8762 break; 8763 default: 8764 g_assert_not_reached(); 8765 } 8766 break; 8767 8768 default: 8769 g_assert_not_reached(); 8770 } 8771 8772 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8773 } 8774 8775 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 8776 ARMFPRounding rmode, bool is_signed) 8777 { 8778 TCGv_i64 tcg_int; 8779 int check = fp_access_check_scalar_hsd(s, a->esz); 8780 8781 if (check <= 0) { 8782 return check == 0; 8783 } 8784 8785 tcg_int = cpu_reg(s, a->rd); 8786 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 8787 a->esz, tcg_int, a->shift, a->rn, rmode); 8788 8789 if (!a->sf) { 8790 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8791 } 8792 return true; 8793 } 8794 8795 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 8796 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 8797 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 8798 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 8799 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 8800 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 8801 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 8802 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 8803 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 8804 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 8805 8806 /* 8807 * FCVT* (vector), scalar version. 8808 * Which sounds weird, but really just means output to fp register 8809 * instead of output to general register. Input and output element 8810 * size are always equal. 8811 */ 8812 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 8813 ARMFPRounding rmode, bool is_signed) 8814 { 8815 TCGv_i64 tcg_int; 8816 int check = fp_access_check_scalar_hsd(s, a->esz); 8817 8818 if (check <= 0) { 8819 return check == 0; 8820 } 8821 8822 tcg_int = tcg_temp_new_i64(); 8823 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 8824 a->esz, tcg_int, a->shift, a->rn, rmode); 8825 8826 clear_vec(s, a->rd); 8827 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 8828 return true; 8829 } 8830 8831 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 8832 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 8833 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 8834 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 8835 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 8836 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 8837 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 8838 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 8839 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 8840 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 8841 8842 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 8843 { 8844 if (!dc_isar_feature(aa64_jscvt, s)) { 8845 return false; 8846 } 8847 if (fp_access_check(s)) { 8848 TCGv_i64 t = read_fp_dreg(s, a->rn); 8849 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 8850 8851 gen_helper_fjcvtzs(t, t, fpstatus); 8852 8853 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 8854 tcg_gen_extrh_i64_i32(cpu_ZF, t); 8855 tcg_gen_movi_i32(cpu_CF, 0); 8856 tcg_gen_movi_i32(cpu_NF, 0); 8857 tcg_gen_movi_i32(cpu_VF, 0); 8858 } 8859 return true; 8860 } 8861 8862 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 8863 { 8864 if (!dc_isar_feature(aa64_fp16, s)) { 8865 return false; 8866 } 8867 if (fp_access_check(s)) { 8868 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8869 TCGv_i64 tmp = tcg_temp_new_i64(); 8870 tcg_gen_ext16u_i64(tmp, tcg_rn); 8871 write_fp_dreg(s, a->rd, tmp); 8872 } 8873 return true; 8874 } 8875 8876 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 8877 { 8878 if (fp_access_check(s)) { 8879 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8880 TCGv_i64 tmp = tcg_temp_new_i64(); 8881 tcg_gen_ext32u_i64(tmp, tcg_rn); 8882 write_fp_dreg(s, a->rd, tmp); 8883 } 8884 return true; 8885 } 8886 8887 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 8888 { 8889 if (fp_access_check(s)) { 8890 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8891 write_fp_dreg(s, a->rd, tcg_rn); 8892 } 8893 return true; 8894 } 8895 8896 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 8897 { 8898 if (fp_access_check(s)) { 8899 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8900 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 8901 clear_vec_high(s, true, a->rd); 8902 } 8903 return true; 8904 } 8905 8906 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 8907 { 8908 if (!dc_isar_feature(aa64_fp16, s)) { 8909 return false; 8910 } 8911 if (fp_access_check(s)) { 8912 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8913 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 8914 } 8915 return true; 8916 } 8917 8918 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 8919 { 8920 if (fp_access_check(s)) { 8921 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8922 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 8923 } 8924 return true; 8925 } 8926 8927 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 8928 { 8929 if (fp_access_check(s)) { 8930 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8931 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 8932 } 8933 return true; 8934 } 8935 8936 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 8937 { 8938 if (fp_access_check(s)) { 8939 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8940 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 8941 } 8942 return true; 8943 } 8944 8945 typedef struct ENVScalar1 { 8946 NeonGenOneOpEnvFn *gen_bhs[3]; 8947 NeonGenOne64OpEnvFn *gen_d; 8948 } ENVScalar1; 8949 8950 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 8951 { 8952 if (!fp_access_check(s)) { 8953 return true; 8954 } 8955 if (a->esz == MO_64) { 8956 TCGv_i64 t = read_fp_dreg(s, a->rn); 8957 f->gen_d(t, tcg_env, t); 8958 write_fp_dreg(s, a->rd, t); 8959 } else { 8960 TCGv_i32 t = tcg_temp_new_i32(); 8961 8962 read_vec_element_i32(s, t, a->rn, 0, a->esz); 8963 f->gen_bhs[a->esz](t, tcg_env, t); 8964 write_fp_sreg(s, a->rd, t); 8965 } 8966 return true; 8967 } 8968 8969 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 8970 { 8971 if (a->esz == MO_64 && !a->q) { 8972 return false; 8973 } 8974 if (!fp_access_check(s)) { 8975 return true; 8976 } 8977 if (a->esz == MO_64) { 8978 TCGv_i64 t = tcg_temp_new_i64(); 8979 8980 for (int i = 0; i < 2; ++i) { 8981 read_vec_element(s, t, a->rn, i, MO_64); 8982 f->gen_d(t, tcg_env, t); 8983 write_vec_element(s, t, a->rd, i, MO_64); 8984 } 8985 } else { 8986 TCGv_i32 t = tcg_temp_new_i32(); 8987 int n = (a->q ? 16 : 8) >> a->esz; 8988 8989 for (int i = 0; i < n; ++i) { 8990 read_vec_element_i32(s, t, a->rn, i, a->esz); 8991 f->gen_bhs[a->esz](t, tcg_env, t); 8992 write_vec_element_i32(s, t, a->rd, i, a->esz); 8993 } 8994 } 8995 clear_vec_high(s, a->q, a->rd); 8996 return true; 8997 } 8998 8999 static const ENVScalar1 f_scalar_sqabs = { 9000 { gen_helper_neon_qabs_s8, 9001 gen_helper_neon_qabs_s16, 9002 gen_helper_neon_qabs_s32 }, 9003 gen_helper_neon_qabs_s64, 9004 }; 9005 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9006 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9007 9008 static const ENVScalar1 f_scalar_sqneg = { 9009 { gen_helper_neon_qneg_s8, 9010 gen_helper_neon_qneg_s16, 9011 gen_helper_neon_qneg_s32 }, 9012 gen_helper_neon_qneg_s64, 9013 }; 9014 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 9015 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 9016 9017 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 9018 { 9019 if (fp_access_check(s)) { 9020 TCGv_i64 t = read_fp_dreg(s, a->rn); 9021 f(t, t); 9022 write_fp_dreg(s, a->rd, t); 9023 } 9024 return true; 9025 } 9026 9027 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 9028 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 9029 9030 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 9031 { 9032 if (fp_access_check(s)) { 9033 TCGv_i64 t = read_fp_dreg(s, a->rn); 9034 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 9035 write_fp_dreg(s, a->rd, t); 9036 } 9037 return true; 9038 } 9039 9040 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 9041 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 9042 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 9043 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 9044 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 9045 9046 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 9047 ArithOneOp * const fn[3]) 9048 { 9049 if (a->esz == MO_64) { 9050 return false; 9051 } 9052 if (fp_access_check(s)) { 9053 TCGv_i64 t = tcg_temp_new_i64(); 9054 9055 read_vec_element(s, t, a->rn, 0, a->esz + 1); 9056 fn[a->esz](t, t); 9057 clear_vec(s, a->rd); 9058 write_vec_element(s, t, a->rd, 0, a->esz); 9059 } 9060 return true; 9061 } 9062 9063 #define WRAP_ENV(NAME) \ 9064 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 9065 { gen_helper_##NAME(d, tcg_env, n); } 9066 9067 WRAP_ENV(neon_unarrow_sat8) 9068 WRAP_ENV(neon_unarrow_sat16) 9069 WRAP_ENV(neon_unarrow_sat32) 9070 9071 static ArithOneOp * const f_scalar_sqxtun[] = { 9072 gen_neon_unarrow_sat8, 9073 gen_neon_unarrow_sat16, 9074 gen_neon_unarrow_sat32, 9075 }; 9076 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 9077 9078 WRAP_ENV(neon_narrow_sat_s8) 9079 WRAP_ENV(neon_narrow_sat_s16) 9080 WRAP_ENV(neon_narrow_sat_s32) 9081 9082 static ArithOneOp * const f_scalar_sqxtn[] = { 9083 gen_neon_narrow_sat_s8, 9084 gen_neon_narrow_sat_s16, 9085 gen_neon_narrow_sat_s32, 9086 }; 9087 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 9088 9089 WRAP_ENV(neon_narrow_sat_u8) 9090 WRAP_ENV(neon_narrow_sat_u16) 9091 WRAP_ENV(neon_narrow_sat_u32) 9092 9093 static ArithOneOp * const f_scalar_uqxtn[] = { 9094 gen_neon_narrow_sat_u8, 9095 gen_neon_narrow_sat_u16, 9096 gen_neon_narrow_sat_u32, 9097 }; 9098 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 9099 9100 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 9101 { 9102 /* 9103 * 64 bit to 32 bit float conversion 9104 * with von Neumann rounding (round to odd) 9105 */ 9106 TCGv_i32 tmp = tcg_temp_new_i32(); 9107 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 9108 tcg_gen_extu_i32_i64(d, tmp); 9109 } 9110 9111 static ArithOneOp * const f_scalar_fcvtxn[] = { 9112 NULL, 9113 NULL, 9114 gen_fcvtxn_sd, 9115 }; 9116 TRANS(FCVTXN_s, do_2misc_narrow_scalar, a, f_scalar_fcvtxn) 9117 9118 #undef WRAP_ENV 9119 9120 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9121 { 9122 if (!a->q && a->esz == MO_64) { 9123 return false; 9124 } 9125 if (fp_access_check(s)) { 9126 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9127 } 9128 return true; 9129 } 9130 9131 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 9132 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 9133 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 9134 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 9135 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 9136 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 9137 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 9138 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 9139 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 9140 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 9141 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 9142 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 9143 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 9144 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 9145 9146 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9147 { 9148 if (a->esz == MO_64) { 9149 return false; 9150 } 9151 if (fp_access_check(s)) { 9152 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9153 } 9154 return true; 9155 } 9156 9157 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 9158 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 9159 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 9160 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 9161 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 9162 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 9163 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 9164 9165 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 9166 ArithOneOp * const fn[3]) 9167 { 9168 if (a->esz == MO_64) { 9169 return false; 9170 } 9171 if (fp_access_check(s)) { 9172 TCGv_i64 t0 = tcg_temp_new_i64(); 9173 TCGv_i64 t1 = tcg_temp_new_i64(); 9174 9175 read_vec_element(s, t0, a->rn, 0, MO_64); 9176 read_vec_element(s, t1, a->rn, 1, MO_64); 9177 fn[a->esz](t0, t0); 9178 fn[a->esz](t1, t1); 9179 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 9180 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 9181 clear_vec_high(s, a->q, a->rd); 9182 } 9183 return true; 9184 } 9185 9186 static ArithOneOp * const f_scalar_xtn[] = { 9187 gen_helper_neon_narrow_u8, 9188 gen_helper_neon_narrow_u16, 9189 tcg_gen_ext32u_i64, 9190 }; 9191 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 9192 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 9193 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 9194 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 9195 9196 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9197 { 9198 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9199 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9200 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9201 TCGv_i32 ahp = get_ahp_flag(); 9202 9203 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 9204 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9205 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9206 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 9207 tcg_gen_extu_i32_i64(d, tcg_lo); 9208 } 9209 9210 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 9211 { 9212 TCGv_i32 tmp = tcg_temp_new_i32(); 9213 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9214 9215 gen_helper_vfp_fcvtsd(tmp, n, fpst); 9216 tcg_gen_extu_i32_i64(d, tmp); 9217 } 9218 9219 static ArithOneOp * const f_vector_fcvtn[] = { 9220 NULL, 9221 gen_fcvtn_hs, 9222 gen_fcvtn_sd, 9223 }; 9224 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 9225 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 9226 9227 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 9228 { 9229 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9230 TCGv_i32 tmp = tcg_temp_new_i32(); 9231 gen_helper_bfcvt_pair(tmp, n, fpst); 9232 tcg_gen_extu_i32_i64(d, tmp); 9233 } 9234 9235 static ArithOneOp * const f_vector_bfcvtn[] = { 9236 NULL, 9237 gen_bfcvtn_hs, 9238 NULL, 9239 }; 9240 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, f_vector_bfcvtn) 9241 9242 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 9243 { 9244 static NeonGenWidenFn * const widenfns[3] = { 9245 gen_helper_neon_widen_u8, 9246 gen_helper_neon_widen_u16, 9247 tcg_gen_extu_i32_i64, 9248 }; 9249 NeonGenWidenFn *widenfn; 9250 TCGv_i64 tcg_res[2]; 9251 TCGv_i32 tcg_op; 9252 int part, pass; 9253 9254 if (a->esz == MO_64) { 9255 return false; 9256 } 9257 if (!fp_access_check(s)) { 9258 return true; 9259 } 9260 9261 tcg_op = tcg_temp_new_i32(); 9262 widenfn = widenfns[a->esz]; 9263 part = a->q ? 2 : 0; 9264 9265 for (pass = 0; pass < 2; pass++) { 9266 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 9267 tcg_res[pass] = tcg_temp_new_i64(); 9268 widenfn(tcg_res[pass], tcg_op); 9269 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 9270 } 9271 9272 for (pass = 0; pass < 2; pass++) { 9273 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9274 } 9275 return true; 9276 } 9277 9278 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 9279 { 9280 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9281 9282 if (check <= 0) { 9283 return check == 0; 9284 } 9285 9286 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 9287 return true; 9288 } 9289 9290 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 9291 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 9292 9293 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 9294 const FPScalar1 *f, int rmode) 9295 { 9296 TCGv_i32 tcg_rmode = NULL; 9297 TCGv_ptr fpst; 9298 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 9299 9300 if (check <= 0) { 9301 return check == 0; 9302 } 9303 9304 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9305 if (rmode >= 0) { 9306 tcg_rmode = gen_set_rmode(rmode, fpst); 9307 } 9308 9309 if (a->esz == MO_64) { 9310 TCGv_i64 t64 = tcg_temp_new_i64(); 9311 9312 for (int pass = 0; pass < 2; ++pass) { 9313 read_vec_element(s, t64, a->rn, pass, MO_64); 9314 f->gen_d(t64, t64, fpst); 9315 write_vec_element(s, t64, a->rd, pass, MO_64); 9316 } 9317 } else { 9318 TCGv_i32 t32 = tcg_temp_new_i32(); 9319 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 9320 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 9321 9322 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 9323 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 9324 gen(t32, t32, fpst); 9325 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 9326 } 9327 } 9328 clear_vec_high(s, a->q, a->rd); 9329 9330 if (rmode >= 0) { 9331 gen_restore_rmode(tcg_rmode, fpst); 9332 } 9333 return true; 9334 } 9335 9336 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 9337 9338 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9339 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 9340 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 9341 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 9342 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9343 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 9344 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 9345 9346 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 9347 &f_scalar_frint32, FPROUNDING_ZERO) 9348 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 9349 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 9350 &f_scalar_frint64, FPROUNDING_ZERO) 9351 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 9352 9353 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 9354 int rd, int rn, int data, 9355 gen_helper_gvec_2_ptr * const fns[3]) 9356 { 9357 int check = fp_access_check_vector_hsd(s, is_q, esz); 9358 TCGv_ptr fpst; 9359 9360 if (check <= 0) { 9361 return check == 0; 9362 } 9363 9364 fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9365 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 9366 vec_full_reg_offset(s, rn), fpst, 9367 is_q ? 16 : 8, vec_full_reg_size(s), 9368 data, fns[esz - 1]); 9369 return true; 9370 } 9371 9372 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 9373 gen_helper_gvec_vcvt_sh, 9374 gen_helper_gvec_vcvt_sf, 9375 gen_helper_gvec_vcvt_sd, 9376 }; 9377 TRANS(SCVTF_vi, do_gvec_op2_fpst, 9378 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 9379 TRANS(SCVTF_vf, do_gvec_op2_fpst, 9380 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 9381 9382 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 9383 gen_helper_gvec_vcvt_uh, 9384 gen_helper_gvec_vcvt_uf, 9385 gen_helper_gvec_vcvt_ud, 9386 }; 9387 TRANS(UCVTF_vi, do_gvec_op2_fpst, 9388 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 9389 TRANS(UCVTF_vf, do_gvec_op2_fpst, 9390 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 9391 9392 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 9393 gen_helper_gvec_vcvt_rz_hs, 9394 gen_helper_gvec_vcvt_rz_fs, 9395 gen_helper_gvec_vcvt_rz_ds, 9396 }; 9397 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 9398 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 9399 9400 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 9401 gen_helper_gvec_vcvt_rz_hu, 9402 gen_helper_gvec_vcvt_rz_fu, 9403 gen_helper_gvec_vcvt_rz_du, 9404 }; 9405 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 9406 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 9407 9408 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 9409 gen_helper_gvec_vcvt_rm_sh, 9410 gen_helper_gvec_vcvt_rm_ss, 9411 gen_helper_gvec_vcvt_rm_sd, 9412 }; 9413 9414 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 9415 gen_helper_gvec_vcvt_rm_uh, 9416 gen_helper_gvec_vcvt_rm_us, 9417 gen_helper_gvec_vcvt_rm_ud, 9418 }; 9419 9420 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 9421 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 9422 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 9423 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 9424 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 9425 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 9426 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 9427 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 9428 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 9429 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 9430 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 9431 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 9432 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 9433 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 9434 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 9435 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 9436 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 9437 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 9438 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 9439 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 9440 9441 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 9442 gen_helper_gvec_fceq0_h, 9443 gen_helper_gvec_fceq0_s, 9444 gen_helper_gvec_fceq0_d, 9445 }; 9446 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 9447 9448 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 9449 gen_helper_gvec_fcgt0_h, 9450 gen_helper_gvec_fcgt0_s, 9451 gen_helper_gvec_fcgt0_d, 9452 }; 9453 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 9454 9455 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 9456 gen_helper_gvec_fcge0_h, 9457 gen_helper_gvec_fcge0_s, 9458 gen_helper_gvec_fcge0_d, 9459 }; 9460 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 9461 9462 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 9463 gen_helper_gvec_fclt0_h, 9464 gen_helper_gvec_fclt0_s, 9465 gen_helper_gvec_fclt0_d, 9466 }; 9467 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 9468 9469 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 9470 gen_helper_gvec_fcle0_h, 9471 gen_helper_gvec_fcle0_s, 9472 gen_helper_gvec_fcle0_d, 9473 }; 9474 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 9475 9476 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 9477 gen_helper_gvec_frecpe_h, 9478 gen_helper_gvec_frecpe_s, 9479 gen_helper_gvec_frecpe_d, 9480 }; 9481 TRANS(FRECPE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frecpe) 9482 9483 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 9484 gen_helper_gvec_frsqrte_h, 9485 gen_helper_gvec_frsqrte_s, 9486 gen_helper_gvec_frsqrte_d, 9487 }; 9488 TRANS(FRSQRTE_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_frsqrte) 9489 9490 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 9491 { 9492 /* Handle 2-reg-misc ops which are widening (so each size element 9493 * in the source becomes a 2*size element in the destination. 9494 * The only instruction like this is FCVTL. 9495 */ 9496 int pass; 9497 TCGv_ptr fpst; 9498 9499 if (!fp_access_check(s)) { 9500 return true; 9501 } 9502 9503 if (a->esz == MO_64) { 9504 /* 32 -> 64 bit fp conversion */ 9505 TCGv_i64 tcg_res[2]; 9506 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9507 int srcelt = a->q ? 2 : 0; 9508 9509 fpst = fpstatus_ptr(FPST_A64); 9510 9511 for (pass = 0; pass < 2; pass++) { 9512 tcg_res[pass] = tcg_temp_new_i64(); 9513 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 9514 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 9515 } 9516 for (pass = 0; pass < 2; pass++) { 9517 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 9518 } 9519 } else { 9520 /* 16 -> 32 bit fp conversion */ 9521 int srcelt = a->q ? 4 : 0; 9522 TCGv_i32 tcg_res[4]; 9523 TCGv_i32 ahp = get_ahp_flag(); 9524 9525 fpst = fpstatus_ptr(FPST_A64_F16); 9526 9527 for (pass = 0; pass < 4; pass++) { 9528 tcg_res[pass] = tcg_temp_new_i32(); 9529 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 9530 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 9531 fpst, ahp); 9532 } 9533 for (pass = 0; pass < 4; pass++) { 9534 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 9535 } 9536 } 9537 clear_vec_high(s, true, a->rd); 9538 return true; 9539 } 9540 9541 static bool trans_OK(DisasContext *s, arg_OK *a) 9542 { 9543 return true; 9544 } 9545 9546 static bool trans_FAIL(DisasContext *s, arg_OK *a) 9547 { 9548 s->is_nonstreaming = true; 9549 return true; 9550 } 9551 9552 /** 9553 * btype_destination_ok: 9554 * @insn: The instruction at the branch destination 9555 * @bt: SCTLR_ELx.BT 9556 * @btype: PSTATE.BTYPE, and is non-zero 9557 * 9558 * On a guarded page, there are a limited number of insns 9559 * that may be present at the branch target: 9560 * - branch target identifiers, 9561 * - paciasp, pacibsp, 9562 * - BRK insn 9563 * - HLT insn 9564 * Anything else causes a Branch Target Exception. 9565 * 9566 * Return true if the branch is compatible, false to raise BTITRAP. 9567 */ 9568 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 9569 { 9570 if ((insn & 0xfffff01fu) == 0xd503201fu) { 9571 /* HINT space */ 9572 switch (extract32(insn, 5, 7)) { 9573 case 0b011001: /* PACIASP */ 9574 case 0b011011: /* PACIBSP */ 9575 /* 9576 * If SCTLR_ELx.BT, then PACI*SP are not compatible 9577 * with btype == 3. Otherwise all btype are ok. 9578 */ 9579 return !bt || btype != 3; 9580 case 0b100000: /* BTI */ 9581 /* Not compatible with any btype. */ 9582 return false; 9583 case 0b100010: /* BTI c */ 9584 /* Not compatible with btype == 3 */ 9585 return btype != 3; 9586 case 0b100100: /* BTI j */ 9587 /* Not compatible with btype == 2 */ 9588 return btype != 2; 9589 case 0b100110: /* BTI jc */ 9590 /* Compatible with any btype. */ 9591 return true; 9592 } 9593 } else { 9594 switch (insn & 0xffe0001fu) { 9595 case 0xd4200000u: /* BRK */ 9596 case 0xd4400000u: /* HLT */ 9597 /* Give priority to the breakpoint exception. */ 9598 return true; 9599 } 9600 } 9601 return false; 9602 } 9603 9604 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 9605 CPUState *cpu) 9606 { 9607 DisasContext *dc = container_of(dcbase, DisasContext, base); 9608 CPUARMState *env = cpu_env(cpu); 9609 ARMCPU *arm_cpu = env_archcpu(env); 9610 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 9611 int bound, core_mmu_idx; 9612 9613 dc->isar = &arm_cpu->isar; 9614 dc->condjmp = 0; 9615 dc->pc_save = dc->base.pc_first; 9616 dc->aarch64 = true; 9617 dc->thumb = false; 9618 dc->sctlr_b = 0; 9619 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 9620 dc->condexec_mask = 0; 9621 dc->condexec_cond = 0; 9622 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 9623 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 9624 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 9625 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 9626 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 9627 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 9628 #if !defined(CONFIG_USER_ONLY) 9629 dc->user = (dc->current_el == 0); 9630 #endif 9631 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 9632 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 9633 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 9634 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 9635 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 9636 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 9637 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 9638 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 9639 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 9640 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 9641 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 9642 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 9643 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 9644 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 9645 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 9646 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 9647 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 9648 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 9649 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 9650 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 9651 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 9652 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 9653 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 9654 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 9655 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 9656 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 9657 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 9658 dc->vec_len = 0; 9659 dc->vec_stride = 0; 9660 dc->cp_regs = arm_cpu->cp_regs; 9661 dc->features = env->features; 9662 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 9663 dc->gm_blocksize = arm_cpu->gm_blocksize; 9664 9665 #ifdef CONFIG_USER_ONLY 9666 /* In sve_probe_page, we assume TBI is enabled. */ 9667 tcg_debug_assert(dc->tbid & 1); 9668 #endif 9669 9670 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 9671 9672 /* Single step state. The code-generation logic here is: 9673 * SS_ACTIVE == 0: 9674 * generate code with no special handling for single-stepping (except 9675 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 9676 * this happens anyway because those changes are all system register or 9677 * PSTATE writes). 9678 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 9679 * emit code for one insn 9680 * emit code to clear PSTATE.SS 9681 * emit code to generate software step exception for completed step 9682 * end TB (as usual for having generated an exception) 9683 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 9684 * emit code to generate a software step exception 9685 * end the TB 9686 */ 9687 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 9688 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 9689 dc->is_ldex = false; 9690 9691 /* Bound the number of insns to execute to those left on the page. */ 9692 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 9693 9694 /* If architectural single step active, limit to 1. */ 9695 if (dc->ss_active) { 9696 bound = 1; 9697 } 9698 dc->base.max_insns = MIN(dc->base.max_insns, bound); 9699 } 9700 9701 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 9702 { 9703 } 9704 9705 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 9706 { 9707 DisasContext *dc = container_of(dcbase, DisasContext, base); 9708 target_ulong pc_arg = dc->base.pc_next; 9709 9710 if (tb_cflags(dcbase->tb) & CF_PCREL) { 9711 pc_arg &= ~TARGET_PAGE_MASK; 9712 } 9713 tcg_gen_insn_start(pc_arg, 0, 0); 9714 dc->insn_start_updated = false; 9715 } 9716 9717 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 9718 { 9719 DisasContext *s = container_of(dcbase, DisasContext, base); 9720 CPUARMState *env = cpu_env(cpu); 9721 uint64_t pc = s->base.pc_next; 9722 uint32_t insn; 9723 9724 /* Singlestep exceptions have the highest priority. */ 9725 if (s->ss_active && !s->pstate_ss) { 9726 /* Singlestep state is Active-pending. 9727 * If we're in this state at the start of a TB then either 9728 * a) we just took an exception to an EL which is being debugged 9729 * and this is the first insn in the exception handler 9730 * b) debug exceptions were masked and we just unmasked them 9731 * without changing EL (eg by clearing PSTATE.D) 9732 * In either case we're going to take a swstep exception in the 9733 * "did not step an insn" case, and so the syndrome ISV and EX 9734 * bits should be zero. 9735 */ 9736 assert(s->base.num_insns == 1); 9737 gen_swstep_exception(s, 0, 0); 9738 s->base.is_jmp = DISAS_NORETURN; 9739 s->base.pc_next = pc + 4; 9740 return; 9741 } 9742 9743 if (pc & 3) { 9744 /* 9745 * PC alignment fault. This has priority over the instruction abort 9746 * that we would receive from a translation fault via arm_ldl_code. 9747 * This should only be possible after an indirect branch, at the 9748 * start of the TB. 9749 */ 9750 assert(s->base.num_insns == 1); 9751 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 9752 s->base.is_jmp = DISAS_NORETURN; 9753 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 9754 return; 9755 } 9756 9757 s->pc_curr = pc; 9758 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 9759 s->insn = insn; 9760 s->base.pc_next = pc + 4; 9761 9762 s->fp_access_checked = false; 9763 s->sve_access_checked = false; 9764 9765 if (s->pstate_il) { 9766 /* 9767 * Illegal execution state. This has priority over BTI 9768 * exceptions, but comes after instruction abort exceptions. 9769 */ 9770 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 9771 return; 9772 } 9773 9774 if (dc_isar_feature(aa64_bti, s)) { 9775 if (s->base.num_insns == 1) { 9776 /* First insn can have btype set to non-zero. */ 9777 tcg_debug_assert(s->btype >= 0); 9778 9779 /* 9780 * Note that the Branch Target Exception has fairly high 9781 * priority -- below debugging exceptions but above most 9782 * everything else. This allows us to handle this now 9783 * instead of waiting until the insn is otherwise decoded. 9784 * 9785 * We can check all but the guarded page check here; 9786 * defer the latter to a helper. 9787 */ 9788 if (s->btype != 0 9789 && !btype_destination_ok(insn, s->bt, s->btype)) { 9790 gen_helper_guarded_page_check(tcg_env); 9791 } 9792 } else { 9793 /* Not the first insn: btype must be 0. */ 9794 tcg_debug_assert(s->btype == 0); 9795 } 9796 } 9797 9798 s->is_nonstreaming = false; 9799 if (s->sme_trap_nonstreaming) { 9800 disas_sme_fa64(s, insn); 9801 } 9802 9803 if (!disas_a64(s, insn) && 9804 !disas_sme(s, insn) && 9805 !disas_sve(s, insn)) { 9806 unallocated_encoding(s); 9807 } 9808 9809 /* 9810 * After execution of most insns, btype is reset to 0. 9811 * Note that we set btype == -1 when the insn sets btype. 9812 */ 9813 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 9814 reset_btype(s); 9815 } 9816 } 9817 9818 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 9819 { 9820 DisasContext *dc = container_of(dcbase, DisasContext, base); 9821 9822 if (unlikely(dc->ss_active)) { 9823 /* Note that this means single stepping WFI doesn't halt the CPU. 9824 * For conditional branch insns this is harmless unreachable code as 9825 * gen_goto_tb() has already handled emitting the debug exception 9826 * (and thus a tb-jump is not possible when singlestepping). 9827 */ 9828 switch (dc->base.is_jmp) { 9829 default: 9830 gen_a64_update_pc(dc, 4); 9831 /* fall through */ 9832 case DISAS_EXIT: 9833 case DISAS_JUMP: 9834 gen_step_complete_exception(dc); 9835 break; 9836 case DISAS_NORETURN: 9837 break; 9838 } 9839 } else { 9840 switch (dc->base.is_jmp) { 9841 case DISAS_NEXT: 9842 case DISAS_TOO_MANY: 9843 gen_goto_tb(dc, 1, 4); 9844 break; 9845 default: 9846 case DISAS_UPDATE_EXIT: 9847 gen_a64_update_pc(dc, 4); 9848 /* fall through */ 9849 case DISAS_EXIT: 9850 tcg_gen_exit_tb(NULL, 0); 9851 break; 9852 case DISAS_UPDATE_NOCHAIN: 9853 gen_a64_update_pc(dc, 4); 9854 /* fall through */ 9855 case DISAS_JUMP: 9856 tcg_gen_lookup_and_goto_ptr(); 9857 break; 9858 case DISAS_NORETURN: 9859 case DISAS_SWI: 9860 break; 9861 case DISAS_WFE: 9862 gen_a64_update_pc(dc, 4); 9863 gen_helper_wfe(tcg_env); 9864 break; 9865 case DISAS_YIELD: 9866 gen_a64_update_pc(dc, 4); 9867 gen_helper_yield(tcg_env); 9868 break; 9869 case DISAS_WFI: 9870 /* 9871 * This is a special case because we don't want to just halt 9872 * the CPU if trying to debug across a WFI. 9873 */ 9874 gen_a64_update_pc(dc, 4); 9875 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 9876 /* 9877 * The helper doesn't necessarily throw an exception, but we 9878 * must go back to the main loop to check for interrupts anyway. 9879 */ 9880 tcg_gen_exit_tb(NULL, 0); 9881 break; 9882 } 9883 } 9884 } 9885 9886 const TranslatorOps aarch64_translator_ops = { 9887 .init_disas_context = aarch64_tr_init_disas_context, 9888 .tb_start = aarch64_tr_tb_start, 9889 .insn_start = aarch64_tr_insn_start, 9890 .translate_insn = aarch64_tr_translate_insn, 9891 .tb_stop = aarch64_tr_tb_stop, 9892 }; 9893