1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 #include "exec/target_page.h" 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "qemu/log.h" 24 #include "arm_ldst.h" 25 #include "semihosting/semihost.h" 26 #include "cpregs.h" 27 28 static TCGv_i64 cpu_X[32]; 29 static TCGv_i64 cpu_gcspr[4]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* initialize TCG globals. */ 79 void a64_translate_init(void) 80 { 81 static const char gcspr_names[4][12] = { 82 "gcspr_el0", "gcspr_el1", "gcspr_el2", "gcspr_el3" 83 }; 84 85 int i; 86 87 cpu_pc = tcg_global_mem_new_i64(tcg_env, 88 offsetof(CPUARMState, pc), 89 "pc"); 90 for (i = 0; i < 32; i++) { 91 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 92 offsetof(CPUARMState, xregs[i]), 93 regnames[i]); 94 } 95 96 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 97 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 98 99 for (i = 0; i < 4; i++) { 100 cpu_gcspr[i] = 101 tcg_global_mem_new_i64(tcg_env, 102 offsetof(CPUARMState, cp15.gcspr_el[i]), 103 gcspr_names[i]); 104 } 105 } 106 107 /* 108 * Return the full arm mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static ARMMMUIdx full_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return useridx; 146 } 147 148 /* Return the core mmu_idx per above. */ 149 static int core_a64_user_mem_index(DisasContext *s, bool unpriv) 150 { 151 return arm_to_core_mmu_idx(full_a64_user_mem_index(s, unpriv)); 152 } 153 154 /* For a given translation regime, return the core mmu_idx for gcs access. */ 155 static int core_gcs_mem_index(ARMMMUIdx armidx) 156 { 157 return arm_to_core_mmu_idx(regime_to_gcs(armidx)); 158 } 159 160 static void set_btype_raw(int val) 161 { 162 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 163 offsetof(CPUARMState, btype)); 164 } 165 166 static void set_btype(DisasContext *s, int val) 167 { 168 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 169 tcg_debug_assert(val >= 1 && val <= 3); 170 set_btype_raw(val); 171 s->btype = -1; 172 } 173 174 static void reset_btype(DisasContext *s) 175 { 176 if (s->btype != 0) { 177 set_btype_raw(0); 178 s->btype = 0; 179 } 180 } 181 182 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 183 { 184 assert(s->pc_save != -1); 185 if (tb_cflags(s->base.tb) & CF_PCREL) { 186 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 187 } else { 188 tcg_gen_movi_i64(dest, s->pc_curr + diff); 189 } 190 } 191 192 void gen_a64_update_pc(DisasContext *s, target_long diff) 193 { 194 gen_pc_plus_diff(s, cpu_pc, diff); 195 s->pc_save = s->pc_curr + diff; 196 } 197 198 /* 199 * Handle Top Byte Ignore (TBI) bits. 200 * 201 * If address tagging is enabled via the TCR TBI bits: 202 * + for EL2 and EL3 there is only one TBI bit, and if it is set 203 * then the address is zero-extended, clearing bits [63:56] 204 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 205 * and TBI1 controls addresses with bit 55 == 1. 206 * If the appropriate TBI bit is set for the address then 207 * the address is sign-extended from bit 55 into bits [63:56] 208 * 209 * Here We have concatenated TBI{1,0} into tbi. 210 */ 211 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 212 TCGv_i64 src, int tbi) 213 { 214 if (tbi == 0) { 215 /* Load unmodified address */ 216 tcg_gen_mov_i64(dst, src); 217 } else if (!regime_has_2_ranges(s->mmu_idx)) { 218 /* Force tag byte to all zero */ 219 tcg_gen_extract_i64(dst, src, 0, 56); 220 } else { 221 /* Sign-extend from bit 55. */ 222 tcg_gen_sextract_i64(dst, src, 0, 56); 223 224 switch (tbi) { 225 case 1: 226 /* tbi0 but !tbi1: only use the extension if positive */ 227 tcg_gen_and_i64(dst, dst, src); 228 break; 229 case 2: 230 /* !tbi0 but tbi1: only use the extension if negative */ 231 tcg_gen_or_i64(dst, dst, src); 232 break; 233 case 3: 234 /* tbi0 and tbi1: always use the extension */ 235 break; 236 default: 237 g_assert_not_reached(); 238 } 239 } 240 } 241 242 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 243 { 244 /* 245 * If address tagging is enabled for instructions via the TCR TBI bits, 246 * then loading an address into the PC will clear out any tag. 247 */ 248 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 249 s->pc_save = -1; 250 } 251 252 /* 253 * Handle MTE and/or TBI. 254 * 255 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 256 * for the tag to be present in the FAR_ELx register. But for user-only 257 * mode we do not have a TLB with which to implement this, so we must 258 * remove the top byte now. 259 * 260 * Always return a fresh temporary that we can increment independently 261 * of the write-back address. 262 */ 263 264 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 265 { 266 TCGv_i64 clean = tcg_temp_new_i64(); 267 #ifdef CONFIG_USER_ONLY 268 gen_top_byte_ignore(s, clean, addr, s->tbid); 269 #else 270 tcg_gen_mov_i64(clean, addr); 271 #endif 272 return clean; 273 } 274 275 /* Insert a zero tag into src, with the result at dst. */ 276 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 277 { 278 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 279 } 280 281 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 282 MMUAccessType acc, int log2_size) 283 { 284 gen_helper_probe_access(tcg_env, ptr, 285 tcg_constant_i32(acc), 286 tcg_constant_i32(get_mem_index(s)), 287 tcg_constant_i32(1 << log2_size)); 288 } 289 290 /* 291 * For MTE, check a single logical or atomic access. This probes a single 292 * address, the exact one specified. The size and alignment of the access 293 * is not relevant to MTE, per se, but watchpoints do require the size, 294 * and we want to recognize those before making any other changes to state. 295 */ 296 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 297 bool is_write, bool tag_checked, 298 MemOp memop, bool is_unpriv, 299 int core_idx) 300 { 301 if (tag_checked && s->mte_active[is_unpriv]) { 302 TCGv_i64 ret; 303 int desc = 0; 304 305 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 306 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 307 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 308 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 309 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 310 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 311 312 ret = tcg_temp_new_i64(); 313 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 314 315 return ret; 316 } 317 return clean_data_tbi(s, addr); 318 } 319 320 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 321 bool tag_checked, MemOp memop) 322 { 323 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 324 false, get_mem_index(s)); 325 } 326 327 /* 328 * For MTE, check multiple logical sequential accesses. 329 */ 330 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 331 bool tag_checked, int total_size, MemOp single_mop) 332 { 333 if (tag_checked && s->mte_active[0]) { 334 TCGv_i64 ret; 335 int desc = 0; 336 337 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 338 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 339 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 340 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 341 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 342 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 343 344 ret = tcg_temp_new_i64(); 345 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 346 347 return ret; 348 } 349 return clean_data_tbi(s, addr); 350 } 351 352 /* 353 * Generate the special alignment check that applies to AccType_ATOMIC 354 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 355 * naturally aligned, but it must not cross a 16-byte boundary. 356 * See AArch64.CheckAlignment(). 357 */ 358 static void check_lse2_align(DisasContext *s, int rn, int imm, 359 bool is_write, MemOp mop) 360 { 361 TCGv_i32 tmp; 362 TCGv_i64 addr; 363 TCGLabel *over_label; 364 MMUAccessType type; 365 int mmu_idx; 366 367 tmp = tcg_temp_new_i32(); 368 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 369 tcg_gen_addi_i32(tmp, tmp, imm & 15); 370 tcg_gen_andi_i32(tmp, tmp, 15); 371 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 372 373 over_label = gen_new_label(); 374 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 375 376 addr = tcg_temp_new_i64(); 377 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 378 379 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 380 mmu_idx = get_mem_index(s); 381 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 382 tcg_constant_i32(mmu_idx)); 383 384 gen_set_label(over_label); 385 386 } 387 388 /* Handle the alignment check for AccType_ATOMIC instructions. */ 389 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 390 { 391 MemOp size = mop & MO_SIZE; 392 393 if (size == MO_8) { 394 return mop; 395 } 396 397 /* 398 * If size == MO_128, this is a LDXP, and the operation is single-copy 399 * atomic for each doubleword, not the entire quadword; it still must 400 * be quadword aligned. 401 */ 402 if (size == MO_128) { 403 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 404 MO_ATOM_IFALIGN_PAIR); 405 } 406 if (dc_isar_feature(aa64_lse2, s)) { 407 check_lse2_align(s, rn, 0, true, mop); 408 } else { 409 mop |= MO_ALIGN; 410 } 411 return finalize_memop(s, mop); 412 } 413 414 /* Handle the alignment check for AccType_ORDERED instructions. */ 415 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 416 bool is_write, MemOp mop) 417 { 418 MemOp size = mop & MO_SIZE; 419 420 if (size == MO_8) { 421 return mop; 422 } 423 if (size == MO_128) { 424 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 425 MO_ATOM_IFALIGN_PAIR); 426 } 427 if (!dc_isar_feature(aa64_lse2, s)) { 428 mop |= MO_ALIGN; 429 } else if (!s->naa) { 430 check_lse2_align(s, rn, imm, is_write, mop); 431 } 432 return finalize_memop(s, mop); 433 } 434 435 static void gen_add_gcs_record(DisasContext *s, TCGv_i64 value) 436 { 437 TCGv_i64 addr = tcg_temp_new_i64(); 438 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 439 int mmuidx = core_gcs_mem_index(s->mmu_idx); 440 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 441 442 tcg_gen_addi_i64(addr, gcspr, -8); 443 tcg_gen_qemu_st_i64(value, clean_data_tbi(s, addr), mmuidx, mop); 444 tcg_gen_mov_i64(gcspr, addr); 445 } 446 447 static void gen_load_check_gcs_record(DisasContext *s, TCGv_i64 target, 448 GCSInstructionType it, int rt) 449 { 450 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 451 int mmuidx = core_gcs_mem_index(s->mmu_idx); 452 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 453 TCGv_i64 rec_va = tcg_temp_new_i64(); 454 455 tcg_gen_qemu_ld_i64(rec_va, clean_data_tbi(s, gcspr), mmuidx, mop); 456 457 if (s->gcs_rvcen) { 458 TCGLabel *fail_label = 459 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(it, rt)); 460 461 tcg_gen_brcond_i64(TCG_COND_NE, rec_va, target, fail_label); 462 } 463 464 gen_a64_set_pc(s, rec_va); 465 tcg_gen_addi_i64(gcspr, gcspr, 8); 466 } 467 468 typedef struct DisasCompare64 { 469 TCGCond cond; 470 TCGv_i64 value; 471 } DisasCompare64; 472 473 static void a64_test_cc(DisasCompare64 *c64, int cc) 474 { 475 DisasCompare c32; 476 477 arm_test_cc(&c32, cc); 478 479 /* 480 * Sign-extend the 32-bit value so that the GE/LT comparisons work 481 * properly. The NE/EQ comparisons are also fine with this choice. 482 */ 483 c64->cond = c32.cond; 484 c64->value = tcg_temp_new_i64(); 485 tcg_gen_ext_i32_i64(c64->value, c32.value); 486 } 487 488 static void gen_rebuild_hflags(DisasContext *s) 489 { 490 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 491 } 492 493 static void gen_exception_internal_insn(DisasContext *s, int excp) 494 { 495 gen_a64_update_pc(s, 0); 496 gen_exception_internal(excp); 497 s->base.is_jmp = DISAS_NORETURN; 498 } 499 500 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 501 { 502 gen_a64_update_pc(s, 0); 503 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 504 s->base.is_jmp = DISAS_NORETURN; 505 } 506 507 static void gen_step_complete_exception(DisasContext *s) 508 { 509 /* We just completed step of an insn. Move from Active-not-pending 510 * to Active-pending, and then also take the swstep exception. 511 * This corresponds to making the (IMPDEF) choice to prioritize 512 * swstep exceptions over asynchronous exceptions taken to an exception 513 * level where debug is disabled. This choice has the advantage that 514 * we do not need to maintain internal state corresponding to the 515 * ISV/EX syndrome bits between completion of the step and generation 516 * of the exception, and our syndrome information is always correct. 517 */ 518 gen_ss_advance(s); 519 gen_swstep_exception(s, 1, s->is_ldex); 520 s->base.is_jmp = DISAS_NORETURN; 521 } 522 523 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 524 { 525 if (s->ss_active) { 526 return false; 527 } 528 return translator_use_goto_tb(&s->base, dest); 529 } 530 531 static void gen_goto_tb(DisasContext *s, unsigned tb_slot_idx, int64_t diff) 532 { 533 if (use_goto_tb(s, s->pc_curr + diff)) { 534 /* 535 * For pcrel, the pc must always be up-to-date on entry to 536 * the linked TB, so that it can use simple additions for all 537 * further adjustments. For !pcrel, the linked TB is compiled 538 * to know its full virtual address, so we can delay the 539 * update to pc to the unlinked path. A long chain of links 540 * can thus avoid many updates to the PC. 541 */ 542 if (tb_cflags(s->base.tb) & CF_PCREL) { 543 gen_a64_update_pc(s, diff); 544 tcg_gen_goto_tb(tb_slot_idx); 545 } else { 546 tcg_gen_goto_tb(tb_slot_idx); 547 gen_a64_update_pc(s, diff); 548 } 549 tcg_gen_exit_tb(s->base.tb, tb_slot_idx); 550 s->base.is_jmp = DISAS_NORETURN; 551 } else { 552 gen_a64_update_pc(s, diff); 553 if (s->ss_active) { 554 gen_step_complete_exception(s); 555 } else { 556 tcg_gen_lookup_and_goto_ptr(); 557 s->base.is_jmp = DISAS_NORETURN; 558 } 559 } 560 } 561 562 /* 563 * Register access functions 564 * 565 * These functions are used for directly accessing a register in where 566 * changes to the final register value are likely to be made. If you 567 * need to use a register for temporary calculation (e.g. index type 568 * operations) use the read_* form. 569 * 570 * B1.2.1 Register mappings 571 * 572 * In instruction register encoding 31 can refer to ZR (zero register) or 573 * the SP (stack pointer) depending on context. In QEMU's case we map SP 574 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 575 * This is the point of the _sp forms. 576 */ 577 TCGv_i64 cpu_reg(DisasContext *s, int reg) 578 { 579 if (reg == 31) { 580 TCGv_i64 t = tcg_temp_new_i64(); 581 tcg_gen_movi_i64(t, 0); 582 return t; 583 } else { 584 return cpu_X[reg]; 585 } 586 } 587 588 /* register access for when 31 == SP */ 589 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 590 { 591 return cpu_X[reg]; 592 } 593 594 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 595 * representing the register contents. This TCGv is an auto-freed 596 * temporary so it need not be explicitly freed, and may be modified. 597 */ 598 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 599 { 600 TCGv_i64 v = tcg_temp_new_i64(); 601 if (reg != 31) { 602 if (sf) { 603 tcg_gen_mov_i64(v, cpu_X[reg]); 604 } else { 605 tcg_gen_ext32u_i64(v, cpu_X[reg]); 606 } 607 } else { 608 tcg_gen_movi_i64(v, 0); 609 } 610 return v; 611 } 612 613 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 614 { 615 TCGv_i64 v = tcg_temp_new_i64(); 616 if (sf) { 617 tcg_gen_mov_i64(v, cpu_X[reg]); 618 } else { 619 tcg_gen_ext32u_i64(v, cpu_X[reg]); 620 } 621 return v; 622 } 623 624 /* Return the offset into CPUARMState of a slice (from 625 * the least significant end) of FP register Qn (ie 626 * Dn, Sn, Hn or Bn). 627 * (Note that this is not the same mapping as for A32; see cpu.h) 628 */ 629 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 630 { 631 return vec_reg_offset(s, regno, 0, size); 632 } 633 634 /* Offset of the high half of the 128 bit vector Qn */ 635 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 636 { 637 return vec_reg_offset(s, regno, 1, MO_64); 638 } 639 640 /* Convenience accessors for reading and writing single and double 641 * FP registers. Writing clears the upper parts of the associated 642 * 128 bit vector register, as required by the architecture. 643 * Note that unlike the GP register accessors, the values returned 644 * by the read functions must be manually freed. 645 */ 646 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 647 { 648 TCGv_i64 v = tcg_temp_new_i64(); 649 650 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 651 return v; 652 } 653 654 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 655 { 656 TCGv_i32 v = tcg_temp_new_i32(); 657 658 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 659 return v; 660 } 661 662 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 663 { 664 TCGv_i32 v = tcg_temp_new_i32(); 665 666 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 667 return v; 668 } 669 670 static void clear_vec(DisasContext *s, int rd) 671 { 672 unsigned ofs = fp_reg_offset(s, rd, MO_64); 673 unsigned vsz = vec_full_reg_size(s); 674 675 tcg_gen_gvec_dup_imm(MO_64, ofs, vsz, vsz, 0); 676 } 677 678 /* 679 * Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 680 * If SVE is not enabled, then there are only 128 bits in the vector. 681 */ 682 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 683 { 684 unsigned ofs = fp_reg_offset(s, rd, MO_64); 685 unsigned vsz = vec_full_reg_size(s); 686 687 /* Nop move, with side effect of clearing the tail. */ 688 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 689 } 690 691 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 692 { 693 unsigned ofs = fp_reg_offset(s, reg, MO_64); 694 695 tcg_gen_st_i64(v, tcg_env, ofs); 696 clear_vec_high(s, false, reg); 697 } 698 699 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 700 { 701 TCGv_i64 tmp = tcg_temp_new_i64(); 702 703 tcg_gen_extu_i32_i64(tmp, v); 704 write_fp_dreg(s, reg, tmp); 705 } 706 707 /* 708 * Write a double result to 128 bit vector register reg, honouring FPCR.NEP: 709 * - if FPCR.NEP == 0, clear the high elements of reg 710 * - if FPCR.NEP == 1, set the high elements of reg from mergereg 711 * (i.e. merge the result with those high elements) 712 * In either case, SVE register bits above 128 are zeroed (per R_WKYLB). 713 */ 714 static void write_fp_dreg_merging(DisasContext *s, int reg, int mergereg, 715 TCGv_i64 v) 716 { 717 if (!s->fpcr_nep) { 718 write_fp_dreg(s, reg, v); 719 return; 720 } 721 722 /* 723 * Move from mergereg to reg; this sets the high elements and 724 * clears the bits above 128 as a side effect. 725 */ 726 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 727 vec_full_reg_offset(s, mergereg), 728 16, vec_full_reg_size(s)); 729 tcg_gen_st_i64(v, tcg_env, vec_full_reg_offset(s, reg)); 730 } 731 732 /* 733 * Write a single-prec result, but only clear the higher elements 734 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 735 */ 736 static void write_fp_sreg_merging(DisasContext *s, int reg, int mergereg, 737 TCGv_i32 v) 738 { 739 if (!s->fpcr_nep) { 740 write_fp_sreg(s, reg, v); 741 return; 742 } 743 744 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 745 vec_full_reg_offset(s, mergereg), 746 16, vec_full_reg_size(s)); 747 tcg_gen_st_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 748 } 749 750 /* 751 * Write a half-prec result, but only clear the higher elements 752 * of the destination register if FPCR.NEP is 0; otherwise preserve them. 753 * The caller must ensure that the top 16 bits of v are zero. 754 */ 755 static void write_fp_hreg_merging(DisasContext *s, int reg, int mergereg, 756 TCGv_i32 v) 757 { 758 if (!s->fpcr_nep) { 759 write_fp_sreg(s, reg, v); 760 return; 761 } 762 763 tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, reg), 764 vec_full_reg_offset(s, mergereg), 765 16, vec_full_reg_size(s)); 766 tcg_gen_st16_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 767 } 768 769 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 770 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 771 GVecGen2Fn *gvec_fn, int vece) 772 { 773 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 774 is_q ? 16 : 8, vec_full_reg_size(s)); 775 } 776 777 /* Expand a 2-operand + immediate AdvSIMD vector operation using 778 * an expander function. 779 */ 780 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 781 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 782 { 783 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 784 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 785 } 786 787 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 788 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 789 GVecGen3Fn *gvec_fn, int vece) 790 { 791 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 792 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 793 } 794 795 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 796 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 797 int rx, GVecGen4Fn *gvec_fn, int vece) 798 { 799 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 800 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 801 is_q ? 16 : 8, vec_full_reg_size(s)); 802 } 803 804 /* Expand a 2-operand operation using an out-of-line helper. */ 805 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 806 int rn, int data, gen_helper_gvec_2 *fn) 807 { 808 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 809 vec_full_reg_offset(s, rn), 810 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 811 } 812 813 /* Expand a 3-operand operation using an out-of-line helper. */ 814 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 815 int rn, int rm, int data, gen_helper_gvec_3 *fn) 816 { 817 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 818 vec_full_reg_offset(s, rn), 819 vec_full_reg_offset(s, rm), 820 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 821 } 822 823 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 824 * an out-of-line helper. 825 */ 826 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 827 int rm, ARMFPStatusFlavour fpsttype, int data, 828 gen_helper_gvec_3_ptr *fn) 829 { 830 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 831 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 832 vec_full_reg_offset(s, rn), 833 vec_full_reg_offset(s, rm), fpst, 834 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 835 } 836 837 /* Expand a 4-operand operation using an out-of-line helper. */ 838 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 839 int rm, int ra, int data, gen_helper_gvec_4 *fn) 840 { 841 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 842 vec_full_reg_offset(s, rn), 843 vec_full_reg_offset(s, rm), 844 vec_full_reg_offset(s, ra), 845 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 846 } 847 848 /* 849 * Expand a 4-operand operation using an out-of-line helper that takes 850 * a pointer to the CPU env. 851 */ 852 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 853 int rm, int ra, int data, 854 gen_helper_gvec_4_ptr *fn) 855 { 856 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 857 vec_full_reg_offset(s, rn), 858 vec_full_reg_offset(s, rm), 859 vec_full_reg_offset(s, ra), 860 tcg_env, 861 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 862 } 863 864 /* 865 * Expand a 4-operand + fpstatus pointer + simd data value operation using 866 * an out-of-line helper. 867 */ 868 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 869 int rm, int ra, ARMFPStatusFlavour fpsttype, 870 int data, 871 gen_helper_gvec_4_ptr *fn) 872 { 873 TCGv_ptr fpst = fpstatus_ptr(fpsttype); 874 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 875 vec_full_reg_offset(s, rn), 876 vec_full_reg_offset(s, rm), 877 vec_full_reg_offset(s, ra), fpst, 878 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 879 } 880 881 /* 882 * When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN. 883 * These functions implement 884 * d = floatN_is_any_nan(s) ? s : floatN_chs(s) 885 * which for float32 is 886 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31)) 887 * and similarly for the other float sizes. 888 */ 889 static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s) 890 { 891 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 892 893 gen_vfp_negh(chs_s, s); 894 gen_vfp_absh(abs_s, s); 895 tcg_gen_movcond_i32(TCG_COND_GTU, d, 896 abs_s, tcg_constant_i32(0x7c00), 897 s, chs_s); 898 } 899 900 static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s) 901 { 902 TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32(); 903 904 gen_vfp_negs(chs_s, s); 905 gen_vfp_abss(abs_s, s); 906 tcg_gen_movcond_i32(TCG_COND_GTU, d, 907 abs_s, tcg_constant_i32(0x7f800000UL), 908 s, chs_s); 909 } 910 911 static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s) 912 { 913 TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64(); 914 915 gen_vfp_negd(chs_s, s); 916 gen_vfp_absd(abs_s, s); 917 tcg_gen_movcond_i64(TCG_COND_GTU, d, 918 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 919 s, chs_s); 920 } 921 922 /* 923 * These functions implement 924 * d = floatN_is_any_nan(s) ? s : floatN_abs(s) 925 * which for float32 is 926 * d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s & ~(1 << 31)) 927 * and similarly for the other float sizes. 928 */ 929 static void gen_vfp_ah_absh(TCGv_i32 d, TCGv_i32 s) 930 { 931 TCGv_i32 abs_s = tcg_temp_new_i32(); 932 933 gen_vfp_absh(abs_s, s); 934 tcg_gen_movcond_i32(TCG_COND_GTU, d, 935 abs_s, tcg_constant_i32(0x7c00), 936 s, abs_s); 937 } 938 939 static void gen_vfp_ah_abss(TCGv_i32 d, TCGv_i32 s) 940 { 941 TCGv_i32 abs_s = tcg_temp_new_i32(); 942 943 gen_vfp_abss(abs_s, s); 944 tcg_gen_movcond_i32(TCG_COND_GTU, d, 945 abs_s, tcg_constant_i32(0x7f800000UL), 946 s, abs_s); 947 } 948 949 static void gen_vfp_ah_absd(TCGv_i64 d, TCGv_i64 s) 950 { 951 TCGv_i64 abs_s = tcg_temp_new_i64(); 952 953 gen_vfp_absd(abs_s, s); 954 tcg_gen_movcond_i64(TCG_COND_GTU, d, 955 abs_s, tcg_constant_i64(0x7ff0000000000000ULL), 956 s, abs_s); 957 } 958 959 static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 960 { 961 if (dc->fpcr_ah) { 962 gen_vfp_ah_negh(d, s); 963 } else { 964 gen_vfp_negh(d, s); 965 } 966 } 967 968 static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s) 969 { 970 if (dc->fpcr_ah) { 971 gen_vfp_ah_negs(d, s); 972 } else { 973 gen_vfp_negs(d, s); 974 } 975 } 976 977 static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s) 978 { 979 if (dc->fpcr_ah) { 980 gen_vfp_ah_negd(d, s); 981 } else { 982 gen_vfp_negd(d, s); 983 } 984 } 985 986 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 987 * than the 32 bit equivalent. 988 */ 989 static inline void gen_set_NZ64(TCGv_i64 result) 990 { 991 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 992 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 993 } 994 995 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 996 static inline void gen_logic_CC(int sf, TCGv_i64 result) 997 { 998 if (sf) { 999 gen_set_NZ64(result); 1000 } else { 1001 tcg_gen_extrl_i64_i32(cpu_ZF, result); 1002 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 1003 } 1004 tcg_gen_movi_i32(cpu_CF, 0); 1005 tcg_gen_movi_i32(cpu_VF, 0); 1006 } 1007 1008 /* dest = T0 + T1; compute C, N, V and Z flags */ 1009 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1010 { 1011 TCGv_i64 result, flag, tmp; 1012 result = tcg_temp_new_i64(); 1013 flag = tcg_temp_new_i64(); 1014 tmp = tcg_temp_new_i64(); 1015 1016 tcg_gen_movi_i64(tmp, 0); 1017 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 1018 1019 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1020 1021 gen_set_NZ64(result); 1022 1023 tcg_gen_xor_i64(flag, result, t0); 1024 tcg_gen_xor_i64(tmp, t0, t1); 1025 tcg_gen_andc_i64(flag, flag, tmp); 1026 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1027 1028 tcg_gen_mov_i64(dest, result); 1029 } 1030 1031 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1032 { 1033 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1034 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1035 TCGv_i32 tmp = tcg_temp_new_i32(); 1036 1037 tcg_gen_movi_i32(tmp, 0); 1038 tcg_gen_extrl_i64_i32(t0_32, t0); 1039 tcg_gen_extrl_i64_i32(t1_32, t1); 1040 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 1041 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1042 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1043 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1044 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1045 tcg_gen_extu_i32_i64(dest, cpu_NF); 1046 } 1047 1048 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1049 { 1050 if (sf) { 1051 gen_add64_CC(dest, t0, t1); 1052 } else { 1053 gen_add32_CC(dest, t0, t1); 1054 } 1055 } 1056 1057 /* dest = T0 - T1; compute C, N, V and Z flags */ 1058 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1059 { 1060 /* 64 bit arithmetic */ 1061 TCGv_i64 result, flag, tmp; 1062 1063 result = tcg_temp_new_i64(); 1064 flag = tcg_temp_new_i64(); 1065 tcg_gen_sub_i64(result, t0, t1); 1066 1067 gen_set_NZ64(result); 1068 1069 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 1070 tcg_gen_extrl_i64_i32(cpu_CF, flag); 1071 1072 tcg_gen_xor_i64(flag, result, t0); 1073 tmp = tcg_temp_new_i64(); 1074 tcg_gen_xor_i64(tmp, t0, t1); 1075 tcg_gen_and_i64(flag, flag, tmp); 1076 tcg_gen_extrh_i64_i32(cpu_VF, flag); 1077 tcg_gen_mov_i64(dest, result); 1078 } 1079 1080 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1081 { 1082 /* 32 bit arithmetic */ 1083 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1084 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1085 TCGv_i32 tmp; 1086 1087 tcg_gen_extrl_i64_i32(t0_32, t0); 1088 tcg_gen_extrl_i64_i32(t1_32, t1); 1089 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 1090 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1091 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 1092 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1093 tmp = tcg_temp_new_i32(); 1094 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1095 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 1096 tcg_gen_extu_i32_i64(dest, cpu_NF); 1097 } 1098 1099 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1100 { 1101 if (sf) { 1102 gen_sub64_CC(dest, t0, t1); 1103 } else { 1104 gen_sub32_CC(dest, t0, t1); 1105 } 1106 } 1107 1108 /* dest = T0 + T1 + CF; do not compute flags. */ 1109 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1110 { 1111 TCGv_i64 flag = tcg_temp_new_i64(); 1112 tcg_gen_extu_i32_i64(flag, cpu_CF); 1113 tcg_gen_add_i64(dest, t0, t1); 1114 tcg_gen_add_i64(dest, dest, flag); 1115 1116 if (!sf) { 1117 tcg_gen_ext32u_i64(dest, dest); 1118 } 1119 } 1120 1121 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 1122 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 1123 { 1124 if (sf) { 1125 TCGv_i64 result = tcg_temp_new_i64(); 1126 TCGv_i64 cf_64 = tcg_temp_new_i64(); 1127 TCGv_i64 vf_64 = tcg_temp_new_i64(); 1128 TCGv_i64 tmp = tcg_temp_new_i64(); 1129 1130 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 1131 tcg_gen_addcio_i64(result, cf_64, t0, t1, cf_64); 1132 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 1133 gen_set_NZ64(result); 1134 1135 tcg_gen_xor_i64(vf_64, result, t0); 1136 tcg_gen_xor_i64(tmp, t0, t1); 1137 tcg_gen_andc_i64(vf_64, vf_64, tmp); 1138 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 1139 1140 tcg_gen_mov_i64(dest, result); 1141 } else { 1142 TCGv_i32 t0_32 = tcg_temp_new_i32(); 1143 TCGv_i32 t1_32 = tcg_temp_new_i32(); 1144 TCGv_i32 tmp = tcg_temp_new_i32(); 1145 1146 tcg_gen_extrl_i64_i32(t0_32, t0); 1147 tcg_gen_extrl_i64_i32(t1_32, t1); 1148 tcg_gen_addcio_i32(cpu_NF, cpu_CF, t0_32, t1_32, cpu_CF); 1149 1150 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 1151 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 1152 tcg_gen_xor_i32(tmp, t0_32, t1_32); 1153 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 1154 tcg_gen_extu_i32_i64(dest, cpu_NF); 1155 } 1156 } 1157 1158 /* 1159 * Load/Store generators 1160 */ 1161 1162 /* 1163 * Store from GPR register to memory. 1164 */ 1165 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 1166 TCGv_i64 tcg_addr, MemOp memop, int memidx, 1167 bool iss_valid, 1168 unsigned int iss_srt, 1169 bool iss_sf, bool iss_ar) 1170 { 1171 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 1172 1173 if (iss_valid) { 1174 uint32_t syn; 1175 1176 syn = syn_data_abort_with_iss(0, 1177 (memop & MO_SIZE), 1178 false, 1179 iss_srt, 1180 iss_sf, 1181 iss_ar, 1182 0, 0, 0, 0, 0, false); 1183 disas_set_insn_syndrome(s, syn); 1184 } 1185 } 1186 1187 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 1188 TCGv_i64 tcg_addr, MemOp memop, 1189 bool iss_valid, 1190 unsigned int iss_srt, 1191 bool iss_sf, bool iss_ar) 1192 { 1193 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 1194 iss_valid, iss_srt, iss_sf, iss_ar); 1195 } 1196 1197 /* 1198 * Load from memory to GPR register 1199 */ 1200 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1201 MemOp memop, bool extend, int memidx, 1202 bool iss_valid, unsigned int iss_srt, 1203 bool iss_sf, bool iss_ar) 1204 { 1205 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 1206 1207 if (extend && (memop & MO_SIGN)) { 1208 g_assert((memop & MO_SIZE) <= MO_32); 1209 tcg_gen_ext32u_i64(dest, dest); 1210 } 1211 1212 if (iss_valid) { 1213 uint32_t syn; 1214 1215 syn = syn_data_abort_with_iss(0, 1216 (memop & MO_SIZE), 1217 (memop & MO_SIGN) != 0, 1218 iss_srt, 1219 iss_sf, 1220 iss_ar, 1221 0, 0, 0, 0, 0, false); 1222 disas_set_insn_syndrome(s, syn); 1223 } 1224 } 1225 1226 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1227 MemOp memop, bool extend, 1228 bool iss_valid, unsigned int iss_srt, 1229 bool iss_sf, bool iss_ar) 1230 { 1231 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1232 iss_valid, iss_srt, iss_sf, iss_ar); 1233 } 1234 1235 /* 1236 * Store from FP register to memory 1237 */ 1238 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1239 { 1240 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1241 TCGv_i64 tmplo = tcg_temp_new_i64(); 1242 1243 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1244 1245 if ((mop & MO_SIZE) < MO_128) { 1246 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1247 } else { 1248 TCGv_i64 tmphi = tcg_temp_new_i64(); 1249 TCGv_i128 t16 = tcg_temp_new_i128(); 1250 1251 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1252 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1253 1254 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1255 } 1256 } 1257 1258 /* 1259 * Load from memory to FP register 1260 */ 1261 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1262 { 1263 /* This always zero-extends and writes to a full 128 bit wide vector */ 1264 TCGv_i64 tmplo = tcg_temp_new_i64(); 1265 TCGv_i64 tmphi = NULL; 1266 1267 if ((mop & MO_SIZE) < MO_128) { 1268 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1269 } else { 1270 TCGv_i128 t16 = tcg_temp_new_i128(); 1271 1272 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1273 1274 tmphi = tcg_temp_new_i64(); 1275 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1276 } 1277 1278 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1279 1280 if (tmphi) { 1281 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1282 } 1283 clear_vec_high(s, tmphi != NULL, destidx); 1284 } 1285 1286 /* 1287 * Vector load/store helpers. 1288 * 1289 * The principal difference between this and a FP load is that we don't 1290 * zero extend as we are filling a partial chunk of the vector register. 1291 * These functions don't support 128 bit loads/stores, which would be 1292 * normal load/store operations. 1293 * 1294 * The _i32 versions are useful when operating on 32 bit quantities 1295 * (eg for floating point single or using Neon helper functions). 1296 */ 1297 1298 /* Get value of an element within a vector register */ 1299 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1300 int element, MemOp memop) 1301 { 1302 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1303 switch ((unsigned)memop) { 1304 case MO_8: 1305 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1306 break; 1307 case MO_16: 1308 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1309 break; 1310 case MO_32: 1311 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1312 break; 1313 case MO_8|MO_SIGN: 1314 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1315 break; 1316 case MO_16|MO_SIGN: 1317 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1318 break; 1319 case MO_32|MO_SIGN: 1320 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1321 break; 1322 case MO_64: 1323 case MO_64|MO_SIGN: 1324 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1325 break; 1326 default: 1327 g_assert_not_reached(); 1328 } 1329 } 1330 1331 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1332 int element, MemOp memop) 1333 { 1334 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1335 switch (memop) { 1336 case MO_8: 1337 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1338 break; 1339 case MO_16: 1340 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1341 break; 1342 case MO_8|MO_SIGN: 1343 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1344 break; 1345 case MO_16|MO_SIGN: 1346 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1347 break; 1348 case MO_32: 1349 case MO_32|MO_SIGN: 1350 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1351 break; 1352 default: 1353 g_assert_not_reached(); 1354 } 1355 } 1356 1357 /* Set value of an element within a vector register */ 1358 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1359 int element, MemOp memop) 1360 { 1361 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1362 switch (memop) { 1363 case MO_8: 1364 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1365 break; 1366 case MO_16: 1367 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1368 break; 1369 case MO_32: 1370 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1371 break; 1372 case MO_64: 1373 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1374 break; 1375 default: 1376 g_assert_not_reached(); 1377 } 1378 } 1379 1380 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1381 int destidx, int element, MemOp memop) 1382 { 1383 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1384 switch (memop) { 1385 case MO_8: 1386 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1387 break; 1388 case MO_16: 1389 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1390 break; 1391 case MO_32: 1392 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1393 break; 1394 default: 1395 g_assert_not_reached(); 1396 } 1397 } 1398 1399 /* Store from vector register to memory */ 1400 static void do_vec_st(DisasContext *s, int srcidx, int element, 1401 TCGv_i64 tcg_addr, MemOp mop) 1402 { 1403 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1404 1405 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1406 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1407 } 1408 1409 /* Load from memory to vector register */ 1410 static void do_vec_ld(DisasContext *s, int destidx, int element, 1411 TCGv_i64 tcg_addr, MemOp mop) 1412 { 1413 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1414 1415 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1416 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1417 } 1418 1419 /* Check that FP/Neon access is enabled. If it is, return 1420 * true. If not, emit code to generate an appropriate exception, 1421 * and return false; the caller should not emit any code for 1422 * the instruction. Note that this check must happen after all 1423 * unallocated-encoding checks (otherwise the syndrome information 1424 * for the resulting exception will be incorrect). 1425 */ 1426 static bool fp_access_check_only(DisasContext *s) 1427 { 1428 if (s->fp_excp_el) { 1429 assert(!s->fp_access_checked); 1430 s->fp_access_checked = -1; 1431 1432 gen_exception_insn_el(s, 0, EXCP_UDEF, 1433 syn_fp_access_trap(1, 0xe, false, 0), 1434 s->fp_excp_el); 1435 return false; 1436 } 1437 s->fp_access_checked = 1; 1438 return true; 1439 } 1440 1441 static bool nonstreaming_check(DisasContext *s) 1442 { 1443 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1444 gen_exception_insn(s, 0, EXCP_UDEF, 1445 syn_smetrap(SME_ET_Streaming, false)); 1446 return false; 1447 } 1448 return true; 1449 } 1450 1451 static bool fp_access_check(DisasContext *s) 1452 { 1453 return fp_access_check_only(s) && nonstreaming_check(s); 1454 } 1455 1456 /* 1457 * Return <0 for non-supported element sizes, with MO_16 controlled by 1458 * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. 1459 */ 1460 static int fp_access_check_scalar_hsd(DisasContext *s, MemOp esz) 1461 { 1462 switch (esz) { 1463 case MO_64: 1464 case MO_32: 1465 break; 1466 case MO_16: 1467 if (!dc_isar_feature(aa64_fp16, s)) { 1468 return -1; 1469 } 1470 break; 1471 default: 1472 return -1; 1473 } 1474 return fp_access_check(s); 1475 } 1476 1477 /* Likewise, but vector MO_64 must have two elements. */ 1478 static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) 1479 { 1480 switch (esz) { 1481 case MO_64: 1482 if (!is_q) { 1483 return -1; 1484 } 1485 break; 1486 case MO_32: 1487 break; 1488 case MO_16: 1489 if (!dc_isar_feature(aa64_fp16, s)) { 1490 return -1; 1491 } 1492 break; 1493 default: 1494 return -1; 1495 } 1496 return fp_access_check(s); 1497 } 1498 1499 /* 1500 * Check that SVE access is enabled. If it is, return true. 1501 * If not, emit code to generate an appropriate exception and return false. 1502 * This function corresponds to CheckSVEEnabled(). 1503 */ 1504 bool sve_access_check(DisasContext *s) 1505 { 1506 if (dc_isar_feature(aa64_sme, s)) { 1507 bool ret; 1508 1509 if (s->pstate_sm) { 1510 ret = sme_enabled_check(s); 1511 } else if (dc_isar_feature(aa64_sve, s)) { 1512 goto continue_sve; 1513 } else { 1514 ret = sme_sm_enabled_check(s); 1515 } 1516 if (ret) { 1517 ret = nonstreaming_check(s); 1518 } 1519 s->sve_access_checked = (ret ? 1 : -1); 1520 return ret; 1521 } 1522 1523 continue_sve: 1524 if (s->sve_excp_el) { 1525 /* Assert that we only raise one exception per instruction. */ 1526 assert(!s->sve_access_checked); 1527 gen_exception_insn_el(s, 0, EXCP_UDEF, 1528 syn_sve_access_trap(), s->sve_excp_el); 1529 s->sve_access_checked = -1; 1530 return false; 1531 } 1532 s->sve_access_checked = 1; 1533 return fp_access_check(s); 1534 } 1535 1536 /* 1537 * Check that SME access is enabled, raise an exception if not. 1538 * Note that this function corresponds to CheckSMEAccess and is 1539 * only used directly for cpregs. 1540 */ 1541 static bool sme_access_check(DisasContext *s) 1542 { 1543 if (s->sme_excp_el) { 1544 gen_exception_insn_el(s, 0, EXCP_UDEF, 1545 syn_smetrap(SME_ET_AccessTrap, false), 1546 s->sme_excp_el); 1547 return false; 1548 } 1549 return true; 1550 } 1551 1552 /* This function corresponds to CheckSMEEnabled. */ 1553 bool sme_enabled_check(DisasContext *s) 1554 { 1555 /* 1556 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1557 * to be zero when fp_excp_el has priority. This is because we need 1558 * sme_excp_el by itself for cpregs access checks. 1559 */ 1560 if (s->sme_excp_el 1561 && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) { 1562 bool ret = sme_access_check(s); 1563 s->fp_access_checked = (ret ? 1 : -1); 1564 return ret; 1565 } 1566 return fp_access_check_only(s); 1567 } 1568 1569 /* Common subroutine for CheckSMEAnd*Enabled. */ 1570 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1571 { 1572 if (!sme_enabled_check(s)) { 1573 return false; 1574 } 1575 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1576 gen_exception_insn(s, 0, EXCP_UDEF, 1577 syn_smetrap(SME_ET_NotStreaming, false)); 1578 return false; 1579 } 1580 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1581 gen_exception_insn(s, 0, EXCP_UDEF, 1582 syn_smetrap(SME_ET_InactiveZA, false)); 1583 return false; 1584 } 1585 return true; 1586 } 1587 1588 /* 1589 * Expanders for AdvSIMD translation functions. 1590 */ 1591 1592 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1593 gen_helper_gvec_2 *fn) 1594 { 1595 if (!a->q && a->esz == MO_64) { 1596 return false; 1597 } 1598 if (fp_access_check(s)) { 1599 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1600 } 1601 return true; 1602 } 1603 1604 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1605 gen_helper_gvec_3 *fn) 1606 { 1607 if (!a->q && a->esz == MO_64) { 1608 return false; 1609 } 1610 if (fp_access_check(s)) { 1611 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1612 } 1613 return true; 1614 } 1615 1616 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1617 { 1618 if (!a->q && a->esz == MO_64) { 1619 return false; 1620 } 1621 if (fp_access_check(s)) { 1622 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1623 } 1624 return true; 1625 } 1626 1627 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1628 { 1629 if (a->esz == MO_64) { 1630 return false; 1631 } 1632 if (fp_access_check(s)) { 1633 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1634 } 1635 return true; 1636 } 1637 1638 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1639 { 1640 if (a->esz == MO_8) { 1641 return false; 1642 } 1643 return do_gvec_fn3_no64(s, a, fn); 1644 } 1645 1646 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1647 { 1648 if (!a->q && a->esz == MO_64) { 1649 return false; 1650 } 1651 if (fp_access_check(s)) { 1652 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1653 } 1654 return true; 1655 } 1656 1657 /* 1658 * This utility function is for doing register extension with an 1659 * optional shift. You will likely want to pass a temporary for the 1660 * destination register. See DecodeRegExtend() in the ARM ARM. 1661 */ 1662 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1663 int option, unsigned int shift) 1664 { 1665 int extsize = extract32(option, 0, 2); 1666 bool is_signed = extract32(option, 2, 1); 1667 1668 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1669 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1670 } 1671 1672 static inline void gen_check_sp_alignment(DisasContext *s) 1673 { 1674 /* The AArch64 architecture mandates that (if enabled via PSTATE 1675 * or SCTLR bits) there is a check that SP is 16-aligned on every 1676 * SP-relative load or store (with an exception generated if it is not). 1677 * In line with general QEMU practice regarding misaligned accesses, 1678 * we omit these checks for the sake of guest program performance. 1679 * This function is provided as a hook so we can more easily add these 1680 * checks in future (possibly as a "favour catching guest program bugs 1681 * over speed" user selectable option). 1682 */ 1683 } 1684 1685 /* 1686 * The instruction disassembly implemented here matches 1687 * the instruction encoding classifications in chapter C4 1688 * of the ARM Architecture Reference Manual (DDI0487B_a); 1689 * classification names and decode diagrams here should generally 1690 * match up with those in the manual. 1691 */ 1692 1693 static bool trans_B(DisasContext *s, arg_i *a) 1694 { 1695 reset_btype(s); 1696 gen_goto_tb(s, 0, a->imm); 1697 return true; 1698 } 1699 1700 static bool trans_BL(DisasContext *s, arg_i *a) 1701 { 1702 TCGv_i64 link = tcg_temp_new_i64(); 1703 1704 gen_pc_plus_diff(s, link, 4); 1705 if (s->gcs_en) { 1706 gen_add_gcs_record(s, link); 1707 } 1708 tcg_gen_mov_i64(cpu_reg(s, 30), link); 1709 1710 reset_btype(s); 1711 gen_goto_tb(s, 0, a->imm); 1712 return true; 1713 } 1714 1715 1716 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1717 { 1718 DisasLabel match; 1719 TCGv_i64 tcg_cmp; 1720 1721 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1722 reset_btype(s); 1723 1724 match = gen_disas_label(s); 1725 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1726 tcg_cmp, 0, match.label); 1727 gen_goto_tb(s, 0, 4); 1728 set_disas_label(s, match); 1729 gen_goto_tb(s, 1, a->imm); 1730 return true; 1731 } 1732 1733 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1734 { 1735 DisasLabel match; 1736 TCGv_i64 tcg_cmp; 1737 1738 tcg_cmp = tcg_temp_new_i64(); 1739 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1740 1741 reset_btype(s); 1742 1743 match = gen_disas_label(s); 1744 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1745 tcg_cmp, 0, match.label); 1746 gen_goto_tb(s, 0, 4); 1747 set_disas_label(s, match); 1748 gen_goto_tb(s, 1, a->imm); 1749 return true; 1750 } 1751 1752 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1753 { 1754 /* BC.cond is only present with FEAT_HBC */ 1755 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1756 return false; 1757 } 1758 reset_btype(s); 1759 if (a->cond < 0x0e) { 1760 /* genuinely conditional branches */ 1761 DisasLabel match = gen_disas_label(s); 1762 arm_gen_test_cc(a->cond, match.label); 1763 gen_goto_tb(s, 0, 4); 1764 set_disas_label(s, match); 1765 gen_goto_tb(s, 1, a->imm); 1766 } else { 1767 /* 0xe and 0xf are both "always" conditions */ 1768 gen_goto_tb(s, 0, a->imm); 1769 } 1770 return true; 1771 } 1772 1773 static void set_btype_for_br(DisasContext *s, int rn) 1774 { 1775 if (dc_isar_feature(aa64_bti, s)) { 1776 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1777 if (rn == 16 || rn == 17) { 1778 set_btype(s, 1); 1779 } else { 1780 TCGv_i64 pc = tcg_temp_new_i64(); 1781 gen_pc_plus_diff(s, pc, 0); 1782 gen_helper_guarded_page_br(tcg_env, pc); 1783 s->btype = -1; 1784 } 1785 } 1786 } 1787 1788 static void set_btype_for_blr(DisasContext *s) 1789 { 1790 if (dc_isar_feature(aa64_bti, s)) { 1791 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1792 set_btype(s, 2); 1793 } 1794 } 1795 1796 static bool trans_BR(DisasContext *s, arg_r *a) 1797 { 1798 set_btype_for_br(s, a->rn); 1799 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1800 s->base.is_jmp = DISAS_JUMP; 1801 return true; 1802 } 1803 1804 static bool trans_BLR(DisasContext *s, arg_r *a) 1805 { 1806 TCGv_i64 link = tcg_temp_new_i64(); 1807 1808 gen_pc_plus_diff(s, link, 4); 1809 if (s->gcs_en) { 1810 gen_add_gcs_record(s, link); 1811 } 1812 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1813 tcg_gen_mov_i64(cpu_reg(s, 30), link); 1814 1815 set_btype_for_blr(s); 1816 s->base.is_jmp = DISAS_JUMP; 1817 return true; 1818 } 1819 1820 static bool trans_RET(DisasContext *s, arg_r *a) 1821 { 1822 TCGv_i64 target = cpu_reg(s, a->rn); 1823 1824 if (s->gcs_en) { 1825 gen_load_check_gcs_record(s, target, GCS_IT_RET_nPauth, a->rn); 1826 } else { 1827 gen_a64_set_pc(s, target); 1828 } 1829 s->base.is_jmp = DISAS_JUMP; 1830 return true; 1831 } 1832 1833 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1834 TCGv_i64 modifier, bool use_key_a) 1835 { 1836 TCGv_i64 truedst; 1837 /* 1838 * Return the branch target for a BRAA/RETA/etc, which is either 1839 * just the destination dst, or that value with the pauth check 1840 * done and the code removed from the high bits. 1841 */ 1842 if (!s->pauth_active) { 1843 return dst; 1844 } 1845 1846 truedst = tcg_temp_new_i64(); 1847 if (use_key_a) { 1848 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1849 } else { 1850 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1851 } 1852 return truedst; 1853 } 1854 1855 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1856 { 1857 TCGv_i64 dst; 1858 1859 if (!dc_isar_feature(aa64_pauth, s)) { 1860 return false; 1861 } 1862 1863 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1864 set_btype_for_br(s, a->rn); 1865 gen_a64_set_pc(s, dst); 1866 s->base.is_jmp = DISAS_JUMP; 1867 return true; 1868 } 1869 1870 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1871 { 1872 TCGv_i64 dst, link; 1873 1874 if (!dc_isar_feature(aa64_pauth, s)) { 1875 return false; 1876 } 1877 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1878 1879 link = tcg_temp_new_i64(); 1880 gen_pc_plus_diff(s, link, 4); 1881 if (s->gcs_en) { 1882 gen_add_gcs_record(s, link); 1883 } 1884 gen_a64_set_pc(s, dst); 1885 tcg_gen_mov_i64(cpu_reg(s, 30), link); 1886 1887 set_btype_for_blr(s); 1888 s->base.is_jmp = DISAS_JUMP; 1889 return true; 1890 } 1891 1892 static bool trans_RETA(DisasContext *s, arg_reta *a) 1893 { 1894 TCGv_i64 dst; 1895 1896 if (!dc_isar_feature(aa64_pauth, s)) { 1897 return false; 1898 } 1899 1900 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1901 if (s->gcs_en) { 1902 GCSInstructionType it = a->m ? GCS_IT_RET_PauthB : GCS_IT_RET_PauthA; 1903 gen_load_check_gcs_record(s, dst, it, 30); 1904 } else { 1905 gen_a64_set_pc(s, dst); 1906 } 1907 s->base.is_jmp = DISAS_JUMP; 1908 return true; 1909 } 1910 1911 static bool trans_BRA(DisasContext *s, arg_bra *a) 1912 { 1913 TCGv_i64 dst; 1914 1915 if (!dc_isar_feature(aa64_pauth, s)) { 1916 return false; 1917 } 1918 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1919 set_btype_for_br(s, a->rn); 1920 gen_a64_set_pc(s, dst); 1921 s->base.is_jmp = DISAS_JUMP; 1922 return true; 1923 } 1924 1925 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1926 { 1927 TCGv_i64 dst, link; 1928 1929 if (!dc_isar_feature(aa64_pauth, s)) { 1930 return false; 1931 } 1932 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1933 1934 link = tcg_temp_new_i64(); 1935 gen_pc_plus_diff(s, link, 4); 1936 if (s->gcs_en) { 1937 gen_add_gcs_record(s, link); 1938 } 1939 gen_a64_set_pc(s, dst); 1940 tcg_gen_mov_i64(cpu_reg(s, 30), link); 1941 1942 set_btype_for_blr(s); 1943 s->base.is_jmp = DISAS_JUMP; 1944 return true; 1945 } 1946 1947 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1948 { 1949 #ifdef CONFIG_USER_ONLY 1950 return false; 1951 #else 1952 TCGv_i64 dst; 1953 1954 if (s->current_el == 0) { 1955 return false; 1956 } 1957 if (s->trap_eret) { 1958 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1959 return true; 1960 } 1961 dst = tcg_temp_new_i64(); 1962 tcg_gen_ld_i64(dst, tcg_env, 1963 offsetof(CPUARMState, elr_el[s->current_el])); 1964 1965 translator_io_start(&s->base); 1966 1967 gen_helper_exception_return(tcg_env, dst); 1968 /* Must exit loop to check un-masked IRQs */ 1969 s->base.is_jmp = DISAS_EXIT; 1970 return true; 1971 #endif 1972 } 1973 1974 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1975 { 1976 #ifdef CONFIG_USER_ONLY 1977 return false; 1978 #else 1979 TCGv_i64 dst; 1980 1981 if (!dc_isar_feature(aa64_pauth, s)) { 1982 return false; 1983 } 1984 if (s->current_el == 0) { 1985 return false; 1986 } 1987 /* The FGT trap takes precedence over an auth trap. */ 1988 if (s->trap_eret) { 1989 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1990 return true; 1991 } 1992 dst = tcg_temp_new_i64(); 1993 tcg_gen_ld_i64(dst, tcg_env, 1994 offsetof(CPUARMState, elr_el[s->current_el])); 1995 1996 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1997 1998 translator_io_start(&s->base); 1999 2000 gen_helper_exception_return(tcg_env, dst); 2001 /* Must exit loop to check un-masked IRQs */ 2002 s->base.is_jmp = DISAS_EXIT; 2003 return true; 2004 #endif 2005 } 2006 2007 static bool trans_NOP(DisasContext *s, arg_NOP *a) 2008 { 2009 return true; 2010 } 2011 2012 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 2013 { 2014 /* 2015 * When running in MTTCG we don't generate jumps to the yield and 2016 * WFE helpers as it won't affect the scheduling of other vCPUs. 2017 * If we wanted to more completely model WFE/SEV so we don't busy 2018 * spin unnecessarily we would need to do something more involved. 2019 */ 2020 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 2021 s->base.is_jmp = DISAS_YIELD; 2022 } 2023 return true; 2024 } 2025 2026 static bool trans_WFI(DisasContext *s, arg_WFI *a) 2027 { 2028 s->base.is_jmp = DISAS_WFI; 2029 return true; 2030 } 2031 2032 static bool trans_WFE(DisasContext *s, arg_WFI *a) 2033 { 2034 /* 2035 * When running in MTTCG we don't generate jumps to the yield and 2036 * WFE helpers as it won't affect the scheduling of other vCPUs. 2037 * If we wanted to more completely model WFE/SEV so we don't busy 2038 * spin unnecessarily we would need to do something more involved. 2039 */ 2040 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 2041 s->base.is_jmp = DISAS_WFE; 2042 } 2043 return true; 2044 } 2045 2046 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 2047 { 2048 if (!dc_isar_feature(aa64_wfxt, s)) { 2049 return false; 2050 } 2051 2052 /* 2053 * Because we need to pass the register value to the helper, 2054 * it's easier to emit the code now, unlike trans_WFI which 2055 * defers it to aarch64_tr_tb_stop(). That means we need to 2056 * check ss_active so that single-stepping a WFIT doesn't halt. 2057 */ 2058 if (s->ss_active) { 2059 /* Act like a NOP under architectural singlestep */ 2060 return true; 2061 } 2062 2063 gen_a64_update_pc(s, 4); 2064 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 2065 /* Go back to the main loop to check for interrupts */ 2066 s->base.is_jmp = DISAS_EXIT; 2067 return true; 2068 } 2069 2070 static bool trans_WFET(DisasContext *s, arg_WFET *a) 2071 { 2072 if (!dc_isar_feature(aa64_wfxt, s)) { 2073 return false; 2074 } 2075 2076 /* 2077 * We rely here on our WFE implementation being a NOP, so we 2078 * don't need to do anything different to handle the WFET timeout 2079 * from what trans_WFE does. 2080 */ 2081 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 2082 s->base.is_jmp = DISAS_WFE; 2083 } 2084 return true; 2085 } 2086 2087 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 2088 { 2089 if (s->pauth_active) { 2090 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 2091 } 2092 return true; 2093 } 2094 2095 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 2096 { 2097 if (s->pauth_active) { 2098 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2099 } 2100 return true; 2101 } 2102 2103 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 2104 { 2105 if (s->pauth_active) { 2106 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2107 } 2108 return true; 2109 } 2110 2111 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 2112 { 2113 if (s->pauth_active) { 2114 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2115 } 2116 return true; 2117 } 2118 2119 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 2120 { 2121 if (s->pauth_active) { 2122 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 2123 } 2124 return true; 2125 } 2126 2127 static bool trans_ESB(DisasContext *s, arg_ESB *a) 2128 { 2129 /* Without RAS, we must implement this as NOP. */ 2130 if (dc_isar_feature(aa64_ras, s)) { 2131 /* 2132 * QEMU does not have a source of physical SErrors, 2133 * so we are only concerned with virtual SErrors. 2134 * The pseudocode in the ARM for this case is 2135 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 2136 * AArch64.vESBOperation(); 2137 * Most of the condition can be evaluated at translation time. 2138 * Test for EL2 present, and defer test for SEL2 to runtime. 2139 */ 2140 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 2141 gen_helper_vesb(tcg_env); 2142 } 2143 } 2144 return true; 2145 } 2146 2147 static bool trans_GCSB(DisasContext *s, arg_GCSB *a) 2148 { 2149 if (dc_isar_feature(aa64_gcs, s)) { 2150 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2151 } 2152 return true; 2153 } 2154 2155 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 2156 { 2157 if (s->pauth_active) { 2158 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2159 } 2160 return true; 2161 } 2162 2163 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 2164 { 2165 if (s->pauth_active) { 2166 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2167 } 2168 return true; 2169 } 2170 2171 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 2172 { 2173 if (s->pauth_active) { 2174 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2175 } 2176 return true; 2177 } 2178 2179 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 2180 { 2181 if (s->pauth_active) { 2182 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2183 } 2184 return true; 2185 } 2186 2187 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 2188 { 2189 if (s->pauth_active) { 2190 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2191 } 2192 return true; 2193 } 2194 2195 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 2196 { 2197 if (s->pauth_active) { 2198 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2199 } 2200 return true; 2201 } 2202 2203 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 2204 { 2205 if (s->pauth_active) { 2206 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 2207 } 2208 return true; 2209 } 2210 2211 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 2212 { 2213 if (s->pauth_active) { 2214 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 2215 } 2216 return true; 2217 } 2218 2219 static bool trans_CHKFEAT(DisasContext *s, arg_CHKFEAT *a) 2220 { 2221 uint64_t feat_en = 0; 2222 2223 if (s->gcs_en) { 2224 feat_en |= 1 << 0; 2225 } 2226 if (feat_en) { 2227 TCGv_i64 x16 = cpu_reg(s, 16); 2228 tcg_gen_andi_i64(x16, x16, ~feat_en); 2229 } 2230 return true; 2231 } 2232 2233 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 2234 { 2235 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2236 return true; 2237 } 2238 2239 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 2240 { 2241 /* We handle DSB and DMB the same way */ 2242 TCGBar bar; 2243 2244 switch (a->types) { 2245 case 1: /* MBReqTypes_Reads */ 2246 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 2247 break; 2248 case 2: /* MBReqTypes_Writes */ 2249 bar = TCG_BAR_SC | TCG_MO_ST_ST; 2250 break; 2251 default: /* MBReqTypes_All */ 2252 bar = TCG_BAR_SC | TCG_MO_ALL; 2253 break; 2254 } 2255 tcg_gen_mb(bar); 2256 return true; 2257 } 2258 2259 static bool trans_DSB_nXS(DisasContext *s, arg_DSB_nXS *a) 2260 { 2261 if (!dc_isar_feature(aa64_xs, s)) { 2262 return false; 2263 } 2264 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2265 return true; 2266 } 2267 2268 static bool trans_ISB(DisasContext *s, arg_ISB *a) 2269 { 2270 /* 2271 * We need to break the TB after this insn to execute 2272 * self-modifying code correctly and also to take 2273 * any pending interrupts immediately. 2274 */ 2275 reset_btype(s); 2276 gen_goto_tb(s, 0, 4); 2277 return true; 2278 } 2279 2280 static bool trans_SB(DisasContext *s, arg_SB *a) 2281 { 2282 if (!dc_isar_feature(aa64_sb, s)) { 2283 return false; 2284 } 2285 /* 2286 * TODO: There is no speculation barrier opcode for TCG; 2287 * MB and end the TB instead. 2288 */ 2289 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 2290 gen_goto_tb(s, 0, 4); 2291 return true; 2292 } 2293 2294 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 2295 { 2296 if (!dc_isar_feature(aa64_condm_4, s)) { 2297 return false; 2298 } 2299 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 2300 return true; 2301 } 2302 2303 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 2304 { 2305 TCGv_i32 z; 2306 2307 if (!dc_isar_feature(aa64_condm_5, s)) { 2308 return false; 2309 } 2310 2311 z = tcg_temp_new_i32(); 2312 2313 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2314 2315 /* 2316 * (!C & !Z) << 31 2317 * (!(C | Z)) << 31 2318 * ~((C | Z) << 31) 2319 * ~-(C | Z) 2320 * (C | Z) - 1 2321 */ 2322 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2323 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2324 2325 /* !(Z & C) */ 2326 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2327 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2328 2329 /* (!C & Z) << 31 -> -(Z & ~C) */ 2330 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2331 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2332 2333 /* C | Z */ 2334 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2335 2336 return true; 2337 } 2338 2339 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2340 { 2341 if (!dc_isar_feature(aa64_condm_5, s)) { 2342 return false; 2343 } 2344 2345 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2346 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2347 2348 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2349 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2350 2351 tcg_gen_movi_i32(cpu_NF, 0); 2352 tcg_gen_movi_i32(cpu_VF, 0); 2353 2354 return true; 2355 } 2356 2357 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2358 { 2359 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2360 return false; 2361 } 2362 if (a->imm & 1) { 2363 set_pstate_bits(PSTATE_UAO); 2364 } else { 2365 clear_pstate_bits(PSTATE_UAO); 2366 } 2367 gen_rebuild_hflags(s); 2368 s->base.is_jmp = DISAS_TOO_MANY; 2369 return true; 2370 } 2371 2372 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2373 { 2374 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2375 return false; 2376 } 2377 if (a->imm & 1) { 2378 set_pstate_bits(PSTATE_PAN); 2379 } else { 2380 clear_pstate_bits(PSTATE_PAN); 2381 } 2382 gen_rebuild_hflags(s); 2383 s->base.is_jmp = DISAS_TOO_MANY; 2384 return true; 2385 } 2386 2387 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2388 { 2389 if (s->current_el == 0) { 2390 return false; 2391 } 2392 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2393 s->base.is_jmp = DISAS_TOO_MANY; 2394 return true; 2395 } 2396 2397 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2398 { 2399 if (!dc_isar_feature(aa64_ssbs, s)) { 2400 return false; 2401 } 2402 if (a->imm & 1) { 2403 set_pstate_bits(PSTATE_SSBS); 2404 } else { 2405 clear_pstate_bits(PSTATE_SSBS); 2406 } 2407 /* Don't need to rebuild hflags since SSBS is a nop */ 2408 s->base.is_jmp = DISAS_TOO_MANY; 2409 return true; 2410 } 2411 2412 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2413 { 2414 if (!dc_isar_feature(aa64_dit, s)) { 2415 return false; 2416 } 2417 if (a->imm & 1) { 2418 set_pstate_bits(PSTATE_DIT); 2419 } else { 2420 clear_pstate_bits(PSTATE_DIT); 2421 } 2422 /* There's no need to rebuild hflags because DIT is a nop */ 2423 s->base.is_jmp = DISAS_TOO_MANY; 2424 return true; 2425 } 2426 2427 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2428 { 2429 if (dc_isar_feature(aa64_mte, s)) { 2430 /* Full MTE is enabled -- set the TCO bit as directed. */ 2431 if (a->imm & 1) { 2432 set_pstate_bits(PSTATE_TCO); 2433 } else { 2434 clear_pstate_bits(PSTATE_TCO); 2435 } 2436 gen_rebuild_hflags(s); 2437 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2438 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2439 return true; 2440 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2441 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2442 return true; 2443 } else { 2444 /* Insn not present */ 2445 return false; 2446 } 2447 } 2448 2449 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2450 { 2451 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2452 s->base.is_jmp = DISAS_TOO_MANY; 2453 return true; 2454 } 2455 2456 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2457 { 2458 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2459 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2460 s->base.is_jmp = DISAS_UPDATE_EXIT; 2461 return true; 2462 } 2463 2464 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2465 { 2466 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2467 return false; 2468 } 2469 2470 if (a->imm == 0) { 2471 clear_pstate_bits(PSTATE_ALLINT); 2472 } else if (s->current_el > 1) { 2473 set_pstate_bits(PSTATE_ALLINT); 2474 } else { 2475 gen_helper_msr_set_allint_el1(tcg_env); 2476 } 2477 2478 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2479 s->base.is_jmp = DISAS_UPDATE_EXIT; 2480 return true; 2481 } 2482 2483 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2484 { 2485 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2486 return false; 2487 } 2488 if (sme_access_check(s)) { 2489 int old = s->pstate_sm | (s->pstate_za << 1); 2490 int new = a->imm * 3; 2491 2492 if ((old ^ new) & a->mask) { 2493 /* At least one bit changes. */ 2494 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2495 tcg_constant_i32(a->mask)); 2496 s->base.is_jmp = DISAS_TOO_MANY; 2497 } 2498 } 2499 return true; 2500 } 2501 2502 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2503 { 2504 TCGv_i32 tmp = tcg_temp_new_i32(); 2505 TCGv_i32 nzcv = tcg_temp_new_i32(); 2506 2507 /* build bit 31, N */ 2508 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2509 /* build bit 30, Z */ 2510 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2511 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2512 /* build bit 29, C */ 2513 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2514 /* build bit 28, V */ 2515 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2516 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2517 /* generate result */ 2518 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2519 } 2520 2521 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2522 { 2523 TCGv_i32 nzcv = tcg_temp_new_i32(); 2524 2525 /* take NZCV from R[t] */ 2526 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2527 2528 /* bit 31, N */ 2529 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2530 /* bit 30, Z */ 2531 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2532 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2533 /* bit 29, C */ 2534 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2535 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2536 /* bit 28, V */ 2537 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2538 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2539 } 2540 2541 static void gen_sysreg_undef(DisasContext *s, bool isread, 2542 uint8_t op0, uint8_t op1, uint8_t op2, 2543 uint8_t crn, uint8_t crm, uint8_t rt) 2544 { 2545 /* 2546 * Generate code to emit an UNDEF with correct syndrome 2547 * information for a failed system register access. 2548 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2549 * but if FEAT_IDST is implemented then read accesses to registers 2550 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2551 * syndrome. 2552 */ 2553 uint32_t syndrome; 2554 2555 if (isread && dc_isar_feature(aa64_ids, s) && 2556 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2557 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2558 } else { 2559 syndrome = syn_uncategorized(); 2560 } 2561 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2562 } 2563 2564 static void gen_gcspopm(DisasContext *s, int rt) 2565 { 2566 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2567 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2568 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2569 TCGv_i64 value = tcg_temp_new_i64(); 2570 TCGLabel *fail_label = 2571 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPM, rt)); 2572 2573 /* The value at top-of-stack must have low 2 bits clear. */ 2574 tcg_gen_qemu_ld_i64(value, clean_data_tbi(s, gcspr), mmuidx, mop); 2575 tcg_gen_brcondi_i64(TCG_COND_TSTNE, value, 3, fail_label); 2576 2577 /* Complete the pop and return the value. */ 2578 tcg_gen_addi_i64(gcspr, gcspr, 8); 2579 tcg_gen_mov_i64(cpu_reg(s, rt), value); 2580 } 2581 2582 static void gen_gcspushx(DisasContext *s) 2583 { 2584 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2585 int spsr_idx = aarch64_banked_spsr_index(s->current_el); 2586 int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); 2587 int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); 2588 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2589 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2590 TCGv_i64 addr = tcg_temp_new_i64(); 2591 TCGv_i64 tmp = tcg_temp_new_i64(); 2592 2593 tcg_gen_addi_i64(addr, gcspr, -8); 2594 tcg_gen_qemu_st_i64(cpu_reg(s, 30), addr, mmuidx, mop); 2595 2596 tcg_gen_ld_i64(tmp, tcg_env, spsr_off); 2597 tcg_gen_addi_i64(addr, addr, -8); 2598 tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); 2599 2600 tcg_gen_ld_i64(tmp, tcg_env, elr_off); 2601 tcg_gen_addi_i64(addr, addr, -8); 2602 tcg_gen_qemu_st_i64(tmp, addr, mmuidx, mop); 2603 2604 tcg_gen_addi_i64(addr, addr, -8); 2605 tcg_gen_qemu_st_i64(tcg_constant_i64(0b1001), addr, mmuidx, mop); 2606 2607 tcg_gen_mov_i64(gcspr, addr); 2608 clear_pstate_bits(PSTATE_EXLOCK); 2609 } 2610 2611 static void gen_gcspopcx(DisasContext *s) 2612 { 2613 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2614 int spsr_idx = aarch64_banked_spsr_index(s->current_el); 2615 int spsr_off = offsetof(CPUARMState, banked_spsr[spsr_idx]); 2616 int elr_off = offsetof(CPUARMState, elr_el[s->current_el]); 2617 int gcscr_off = offsetof(CPUARMState, cp15.gcscr_el[s->current_el]); 2618 int pstate_off = offsetof(CPUARMState, pstate); 2619 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2620 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2621 TCGv_i64 addr = tcg_temp_new_i64(); 2622 TCGv_i64 tmp1 = tcg_temp_new_i64(); 2623 TCGv_i64 tmp2 = tcg_temp_new_i64(); 2624 TCGLabel *fail_label = 2625 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPCX, 31)); 2626 2627 /* The value at top-of-stack must be an exception token. */ 2628 tcg_gen_qemu_ld_i64(tmp1, gcspr, mmuidx, mop); 2629 tcg_gen_brcondi_i64(TCG_COND_NE, tmp1, 0b1001, fail_label); 2630 2631 /* Validate in turn, ELR ... */ 2632 tcg_gen_addi_i64(addr, gcspr, 8); 2633 tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); 2634 tcg_gen_ld_i64(tmp2, tcg_env, elr_off); 2635 tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); 2636 2637 /* ... SPSR ... */ 2638 tcg_gen_addi_i64(addr, addr, 8); 2639 tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); 2640 tcg_gen_ld_i64(tmp2, tcg_env, spsr_off); 2641 tcg_gen_brcond_i64(TCG_COND_NE, tmp1, tmp2, fail_label); 2642 2643 /* ... and LR. */ 2644 tcg_gen_addi_i64(addr, addr, 8); 2645 tcg_gen_qemu_ld_i64(tmp1, addr, mmuidx, mop); 2646 tcg_gen_brcond_i64(TCG_COND_NE, tmp1, cpu_reg(s, 30), fail_label); 2647 2648 /* Writeback stack pointer after pop. */ 2649 tcg_gen_addi_i64(gcspr, addr, 8); 2650 2651 /* PSTATE.EXLOCK = GetCurrentEXLOCKEN(). */ 2652 tcg_gen_ld_i64(tmp1, tcg_env, gcscr_off); 2653 tcg_gen_ld_i64(tmp2, tcg_env, pstate_off); 2654 tcg_gen_shri_i64(tmp1, tmp1, ctz64(GCSCR_EXLOCKEN)); 2655 tcg_gen_deposit_i64(tmp2, tmp2, tmp1, ctz64(PSTATE_EXLOCK), 1); 2656 tcg_gen_st_i64(tmp2, tcg_env, pstate_off); 2657 } 2658 2659 static void gen_gcspopx(DisasContext *s) 2660 { 2661 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2662 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2663 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2664 TCGv_i64 addr = tcg_temp_new_i64(); 2665 TCGv_i64 tmp = tcg_temp_new_i64(); 2666 TCGLabel *fail_label = 2667 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSPOPX, 31)); 2668 2669 /* The value at top-of-stack must be an exception token. */ 2670 tcg_gen_qemu_ld_i64(tmp, gcspr, mmuidx, mop); 2671 tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 0b1001, fail_label); 2672 2673 /* 2674 * The other three values in the exception return record 2675 * are ignored, but are loaded anyway to raise faults. 2676 */ 2677 tcg_gen_addi_i64(addr, gcspr, 8); 2678 tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); 2679 tcg_gen_addi_i64(addr, addr, 8); 2680 tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); 2681 tcg_gen_addi_i64(addr, addr, 8); 2682 tcg_gen_qemu_ld_i64(tmp, addr, mmuidx, mop); 2683 tcg_gen_addi_i64(gcspr, addr, 8); 2684 } 2685 2686 static void gen_gcsss1(DisasContext *s, int rt) 2687 { 2688 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2689 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2690 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2691 TCGv_i64 inptr = cpu_reg(s, rt); 2692 TCGv_i64 cmp = tcg_temp_new_i64(); 2693 TCGv_i64 new = tcg_temp_new_i64(); 2694 TCGv_i64 old = tcg_temp_new_i64(); 2695 TCGLabel *fail_label = 2696 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS1, rt)); 2697 2698 /* Compute the valid cap entry that the new stack must have. */ 2699 tcg_gen_deposit_i64(cmp, inptr, tcg_constant_i64(1), 0, 12); 2700 /* Compute the in-progress cap entry for the old stack. */ 2701 tcg_gen_deposit_i64(new, gcspr, tcg_constant_i64(5), 0, 3); 2702 2703 /* Swap the valid cap the with the in-progress cap. */ 2704 tcg_gen_atomic_cmpxchg_i64(old, inptr, cmp, new, mmuidx, mop); 2705 tcg_gen_brcond_i64(TCG_COND_NE, old, cmp, fail_label); 2706 2707 /* The new stack had a valid cap: change gcspr. */ 2708 tcg_gen_andi_i64(gcspr, inptr, ~7); 2709 } 2710 2711 static void gen_gcsss2(DisasContext *s, int rt) 2712 { 2713 TCGv_i64 gcspr = cpu_gcspr[s->current_el]; 2714 int mmuidx = core_gcs_mem_index(s->mmu_idx); 2715 MemOp mop = finalize_memop(s, MO_64 | MO_ALIGN); 2716 TCGv_i64 outptr = tcg_temp_new_i64(); 2717 TCGv_i64 tmp = tcg_temp_new_i64(); 2718 TCGLabel *fail_label = 2719 delay_exception(s, EXCP_UDEF, syn_gcs_data_check(GCS_IT_GCSSS2, rt)); 2720 2721 /* Validate that the new stack has an in-progress cap. */ 2722 tcg_gen_qemu_ld_i64(outptr, gcspr, mmuidx, mop); 2723 tcg_gen_andi_i64(tmp, outptr, 7); 2724 tcg_gen_brcondi_i64(TCG_COND_NE, tmp, 5, fail_label); 2725 2726 /* Push a valid cap to the old stack. */ 2727 tcg_gen_andi_i64(outptr, outptr, ~7); 2728 tcg_gen_addi_i64(outptr, outptr, -8); 2729 tcg_gen_deposit_i64(tmp, outptr, tcg_constant_i64(1), 0, 12); 2730 tcg_gen_qemu_st_i64(tmp, outptr, mmuidx, mop); 2731 tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); 2732 2733 /* Pop the in-progress cap from the new stack. */ 2734 tcg_gen_addi_i64(gcspr, gcspr, 8); 2735 2736 /* Return a pointer to the old stack cap. */ 2737 tcg_gen_mov_i64(cpu_reg(s, rt), outptr); 2738 } 2739 2740 /* 2741 * Look up @key, returning the cpreg, which must exist. 2742 * Additionally, the new cpreg must also be accessible. 2743 */ 2744 static const ARMCPRegInfo * 2745 redirect_cpreg(DisasContext *s, uint32_t key, bool isread) 2746 { 2747 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2748 assert(ri); 2749 assert(cp_access_ok(s->current_el, ri, isread)); 2750 return ri; 2751 } 2752 2753 /* MRS - move from system register 2754 * MSR (register) - move to system register 2755 * SYS 2756 * SYSL 2757 * These are all essentially the same insn in 'read' and 'write' 2758 * versions, with varying op0 fields. 2759 */ 2760 static void handle_sys(DisasContext *s, bool isread, 2761 unsigned int op0, unsigned int op1, unsigned int op2, 2762 unsigned int crn, unsigned int crm, unsigned int rt) 2763 { 2764 uint32_t key = ENCODE_AA64_CP_REG(op0, op1, crn, crm, op2); 2765 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2766 bool need_exit_tb = false; 2767 bool nv_trap_to_el2 = false; 2768 bool nv_redirect_reg = false; 2769 bool skip_fp_access_checks = false; 2770 bool nv2_mem_redirect = false; 2771 TCGv_ptr tcg_ri = NULL; 2772 TCGv_i64 tcg_rt; 2773 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2774 2775 if (crn == 11 || crn == 15) { 2776 /* 2777 * Check for TIDCP trap, which must take precedence over 2778 * the UNDEF for "no such register" etc. 2779 */ 2780 switch (s->current_el) { 2781 case 0: 2782 if (dc_isar_feature(aa64_tidcp1, s)) { 2783 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2784 } 2785 break; 2786 case 1: 2787 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2788 break; 2789 } 2790 } 2791 2792 if (!ri) { 2793 /* Unknown register; this might be a guest error or a QEMU 2794 * unimplemented feature. 2795 */ 2796 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2797 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2798 isread ? "read" : "write", op0, op1, crn, crm, op2); 2799 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2800 return; 2801 } 2802 2803 if (s->nv2 && ri->nv2_redirect_offset) { 2804 /* 2805 * Some registers always redirect to memory; some only do so if 2806 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2807 * pairs which share an offset; see the table in R_CSRPQ). 2808 */ 2809 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2810 nv2_mem_redirect = s->nv1; 2811 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2812 nv2_mem_redirect = !s->nv1; 2813 } else { 2814 nv2_mem_redirect = true; 2815 } 2816 } 2817 2818 /* Check access permissions */ 2819 if (!cp_access_ok(s->current_el, ri, isread)) { 2820 /* 2821 * FEAT_NV/NV2 handling does not do the usual FP access checks 2822 * for registers only accessible at EL2 (though it *does* do them 2823 * for registers accessible at EL1). 2824 */ 2825 skip_fp_access_checks = true; 2826 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2827 /* 2828 * This is one of the few EL2 registers which should redirect 2829 * to the equivalent EL1 register. We do that after running 2830 * the EL2 register's accessfn. 2831 */ 2832 nv_redirect_reg = true; 2833 assert(!nv2_mem_redirect); 2834 } else if (nv2_mem_redirect) { 2835 /* 2836 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2837 * UNDEF to EL1. 2838 */ 2839 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2840 /* 2841 * This register / instruction exists and is an EL2 register, so 2842 * we must trap to EL2 if accessed in nested virtualization EL1 2843 * instead of UNDEFing. We'll do that after the usual access checks. 2844 * (This makes a difference only for a couple of registers like 2845 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2846 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2847 * an accessfn which does nothing when called from EL1, because 2848 * the trap-to-EL3 controls which would apply to that register 2849 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2850 */ 2851 nv_trap_to_el2 = true; 2852 } else { 2853 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2854 return; 2855 } 2856 } 2857 2858 if (ri->vhe_redir_to_el2 && s->current_el == 2 && s->e2h) { 2859 /* 2860 * This one of the FOO_EL1 registers which redirect to FOO_EL2 2861 * from EL2 when HCR_EL2.E2H is set. 2862 */ 2863 key = ri->vhe_redir_to_el2; 2864 ri = redirect_cpreg(s, key, isread); 2865 } else if (ri->vhe_redir_to_el01 && s->current_el >= 2) { 2866 /* 2867 * This is one of the FOO_EL12 or FOO_EL02 registers. 2868 * With !E2H, they all UNDEF. 2869 * With E2H, from EL2 or EL3, they redirect to FOO_EL1/FOO_EL0. 2870 */ 2871 if (!s->e2h) { 2872 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2873 return; 2874 } 2875 key = ri->vhe_redir_to_el01; 2876 ri = redirect_cpreg(s, key, isread); 2877 } 2878 2879 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2880 /* Emit code to perform further access permissions checks at 2881 * runtime; this may result in an exception. 2882 */ 2883 gen_a64_update_pc(s, 0); 2884 tcg_ri = tcg_temp_new_ptr(); 2885 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2886 tcg_constant_i32(key), 2887 tcg_constant_i32(syndrome), 2888 tcg_constant_i32(isread)); 2889 } else if (ri->type & ARM_CP_RAISES_EXC) { 2890 /* 2891 * The readfn or writefn might raise an exception; 2892 * synchronize the CPU state in case it does. 2893 */ 2894 gen_a64_update_pc(s, 0); 2895 } 2896 2897 if (!skip_fp_access_checks) { 2898 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2899 return; 2900 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2901 return; 2902 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2903 return; 2904 } 2905 } 2906 2907 if (nv_trap_to_el2) { 2908 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2909 return; 2910 } 2911 2912 if (nv_redirect_reg) { 2913 /* 2914 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2915 * Conveniently in all cases the encoding of the EL1 register is 2916 * identical to the EL2 register except that opc1 is 0. 2917 * Get the reginfo for the EL1 register to use for the actual access. 2918 * We don't use the EL1 register's access function, and 2919 * fine-grained-traps on EL1 also do not apply here. 2920 */ 2921 key = ENCODE_AA64_CP_REG(op0, 0, crn, crm, op2); 2922 ri = redirect_cpreg(s, key, isread); 2923 /* 2924 * We might not have done an update_pc earlier, so check we don't 2925 * need it. We could support this in future if necessary. 2926 */ 2927 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2928 } 2929 2930 if (nv2_mem_redirect) { 2931 /* 2932 * This system register is being redirected into an EL2 memory access. 2933 * This means it is not an IO operation, doesn't change hflags, 2934 * and need not end the TB, because it has no side effects. 2935 * 2936 * The access is 64-bit single copy atomic, guaranteed aligned because 2937 * of the definition of VCNR_EL2. Its endianness depends on 2938 * SCTLR_EL2.EE, not on the data endianness of EL1. 2939 * It is done under either the EL2 translation regime or the EL2&0 2940 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2941 * PSTATE.PAN is 0. 2942 */ 2943 TCGv_i64 ptr = tcg_temp_new_i64(); 2944 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2945 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2946 int memidx = arm_to_core_mmu_idx(armmemidx); 2947 uint32_t syn; 2948 2949 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2950 2951 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2952 tcg_gen_addi_i64(ptr, ptr, 2953 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2954 tcg_rt = cpu_reg(s, rt); 2955 2956 syn = syn_data_abort_vncr(0, !isread, 0); 2957 disas_set_insn_syndrome(s, syn); 2958 if (isread) { 2959 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2960 } else { 2961 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2962 } 2963 return; 2964 } 2965 2966 /* Handle special cases first */ 2967 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2968 case 0: 2969 break; 2970 case ARM_CP_NOP: 2971 return; 2972 case ARM_CP_NZCV: 2973 tcg_rt = cpu_reg(s, rt); 2974 if (isread) { 2975 gen_get_nzcv(tcg_rt); 2976 } else { 2977 gen_set_nzcv(tcg_rt); 2978 } 2979 return; 2980 case ARM_CP_CURRENTEL: 2981 { 2982 /* 2983 * Reads as current EL value from pstate, which is 2984 * guaranteed to be constant by the tb flags. 2985 * For nested virt we should report EL2. 2986 */ 2987 int el = s->nv ? 2 : s->current_el; 2988 tcg_rt = cpu_reg(s, rt); 2989 tcg_gen_movi_i64(tcg_rt, el << 2); 2990 return; 2991 } 2992 case ARM_CP_DC_ZVA: 2993 /* Writes clear the aligned block of memory which rt points into. */ 2994 if (s->mte_active[0]) { 2995 int desc = 0; 2996 2997 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2998 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2999 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 3000 3001 tcg_rt = tcg_temp_new_i64(); 3002 gen_helper_mte_check_zva(tcg_rt, tcg_env, 3003 tcg_constant_i32(desc), cpu_reg(s, rt)); 3004 } else { 3005 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 3006 } 3007 gen_helper_dc_zva(tcg_env, tcg_rt); 3008 return; 3009 case ARM_CP_DC_GVA: 3010 { 3011 TCGv_i64 clean_addr, tag; 3012 3013 /* 3014 * DC_GVA, like DC_ZVA, requires that we supply the original 3015 * pointer for an invalid page. Probe that address first. 3016 */ 3017 tcg_rt = cpu_reg(s, rt); 3018 clean_addr = clean_data_tbi(s, tcg_rt); 3019 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 3020 3021 if (s->ata[0]) { 3022 /* Extract the tag from the register to match STZGM. */ 3023 tag = tcg_temp_new_i64(); 3024 tcg_gen_shri_i64(tag, tcg_rt, 56); 3025 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 3026 } 3027 } 3028 return; 3029 case ARM_CP_DC_GZVA: 3030 { 3031 TCGv_i64 clean_addr, tag; 3032 3033 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 3034 tcg_rt = cpu_reg(s, rt); 3035 clean_addr = clean_data_tbi(s, tcg_rt); 3036 gen_helper_dc_zva(tcg_env, clean_addr); 3037 3038 if (s->ata[0]) { 3039 /* Extract the tag from the register to match STZGM. */ 3040 tag = tcg_temp_new_i64(); 3041 tcg_gen_shri_i64(tag, tcg_rt, 56); 3042 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 3043 } 3044 } 3045 return; 3046 case ARM_CP_GCSPUSHM: 3047 if (s->gcs_en) { 3048 gen_add_gcs_record(s, cpu_reg(s, rt)); 3049 } 3050 return; 3051 case ARM_CP_GCSPOPM: 3052 /* Note that X[rt] is unchanged if !GCSEnabled. */ 3053 if (s->gcs_en) { 3054 gen_gcspopm(s, rt); 3055 } 3056 return; 3057 case ARM_CP_GCSPUSHX: 3058 /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ 3059 if (rt != 31) { 3060 unallocated_encoding(s); 3061 } else if (s->gcs_en) { 3062 gen_gcspushx(s); 3063 } 3064 return; 3065 case ARM_CP_GCSPOPCX: 3066 /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ 3067 if (rt != 31) { 3068 unallocated_encoding(s); 3069 } else if (s->gcs_en) { 3070 gen_gcspopcx(s); 3071 } 3072 return; 3073 case ARM_CP_GCSPOPX: 3074 /* Choose the CONSTRAINED UNPREDICTABLE for UNDEF. */ 3075 if (rt != 31) { 3076 unallocated_encoding(s); 3077 } else if (s->gcs_en) { 3078 gen_gcspopx(s); 3079 } 3080 return; 3081 case ARM_CP_GCSSS1: 3082 if (s->gcs_en) { 3083 gen_gcsss1(s, rt); 3084 } 3085 return; 3086 case ARM_CP_GCSSS2: 3087 if (s->gcs_en) { 3088 gen_gcsss2(s, rt); 3089 } 3090 return; 3091 default: 3092 g_assert_not_reached(); 3093 } 3094 3095 if (ri->type & ARM_CP_IO) { 3096 /* I/O operations must end the TB here (whether read or write) */ 3097 need_exit_tb = translator_io_start(&s->base); 3098 } 3099 3100 tcg_rt = cpu_reg(s, rt); 3101 3102 if (isread) { 3103 if (ri->type & ARM_CP_CONST) { 3104 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 3105 } else if (ri->readfn) { 3106 if (!tcg_ri) { 3107 tcg_ri = gen_lookup_cp_reg(key); 3108 } 3109 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 3110 } else { 3111 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 3112 } 3113 } else { 3114 if (ri->type & ARM_CP_CONST) { 3115 /* If not forbidden by access permissions, treat as WI */ 3116 return; 3117 } else if (ri->writefn) { 3118 if (!tcg_ri) { 3119 tcg_ri = gen_lookup_cp_reg(key); 3120 } 3121 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 3122 } else { 3123 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 3124 } 3125 } 3126 3127 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 3128 /* 3129 * A write to any coprocessor register that ends a TB 3130 * must rebuild the hflags for the next TB. 3131 */ 3132 gen_rebuild_hflags(s); 3133 /* 3134 * We default to ending the TB on a coprocessor register write, 3135 * but allow this to be suppressed by the register definition 3136 * (usually only necessary to work around guest bugs). 3137 */ 3138 need_exit_tb = true; 3139 } 3140 if (need_exit_tb) { 3141 s->base.is_jmp = DISAS_UPDATE_EXIT; 3142 } 3143 } 3144 3145 static bool trans_SYS(DisasContext *s, arg_SYS *a) 3146 { 3147 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 3148 return true; 3149 } 3150 3151 static bool trans_SVC(DisasContext *s, arg_i *a) 3152 { 3153 /* 3154 * For SVC, HVC and SMC we advance the single-step state 3155 * machine before taking the exception. This is architecturally 3156 * mandated, to ensure that single-stepping a system call 3157 * instruction works properly. 3158 */ 3159 uint32_t syndrome = syn_aa64_svc(a->imm); 3160 if (s->fgt_svc) { 3161 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 3162 return true; 3163 } 3164 gen_ss_advance(s); 3165 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 3166 return true; 3167 } 3168 3169 static bool trans_HVC(DisasContext *s, arg_i *a) 3170 { 3171 int target_el = s->current_el == 3 ? 3 : 2; 3172 3173 if (s->current_el == 0) { 3174 unallocated_encoding(s); 3175 return true; 3176 } 3177 /* 3178 * The pre HVC helper handles cases when HVC gets trapped 3179 * as an undefined insn by runtime configuration. 3180 */ 3181 gen_a64_update_pc(s, 0); 3182 gen_helper_pre_hvc(tcg_env); 3183 /* Architecture requires ss advance before we do the actual work */ 3184 gen_ss_advance(s); 3185 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 3186 return true; 3187 } 3188 3189 static bool trans_SMC(DisasContext *s, arg_i *a) 3190 { 3191 if (s->current_el == 0) { 3192 unallocated_encoding(s); 3193 return true; 3194 } 3195 gen_a64_update_pc(s, 0); 3196 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 3197 /* Architecture requires ss advance before we do the actual work */ 3198 gen_ss_advance(s); 3199 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 3200 return true; 3201 } 3202 3203 static bool trans_BRK(DisasContext *s, arg_i *a) 3204 { 3205 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 3206 return true; 3207 } 3208 3209 static bool trans_HLT(DisasContext *s, arg_i *a) 3210 { 3211 /* 3212 * HLT. This has two purposes. 3213 * Architecturally, it is an external halting debug instruction. 3214 * Since QEMU doesn't implement external debug, we treat this as 3215 * it is required for halting debug disabled: it will UNDEF. 3216 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 3217 */ 3218 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 3219 gen_exception_internal_insn(s, EXCP_SEMIHOST); 3220 } else { 3221 unallocated_encoding(s); 3222 } 3223 return true; 3224 } 3225 3226 /* 3227 * Load/Store exclusive instructions are implemented by remembering 3228 * the value/address loaded, and seeing if these are the same 3229 * when the store is performed. This is not actually the architecturally 3230 * mandated semantics, but it works for typical guest code sequences 3231 * and avoids having to monitor regular stores. 3232 * 3233 * The store exclusive uses the atomic cmpxchg primitives to avoid 3234 * races in multi-threaded linux-user and when MTTCG softmmu is 3235 * enabled. 3236 */ 3237 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 3238 int size, bool is_pair) 3239 { 3240 int idx = get_mem_index(s); 3241 TCGv_i64 dirty_addr, clean_addr; 3242 MemOp memop = check_atomic_align(s, rn, size + is_pair); 3243 3244 s->is_ldex = true; 3245 dirty_addr = cpu_reg_sp(s, rn); 3246 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 3247 3248 g_assert(size <= 3); 3249 if (is_pair) { 3250 g_assert(size >= 2); 3251 if (size == 2) { 3252 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 3253 if (s->be_data == MO_LE) { 3254 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 3255 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 3256 } else { 3257 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 3258 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 3259 } 3260 } else { 3261 TCGv_i128 t16 = tcg_temp_new_i128(); 3262 3263 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 3264 3265 if (s->be_data == MO_LE) { 3266 tcg_gen_extr_i128_i64(cpu_exclusive_val, 3267 cpu_exclusive_high, t16); 3268 } else { 3269 tcg_gen_extr_i128_i64(cpu_exclusive_high, 3270 cpu_exclusive_val, t16); 3271 } 3272 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 3273 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 3274 } 3275 } else { 3276 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 3277 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 3278 } 3279 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 3280 } 3281 3282 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 3283 int rn, int size, int is_pair) 3284 { 3285 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 3286 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 3287 * [addr] = {Rt}; 3288 * if (is_pair) { 3289 * [addr + datasize] = {Rt2}; 3290 * } 3291 * {Rd} = 0; 3292 * } else { 3293 * {Rd} = 1; 3294 * } 3295 * env->exclusive_addr = -1; 3296 */ 3297 TCGLabel *fail_label = gen_new_label(); 3298 TCGLabel *done_label = gen_new_label(); 3299 TCGv_i64 tmp, clean_addr; 3300 MemOp memop; 3301 3302 /* 3303 * FIXME: We are out of spec here. We have recorded only the address 3304 * from load_exclusive, not the entire range, and we assume that the 3305 * size of the access on both sides match. The architecture allows the 3306 * store to be smaller than the load, so long as the stored bytes are 3307 * within the range recorded by the load. 3308 */ 3309 3310 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 3311 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 3312 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 3313 3314 /* 3315 * The write, and any associated faults, only happen if the virtual 3316 * and physical addresses pass the exclusive monitor check. These 3317 * faults are exceedingly unlikely, because normally the guest uses 3318 * the exact same address register for the load_exclusive, and we 3319 * would have recognized these faults there. 3320 * 3321 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 3322 * unaligned 4-byte write within the range of an aligned 8-byte load. 3323 * With LSE2, the store would need to cross a 16-byte boundary when the 3324 * load did not, which would mean the store is outside the range 3325 * recorded for the monitor, which would have failed a corrected monitor 3326 * check above. For now, we assume no size change and retain the 3327 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 3328 * 3329 * It is possible to trigger an MTE fault, by performing the load with 3330 * a virtual address with a valid tag and performing the store with the 3331 * same virtual address and a different invalid tag. 3332 */ 3333 memop = size + is_pair; 3334 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 3335 memop |= MO_ALIGN; 3336 } 3337 memop = finalize_memop(s, memop); 3338 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3339 3340 tmp = tcg_temp_new_i64(); 3341 if (is_pair) { 3342 if (size == 2) { 3343 if (s->be_data == MO_LE) { 3344 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 3345 } else { 3346 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 3347 } 3348 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 3349 cpu_exclusive_val, tmp, 3350 get_mem_index(s), memop); 3351 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3352 } else { 3353 TCGv_i128 t16 = tcg_temp_new_i128(); 3354 TCGv_i128 c16 = tcg_temp_new_i128(); 3355 TCGv_i64 a, b; 3356 3357 if (s->be_data == MO_LE) { 3358 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 3359 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 3360 cpu_exclusive_high); 3361 } else { 3362 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 3363 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 3364 cpu_exclusive_val); 3365 } 3366 3367 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 3368 get_mem_index(s), memop); 3369 3370 a = tcg_temp_new_i64(); 3371 b = tcg_temp_new_i64(); 3372 if (s->be_data == MO_LE) { 3373 tcg_gen_extr_i128_i64(a, b, t16); 3374 } else { 3375 tcg_gen_extr_i128_i64(b, a, t16); 3376 } 3377 3378 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 3379 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 3380 tcg_gen_or_i64(tmp, a, b); 3381 3382 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 3383 } 3384 } else { 3385 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 3386 cpu_reg(s, rt), get_mem_index(s), memop); 3387 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 3388 } 3389 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 3390 tcg_gen_br(done_label); 3391 3392 gen_set_label(fail_label); 3393 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 3394 gen_set_label(done_label); 3395 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 3396 } 3397 3398 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 3399 int rn, int size) 3400 { 3401 TCGv_i64 tcg_rs = cpu_reg(s, rs); 3402 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3403 int memidx = get_mem_index(s); 3404 TCGv_i64 clean_addr; 3405 MemOp memop; 3406 3407 if (rn == 31) { 3408 gen_check_sp_alignment(s); 3409 } 3410 memop = check_atomic_align(s, rn, size); 3411 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3412 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 3413 memidx, memop); 3414 } 3415 3416 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 3417 int rn, int size) 3418 { 3419 TCGv_i64 s1 = cpu_reg(s, rs); 3420 TCGv_i64 s2 = cpu_reg(s, rs + 1); 3421 TCGv_i64 t1 = cpu_reg(s, rt); 3422 TCGv_i64 t2 = cpu_reg(s, rt + 1); 3423 TCGv_i64 clean_addr; 3424 int memidx = get_mem_index(s); 3425 MemOp memop; 3426 3427 if (rn == 31) { 3428 gen_check_sp_alignment(s); 3429 } 3430 3431 /* This is a single atomic access, despite the "pair". */ 3432 memop = check_atomic_align(s, rn, size + 1); 3433 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 3434 3435 if (size == 2) { 3436 TCGv_i64 cmp = tcg_temp_new_i64(); 3437 TCGv_i64 val = tcg_temp_new_i64(); 3438 3439 if (s->be_data == MO_LE) { 3440 tcg_gen_concat32_i64(val, t1, t2); 3441 tcg_gen_concat32_i64(cmp, s1, s2); 3442 } else { 3443 tcg_gen_concat32_i64(val, t2, t1); 3444 tcg_gen_concat32_i64(cmp, s2, s1); 3445 } 3446 3447 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 3448 3449 if (s->be_data == MO_LE) { 3450 tcg_gen_extr32_i64(s1, s2, cmp); 3451 } else { 3452 tcg_gen_extr32_i64(s2, s1, cmp); 3453 } 3454 } else { 3455 TCGv_i128 cmp = tcg_temp_new_i128(); 3456 TCGv_i128 val = tcg_temp_new_i128(); 3457 3458 if (s->be_data == MO_LE) { 3459 tcg_gen_concat_i64_i128(val, t1, t2); 3460 tcg_gen_concat_i64_i128(cmp, s1, s2); 3461 } else { 3462 tcg_gen_concat_i64_i128(val, t2, t1); 3463 tcg_gen_concat_i64_i128(cmp, s2, s1); 3464 } 3465 3466 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 3467 3468 if (s->be_data == MO_LE) { 3469 tcg_gen_extr_i128_i64(s1, s2, cmp); 3470 } else { 3471 tcg_gen_extr_i128_i64(s2, s1, cmp); 3472 } 3473 } 3474 } 3475 3476 /* 3477 * Compute the ISS.SF bit for syndrome information if an exception 3478 * is taken on a load or store. This indicates whether the instruction 3479 * is accessing a 32-bit or 64-bit register. This logic is derived 3480 * from the ARMv8 specs for LDR (Shared decode for all encodings). 3481 */ 3482 static bool ldst_iss_sf(int size, bool sign, bool ext) 3483 { 3484 3485 if (sign) { 3486 /* 3487 * Signed loads are 64 bit results if we are not going to 3488 * do a zero-extend from 32 to 64 after the load. 3489 * (For a store, sign and ext are always false.) 3490 */ 3491 return !ext; 3492 } else { 3493 /* Unsigned loads/stores work at the specified size */ 3494 return size == MO_64; 3495 } 3496 } 3497 3498 static bool trans_STXR(DisasContext *s, arg_stxr *a) 3499 { 3500 if (a->rn == 31) { 3501 gen_check_sp_alignment(s); 3502 } 3503 if (a->lasr) { 3504 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3505 } 3506 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 3507 return true; 3508 } 3509 3510 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 3511 { 3512 if (a->rn == 31) { 3513 gen_check_sp_alignment(s); 3514 } 3515 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 3516 if (a->lasr) { 3517 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3518 } 3519 return true; 3520 } 3521 3522 static bool trans_STLR(DisasContext *s, arg_stlr *a) 3523 { 3524 TCGv_i64 clean_addr; 3525 MemOp memop; 3526 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3527 3528 /* 3529 * StoreLORelease is the same as Store-Release for QEMU, but 3530 * needs the feature-test. 3531 */ 3532 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3533 return false; 3534 } 3535 /* Generate ISS for non-exclusive accesses including LASR. */ 3536 if (a->rn == 31) { 3537 gen_check_sp_alignment(s); 3538 } 3539 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3540 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 3541 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3542 true, a->rn != 31, memop); 3543 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 3544 iss_sf, a->lasr); 3545 return true; 3546 } 3547 3548 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 3549 { 3550 TCGv_i64 clean_addr; 3551 MemOp memop; 3552 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3553 3554 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 3555 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3556 return false; 3557 } 3558 /* Generate ISS for non-exclusive accesses including LASR. */ 3559 if (a->rn == 31) { 3560 gen_check_sp_alignment(s); 3561 } 3562 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3563 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3564 false, a->rn != 31, memop); 3565 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3566 a->rt, iss_sf, a->lasr); 3567 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3568 return true; 3569 } 3570 3571 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3572 { 3573 if (a->rn == 31) { 3574 gen_check_sp_alignment(s); 3575 } 3576 if (a->lasr) { 3577 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3578 } 3579 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3580 return true; 3581 } 3582 3583 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3584 { 3585 if (a->rn == 31) { 3586 gen_check_sp_alignment(s); 3587 } 3588 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3589 if (a->lasr) { 3590 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3591 } 3592 return true; 3593 } 3594 3595 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3596 { 3597 if (!dc_isar_feature(aa64_lse, s)) { 3598 return false; 3599 } 3600 if (((a->rt | a->rs) & 1) != 0) { 3601 return false; 3602 } 3603 3604 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3605 return true; 3606 } 3607 3608 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3609 { 3610 if (!dc_isar_feature(aa64_lse, s)) { 3611 return false; 3612 } 3613 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3614 return true; 3615 } 3616 3617 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3618 { 3619 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3620 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3621 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3622 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3623 3624 gen_pc_plus_diff(s, clean_addr, a->imm); 3625 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3626 false, true, a->rt, iss_sf, false); 3627 return true; 3628 } 3629 3630 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3631 { 3632 /* Load register (literal), vector version */ 3633 TCGv_i64 clean_addr; 3634 MemOp memop; 3635 3636 if (!fp_access_check(s)) { 3637 return true; 3638 } 3639 memop = finalize_memop_asimd(s, a->sz); 3640 clean_addr = tcg_temp_new_i64(); 3641 gen_pc_plus_diff(s, clean_addr, a->imm); 3642 do_fp_ld(s, a->rt, clean_addr, memop); 3643 return true; 3644 } 3645 3646 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3647 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3648 uint64_t offset, bool is_store, MemOp mop) 3649 { 3650 if (a->rn == 31) { 3651 gen_check_sp_alignment(s); 3652 } 3653 3654 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3655 if (!a->p) { 3656 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3657 } 3658 3659 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3660 (a->w || a->rn != 31), 2 << a->sz, mop); 3661 } 3662 3663 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3664 TCGv_i64 dirty_addr, uint64_t offset) 3665 { 3666 if (a->w) { 3667 if (a->p) { 3668 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3669 } 3670 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3671 } 3672 } 3673 3674 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3675 { 3676 uint64_t offset = a->imm << a->sz; 3677 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3678 MemOp mop = finalize_memop(s, a->sz); 3679 3680 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3681 tcg_rt = cpu_reg(s, a->rt); 3682 tcg_rt2 = cpu_reg(s, a->rt2); 3683 /* 3684 * We built mop above for the single logical access -- rebuild it 3685 * now for the paired operation. 3686 * 3687 * With LSE2, non-sign-extending pairs are treated atomically if 3688 * aligned, and if unaligned one of the pair will be completely 3689 * within a 16-byte block and that element will be atomic. 3690 * Otherwise each element is separately atomic. 3691 * In all cases, issue one operation with the correct atomicity. 3692 */ 3693 mop = a->sz + 1; 3694 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3695 mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); 3696 mop = finalize_memop_pair(s, mop); 3697 if (a->sz == 2) { 3698 TCGv_i64 tmp = tcg_temp_new_i64(); 3699 3700 if (s->be_data == MO_LE) { 3701 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3702 } else { 3703 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3704 } 3705 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3706 } else { 3707 TCGv_i128 tmp = tcg_temp_new_i128(); 3708 3709 if (s->be_data == MO_LE) { 3710 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3711 } else { 3712 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3713 } 3714 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3715 } 3716 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3717 return true; 3718 } 3719 3720 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3721 { 3722 uint64_t offset = a->imm << a->sz; 3723 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3724 MemOp mop = finalize_memop(s, a->sz); 3725 3726 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3727 tcg_rt = cpu_reg(s, a->rt); 3728 tcg_rt2 = cpu_reg(s, a->rt2); 3729 3730 /* 3731 * We built mop above for the single logical access -- rebuild it 3732 * now for the paired operation. 3733 * 3734 * With LSE2, non-sign-extending pairs are treated atomically if 3735 * aligned, and if unaligned one of the pair will be completely 3736 * within a 16-byte block and that element will be atomic. 3737 * Otherwise each element is separately atomic. 3738 * In all cases, issue one operation with the correct atomicity. 3739 * 3740 * This treats sign-extending loads like zero-extending loads, 3741 * since that reuses the most code below. 3742 */ 3743 mop = a->sz + 1; 3744 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3745 mop |= (s->align_mem ? 0 : MO_ALIGN_TLB_ONLY); 3746 mop = finalize_memop_pair(s, mop); 3747 if (a->sz == 2) { 3748 int o2 = s->be_data == MO_LE ? 32 : 0; 3749 int o1 = o2 ^ 32; 3750 3751 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3752 if (a->sign) { 3753 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3754 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3755 } else { 3756 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3757 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3758 } 3759 } else { 3760 TCGv_i128 tmp = tcg_temp_new_i128(); 3761 3762 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3763 if (s->be_data == MO_LE) { 3764 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3765 } else { 3766 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3767 } 3768 } 3769 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3770 return true; 3771 } 3772 3773 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3774 { 3775 uint64_t offset = a->imm << a->sz; 3776 TCGv_i64 clean_addr, dirty_addr; 3777 MemOp mop; 3778 3779 if (!fp_access_check(s)) { 3780 return true; 3781 } 3782 3783 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3784 mop = finalize_memop_asimd(s, a->sz); 3785 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3786 do_fp_st(s, a->rt, clean_addr, mop); 3787 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3788 do_fp_st(s, a->rt2, clean_addr, mop); 3789 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3790 return true; 3791 } 3792 3793 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3794 { 3795 uint64_t offset = a->imm << a->sz; 3796 TCGv_i64 clean_addr, dirty_addr; 3797 MemOp mop; 3798 3799 if (!fp_access_check(s)) { 3800 return true; 3801 } 3802 3803 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3804 mop = finalize_memop_asimd(s, a->sz); 3805 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3806 do_fp_ld(s, a->rt, clean_addr, mop); 3807 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3808 do_fp_ld(s, a->rt2, clean_addr, mop); 3809 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3810 return true; 3811 } 3812 3813 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3814 { 3815 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3816 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3817 MemOp mop; 3818 TCGv_i128 tmp; 3819 3820 /* STGP only comes in one size. */ 3821 tcg_debug_assert(a->sz == MO_64); 3822 3823 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3824 return false; 3825 } 3826 3827 if (a->rn == 31) { 3828 gen_check_sp_alignment(s); 3829 } 3830 3831 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3832 if (!a->p) { 3833 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3834 } 3835 3836 clean_addr = clean_data_tbi(s, dirty_addr); 3837 tcg_rt = cpu_reg(s, a->rt); 3838 tcg_rt2 = cpu_reg(s, a->rt2); 3839 3840 /* 3841 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3842 * and one tag operation. We implement it as one single aligned 16-byte 3843 * memory operation for convenience. Note that the alignment ensures 3844 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3845 */ 3846 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3847 3848 tmp = tcg_temp_new_i128(); 3849 if (s->be_data == MO_LE) { 3850 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3851 } else { 3852 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3853 } 3854 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3855 3856 /* Perform the tag store, if tag access enabled. */ 3857 if (s->ata[0]) { 3858 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3859 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3860 } else { 3861 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3862 } 3863 } 3864 3865 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3866 return true; 3867 } 3868 3869 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3870 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3871 uint64_t offset, bool is_store, MemOp mop) 3872 { 3873 int memidx; 3874 3875 if (a->rn == 31) { 3876 gen_check_sp_alignment(s); 3877 } 3878 3879 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3880 if (!a->p) { 3881 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3882 } 3883 memidx = core_a64_user_mem_index(s, a->unpriv); 3884 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3885 a->w || a->rn != 31, 3886 mop, a->unpriv, memidx); 3887 } 3888 3889 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3890 TCGv_i64 dirty_addr, uint64_t offset) 3891 { 3892 if (a->w) { 3893 if (a->p) { 3894 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3895 } 3896 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3897 } 3898 } 3899 3900 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3901 { 3902 bool iss_sf, iss_valid = !a->w; 3903 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3904 int memidx = core_a64_user_mem_index(s, a->unpriv); 3905 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3906 3907 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3908 3909 tcg_rt = cpu_reg(s, a->rt); 3910 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3911 3912 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3913 iss_valid, a->rt, iss_sf, false); 3914 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3915 return true; 3916 } 3917 3918 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3919 { 3920 bool iss_sf, iss_valid = !a->w; 3921 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3922 int memidx = core_a64_user_mem_index(s, a->unpriv); 3923 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3924 3925 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3926 3927 tcg_rt = cpu_reg(s, a->rt); 3928 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3929 3930 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3931 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3932 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3933 return true; 3934 } 3935 3936 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3937 { 3938 TCGv_i64 clean_addr, dirty_addr; 3939 MemOp mop; 3940 3941 if (!fp_access_check(s)) { 3942 return true; 3943 } 3944 mop = finalize_memop_asimd(s, a->sz); 3945 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3946 do_fp_st(s, a->rt, clean_addr, mop); 3947 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3948 return true; 3949 } 3950 3951 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3952 { 3953 TCGv_i64 clean_addr, dirty_addr; 3954 MemOp mop; 3955 3956 if (!fp_access_check(s)) { 3957 return true; 3958 } 3959 mop = finalize_memop_asimd(s, a->sz); 3960 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3961 do_fp_ld(s, a->rt, clean_addr, mop); 3962 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3963 return true; 3964 } 3965 3966 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3967 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3968 bool is_store, MemOp memop) 3969 { 3970 TCGv_i64 tcg_rm; 3971 3972 if (a->rn == 31) { 3973 gen_check_sp_alignment(s); 3974 } 3975 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3976 3977 tcg_rm = read_cpu_reg(s, a->rm, 1); 3978 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3979 3980 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3981 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3982 } 3983 3984 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3985 { 3986 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3987 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3988 MemOp memop; 3989 3990 if (extract32(a->opt, 1, 1) == 0) { 3991 return false; 3992 } 3993 3994 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3995 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3996 tcg_rt = cpu_reg(s, a->rt); 3997 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3998 a->ext, true, a->rt, iss_sf, false); 3999 return true; 4000 } 4001 4002 static bool trans_STR(DisasContext *s, arg_ldst *a) 4003 { 4004 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 4005 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 4006 MemOp memop; 4007 4008 if (extract32(a->opt, 1, 1) == 0) { 4009 return false; 4010 } 4011 4012 memop = finalize_memop(s, a->sz); 4013 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 4014 tcg_rt = cpu_reg(s, a->rt); 4015 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 4016 return true; 4017 } 4018 4019 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 4020 { 4021 TCGv_i64 clean_addr, dirty_addr; 4022 MemOp memop; 4023 4024 if (extract32(a->opt, 1, 1) == 0) { 4025 return false; 4026 } 4027 4028 if (!fp_access_check(s)) { 4029 return true; 4030 } 4031 4032 memop = finalize_memop_asimd(s, a->sz); 4033 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 4034 do_fp_ld(s, a->rt, clean_addr, memop); 4035 return true; 4036 } 4037 4038 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 4039 { 4040 TCGv_i64 clean_addr, dirty_addr; 4041 MemOp memop; 4042 4043 if (extract32(a->opt, 1, 1) == 0) { 4044 return false; 4045 } 4046 4047 if (!fp_access_check(s)) { 4048 return true; 4049 } 4050 4051 memop = finalize_memop_asimd(s, a->sz); 4052 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 4053 do_fp_st(s, a->rt, clean_addr, memop); 4054 return true; 4055 } 4056 4057 4058 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 4059 int sign, bool invert) 4060 { 4061 MemOp mop = a->sz | sign; 4062 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 4063 4064 if (a->rn == 31) { 4065 gen_check_sp_alignment(s); 4066 } 4067 mop = check_atomic_align(s, a->rn, mop); 4068 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 4069 a->rn != 31, mop); 4070 tcg_rs = read_cpu_reg(s, a->rs, true); 4071 tcg_rt = cpu_reg(s, a->rt); 4072 if (invert) { 4073 tcg_gen_not_i64(tcg_rs, tcg_rs); 4074 } 4075 /* 4076 * The tcg atomic primitives are all full barriers. Therefore we 4077 * can ignore the Acquire and Release bits of this instruction. 4078 */ 4079 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 4080 4081 if (mop & MO_SIGN) { 4082 switch (a->sz) { 4083 case MO_8: 4084 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 4085 break; 4086 case MO_16: 4087 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 4088 break; 4089 case MO_32: 4090 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 4091 break; 4092 case MO_64: 4093 break; 4094 default: 4095 g_assert_not_reached(); 4096 } 4097 } 4098 return true; 4099 } 4100 4101 TRANS_FEAT(LDADD, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 4102 TRANS_FEAT(LDCLR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 4103 TRANS_FEAT(LDEOR, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 4104 TRANS_FEAT(LDSET, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 4105 TRANS_FEAT(LDSMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 4106 TRANS_FEAT(LDSMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 4107 TRANS_FEAT(LDUMAX, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 4108 TRANS_FEAT(LDUMIN, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 4109 TRANS_FEAT(SWP, aa64_lse, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 4110 4111 typedef void Atomic128ThreeOpFn(TCGv_i128, TCGv_i64, TCGv_i128, TCGArg, MemOp); 4112 4113 static bool do_atomic128_ld(DisasContext *s, arg_atomic128 *a, 4114 Atomic128ThreeOpFn *fn, bool invert) 4115 { 4116 MemOp mop; 4117 int rlo, rhi; 4118 TCGv_i64 clean_addr, tlo, thi; 4119 TCGv_i128 t16; 4120 4121 if (a->rt == 31 || a->rt2 == 31 || a->rt == a->rt2) { 4122 return false; 4123 } 4124 if (a->rn == 31) { 4125 gen_check_sp_alignment(s); 4126 } 4127 mop = check_atomic_align(s, a->rn, MO_128); 4128 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 4129 a->rn != 31, mop); 4130 4131 rlo = (s->be_data == MO_LE ? a->rt : a->rt2); 4132 rhi = (s->be_data == MO_LE ? a->rt2 : a->rt); 4133 4134 tlo = read_cpu_reg(s, rlo, true); 4135 thi = read_cpu_reg(s, rhi, true); 4136 if (invert) { 4137 tcg_gen_not_i64(tlo, tlo); 4138 tcg_gen_not_i64(thi, thi); 4139 } 4140 /* 4141 * The tcg atomic primitives are all full barriers. Therefore we 4142 * can ignore the Acquire and Release bits of this instruction. 4143 */ 4144 t16 = tcg_temp_new_i128(); 4145 tcg_gen_concat_i64_i128(t16, tlo, thi); 4146 4147 fn(t16, clean_addr, t16, get_mem_index(s), mop); 4148 4149 tcg_gen_extr_i128_i64(cpu_reg(s, rlo), cpu_reg(s, rhi), t16); 4150 return true; 4151 } 4152 4153 TRANS_FEAT(LDCLRP, aa64_lse128, do_atomic128_ld, 4154 a, tcg_gen_atomic_fetch_and_i128, true) 4155 TRANS_FEAT(LDSETP, aa64_lse128, do_atomic128_ld, 4156 a, tcg_gen_atomic_fetch_or_i128, false) 4157 TRANS_FEAT(SWPP, aa64_lse128, do_atomic128_ld, 4158 a, tcg_gen_atomic_xchg_i128, false) 4159 4160 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 4161 { 4162 bool iss_sf = ldst_iss_sf(a->sz, false, false); 4163 TCGv_i64 clean_addr; 4164 MemOp mop; 4165 4166 if (!dc_isar_feature(aa64_lse, s) || 4167 !dc_isar_feature(aa64_rcpc_8_3, s)) { 4168 return false; 4169 } 4170 if (a->rn == 31) { 4171 gen_check_sp_alignment(s); 4172 } 4173 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 4174 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 4175 a->rn != 31, mop); 4176 /* 4177 * LDAPR* are a special case because they are a simple load, not a 4178 * fetch-and-do-something op. 4179 * The architectural consistency requirements here are weaker than 4180 * full load-acquire (we only need "load-acquire processor consistent"), 4181 * but we choose to implement them as full LDAQ. 4182 */ 4183 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 4184 true, a->rt, iss_sf, true); 4185 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 4186 return true; 4187 } 4188 4189 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 4190 { 4191 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 4192 MemOp memop; 4193 4194 /* Load with pointer authentication */ 4195 if (!dc_isar_feature(aa64_pauth, s)) { 4196 return false; 4197 } 4198 4199 if (a->rn == 31) { 4200 gen_check_sp_alignment(s); 4201 } 4202 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 4203 4204 if (s->pauth_active) { 4205 if (!a->m) { 4206 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 4207 tcg_constant_i64(0)); 4208 } else { 4209 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 4210 tcg_constant_i64(0)); 4211 } 4212 } 4213 4214 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 4215 4216 memop = finalize_memop(s, MO_64); 4217 4218 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 4219 clean_addr = gen_mte_check1(s, dirty_addr, false, 4220 a->w || a->rn != 31, memop); 4221 4222 tcg_rt = cpu_reg(s, a->rt); 4223 do_gpr_ld(s, tcg_rt, clean_addr, memop, 4224 /* extend */ false, /* iss_valid */ !a->w, 4225 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 4226 4227 if (a->w) { 4228 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 4229 } 4230 return true; 4231 } 4232 4233 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 4234 { 4235 TCGv_i64 clean_addr, dirty_addr; 4236 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 4237 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 4238 4239 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 4240 return false; 4241 } 4242 4243 if (a->rn == 31) { 4244 gen_check_sp_alignment(s); 4245 } 4246 4247 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 4248 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 4249 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 4250 clean_addr = clean_data_tbi(s, dirty_addr); 4251 4252 /* 4253 * Load-AcquirePC semantics; we implement as the slightly more 4254 * restrictive Load-Acquire. 4255 */ 4256 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 4257 a->rt, iss_sf, true); 4258 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 4259 return true; 4260 } 4261 4262 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 4263 { 4264 TCGv_i64 clean_addr, dirty_addr; 4265 MemOp mop = a->sz; 4266 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 4267 4268 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 4269 return false; 4270 } 4271 4272 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 4273 4274 if (a->rn == 31) { 4275 gen_check_sp_alignment(s); 4276 } 4277 4278 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 4279 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 4280 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 4281 clean_addr = clean_data_tbi(s, dirty_addr); 4282 4283 /* Store-Release semantics */ 4284 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 4285 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 4286 return true; 4287 } 4288 4289 static bool trans_GCSSTR(DisasContext *s, arg_GCSSTR *a) 4290 { 4291 ARMMMUIdx armidx; 4292 4293 if (!dc_isar_feature(aa64_gcs, s)) { 4294 return false; 4295 } 4296 4297 /* 4298 * The pseudocode for GCSSTTR is 4299 * 4300 * effective_el = AArch64.IsUnprivAccessPriv() ? PSTATE.EL : EL0; 4301 * if (effective_el == PSTATE.EL) CheckGCSSTREnabled(); 4302 * 4303 * We have cached the result of IsUnprivAccessPriv in DisasContext, 4304 * but since we need the result of full_a64_user_mem_index anyway, 4305 * use the mmu_idx test as a proxy for the effective_el test. 4306 */ 4307 armidx = full_a64_user_mem_index(s, a->unpriv); 4308 if (armidx == s->mmu_idx && s->gcsstr_el != 0) { 4309 gen_exception_insn_el(s, 0, EXCP_UDEF, 4310 syn_gcs_gcsstr(a->rn, a->rt), 4311 s->gcsstr_el); 4312 return true; 4313 } 4314 4315 if (a->rn == 31) { 4316 gen_check_sp_alignment(s); 4317 } 4318 tcg_gen_qemu_st_i64(cpu_reg(s, a->rt), 4319 clean_data_tbi(s, cpu_reg_sp(s, a->rn)), 4320 core_gcs_mem_index(armidx), 4321 finalize_memop(s, MO_64 | MO_ALIGN)); 4322 return true; 4323 } 4324 4325 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 4326 { 4327 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4328 MemOp endian, align, mop; 4329 4330 int total; /* total bytes */ 4331 int elements; /* elements per vector */ 4332 int r; 4333 int size = a->sz; 4334 4335 if (!a->p && a->rm != 0) { 4336 /* For non-postindexed accesses the Rm field must be 0 */ 4337 return false; 4338 } 4339 if (size == 3 && !a->q && a->selem != 1) { 4340 return false; 4341 } 4342 if (!fp_access_check(s)) { 4343 return true; 4344 } 4345 4346 if (a->rn == 31) { 4347 gen_check_sp_alignment(s); 4348 } 4349 4350 /* For our purposes, bytes are always little-endian. */ 4351 endian = s->be_data; 4352 if (size == 0) { 4353 endian = MO_LE; 4354 } 4355 4356 total = a->rpt * a->selem * (a->q ? 16 : 8); 4357 tcg_rn = cpu_reg_sp(s, a->rn); 4358 4359 /* 4360 * Issue the MTE check vs the logical repeat count, before we 4361 * promote consecutive little-endian elements below. 4362 */ 4363 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 4364 finalize_memop_asimd(s, size)); 4365 4366 /* 4367 * Consecutive little-endian elements from a single register 4368 * can be promoted to a larger little-endian operation. 4369 */ 4370 align = MO_ALIGN; 4371 if (a->selem == 1 && endian == MO_LE) { 4372 align = pow2_align(size); 4373 size = 3; 4374 } 4375 if (!s->align_mem) { 4376 align = 0; 4377 } 4378 mop = endian | size | align; 4379 4380 elements = (a->q ? 16 : 8) >> size; 4381 tcg_ebytes = tcg_constant_i64(1 << size); 4382 for (r = 0; r < a->rpt; r++) { 4383 int e; 4384 for (e = 0; e < elements; e++) { 4385 int xs; 4386 for (xs = 0; xs < a->selem; xs++) { 4387 int tt = (a->rt + r + xs) % 32; 4388 do_vec_ld(s, tt, e, clean_addr, mop); 4389 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4390 } 4391 } 4392 } 4393 4394 /* 4395 * For non-quad operations, setting a slice of the low 64 bits of 4396 * the register clears the high 64 bits (in the ARM ARM pseudocode 4397 * this is implicit in the fact that 'rval' is a 64 bit wide 4398 * variable). For quad operations, we might still need to zero 4399 * the high bits of SVE. 4400 */ 4401 for (r = 0; r < a->rpt * a->selem; r++) { 4402 int tt = (a->rt + r) % 32; 4403 clear_vec_high(s, a->q, tt); 4404 } 4405 4406 if (a->p) { 4407 if (a->rm == 31) { 4408 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4409 } else { 4410 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4411 } 4412 } 4413 return true; 4414 } 4415 4416 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 4417 { 4418 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4419 MemOp endian, align, mop; 4420 4421 int total; /* total bytes */ 4422 int elements; /* elements per vector */ 4423 int r; 4424 int size = a->sz; 4425 4426 if (!a->p && a->rm != 0) { 4427 /* For non-postindexed accesses the Rm field must be 0 */ 4428 return false; 4429 } 4430 if (size == 3 && !a->q && a->selem != 1) { 4431 return false; 4432 } 4433 if (!fp_access_check(s)) { 4434 return true; 4435 } 4436 4437 if (a->rn == 31) { 4438 gen_check_sp_alignment(s); 4439 } 4440 4441 /* For our purposes, bytes are always little-endian. */ 4442 endian = s->be_data; 4443 if (size == 0) { 4444 endian = MO_LE; 4445 } 4446 4447 total = a->rpt * a->selem * (a->q ? 16 : 8); 4448 tcg_rn = cpu_reg_sp(s, a->rn); 4449 4450 /* 4451 * Issue the MTE check vs the logical repeat count, before we 4452 * promote consecutive little-endian elements below. 4453 */ 4454 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 4455 finalize_memop_asimd(s, size)); 4456 4457 /* 4458 * Consecutive little-endian elements from a single register 4459 * can be promoted to a larger little-endian operation. 4460 */ 4461 align = MO_ALIGN; 4462 if (a->selem == 1 && endian == MO_LE) { 4463 align = pow2_align(size); 4464 size = 3; 4465 } 4466 if (!s->align_mem) { 4467 align = 0; 4468 } 4469 mop = endian | size | align; 4470 4471 elements = (a->q ? 16 : 8) >> size; 4472 tcg_ebytes = tcg_constant_i64(1 << size); 4473 for (r = 0; r < a->rpt; r++) { 4474 int e; 4475 for (e = 0; e < elements; e++) { 4476 int xs; 4477 for (xs = 0; xs < a->selem; xs++) { 4478 int tt = (a->rt + r + xs) % 32; 4479 do_vec_st(s, tt, e, clean_addr, mop); 4480 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4481 } 4482 } 4483 } 4484 4485 if (a->p) { 4486 if (a->rm == 31) { 4487 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4488 } else { 4489 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4490 } 4491 } 4492 return true; 4493 } 4494 4495 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 4496 { 4497 int xs, total, rt; 4498 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4499 MemOp mop; 4500 4501 if (!a->p && a->rm != 0) { 4502 return false; 4503 } 4504 if (!fp_access_check(s)) { 4505 return true; 4506 } 4507 4508 if (a->rn == 31) { 4509 gen_check_sp_alignment(s); 4510 } 4511 4512 total = a->selem << a->scale; 4513 tcg_rn = cpu_reg_sp(s, a->rn); 4514 4515 mop = finalize_memop_asimd(s, a->scale); 4516 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 4517 total, mop); 4518 4519 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4520 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4521 do_vec_st(s, rt, a->index, clean_addr, mop); 4522 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4523 } 4524 4525 if (a->p) { 4526 if (a->rm == 31) { 4527 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4528 } else { 4529 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4530 } 4531 } 4532 return true; 4533 } 4534 4535 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 4536 { 4537 int xs, total, rt; 4538 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4539 MemOp mop; 4540 4541 if (!a->p && a->rm != 0) { 4542 return false; 4543 } 4544 if (!fp_access_check(s)) { 4545 return true; 4546 } 4547 4548 if (a->rn == 31) { 4549 gen_check_sp_alignment(s); 4550 } 4551 4552 total = a->selem << a->scale; 4553 tcg_rn = cpu_reg_sp(s, a->rn); 4554 4555 mop = finalize_memop_asimd(s, a->scale); 4556 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4557 total, mop); 4558 4559 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4560 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4561 do_vec_ld(s, rt, a->index, clean_addr, mop); 4562 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4563 } 4564 4565 if (a->p) { 4566 if (a->rm == 31) { 4567 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4568 } else { 4569 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4570 } 4571 } 4572 return true; 4573 } 4574 4575 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 4576 { 4577 int xs, total, rt; 4578 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 4579 MemOp mop; 4580 4581 if (!a->p && a->rm != 0) { 4582 return false; 4583 } 4584 if (!fp_access_check(s)) { 4585 return true; 4586 } 4587 4588 if (a->rn == 31) { 4589 gen_check_sp_alignment(s); 4590 } 4591 4592 total = a->selem << a->scale; 4593 tcg_rn = cpu_reg_sp(s, a->rn); 4594 4595 mop = finalize_memop_asimd(s, a->scale); 4596 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 4597 total, mop); 4598 4599 tcg_ebytes = tcg_constant_i64(1 << a->scale); 4600 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 4601 /* Load and replicate to all elements */ 4602 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4603 4604 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4605 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 4606 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 4607 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4608 } 4609 4610 if (a->p) { 4611 if (a->rm == 31) { 4612 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4613 } else { 4614 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 4615 } 4616 } 4617 return true; 4618 } 4619 4620 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 4621 { 4622 TCGv_i64 addr, clean_addr, tcg_rt; 4623 int size = 4 << s->dcz_blocksize; 4624 4625 if (!dc_isar_feature(aa64_mte, s)) { 4626 return false; 4627 } 4628 if (s->current_el == 0) { 4629 return false; 4630 } 4631 4632 if (a->rn == 31) { 4633 gen_check_sp_alignment(s); 4634 } 4635 4636 addr = read_cpu_reg_sp(s, a->rn, true); 4637 tcg_gen_addi_i64(addr, addr, a->imm); 4638 tcg_rt = cpu_reg(s, a->rt); 4639 4640 if (s->ata[0]) { 4641 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4642 } 4643 /* 4644 * The non-tags portion of STZGM is mostly like DC_ZVA, 4645 * except the alignment happens before the access. 4646 */ 4647 clean_addr = clean_data_tbi(s, addr); 4648 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4649 gen_helper_dc_zva(tcg_env, clean_addr); 4650 return true; 4651 } 4652 4653 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4654 { 4655 TCGv_i64 addr, clean_addr, tcg_rt; 4656 4657 if (!dc_isar_feature(aa64_mte, s)) { 4658 return false; 4659 } 4660 if (s->current_el == 0) { 4661 return false; 4662 } 4663 4664 if (a->rn == 31) { 4665 gen_check_sp_alignment(s); 4666 } 4667 4668 addr = read_cpu_reg_sp(s, a->rn, true); 4669 tcg_gen_addi_i64(addr, addr, a->imm); 4670 tcg_rt = cpu_reg(s, a->rt); 4671 4672 if (s->ata[0]) { 4673 gen_helper_stgm(tcg_env, addr, tcg_rt); 4674 } else { 4675 MMUAccessType acc = MMU_DATA_STORE; 4676 int size = 4 << s->gm_blocksize; 4677 4678 clean_addr = clean_data_tbi(s, addr); 4679 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4680 gen_probe_access(s, clean_addr, acc, size); 4681 } 4682 return true; 4683 } 4684 4685 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4686 { 4687 TCGv_i64 addr, clean_addr, tcg_rt; 4688 4689 if (!dc_isar_feature(aa64_mte, s)) { 4690 return false; 4691 } 4692 if (s->current_el == 0) { 4693 return false; 4694 } 4695 4696 if (a->rn == 31) { 4697 gen_check_sp_alignment(s); 4698 } 4699 4700 addr = read_cpu_reg_sp(s, a->rn, true); 4701 tcg_gen_addi_i64(addr, addr, a->imm); 4702 tcg_rt = cpu_reg(s, a->rt); 4703 4704 if (s->ata[0]) { 4705 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4706 } else { 4707 MMUAccessType acc = MMU_DATA_LOAD; 4708 int size = 4 << s->gm_blocksize; 4709 4710 clean_addr = clean_data_tbi(s, addr); 4711 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4712 gen_probe_access(s, clean_addr, acc, size); 4713 /* The result tags are zeros. */ 4714 tcg_gen_movi_i64(tcg_rt, 0); 4715 } 4716 return true; 4717 } 4718 4719 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4720 { 4721 TCGv_i64 addr, clean_addr, tcg_rt; 4722 4723 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4724 return false; 4725 } 4726 4727 if (a->rn == 31) { 4728 gen_check_sp_alignment(s); 4729 } 4730 4731 addr = read_cpu_reg_sp(s, a->rn, true); 4732 if (!a->p) { 4733 /* pre-index or signed offset */ 4734 tcg_gen_addi_i64(addr, addr, a->imm); 4735 } 4736 4737 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4738 tcg_rt = cpu_reg(s, a->rt); 4739 if (s->ata[0]) { 4740 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4741 } else { 4742 /* 4743 * Tag access disabled: we must check for aborts on the load 4744 * load from [rn+offset], and then insert a 0 tag into rt. 4745 */ 4746 clean_addr = clean_data_tbi(s, addr); 4747 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4748 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4749 } 4750 4751 if (a->w) { 4752 /* pre-index or post-index */ 4753 if (a->p) { 4754 /* post-index */ 4755 tcg_gen_addi_i64(addr, addr, a->imm); 4756 } 4757 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4758 } 4759 return true; 4760 } 4761 4762 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4763 { 4764 TCGv_i64 addr, tcg_rt; 4765 4766 if (a->rn == 31) { 4767 gen_check_sp_alignment(s); 4768 } 4769 4770 addr = read_cpu_reg_sp(s, a->rn, true); 4771 if (!a->p) { 4772 /* pre-index or signed offset */ 4773 tcg_gen_addi_i64(addr, addr, a->imm); 4774 } 4775 tcg_rt = cpu_reg_sp(s, a->rt); 4776 if (!s->ata[0]) { 4777 /* 4778 * For STG and ST2G, we need to check alignment and probe memory. 4779 * TODO: For STZG and STZ2G, we could rely on the stores below, 4780 * at least for system mode; user-only won't enforce alignment. 4781 */ 4782 if (is_pair) { 4783 gen_helper_st2g_stub(tcg_env, addr); 4784 } else { 4785 gen_helper_stg_stub(tcg_env, addr); 4786 } 4787 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4788 if (is_pair) { 4789 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4790 } else { 4791 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4792 } 4793 } else { 4794 if (is_pair) { 4795 gen_helper_st2g(tcg_env, addr, tcg_rt); 4796 } else { 4797 gen_helper_stg(tcg_env, addr, tcg_rt); 4798 } 4799 } 4800 4801 if (is_zero) { 4802 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4803 TCGv_i64 zero64 = tcg_constant_i64(0); 4804 TCGv_i128 zero128 = tcg_temp_new_i128(); 4805 int mem_index = get_mem_index(s); 4806 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4807 4808 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4809 4810 /* This is 1 or 2 atomic 16-byte operations. */ 4811 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4812 if (is_pair) { 4813 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4814 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4815 } 4816 } 4817 4818 if (a->w) { 4819 /* pre-index or post-index */ 4820 if (a->p) { 4821 /* post-index */ 4822 tcg_gen_addi_i64(addr, addr, a->imm); 4823 } 4824 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4825 } 4826 return true; 4827 } 4828 4829 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4830 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4831 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4832 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4833 4834 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4835 4836 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4837 bool is_setg, SetFn fn) 4838 { 4839 int memidx; 4840 uint32_t syndrome, desc = 0; 4841 4842 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4843 return false; 4844 } 4845 4846 /* 4847 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4848 * us to pull this check before the CheckMOPSEnabled() test 4849 * (which we do in the helper function) 4850 */ 4851 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4852 a->rd == 31 || a->rn == 31) { 4853 return false; 4854 } 4855 4856 memidx = core_a64_user_mem_index(s, a->unpriv); 4857 4858 /* 4859 * We pass option_a == true, matching our implementation; 4860 * we pass wrong_option == false: helper function may set that bit. 4861 */ 4862 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4863 is_epilogue, false, true, a->rd, a->rs, a->rn); 4864 4865 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4866 /* We may need to do MTE tag checking, so assemble the descriptor */ 4867 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4868 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4869 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4870 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4871 } 4872 /* The helper function always needs the memidx even with MTE disabled */ 4873 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4874 4875 /* 4876 * The helper needs the register numbers, but since they're in 4877 * the syndrome anyway, we let it extract them from there rather 4878 * than passing in an extra three integer arguments. 4879 */ 4880 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4881 return true; 4882 } 4883 4884 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4885 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4886 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4887 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4888 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4889 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4890 4891 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4892 4893 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4894 { 4895 int rmemidx, wmemidx; 4896 uint32_t syndrome, rdesc = 0, wdesc = 0; 4897 bool wunpriv = extract32(a->options, 0, 1); 4898 bool runpriv = extract32(a->options, 1, 1); 4899 4900 /* 4901 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4902 * us to pull this check before the CheckMOPSEnabled() test 4903 * (which we do in the helper function) 4904 */ 4905 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4906 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4907 return false; 4908 } 4909 4910 rmemidx = core_a64_user_mem_index(s, runpriv); 4911 wmemidx = core_a64_user_mem_index(s, wunpriv); 4912 4913 /* 4914 * We pass option_a == true, matching our implementation; 4915 * we pass wrong_option == false: helper function may set that bit. 4916 */ 4917 syndrome = syn_mop(false, false, a->options, is_epilogue, 4918 false, true, a->rd, a->rs, a->rn); 4919 4920 /* If we need to do MTE tag checking, assemble the descriptors */ 4921 if (s->mte_active[runpriv]) { 4922 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4923 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4924 } 4925 if (s->mte_active[wunpriv]) { 4926 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4927 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4928 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4929 } 4930 /* The helper function needs these parts of the descriptor regardless */ 4931 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4932 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4933 4934 /* 4935 * The helper needs the register numbers, but since they're in 4936 * the syndrome anyway, we let it extract them from there rather 4937 * than passing in an extra three integer arguments. 4938 */ 4939 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4940 tcg_constant_i32(rdesc)); 4941 return true; 4942 } 4943 4944 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4945 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4946 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4947 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4948 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4949 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4950 4951 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4952 4953 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4954 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4955 { 4956 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4957 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4958 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4959 4960 fn(tcg_rd, tcg_rn, tcg_imm); 4961 if (!a->sf) { 4962 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4963 } 4964 return true; 4965 } 4966 4967 /* 4968 * PC-rel. addressing 4969 */ 4970 4971 static bool trans_ADR(DisasContext *s, arg_ri *a) 4972 { 4973 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4974 return true; 4975 } 4976 4977 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4978 { 4979 int64_t offset = (int64_t)a->imm << 12; 4980 4981 /* The page offset is ok for CF_PCREL. */ 4982 offset -= s->pc_curr & 0xfff; 4983 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4984 return true; 4985 } 4986 4987 /* 4988 * Add/subtract (immediate) 4989 */ 4990 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4991 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4992 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4993 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4994 4995 /* 4996 * Min/Max (immediate) 4997 */ 4998 4999 static void gen_wrap3_i32(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, NeonGenTwoOpFn fn) 5000 { 5001 TCGv_i32 t1 = tcg_temp_new_i32(); 5002 TCGv_i32 t2 = tcg_temp_new_i32(); 5003 5004 tcg_gen_extrl_i64_i32(t1, n); 5005 tcg_gen_extrl_i64_i32(t2, m); 5006 fn(t1, t1, t2); 5007 tcg_gen_extu_i32_i64(d, t1); 5008 } 5009 5010 static void gen_smax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5011 { 5012 gen_wrap3_i32(d, n, m, tcg_gen_smax_i32); 5013 } 5014 5015 static void gen_smin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5016 { 5017 gen_wrap3_i32(d, n, m, tcg_gen_smin_i32); 5018 } 5019 5020 static void gen_umax32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5021 { 5022 gen_wrap3_i32(d, n, m, tcg_gen_umax_i32); 5023 } 5024 5025 static void gen_umin32_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5026 { 5027 gen_wrap3_i32(d, n, m, tcg_gen_umin_i32); 5028 } 5029 5030 TRANS_FEAT(SMAX_i, aa64_cssc, gen_rri, a, 0, 0, 5031 a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) 5032 TRANS_FEAT(SMIN_i, aa64_cssc, gen_rri, a, 0, 0, 5033 a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) 5034 TRANS_FEAT(UMAX_i, aa64_cssc, gen_rri, a, 0, 0, 5035 a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) 5036 TRANS_FEAT(UMIN_i, aa64_cssc, gen_rri, a, 0, 0, 5037 a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) 5038 5039 /* 5040 * Add/subtract (immediate, with tags) 5041 */ 5042 5043 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 5044 bool sub_op) 5045 { 5046 TCGv_i64 tcg_rn, tcg_rd; 5047 int imm; 5048 5049 imm = a->uimm6 << LOG2_TAG_GRANULE; 5050 if (sub_op) { 5051 imm = -imm; 5052 } 5053 5054 tcg_rn = cpu_reg_sp(s, a->rn); 5055 tcg_rd = cpu_reg_sp(s, a->rd); 5056 5057 if (s->ata[0]) { 5058 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 5059 tcg_constant_i32(imm), 5060 tcg_constant_i32(a->uimm4)); 5061 } else { 5062 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 5063 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 5064 } 5065 return true; 5066 } 5067 5068 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 5069 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 5070 5071 /* The input should be a value in the bottom e bits (with higher 5072 * bits zero); returns that value replicated into every element 5073 * of size e in a 64 bit integer. 5074 */ 5075 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 5076 { 5077 assert(e != 0); 5078 while (e < 64) { 5079 mask |= mask << e; 5080 e *= 2; 5081 } 5082 return mask; 5083 } 5084 5085 /* 5086 * Logical (immediate) 5087 */ 5088 5089 /* 5090 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 5091 * only require the wmask. Returns false if the imms/immr/immn are a reserved 5092 * value (ie should cause a guest UNDEF exception), and true if they are 5093 * valid, in which case the decoded bit pattern is written to result. 5094 */ 5095 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 5096 unsigned int imms, unsigned int immr) 5097 { 5098 uint64_t mask; 5099 unsigned e, levels, s, r; 5100 int len; 5101 5102 assert(immn < 2 && imms < 64 && immr < 64); 5103 5104 /* The bit patterns we create here are 64 bit patterns which 5105 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 5106 * 64 bits each. Each element contains the same value: a run 5107 * of between 1 and e-1 non-zero bits, rotated within the 5108 * element by between 0 and e-1 bits. 5109 * 5110 * The element size and run length are encoded into immn (1 bit) 5111 * and imms (6 bits) as follows: 5112 * 64 bit elements: immn = 1, imms = <length of run - 1> 5113 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 5114 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 5115 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 5116 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 5117 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 5118 * Notice that immn = 0, imms = 11111x is the only combination 5119 * not covered by one of the above options; this is reserved. 5120 * Further, <length of run - 1> all-ones is a reserved pattern. 5121 * 5122 * In all cases the rotation is by immr % e (and immr is 6 bits). 5123 */ 5124 5125 /* First determine the element size */ 5126 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 5127 if (len < 1) { 5128 /* This is the immn == 0, imms == 0x11111x case */ 5129 return false; 5130 } 5131 e = 1 << len; 5132 5133 levels = e - 1; 5134 s = imms & levels; 5135 r = immr & levels; 5136 5137 if (s == levels) { 5138 /* <length of run - 1> mustn't be all-ones. */ 5139 return false; 5140 } 5141 5142 /* Create the value of one element: s+1 set bits rotated 5143 * by r within the element (which is e bits wide)... 5144 */ 5145 mask = MAKE_64BIT_MASK(0, s + 1); 5146 if (r) { 5147 mask = (mask >> r) | (mask << (e - r)); 5148 mask &= MAKE_64BIT_MASK(0, e); 5149 } 5150 /* ...then replicate the element over the whole 64 bit value */ 5151 mask = bitfield_replicate(mask, e); 5152 *result = mask; 5153 return true; 5154 } 5155 5156 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 5157 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 5158 { 5159 TCGv_i64 tcg_rd, tcg_rn; 5160 uint64_t imm; 5161 5162 /* Some immediate field values are reserved. */ 5163 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 5164 extract32(a->dbm, 0, 6), 5165 extract32(a->dbm, 6, 6))) { 5166 return false; 5167 } 5168 if (!a->sf) { 5169 imm &= 0xffffffffull; 5170 } 5171 5172 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 5173 tcg_rn = cpu_reg(s, a->rn); 5174 5175 fn(tcg_rd, tcg_rn, imm); 5176 if (set_cc) { 5177 gen_logic_CC(a->sf, tcg_rd); 5178 } 5179 if (!a->sf) { 5180 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5181 } 5182 return true; 5183 } 5184 5185 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 5186 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 5187 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 5188 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 5189 5190 /* 5191 * Move wide (immediate) 5192 */ 5193 5194 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 5195 { 5196 int pos = a->hw << 4; 5197 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 5198 return true; 5199 } 5200 5201 static bool trans_MOVN(DisasContext *s, arg_movw *a) 5202 { 5203 int pos = a->hw << 4; 5204 uint64_t imm = a->imm; 5205 5206 imm = ~(imm << pos); 5207 if (!a->sf) { 5208 imm = (uint32_t)imm; 5209 } 5210 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 5211 return true; 5212 } 5213 5214 static bool trans_MOVK(DisasContext *s, arg_movw *a) 5215 { 5216 int pos = a->hw << 4; 5217 TCGv_i64 tcg_rd, tcg_im; 5218 5219 tcg_rd = cpu_reg(s, a->rd); 5220 tcg_im = tcg_constant_i64(a->imm); 5221 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 5222 if (!a->sf) { 5223 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5224 } 5225 return true; 5226 } 5227 5228 /* 5229 * Bitfield 5230 */ 5231 5232 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 5233 { 5234 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5235 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 5236 unsigned int bitsize = a->sf ? 64 : 32; 5237 unsigned int ri = a->immr; 5238 unsigned int si = a->imms; 5239 unsigned int pos, len; 5240 5241 if (si >= ri) { 5242 /* Wd<s-r:0> = Wn<s:r> */ 5243 len = (si - ri) + 1; 5244 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 5245 if (!a->sf) { 5246 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5247 } 5248 } else { 5249 /* Wd<32+s-r,32-r> = Wn<s:0> */ 5250 len = si + 1; 5251 pos = (bitsize - ri) & (bitsize - 1); 5252 5253 if (len < ri) { 5254 /* 5255 * Sign extend the destination field from len to fill the 5256 * balance of the word. Let the deposit below insert all 5257 * of those sign bits. 5258 */ 5259 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 5260 len = ri; 5261 } 5262 5263 /* 5264 * We start with zero, and we haven't modified any bits outside 5265 * bitsize, therefore no final zero-extension is unneeded for !sf. 5266 */ 5267 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 5268 } 5269 return true; 5270 } 5271 5272 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 5273 { 5274 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5275 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 5276 unsigned int bitsize = a->sf ? 64 : 32; 5277 unsigned int ri = a->immr; 5278 unsigned int si = a->imms; 5279 unsigned int pos, len; 5280 5281 tcg_rd = cpu_reg(s, a->rd); 5282 tcg_tmp = read_cpu_reg(s, a->rn, 1); 5283 5284 if (si >= ri) { 5285 /* Wd<s-r:0> = Wn<s:r> */ 5286 len = (si - ri) + 1; 5287 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 5288 } else { 5289 /* Wd<32+s-r,32-r> = Wn<s:0> */ 5290 len = si + 1; 5291 pos = (bitsize - ri) & (bitsize - 1); 5292 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 5293 } 5294 return true; 5295 } 5296 5297 static bool trans_BFM(DisasContext *s, arg_BFM *a) 5298 { 5299 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5300 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 5301 unsigned int bitsize = a->sf ? 64 : 32; 5302 unsigned int ri = a->immr; 5303 unsigned int si = a->imms; 5304 unsigned int pos, len; 5305 5306 tcg_rd = cpu_reg(s, a->rd); 5307 tcg_tmp = read_cpu_reg(s, a->rn, 1); 5308 5309 if (si >= ri) { 5310 /* Wd<s-r:0> = Wn<s:r> */ 5311 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 5312 len = (si - ri) + 1; 5313 pos = 0; 5314 } else { 5315 /* Wd<32+s-r,32-r> = Wn<s:0> */ 5316 len = si + 1; 5317 pos = (bitsize - ri) & (bitsize - 1); 5318 } 5319 5320 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 5321 if (!a->sf) { 5322 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5323 } 5324 return true; 5325 } 5326 5327 static bool trans_EXTR(DisasContext *s, arg_extract *a) 5328 { 5329 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 5330 5331 tcg_rd = cpu_reg(s, a->rd); 5332 5333 if (unlikely(a->imm == 0)) { 5334 /* 5335 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 5336 * so an extract from bit 0 is a special case. 5337 */ 5338 if (a->sf) { 5339 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 5340 } else { 5341 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 5342 } 5343 } else { 5344 tcg_rm = cpu_reg(s, a->rm); 5345 tcg_rn = cpu_reg(s, a->rn); 5346 5347 if (a->sf) { 5348 /* Specialization to ROR happens in EXTRACT2. */ 5349 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 5350 } else { 5351 TCGv_i32 t0 = tcg_temp_new_i32(); 5352 5353 tcg_gen_extrl_i64_i32(t0, tcg_rm); 5354 if (a->rm == a->rn) { 5355 tcg_gen_rotri_i32(t0, t0, a->imm); 5356 } else { 5357 TCGv_i32 t1 = tcg_temp_new_i32(); 5358 tcg_gen_extrl_i64_i32(t1, tcg_rn); 5359 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 5360 } 5361 tcg_gen_extu_i32_i64(tcg_rd, t0); 5362 } 5363 } 5364 return true; 5365 } 5366 5367 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 5368 { 5369 if (fp_access_check(s)) { 5370 int len = (a->len + 1) * 16; 5371 5372 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 5373 vec_full_reg_offset(s, a->rm), tcg_env, 5374 a->q ? 16 : 8, vec_full_reg_size(s), 5375 (len << 6) | (a->tbx << 5) | a->rn, 5376 gen_helper_simd_tblx); 5377 } 5378 return true; 5379 } 5380 5381 typedef int simd_permute_idx_fn(int i, int part, int elements); 5382 5383 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 5384 simd_permute_idx_fn *fn, int part) 5385 { 5386 MemOp esz = a->esz; 5387 int datasize = a->q ? 16 : 8; 5388 int elements = datasize >> esz; 5389 TCGv_i64 tcg_res[2], tcg_ele; 5390 5391 if (esz == MO_64 && !a->q) { 5392 return false; 5393 } 5394 if (!fp_access_check(s)) { 5395 return true; 5396 } 5397 5398 tcg_res[0] = tcg_temp_new_i64(); 5399 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 5400 tcg_ele = tcg_temp_new_i64(); 5401 5402 for (int i = 0; i < elements; i++) { 5403 int o, w, idx; 5404 5405 idx = fn(i, part, elements); 5406 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 5407 idx & (elements - 1), esz); 5408 5409 w = (i << (esz + 3)) / 64; 5410 o = (i << (esz + 3)) % 64; 5411 if (o == 0) { 5412 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 5413 } else { 5414 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 5415 } 5416 } 5417 5418 for (int i = a->q; i >= 0; --i) { 5419 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 5420 } 5421 clear_vec_high(s, a->q, a->rd); 5422 return true; 5423 } 5424 5425 static int permute_load_uzp(int i, int part, int elements) 5426 { 5427 return 2 * i + part; 5428 } 5429 5430 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 5431 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 5432 5433 static int permute_load_trn(int i, int part, int elements) 5434 { 5435 return (i & 1) * elements + (i & ~1) + part; 5436 } 5437 5438 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 5439 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 5440 5441 static int permute_load_zip(int i, int part, int elements) 5442 { 5443 return (i & 1) * elements + ((part * elements + i) >> 1); 5444 } 5445 5446 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 5447 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 5448 5449 /* 5450 * Cryptographic AES, SHA, SHA512 5451 */ 5452 5453 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 5454 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 5455 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 5456 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 5457 5458 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 5459 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 5460 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 5461 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 5462 5463 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 5464 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 5465 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 5466 5467 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 5468 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 5469 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 5470 5471 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 5472 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 5473 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 5474 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 5475 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 5476 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 5477 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 5478 5479 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 5480 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 5481 5482 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 5483 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 5484 5485 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 5486 { 5487 if (!dc_isar_feature(aa64_sm3, s)) { 5488 return false; 5489 } 5490 if (fp_access_check(s)) { 5491 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 5492 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 5493 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 5494 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5495 5496 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 5497 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 5498 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 5499 5500 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 5501 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 5502 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 5503 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 5504 5505 /* Clear the whole register first, then store bits [127:96]. */ 5506 clear_vec(s, a->rd); 5507 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 5508 } 5509 return true; 5510 } 5511 5512 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 5513 { 5514 if (fp_access_check(s)) { 5515 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 5516 } 5517 return true; 5518 } 5519 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 5520 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 5521 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 5522 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 5523 5524 static bool trans_XAR(DisasContext *s, arg_XAR *a) 5525 { 5526 if (!dc_isar_feature(aa64_sha3, s)) { 5527 return false; 5528 } 5529 if (fp_access_check(s)) { 5530 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 5531 vec_full_reg_offset(s, a->rn), 5532 vec_full_reg_offset(s, a->rm), a->imm, 16, 5533 vec_full_reg_size(s)); 5534 } 5535 return true; 5536 } 5537 5538 /* 5539 * Advanced SIMD copy 5540 */ 5541 5542 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 5543 { 5544 unsigned esz = ctz32(imm); 5545 if (esz <= MO_64) { 5546 *pesz = esz; 5547 *pidx = imm >> (esz + 1); 5548 return true; 5549 } 5550 return false; 5551 } 5552 5553 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 5554 { 5555 MemOp esz; 5556 unsigned idx; 5557 5558 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5559 return false; 5560 } 5561 if (fp_access_check(s)) { 5562 /* 5563 * This instruction just extracts the specified element and 5564 * zero-extends it into the bottom of the destination register. 5565 */ 5566 TCGv_i64 tmp = tcg_temp_new_i64(); 5567 read_vec_element(s, tmp, a->rn, idx, esz); 5568 write_fp_dreg(s, a->rd, tmp); 5569 } 5570 return true; 5571 } 5572 5573 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 5574 { 5575 MemOp esz; 5576 unsigned idx; 5577 5578 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5579 return false; 5580 } 5581 if (esz == MO_64 && !a->q) { 5582 return false; 5583 } 5584 if (fp_access_check(s)) { 5585 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 5586 vec_reg_offset(s, a->rn, idx, esz), 5587 a->q ? 16 : 8, vec_full_reg_size(s)); 5588 } 5589 return true; 5590 } 5591 5592 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 5593 { 5594 MemOp esz; 5595 unsigned idx; 5596 5597 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5598 return false; 5599 } 5600 if (esz == MO_64 && !a->q) { 5601 return false; 5602 } 5603 if (fp_access_check(s)) { 5604 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 5605 a->q ? 16 : 8, vec_full_reg_size(s), 5606 cpu_reg(s, a->rn)); 5607 } 5608 return true; 5609 } 5610 5611 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 5612 { 5613 MemOp esz; 5614 unsigned idx; 5615 5616 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5617 return false; 5618 } 5619 if (is_signed) { 5620 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 5621 return false; 5622 } 5623 } else { 5624 if (esz == MO_64 ? !a->q : a->q) { 5625 return false; 5626 } 5627 } 5628 if (fp_access_check(s)) { 5629 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 5630 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 5631 if (is_signed && !a->q) { 5632 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5633 } 5634 } 5635 return true; 5636 } 5637 5638 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 5639 TRANS(UMOV, do_smov_umov, a, 0) 5640 5641 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 5642 { 5643 MemOp esz; 5644 unsigned idx; 5645 5646 if (!decode_esz_idx(a->imm, &esz, &idx)) { 5647 return false; 5648 } 5649 if (fp_access_check(s)) { 5650 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 5651 clear_vec_high(s, true, a->rd); 5652 } 5653 return true; 5654 } 5655 5656 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 5657 { 5658 MemOp esz; 5659 unsigned didx, sidx; 5660 5661 if (!decode_esz_idx(a->di, &esz, &didx)) { 5662 return false; 5663 } 5664 sidx = a->si >> esz; 5665 if (fp_access_check(s)) { 5666 TCGv_i64 tmp = tcg_temp_new_i64(); 5667 5668 read_vec_element(s, tmp, a->rn, sidx, esz); 5669 write_vec_element(s, tmp, a->rd, didx, esz); 5670 5671 /* INS is considered a 128-bit write for SVE. */ 5672 clear_vec_high(s, true, a->rd); 5673 } 5674 return true; 5675 } 5676 5677 /* 5678 * Advanced SIMD three same 5679 */ 5680 5681 typedef struct FPScalar { 5682 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5683 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5684 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5685 } FPScalar; 5686 5687 static bool do_fp3_scalar_with_fpsttype(DisasContext *s, arg_rrr_e *a, 5688 const FPScalar *f, int mergereg, 5689 ARMFPStatusFlavour fpsttype) 5690 { 5691 switch (a->esz) { 5692 case MO_64: 5693 if (fp_access_check(s)) { 5694 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5695 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5696 f->gen_d(t0, t0, t1, fpstatus_ptr(fpsttype)); 5697 write_fp_dreg_merging(s, a->rd, mergereg, t0); 5698 } 5699 break; 5700 case MO_32: 5701 if (fp_access_check(s)) { 5702 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5703 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5704 f->gen_s(t0, t0, t1, fpstatus_ptr(fpsttype)); 5705 write_fp_sreg_merging(s, a->rd, mergereg, t0); 5706 } 5707 break; 5708 case MO_16: 5709 if (!dc_isar_feature(aa64_fp16, s)) { 5710 return false; 5711 } 5712 if (fp_access_check(s)) { 5713 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5714 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5715 f->gen_h(t0, t0, t1, fpstatus_ptr(fpsttype)); 5716 write_fp_hreg_merging(s, a->rd, mergereg, t0); 5717 } 5718 break; 5719 default: 5720 return false; 5721 } 5722 return true; 5723 } 5724 5725 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, 5726 int mergereg) 5727 { 5728 return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, 5729 a->esz == MO_16 ? 5730 FPST_A64_F16 : FPST_A64); 5731 } 5732 5733 static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, 5734 const FPScalar *fnormal, const FPScalar *fah, 5735 int mergereg) 5736 { 5737 return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, 5738 mergereg, select_ah_fpst(s, a->esz)); 5739 } 5740 5741 /* Some insns need to call different helpers when FPCR.AH == 1 */ 5742 static bool do_fp3_scalar_2fn(DisasContext *s, arg_rrr_e *a, 5743 const FPScalar *fnormal, 5744 const FPScalar *fah, 5745 int mergereg) 5746 { 5747 return do_fp3_scalar(s, a, s->fpcr_ah ? fah : fnormal, mergereg); 5748 } 5749 5750 static const FPScalar f_scalar_fadd = { 5751 gen_helper_vfp_addh, 5752 gen_helper_vfp_adds, 5753 gen_helper_vfp_addd, 5754 }; 5755 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd, a->rn) 5756 5757 static const FPScalar f_scalar_fsub = { 5758 gen_helper_vfp_subh, 5759 gen_helper_vfp_subs, 5760 gen_helper_vfp_subd, 5761 }; 5762 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub, a->rn) 5763 5764 static const FPScalar f_scalar_fdiv = { 5765 gen_helper_vfp_divh, 5766 gen_helper_vfp_divs, 5767 gen_helper_vfp_divd, 5768 }; 5769 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv, a->rn) 5770 5771 static const FPScalar f_scalar_fmul = { 5772 gen_helper_vfp_mulh, 5773 gen_helper_vfp_muls, 5774 gen_helper_vfp_muld, 5775 }; 5776 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul, a->rn) 5777 5778 static const FPScalar f_scalar_fmax = { 5779 gen_helper_vfp_maxh, 5780 gen_helper_vfp_maxs, 5781 gen_helper_vfp_maxd, 5782 }; 5783 static const FPScalar f_scalar_fmax_ah = { 5784 gen_helper_vfp_ah_maxh, 5785 gen_helper_vfp_ah_maxs, 5786 gen_helper_vfp_ah_maxd, 5787 }; 5788 TRANS(FMAX_s, do_fp3_scalar_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah, a->rn) 5789 5790 static const FPScalar f_scalar_fmin = { 5791 gen_helper_vfp_minh, 5792 gen_helper_vfp_mins, 5793 gen_helper_vfp_mind, 5794 }; 5795 static const FPScalar f_scalar_fmin_ah = { 5796 gen_helper_vfp_ah_minh, 5797 gen_helper_vfp_ah_mins, 5798 gen_helper_vfp_ah_mind, 5799 }; 5800 TRANS(FMIN_s, do_fp3_scalar_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah, a->rn) 5801 5802 static const FPScalar f_scalar_fmaxnm = { 5803 gen_helper_vfp_maxnumh, 5804 gen_helper_vfp_maxnums, 5805 gen_helper_vfp_maxnumd, 5806 }; 5807 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm, a->rn) 5808 5809 static const FPScalar f_scalar_fminnm = { 5810 gen_helper_vfp_minnumh, 5811 gen_helper_vfp_minnums, 5812 gen_helper_vfp_minnumd, 5813 }; 5814 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm, a->rn) 5815 5816 static const FPScalar f_scalar_fmulx = { 5817 gen_helper_advsimd_mulxh, 5818 gen_helper_vfp_mulxs, 5819 gen_helper_vfp_mulxd, 5820 }; 5821 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx, a->rn) 5822 5823 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5824 { 5825 gen_helper_vfp_mulh(d, n, m, s); 5826 gen_vfp_negh(d, d); 5827 } 5828 5829 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5830 { 5831 gen_helper_vfp_muls(d, n, m, s); 5832 gen_vfp_negs(d, d); 5833 } 5834 5835 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5836 { 5837 gen_helper_vfp_muld(d, n, m, s); 5838 gen_vfp_negd(d, d); 5839 } 5840 5841 static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5842 { 5843 gen_helper_vfp_mulh(d, n, m, s); 5844 gen_vfp_ah_negh(d, d); 5845 } 5846 5847 static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5848 { 5849 gen_helper_vfp_muls(d, n, m, s); 5850 gen_vfp_ah_negs(d, d); 5851 } 5852 5853 static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5854 { 5855 gen_helper_vfp_muld(d, n, m, s); 5856 gen_vfp_ah_negd(d, d); 5857 } 5858 5859 static const FPScalar f_scalar_fnmul = { 5860 gen_fnmul_h, 5861 gen_fnmul_s, 5862 gen_fnmul_d, 5863 }; 5864 static const FPScalar f_scalar_ah_fnmul = { 5865 gen_fnmul_ah_h, 5866 gen_fnmul_ah_s, 5867 gen_fnmul_ah_d, 5868 }; 5869 TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn) 5870 5871 static const FPScalar f_scalar_fcmeq = { 5872 gen_helper_advsimd_ceq_f16, 5873 gen_helper_neon_ceq_f32, 5874 gen_helper_neon_ceq_f64, 5875 }; 5876 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq, a->rm) 5877 5878 static const FPScalar f_scalar_fcmge = { 5879 gen_helper_advsimd_cge_f16, 5880 gen_helper_neon_cge_f32, 5881 gen_helper_neon_cge_f64, 5882 }; 5883 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge, a->rm) 5884 5885 static const FPScalar f_scalar_fcmgt = { 5886 gen_helper_advsimd_cgt_f16, 5887 gen_helper_neon_cgt_f32, 5888 gen_helper_neon_cgt_f64, 5889 }; 5890 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt, a->rm) 5891 5892 static const FPScalar f_scalar_facge = { 5893 gen_helper_advsimd_acge_f16, 5894 gen_helper_neon_acge_f32, 5895 gen_helper_neon_acge_f64, 5896 }; 5897 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge, a->rm) 5898 5899 static const FPScalar f_scalar_facgt = { 5900 gen_helper_advsimd_acgt_f16, 5901 gen_helper_neon_acgt_f32, 5902 gen_helper_neon_acgt_f64, 5903 }; 5904 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt, a->rm) 5905 5906 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5907 { 5908 gen_helper_vfp_subh(d, n, m, s); 5909 gen_vfp_absh(d, d); 5910 } 5911 5912 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5913 { 5914 gen_helper_vfp_subs(d, n, m, s); 5915 gen_vfp_abss(d, d); 5916 } 5917 5918 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5919 { 5920 gen_helper_vfp_subd(d, n, m, s); 5921 gen_vfp_absd(d, d); 5922 } 5923 5924 static void gen_fabd_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5925 { 5926 gen_helper_vfp_subh(d, n, m, s); 5927 gen_vfp_ah_absh(d, d); 5928 } 5929 5930 static void gen_fabd_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5931 { 5932 gen_helper_vfp_subs(d, n, m, s); 5933 gen_vfp_ah_abss(d, d); 5934 } 5935 5936 static void gen_fabd_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5937 { 5938 gen_helper_vfp_subd(d, n, m, s); 5939 gen_vfp_ah_absd(d, d); 5940 } 5941 5942 static const FPScalar f_scalar_fabd = { 5943 gen_fabd_h, 5944 gen_fabd_s, 5945 gen_fabd_d, 5946 }; 5947 static const FPScalar f_scalar_ah_fabd = { 5948 gen_fabd_ah_h, 5949 gen_fabd_ah_s, 5950 gen_fabd_ah_d, 5951 }; 5952 TRANS(FABD_s, do_fp3_scalar_2fn, a, &f_scalar_fabd, &f_scalar_ah_fabd, a->rn) 5953 5954 static const FPScalar f_scalar_frecps = { 5955 gen_helper_recpsf_f16, 5956 gen_helper_recpsf_f32, 5957 gen_helper_recpsf_f64, 5958 }; 5959 static const FPScalar f_scalar_ah_frecps = { 5960 gen_helper_recpsf_ah_f16, 5961 gen_helper_recpsf_ah_f32, 5962 gen_helper_recpsf_ah_f64, 5963 }; 5964 TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, 5965 &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) 5966 5967 static const FPScalar f_scalar_frsqrts = { 5968 gen_helper_rsqrtsf_f16, 5969 gen_helper_rsqrtsf_f32, 5970 gen_helper_rsqrtsf_f64, 5971 }; 5972 static const FPScalar f_scalar_ah_frsqrts = { 5973 gen_helper_rsqrtsf_ah_f16, 5974 gen_helper_rsqrtsf_ah_f32, 5975 gen_helper_rsqrtsf_ah_f64, 5976 }; 5977 TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, 5978 &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) 5979 5980 static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, 5981 const FPScalar *f, bool swap) 5982 { 5983 switch (a->esz) { 5984 case MO_64: 5985 if (fp_access_check(s)) { 5986 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5987 TCGv_i64 t1 = tcg_constant_i64(0); 5988 if (swap) { 5989 f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64)); 5990 } else { 5991 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 5992 } 5993 write_fp_dreg(s, a->rd, t0); 5994 } 5995 break; 5996 case MO_32: 5997 if (fp_access_check(s)) { 5998 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5999 TCGv_i32 t1 = tcg_constant_i32(0); 6000 if (swap) { 6001 f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64)); 6002 } else { 6003 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 6004 } 6005 write_fp_sreg(s, a->rd, t0); 6006 } 6007 break; 6008 case MO_16: 6009 if (!dc_isar_feature(aa64_fp16, s)) { 6010 return false; 6011 } 6012 if (fp_access_check(s)) { 6013 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6014 TCGv_i32 t1 = tcg_constant_i32(0); 6015 if (swap) { 6016 f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16)); 6017 } else { 6018 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 6019 } 6020 write_fp_sreg(s, a->rd, t0); 6021 } 6022 break; 6023 default: 6024 return false; 6025 } 6026 return true; 6027 } 6028 6029 TRANS(FCMEQ0_s, do_fcmp0_s, a, &f_scalar_fcmeq, false) 6030 TRANS(FCMGT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, false) 6031 TRANS(FCMGE0_s, do_fcmp0_s, a, &f_scalar_fcmge, false) 6032 TRANS(FCMLT0_s, do_fcmp0_s, a, &f_scalar_fcmgt, true) 6033 TRANS(FCMLE0_s, do_fcmp0_s, a, &f_scalar_fcmge, true) 6034 6035 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 6036 MemOp sgn_n, MemOp sgn_m, 6037 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 6038 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 6039 { 6040 TCGv_i64 t0, t1, t2, qc; 6041 MemOp esz = a->esz; 6042 6043 if (!fp_access_check(s)) { 6044 return true; 6045 } 6046 6047 t0 = tcg_temp_new_i64(); 6048 t1 = tcg_temp_new_i64(); 6049 t2 = tcg_temp_new_i64(); 6050 qc = tcg_temp_new_i64(); 6051 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 6052 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 6053 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 6054 6055 if (esz == MO_64) { 6056 gen_d(t0, qc, t1, t2); 6057 } else { 6058 gen_bhs(t0, qc, t1, t2, esz); 6059 tcg_gen_ext_i64(t0, t0, esz); 6060 } 6061 6062 write_fp_dreg(s, a->rd, t0); 6063 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 6064 return true; 6065 } 6066 6067 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 6068 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 6069 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 6070 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 6071 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 6072 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 6073 6074 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 6075 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 6076 { 6077 if (fp_access_check(s)) { 6078 TCGv_i64 t0 = tcg_temp_new_i64(); 6079 TCGv_i64 t1 = tcg_temp_new_i64(); 6080 6081 read_vec_element(s, t0, a->rn, 0, MO_64); 6082 read_vec_element(s, t1, a->rm, 0, MO_64); 6083 fn(t0, t0, t1); 6084 write_fp_dreg(s, a->rd, t0); 6085 } 6086 return true; 6087 } 6088 6089 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 6090 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 6091 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 6092 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 6093 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 6094 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 6095 6096 typedef struct ENVScalar2 { 6097 NeonGenTwoOpEnvFn *gen_bhs[3]; 6098 NeonGenTwo64OpEnvFn *gen_d; 6099 } ENVScalar2; 6100 6101 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 6102 { 6103 if (!fp_access_check(s)) { 6104 return true; 6105 } 6106 if (a->esz == MO_64) { 6107 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6108 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 6109 f->gen_d(t0, tcg_env, t0, t1); 6110 write_fp_dreg(s, a->rd, t0); 6111 } else { 6112 TCGv_i32 t0 = tcg_temp_new_i32(); 6113 TCGv_i32 t1 = tcg_temp_new_i32(); 6114 6115 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6116 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 6117 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6118 write_fp_sreg(s, a->rd, t0); 6119 } 6120 return true; 6121 } 6122 6123 static const ENVScalar2 f_scalar_sqshl = { 6124 { gen_helper_neon_qshl_s8, 6125 gen_helper_neon_qshl_s16, 6126 gen_helper_neon_qshl_s32 }, 6127 gen_helper_neon_qshl_s64, 6128 }; 6129 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 6130 6131 static const ENVScalar2 f_scalar_uqshl = { 6132 { gen_helper_neon_qshl_u8, 6133 gen_helper_neon_qshl_u16, 6134 gen_helper_neon_qshl_u32 }, 6135 gen_helper_neon_qshl_u64, 6136 }; 6137 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 6138 6139 static const ENVScalar2 f_scalar_sqrshl = { 6140 { gen_helper_neon_qrshl_s8, 6141 gen_helper_neon_qrshl_s16, 6142 gen_helper_neon_qrshl_s32 }, 6143 gen_helper_neon_qrshl_s64, 6144 }; 6145 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 6146 6147 static const ENVScalar2 f_scalar_uqrshl = { 6148 { gen_helper_neon_qrshl_u8, 6149 gen_helper_neon_qrshl_u16, 6150 gen_helper_neon_qrshl_u32 }, 6151 gen_helper_neon_qrshl_u64, 6152 }; 6153 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 6154 6155 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 6156 const ENVScalar2 *f) 6157 { 6158 if (a->esz == MO_16 || a->esz == MO_32) { 6159 return do_env_scalar2(s, a, f); 6160 } 6161 return false; 6162 } 6163 6164 static const ENVScalar2 f_scalar_sqdmulh = { 6165 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 6166 }; 6167 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 6168 6169 static const ENVScalar2 f_scalar_sqrdmulh = { 6170 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 6171 }; 6172 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 6173 6174 typedef struct ENVScalar3 { 6175 NeonGenThreeOpEnvFn *gen_hs[2]; 6176 } ENVScalar3; 6177 6178 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 6179 const ENVScalar3 *f) 6180 { 6181 TCGv_i32 t0, t1, t2; 6182 6183 if (a->esz != MO_16 && a->esz != MO_32) { 6184 return false; 6185 } 6186 if (!fp_access_check(s)) { 6187 return true; 6188 } 6189 6190 t0 = tcg_temp_new_i32(); 6191 t1 = tcg_temp_new_i32(); 6192 t2 = tcg_temp_new_i32(); 6193 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6194 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 6195 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6196 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6197 write_fp_sreg(s, a->rd, t0); 6198 return true; 6199 } 6200 6201 static const ENVScalar3 f_scalar_sqrdmlah = { 6202 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 6203 }; 6204 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 6205 6206 static const ENVScalar3 f_scalar_sqrdmlsh = { 6207 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 6208 }; 6209 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 6210 6211 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 6212 { 6213 if (fp_access_check(s)) { 6214 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6215 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 6216 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 6217 write_fp_dreg(s, a->rd, t0); 6218 } 6219 return true; 6220 } 6221 6222 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 6223 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 6224 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 6225 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 6226 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 6227 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 6228 6229 static bool do_fp3_vector_with_fpsttype(DisasContext *s, arg_qrrr_e *a, 6230 int data, 6231 gen_helper_gvec_3_ptr * const fns[3], 6232 ARMFPStatusFlavour fpsttype) 6233 { 6234 MemOp esz = a->esz; 6235 int check = fp_access_check_vector_hsd(s, a->q, esz); 6236 6237 if (check <= 0) { 6238 return check == 0; 6239 } 6240 6241 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, fpsttype, 6242 data, fns[esz - 1]); 6243 return true; 6244 } 6245 6246 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 6247 gen_helper_gvec_3_ptr * const fns[3]) 6248 { 6249 return do_fp3_vector_with_fpsttype(s, a, data, fns, 6250 a->esz == MO_16 ? 6251 FPST_A64_F16 : FPST_A64); 6252 } 6253 6254 static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data, 6255 gen_helper_gvec_3_ptr * const fnormal[3], 6256 gen_helper_gvec_3_ptr * const fah[3]) 6257 { 6258 return do_fp3_vector(s, a, data, s->fpcr_ah ? fah : fnormal); 6259 } 6260 6261 static bool do_fp3_vector_ah_2fn(DisasContext *s, arg_qrrr_e *a, int data, 6262 gen_helper_gvec_3_ptr * const fnormal[3], 6263 gen_helper_gvec_3_ptr * const fah[3]) 6264 { 6265 return do_fp3_vector_with_fpsttype(s, a, data, s->fpcr_ah ? fah : fnormal, 6266 select_ah_fpst(s, a->esz)); 6267 } 6268 6269 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 6270 gen_helper_gvec_fadd_h, 6271 gen_helper_gvec_fadd_s, 6272 gen_helper_gvec_fadd_d, 6273 }; 6274 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 6275 6276 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 6277 gen_helper_gvec_fsub_h, 6278 gen_helper_gvec_fsub_s, 6279 gen_helper_gvec_fsub_d, 6280 }; 6281 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 6282 6283 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 6284 gen_helper_gvec_fdiv_h, 6285 gen_helper_gvec_fdiv_s, 6286 gen_helper_gvec_fdiv_d, 6287 }; 6288 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 6289 6290 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 6291 gen_helper_gvec_fmul_h, 6292 gen_helper_gvec_fmul_s, 6293 gen_helper_gvec_fmul_d, 6294 }; 6295 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 6296 6297 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 6298 gen_helper_gvec_fmax_h, 6299 gen_helper_gvec_fmax_s, 6300 gen_helper_gvec_fmax_d, 6301 }; 6302 static gen_helper_gvec_3_ptr * const f_vector_fmax_ah[3] = { 6303 gen_helper_gvec_ah_fmax_h, 6304 gen_helper_gvec_ah_fmax_s, 6305 gen_helper_gvec_ah_fmax_d, 6306 }; 6307 TRANS(FMAX_v, do_fp3_vector_2fn, a, 0, f_vector_fmax, f_vector_fmax_ah) 6308 6309 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 6310 gen_helper_gvec_fmin_h, 6311 gen_helper_gvec_fmin_s, 6312 gen_helper_gvec_fmin_d, 6313 }; 6314 static gen_helper_gvec_3_ptr * const f_vector_fmin_ah[3] = { 6315 gen_helper_gvec_ah_fmin_h, 6316 gen_helper_gvec_ah_fmin_s, 6317 gen_helper_gvec_ah_fmin_d, 6318 }; 6319 TRANS(FMIN_v, do_fp3_vector_2fn, a, 0, f_vector_fmin, f_vector_fmin_ah) 6320 6321 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 6322 gen_helper_gvec_fmaxnum_h, 6323 gen_helper_gvec_fmaxnum_s, 6324 gen_helper_gvec_fmaxnum_d, 6325 }; 6326 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 6327 6328 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 6329 gen_helper_gvec_fminnum_h, 6330 gen_helper_gvec_fminnum_s, 6331 gen_helper_gvec_fminnum_d, 6332 }; 6333 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 6334 6335 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 6336 gen_helper_gvec_fmulx_h, 6337 gen_helper_gvec_fmulx_s, 6338 gen_helper_gvec_fmulx_d, 6339 }; 6340 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 6341 6342 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 6343 gen_helper_gvec_vfma_h, 6344 gen_helper_gvec_vfma_s, 6345 gen_helper_gvec_vfma_d, 6346 }; 6347 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 6348 6349 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 6350 gen_helper_gvec_vfms_h, 6351 gen_helper_gvec_vfms_s, 6352 gen_helper_gvec_vfms_d, 6353 }; 6354 static gen_helper_gvec_3_ptr * const f_vector_fmls_ah[3] = { 6355 gen_helper_gvec_ah_vfms_h, 6356 gen_helper_gvec_ah_vfms_s, 6357 gen_helper_gvec_ah_vfms_d, 6358 }; 6359 TRANS(FMLS_v, do_fp3_vector_2fn, a, 0, f_vector_fmls, f_vector_fmls_ah) 6360 6361 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 6362 gen_helper_gvec_fceq_h, 6363 gen_helper_gvec_fceq_s, 6364 gen_helper_gvec_fceq_d, 6365 }; 6366 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 6367 6368 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 6369 gen_helper_gvec_fcge_h, 6370 gen_helper_gvec_fcge_s, 6371 gen_helper_gvec_fcge_d, 6372 }; 6373 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 6374 6375 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 6376 gen_helper_gvec_fcgt_h, 6377 gen_helper_gvec_fcgt_s, 6378 gen_helper_gvec_fcgt_d, 6379 }; 6380 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 6381 6382 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 6383 gen_helper_gvec_facge_h, 6384 gen_helper_gvec_facge_s, 6385 gen_helper_gvec_facge_d, 6386 }; 6387 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 6388 6389 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 6390 gen_helper_gvec_facgt_h, 6391 gen_helper_gvec_facgt_s, 6392 gen_helper_gvec_facgt_d, 6393 }; 6394 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 6395 6396 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 6397 gen_helper_gvec_fabd_h, 6398 gen_helper_gvec_fabd_s, 6399 gen_helper_gvec_fabd_d, 6400 }; 6401 static gen_helper_gvec_3_ptr * const f_vector_ah_fabd[3] = { 6402 gen_helper_gvec_ah_fabd_h, 6403 gen_helper_gvec_ah_fabd_s, 6404 gen_helper_gvec_ah_fabd_d, 6405 }; 6406 TRANS(FABD_v, do_fp3_vector_2fn, a, 0, f_vector_fabd, f_vector_ah_fabd) 6407 6408 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 6409 gen_helper_gvec_recps_h, 6410 gen_helper_gvec_recps_s, 6411 gen_helper_gvec_recps_d, 6412 }; 6413 static gen_helper_gvec_3_ptr * const f_vector_ah_frecps[3] = { 6414 gen_helper_gvec_ah_recps_h, 6415 gen_helper_gvec_ah_recps_s, 6416 gen_helper_gvec_ah_recps_d, 6417 }; 6418 TRANS(FRECPS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frecps, f_vector_ah_frecps) 6419 6420 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 6421 gen_helper_gvec_rsqrts_h, 6422 gen_helper_gvec_rsqrts_s, 6423 gen_helper_gvec_rsqrts_d, 6424 }; 6425 static gen_helper_gvec_3_ptr * const f_vector_ah_frsqrts[3] = { 6426 gen_helper_gvec_ah_rsqrts_h, 6427 gen_helper_gvec_ah_rsqrts_s, 6428 gen_helper_gvec_ah_rsqrts_d, 6429 }; 6430 TRANS(FRSQRTS_v, do_fp3_vector_ah_2fn, a, 0, f_vector_frsqrts, f_vector_ah_frsqrts) 6431 6432 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 6433 gen_helper_gvec_faddp_h, 6434 gen_helper_gvec_faddp_s, 6435 gen_helper_gvec_faddp_d, 6436 }; 6437 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 6438 6439 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 6440 gen_helper_gvec_fmaxp_h, 6441 gen_helper_gvec_fmaxp_s, 6442 gen_helper_gvec_fmaxp_d, 6443 }; 6444 static gen_helper_gvec_3_ptr * const f_vector_ah_fmaxp[3] = { 6445 gen_helper_gvec_ah_fmaxp_h, 6446 gen_helper_gvec_ah_fmaxp_s, 6447 gen_helper_gvec_ah_fmaxp_d, 6448 }; 6449 TRANS(FMAXP_v, do_fp3_vector_2fn, a, 0, f_vector_fmaxp, f_vector_ah_fmaxp) 6450 6451 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 6452 gen_helper_gvec_fminp_h, 6453 gen_helper_gvec_fminp_s, 6454 gen_helper_gvec_fminp_d, 6455 }; 6456 static gen_helper_gvec_3_ptr * const f_vector_ah_fminp[3] = { 6457 gen_helper_gvec_ah_fminp_h, 6458 gen_helper_gvec_ah_fminp_s, 6459 gen_helper_gvec_ah_fminp_d, 6460 }; 6461 TRANS(FMINP_v, do_fp3_vector_2fn, a, 0, f_vector_fminp, f_vector_ah_fminp) 6462 6463 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 6464 gen_helper_gvec_fmaxnump_h, 6465 gen_helper_gvec_fmaxnump_s, 6466 gen_helper_gvec_fmaxnump_d, 6467 }; 6468 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 6469 6470 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 6471 gen_helper_gvec_fminnump_h, 6472 gen_helper_gvec_fminnump_s, 6473 gen_helper_gvec_fminnump_d, 6474 }; 6475 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 6476 6477 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 6478 { 6479 if (fp_access_check(s)) { 6480 int data = (is_2 << 1) | is_s; 6481 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6482 vec_full_reg_offset(s, a->rn), 6483 vec_full_reg_offset(s, a->rm), tcg_env, 6484 a->q ? 16 : 8, vec_full_reg_size(s), 6485 data, gen_helper_gvec_fmlal_a64); 6486 } 6487 return true; 6488 } 6489 6490 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 6491 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 6492 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 6493 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 6494 6495 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 6496 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 6497 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 6498 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 6499 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 6500 6501 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 6502 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 6503 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 6504 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 6505 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 6506 6507 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 6508 { 6509 if (fp_access_check(s)) { 6510 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 6511 } 6512 return true; 6513 } 6514 6515 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 6516 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 6517 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 6518 6519 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 6520 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 6521 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 6522 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 6523 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 6524 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 6525 6526 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 6527 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 6528 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 6529 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 6530 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 6531 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 6532 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 6533 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 6534 6535 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 6536 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 6537 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 6538 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 6539 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 6540 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 6541 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 6542 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 6543 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 6544 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 6545 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 6546 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 6547 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 6548 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 6549 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 6550 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 6551 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 6552 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 6553 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 6554 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 6555 6556 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 6557 { 6558 if (a->esz == MO_64 && !a->q) { 6559 return false; 6560 } 6561 if (fp_access_check(s)) { 6562 tcg_gen_gvec_cmp(cond, a->esz, 6563 vec_full_reg_offset(s, a->rd), 6564 vec_full_reg_offset(s, a->rn), 6565 vec_full_reg_offset(s, a->rm), 6566 a->q ? 16 : 8, vec_full_reg_size(s)); 6567 } 6568 return true; 6569 } 6570 6571 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 6572 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 6573 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 6574 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 6575 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 6576 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 6577 6578 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 6579 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 6580 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 6581 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 6582 6583 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 6584 gen_helper_gvec_4 *fn) 6585 { 6586 if (fp_access_check(s)) { 6587 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6588 } 6589 return true; 6590 } 6591 6592 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 6593 gen_helper_gvec_4_ptr *fn) 6594 { 6595 if (fp_access_check(s)) { 6596 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 6597 } 6598 return true; 6599 } 6600 6601 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_4b) 6602 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_4b) 6603 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_4b) 6604 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 6605 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 6606 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 6607 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 6608 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 6609 6610 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 6611 { 6612 if (!dc_isar_feature(aa64_bf16, s)) { 6613 return false; 6614 } 6615 if (fp_access_check(s)) { 6616 /* Q bit selects BFMLALB vs BFMLALT. */ 6617 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 6618 s->fpcr_ah ? FPST_AH : FPST_A64, a->q, 6619 gen_helper_gvec_bfmlal); 6620 } 6621 return true; 6622 } 6623 6624 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 6625 gen_helper_gvec_fcaddh, 6626 gen_helper_gvec_fcadds, 6627 gen_helper_gvec_fcaddd, 6628 }; 6629 /* 6630 * Encode FPCR.AH into the data so the helper knows whether the 6631 * negations it does should avoid flipping the sign bit on a NaN 6632 */ 6633 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0 | (s->fpcr_ah << 1), 6634 f_vector_fcadd) 6635 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1 | (s->fpcr_ah << 1), 6636 f_vector_fcadd) 6637 6638 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 6639 { 6640 static gen_helper_gvec_4_ptr * const fn[] = { 6641 [MO_16] = gen_helper_gvec_fcmlah, 6642 [MO_32] = gen_helper_gvec_fcmlas, 6643 [MO_64] = gen_helper_gvec_fcmlad, 6644 }; 6645 int check; 6646 6647 if (!dc_isar_feature(aa64_fcma, s)) { 6648 return false; 6649 } 6650 6651 check = fp_access_check_vector_hsd(s, a->q, a->esz); 6652 if (check <= 0) { 6653 return check == 0; 6654 } 6655 6656 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6657 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 6658 a->rot | (s->fpcr_ah << 2), fn[a->esz]); 6659 return true; 6660 } 6661 6662 /* 6663 * Widening vector x vector/indexed. 6664 * 6665 * These read from the top or bottom half of a 128-bit vector. 6666 * After widening, optionally accumulate with a 128-bit vector. 6667 * Implement these inline, as the number of elements are limited 6668 * and the related SVE and SME operations on larger vectors use 6669 * even/odd elements instead of top/bottom half. 6670 * 6671 * If idx >= 0, operand 2 is indexed, otherwise vector. 6672 * If acc, operand 0 is loaded with rd. 6673 */ 6674 6675 /* For low half, iterating up. */ 6676 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 6677 int rd, int rn, int rm, int idx, 6678 NeonGenTwo64OpFn *fn, bool acc) 6679 { 6680 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 6681 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 6682 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 6683 MemOp esz = memop & MO_SIZE; 6684 int half = 8 >> esz; 6685 int top_swap, top_half; 6686 6687 /* There are no 64x64->128 bit operations. */ 6688 if (esz >= MO_64) { 6689 return false; 6690 } 6691 if (!fp_access_check(s)) { 6692 return true; 6693 } 6694 6695 if (idx >= 0) { 6696 read_vec_element(s, tcg_op2, rm, idx, memop); 6697 } 6698 6699 /* 6700 * For top half inputs, iterate forward; backward for bottom half. 6701 * This means the store to the destination will not occur until 6702 * overlapping input inputs are consumed. 6703 * Use top_swap to conditionally invert the forward iteration index. 6704 */ 6705 top_swap = top ? 0 : half - 1; 6706 top_half = top ? half : 0; 6707 6708 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6709 int elt = elt_fwd ^ top_swap; 6710 6711 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 6712 if (idx < 0) { 6713 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 6714 } 6715 if (acc) { 6716 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 6717 } 6718 fn(tcg_op0, tcg_op1, tcg_op2); 6719 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 6720 } 6721 clear_vec_high(s, 1, rd); 6722 return true; 6723 } 6724 6725 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6726 { 6727 TCGv_i64 t = tcg_temp_new_i64(); 6728 tcg_gen_mul_i64(t, n, m); 6729 tcg_gen_add_i64(d, d, t); 6730 } 6731 6732 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6733 { 6734 TCGv_i64 t = tcg_temp_new_i64(); 6735 tcg_gen_mul_i64(t, n, m); 6736 tcg_gen_sub_i64(d, d, t); 6737 } 6738 6739 TRANS(SMULL_v, do_3op_widening, 6740 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6741 tcg_gen_mul_i64, false) 6742 TRANS(UMULL_v, do_3op_widening, 6743 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6744 tcg_gen_mul_i64, false) 6745 TRANS(SMLAL_v, do_3op_widening, 6746 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6747 gen_muladd_i64, true) 6748 TRANS(UMLAL_v, do_3op_widening, 6749 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6750 gen_muladd_i64, true) 6751 TRANS(SMLSL_v, do_3op_widening, 6752 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6753 gen_mulsub_i64, true) 6754 TRANS(UMLSL_v, do_3op_widening, 6755 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6756 gen_mulsub_i64, true) 6757 6758 TRANS(SMULL_vi, do_3op_widening, 6759 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6760 tcg_gen_mul_i64, false) 6761 TRANS(UMULL_vi, do_3op_widening, 6762 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6763 tcg_gen_mul_i64, false) 6764 TRANS(SMLAL_vi, do_3op_widening, 6765 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6766 gen_muladd_i64, true) 6767 TRANS(UMLAL_vi, do_3op_widening, 6768 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6769 gen_muladd_i64, true) 6770 TRANS(SMLSL_vi, do_3op_widening, 6771 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6772 gen_mulsub_i64, true) 6773 TRANS(UMLSL_vi, do_3op_widening, 6774 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 6775 gen_mulsub_i64, true) 6776 6777 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6778 { 6779 TCGv_i64 t1 = tcg_temp_new_i64(); 6780 TCGv_i64 t2 = tcg_temp_new_i64(); 6781 6782 tcg_gen_sub_i64(t1, n, m); 6783 tcg_gen_sub_i64(t2, m, n); 6784 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 6785 } 6786 6787 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6788 { 6789 TCGv_i64 t1 = tcg_temp_new_i64(); 6790 TCGv_i64 t2 = tcg_temp_new_i64(); 6791 6792 tcg_gen_sub_i64(t1, n, m); 6793 tcg_gen_sub_i64(t2, m, n); 6794 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 6795 } 6796 6797 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6798 { 6799 TCGv_i64 t = tcg_temp_new_i64(); 6800 gen_sabd_i64(t, n, m); 6801 tcg_gen_add_i64(d, d, t); 6802 } 6803 6804 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6805 { 6806 TCGv_i64 t = tcg_temp_new_i64(); 6807 gen_uabd_i64(t, n, m); 6808 tcg_gen_add_i64(d, d, t); 6809 } 6810 6811 TRANS(SADDL_v, do_3op_widening, 6812 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6813 tcg_gen_add_i64, false) 6814 TRANS(UADDL_v, do_3op_widening, 6815 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6816 tcg_gen_add_i64, false) 6817 TRANS(SSUBL_v, do_3op_widening, 6818 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6819 tcg_gen_sub_i64, false) 6820 TRANS(USUBL_v, do_3op_widening, 6821 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6822 tcg_gen_sub_i64, false) 6823 TRANS(SABDL_v, do_3op_widening, 6824 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6825 gen_sabd_i64, false) 6826 TRANS(UABDL_v, do_3op_widening, 6827 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6828 gen_uabd_i64, false) 6829 TRANS(SABAL_v, do_3op_widening, 6830 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6831 gen_saba_i64, true) 6832 TRANS(UABAL_v, do_3op_widening, 6833 a->esz, a->q, a->rd, a->rn, a->rm, -1, 6834 gen_uaba_i64, true) 6835 6836 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6837 { 6838 tcg_gen_mul_i64(d, n, m); 6839 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 6840 } 6841 6842 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6843 { 6844 tcg_gen_mul_i64(d, n, m); 6845 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 6846 } 6847 6848 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6849 { 6850 TCGv_i64 t = tcg_temp_new_i64(); 6851 6852 tcg_gen_mul_i64(t, n, m); 6853 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6854 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6855 } 6856 6857 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6858 { 6859 TCGv_i64 t = tcg_temp_new_i64(); 6860 6861 tcg_gen_mul_i64(t, n, m); 6862 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6863 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6864 } 6865 6866 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6867 { 6868 TCGv_i64 t = tcg_temp_new_i64(); 6869 6870 tcg_gen_mul_i64(t, n, m); 6871 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 6872 tcg_gen_neg_i64(t, t); 6873 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 6874 } 6875 6876 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 6877 { 6878 TCGv_i64 t = tcg_temp_new_i64(); 6879 6880 tcg_gen_mul_i64(t, n, m); 6881 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6882 tcg_gen_neg_i64(t, t); 6883 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6884 } 6885 6886 TRANS(SQDMULL_v, do_3op_widening, 6887 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6888 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6889 TRANS(SQDMLAL_v, do_3op_widening, 6890 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6891 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6892 TRANS(SQDMLSL_v, do_3op_widening, 6893 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6894 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6895 6896 TRANS(SQDMULL_vi, do_3op_widening, 6897 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6898 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6899 TRANS(SQDMLAL_vi, do_3op_widening, 6900 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6901 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6902 TRANS(SQDMLSL_vi, do_3op_widening, 6903 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6904 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6905 6906 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6907 MemOp sign, bool sub) 6908 { 6909 TCGv_i64 tcg_op0, tcg_op1; 6910 MemOp esz = a->esz; 6911 int half = 8 >> esz; 6912 bool top = a->q; 6913 int top_swap = top ? 0 : half - 1; 6914 int top_half = top ? half : 0; 6915 6916 /* There are no 64x64->128 bit operations. */ 6917 if (esz >= MO_64) { 6918 return false; 6919 } 6920 if (!fp_access_check(s)) { 6921 return true; 6922 } 6923 tcg_op0 = tcg_temp_new_i64(); 6924 tcg_op1 = tcg_temp_new_i64(); 6925 6926 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6927 int elt = elt_fwd ^ top_swap; 6928 6929 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6930 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6931 if (sub) { 6932 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6933 } else { 6934 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6935 } 6936 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6937 } 6938 clear_vec_high(s, 1, a->rd); 6939 return true; 6940 } 6941 6942 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6943 TRANS(UADDW, do_addsub_wide, a, 0, false) 6944 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6945 TRANS(USUBW, do_addsub_wide, a, 0, true) 6946 6947 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6948 bool sub, bool round) 6949 { 6950 TCGv_i64 tcg_op0, tcg_op1; 6951 MemOp esz = a->esz; 6952 int half = 8 >> esz; 6953 bool top = a->q; 6954 int ebits = 8 << esz; 6955 uint64_t rbit = 1ull << (ebits - 1); 6956 int top_swap, top_half; 6957 6958 /* There are no 128x128->64 bit operations. */ 6959 if (esz >= MO_64) { 6960 return false; 6961 } 6962 if (!fp_access_check(s)) { 6963 return true; 6964 } 6965 tcg_op0 = tcg_temp_new_i64(); 6966 tcg_op1 = tcg_temp_new_i64(); 6967 6968 /* 6969 * For top half inputs, iterate backward; forward for bottom half. 6970 * This means the store to the destination will not occur until 6971 * overlapping input inputs are consumed. 6972 */ 6973 top_swap = top ? half - 1 : 0; 6974 top_half = top ? half : 0; 6975 6976 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6977 int elt = elt_fwd ^ top_swap; 6978 6979 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6980 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6981 if (sub) { 6982 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6983 } else { 6984 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6985 } 6986 if (round) { 6987 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6988 } 6989 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6990 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6991 } 6992 clear_vec_high(s, top, a->rd); 6993 return true; 6994 } 6995 6996 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6997 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6998 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6999 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 7000 7001 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 7002 { 7003 if (fp_access_check(s)) { 7004 /* The Q field specifies lo/hi half input for these insns. */ 7005 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 7006 } 7007 return true; 7008 } 7009 7010 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 7011 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 7012 7013 /* 7014 * Advanced SIMD scalar/vector x indexed element 7015 */ 7016 7017 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 7018 { 7019 switch (a->esz) { 7020 case MO_64: 7021 if (fp_access_check(s)) { 7022 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 7023 TCGv_i64 t1 = tcg_temp_new_i64(); 7024 7025 read_vec_element(s, t1, a->rm, a->idx, MO_64); 7026 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 7027 write_fp_dreg_merging(s, a->rd, a->rn, t0); 7028 } 7029 break; 7030 case MO_32: 7031 if (fp_access_check(s)) { 7032 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 7033 TCGv_i32 t1 = tcg_temp_new_i32(); 7034 7035 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 7036 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 7037 write_fp_sreg_merging(s, a->rd, a->rn, t0); 7038 } 7039 break; 7040 case MO_16: 7041 if (!dc_isar_feature(aa64_fp16, s)) { 7042 return false; 7043 } 7044 if (fp_access_check(s)) { 7045 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 7046 TCGv_i32 t1 = tcg_temp_new_i32(); 7047 7048 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 7049 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 7050 write_fp_hreg_merging(s, a->rd, a->rn, t0); 7051 } 7052 break; 7053 default: 7054 g_assert_not_reached(); 7055 } 7056 return true; 7057 } 7058 7059 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 7060 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 7061 7062 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 7063 { 7064 switch (a->esz) { 7065 case MO_64: 7066 if (fp_access_check(s)) { 7067 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 7068 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 7069 TCGv_i64 t2 = tcg_temp_new_i64(); 7070 7071 read_vec_element(s, t2, a->rm, a->idx, MO_64); 7072 if (neg) { 7073 gen_vfp_maybe_ah_negd(s, t1, t1); 7074 } 7075 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 7076 write_fp_dreg_merging(s, a->rd, a->rd, t0); 7077 } 7078 break; 7079 case MO_32: 7080 if (fp_access_check(s)) { 7081 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 7082 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 7083 TCGv_i32 t2 = tcg_temp_new_i32(); 7084 7085 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 7086 if (neg) { 7087 gen_vfp_maybe_ah_negs(s, t1, t1); 7088 } 7089 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64)); 7090 write_fp_sreg_merging(s, a->rd, a->rd, t0); 7091 } 7092 break; 7093 case MO_16: 7094 if (!dc_isar_feature(aa64_fp16, s)) { 7095 return false; 7096 } 7097 if (fp_access_check(s)) { 7098 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 7099 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 7100 TCGv_i32 t2 = tcg_temp_new_i32(); 7101 7102 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 7103 if (neg) { 7104 gen_vfp_maybe_ah_negh(s, t1, t1); 7105 } 7106 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 7107 fpstatus_ptr(FPST_A64_F16)); 7108 write_fp_hreg_merging(s, a->rd, a->rd, t0); 7109 } 7110 break; 7111 default: 7112 g_assert_not_reached(); 7113 } 7114 return true; 7115 } 7116 7117 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 7118 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 7119 7120 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 7121 const ENVScalar2 *f) 7122 { 7123 if (a->esz < MO_16 || a->esz > MO_32) { 7124 return false; 7125 } 7126 if (fp_access_check(s)) { 7127 TCGv_i32 t0 = tcg_temp_new_i32(); 7128 TCGv_i32 t1 = tcg_temp_new_i32(); 7129 7130 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 7131 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 7132 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 7133 write_fp_sreg(s, a->rd, t0); 7134 } 7135 return true; 7136 } 7137 7138 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 7139 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 7140 7141 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 7142 const ENVScalar3 *f) 7143 { 7144 if (a->esz < MO_16 || a->esz > MO_32) { 7145 return false; 7146 } 7147 if (fp_access_check(s)) { 7148 TCGv_i32 t0 = tcg_temp_new_i32(); 7149 TCGv_i32 t1 = tcg_temp_new_i32(); 7150 TCGv_i32 t2 = tcg_temp_new_i32(); 7151 7152 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 7153 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 7154 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 7155 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 7156 write_fp_sreg(s, a->rd, t0); 7157 } 7158 return true; 7159 } 7160 7161 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 7162 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 7163 7164 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 7165 NeonGenTwo64OpFn *fn, bool acc) 7166 { 7167 if (fp_access_check(s)) { 7168 TCGv_i64 t0 = tcg_temp_new_i64(); 7169 TCGv_i64 t1 = tcg_temp_new_i64(); 7170 TCGv_i64 t2 = tcg_temp_new_i64(); 7171 7172 if (acc) { 7173 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 7174 } 7175 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 7176 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 7177 fn(t0, t1, t2); 7178 7179 /* Clear the whole register first, then store scalar. */ 7180 clear_vec(s, a->rd); 7181 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 7182 } 7183 return true; 7184 } 7185 7186 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 7187 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 7188 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 7189 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 7190 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 7191 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 7192 7193 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 7194 gen_helper_gvec_3_ptr * const fns[3]) 7195 { 7196 MemOp esz = a->esz; 7197 int check = fp_access_check_vector_hsd(s, a->q, esz); 7198 7199 if (check <= 0) { 7200 return check == 0; 7201 } 7202 7203 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 7204 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 7205 a->idx, fns[esz - 1]); 7206 return true; 7207 } 7208 7209 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 7210 gen_helper_gvec_fmul_idx_h, 7211 gen_helper_gvec_fmul_idx_s, 7212 gen_helper_gvec_fmul_idx_d, 7213 }; 7214 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 7215 7216 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 7217 gen_helper_gvec_fmulx_idx_h, 7218 gen_helper_gvec_fmulx_idx_s, 7219 gen_helper_gvec_fmulx_idx_d, 7220 }; 7221 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 7222 7223 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 7224 { 7225 static gen_helper_gvec_4_ptr * const fns[3][3] = { 7226 { gen_helper_gvec_fmla_idx_h, 7227 gen_helper_gvec_fmla_idx_s, 7228 gen_helper_gvec_fmla_idx_d }, 7229 { gen_helper_gvec_fmls_idx_h, 7230 gen_helper_gvec_fmls_idx_s, 7231 gen_helper_gvec_fmls_idx_d }, 7232 { gen_helper_gvec_ah_fmls_idx_h, 7233 gen_helper_gvec_ah_fmls_idx_s, 7234 gen_helper_gvec_ah_fmls_idx_d }, 7235 }; 7236 MemOp esz = a->esz; 7237 int check = fp_access_check_vector_hsd(s, a->q, esz); 7238 7239 if (check <= 0) { 7240 return check == 0; 7241 } 7242 7243 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 7244 esz == MO_16 ? FPST_A64_F16 : FPST_A64, 7245 a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]); 7246 return true; 7247 } 7248 7249 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 7250 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 7251 7252 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 7253 { 7254 if (fp_access_check(s)) { 7255 int data = (a->idx << 2) | (is_2 << 1) | is_s; 7256 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 7257 vec_full_reg_offset(s, a->rn), 7258 vec_full_reg_offset(s, a->rm), tcg_env, 7259 a->q ? 16 : 8, vec_full_reg_size(s), 7260 data, gen_helper_gvec_fmlal_idx_a64); 7261 } 7262 return true; 7263 } 7264 7265 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 7266 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 7267 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 7268 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 7269 7270 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 7271 gen_helper_gvec_3 * const fns[2]) 7272 { 7273 assert(a->esz == MO_16 || a->esz == MO_32); 7274 if (fp_access_check(s)) { 7275 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 7276 } 7277 return true; 7278 } 7279 7280 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 7281 gen_helper_gvec_mul_idx_h, 7282 gen_helper_gvec_mul_idx_s, 7283 }; 7284 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 7285 7286 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 7287 { 7288 static gen_helper_gvec_4 * const fns[2][2] = { 7289 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 7290 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 7291 }; 7292 7293 assert(a->esz == MO_16 || a->esz == MO_32); 7294 if (fp_access_check(s)) { 7295 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 7296 a->idx, fns[a->esz - 1][sub]); 7297 } 7298 return true; 7299 } 7300 7301 TRANS(MLA_vi, do_mla_vector_idx, a, false) 7302 TRANS(MLS_vi, do_mla_vector_idx, a, true) 7303 7304 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 7305 gen_helper_gvec_4 * const fns[2]) 7306 { 7307 assert(a->esz == MO_16 || a->esz == MO_32); 7308 if (fp_access_check(s)) { 7309 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 7310 vec_full_reg_offset(s, a->rn), 7311 vec_full_reg_offset(s, a->rm), 7312 offsetof(CPUARMState, vfp.qc), 7313 a->q ? 16 : 8, vec_full_reg_size(s), 7314 a->idx, fns[a->esz - 1]); 7315 } 7316 return true; 7317 } 7318 7319 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 7320 gen_helper_neon_sqdmulh_idx_h, 7321 gen_helper_neon_sqdmulh_idx_s, 7322 }; 7323 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 7324 7325 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 7326 gen_helper_neon_sqrdmulh_idx_h, 7327 gen_helper_neon_sqrdmulh_idx_s, 7328 }; 7329 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 7330 7331 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 7332 gen_helper_neon_sqrdmlah_idx_h, 7333 gen_helper_neon_sqrdmlah_idx_s, 7334 }; 7335 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 7336 f_vector_idx_sqrdmlah) 7337 7338 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 7339 gen_helper_neon_sqrdmlsh_idx_h, 7340 gen_helper_neon_sqrdmlsh_idx_s, 7341 }; 7342 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 7343 f_vector_idx_sqrdmlsh) 7344 7345 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 7346 gen_helper_gvec_4 *fn) 7347 { 7348 if (fp_access_check(s)) { 7349 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 7350 } 7351 return true; 7352 } 7353 7354 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 7355 gen_helper_gvec_4_ptr *fn) 7356 { 7357 if (fp_access_check(s)) { 7358 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 7359 } 7360 return true; 7361 } 7362 7363 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_4b) 7364 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_4b) 7365 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 7366 gen_helper_gvec_sudot_idx_4b) 7367 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 7368 gen_helper_gvec_usdot_idx_4b) 7369 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 7370 gen_helper_gvec_bfdot_idx) 7371 7372 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 7373 { 7374 if (!dc_isar_feature(aa64_bf16, s)) { 7375 return false; 7376 } 7377 if (fp_access_check(s)) { 7378 /* Q bit selects BFMLALB vs BFMLALT. */ 7379 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 7380 s->fpcr_ah ? FPST_AH : FPST_A64, 7381 (a->idx << 1) | a->q, 7382 gen_helper_gvec_bfmlal_idx); 7383 } 7384 return true; 7385 } 7386 7387 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 7388 { 7389 gen_helper_gvec_4_ptr *fn; 7390 7391 if (!dc_isar_feature(aa64_fcma, s)) { 7392 return false; 7393 } 7394 switch (a->esz) { 7395 case MO_16: 7396 if (!dc_isar_feature(aa64_fp16, s)) { 7397 return false; 7398 } 7399 fn = gen_helper_gvec_fcmlah_idx; 7400 break; 7401 case MO_32: 7402 fn = gen_helper_gvec_fcmlas_idx; 7403 break; 7404 default: 7405 g_assert_not_reached(); 7406 } 7407 if (fp_access_check(s)) { 7408 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 7409 a->esz == MO_16 ? FPST_A64_F16 : FPST_A64, 7410 (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn); 7411 } 7412 return true; 7413 } 7414 7415 /* 7416 * Advanced SIMD scalar pairwise 7417 */ 7418 7419 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 7420 { 7421 switch (a->esz) { 7422 case MO_64: 7423 if (fp_access_check(s)) { 7424 TCGv_i64 t0 = tcg_temp_new_i64(); 7425 TCGv_i64 t1 = tcg_temp_new_i64(); 7426 7427 read_vec_element(s, t0, a->rn, 0, MO_64); 7428 read_vec_element(s, t1, a->rn, 1, MO_64); 7429 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64)); 7430 write_fp_dreg(s, a->rd, t0); 7431 } 7432 break; 7433 case MO_32: 7434 if (fp_access_check(s)) { 7435 TCGv_i32 t0 = tcg_temp_new_i32(); 7436 TCGv_i32 t1 = tcg_temp_new_i32(); 7437 7438 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 7439 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 7440 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64)); 7441 write_fp_sreg(s, a->rd, t0); 7442 } 7443 break; 7444 case MO_16: 7445 if (!dc_isar_feature(aa64_fp16, s)) { 7446 return false; 7447 } 7448 if (fp_access_check(s)) { 7449 TCGv_i32 t0 = tcg_temp_new_i32(); 7450 TCGv_i32 t1 = tcg_temp_new_i32(); 7451 7452 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 7453 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 7454 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16)); 7455 write_fp_sreg(s, a->rd, t0); 7456 } 7457 break; 7458 default: 7459 g_assert_not_reached(); 7460 } 7461 return true; 7462 } 7463 7464 static bool do_fp3_scalar_pair_2fn(DisasContext *s, arg_rr_e *a, 7465 const FPScalar *fnormal, 7466 const FPScalar *fah) 7467 { 7468 return do_fp3_scalar_pair(s, a, s->fpcr_ah ? fah : fnormal); 7469 } 7470 7471 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 7472 TRANS(FMAXP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmax, &f_scalar_fmax_ah) 7473 TRANS(FMINP_s, do_fp3_scalar_pair_2fn, a, &f_scalar_fmin, &f_scalar_fmin_ah) 7474 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 7475 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 7476 7477 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 7478 { 7479 if (fp_access_check(s)) { 7480 TCGv_i64 t0 = tcg_temp_new_i64(); 7481 TCGv_i64 t1 = tcg_temp_new_i64(); 7482 7483 read_vec_element(s, t0, a->rn, 0, MO_64); 7484 read_vec_element(s, t1, a->rn, 1, MO_64); 7485 tcg_gen_add_i64(t0, t0, t1); 7486 write_fp_dreg(s, a->rd, t0); 7487 } 7488 return true; 7489 } 7490 7491 /* 7492 * Floating-point conditional select 7493 */ 7494 7495 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 7496 { 7497 TCGv_i64 t_true, t_false; 7498 DisasCompare64 c; 7499 int check = fp_access_check_scalar_hsd(s, a->esz); 7500 7501 if (check <= 0) { 7502 return check == 0; 7503 } 7504 7505 /* Zero extend sreg & hreg inputs to 64 bits now. */ 7506 t_true = tcg_temp_new_i64(); 7507 t_false = tcg_temp_new_i64(); 7508 read_vec_element(s, t_true, a->rn, 0, a->esz); 7509 read_vec_element(s, t_false, a->rm, 0, a->esz); 7510 7511 a64_test_cc(&c, a->cond); 7512 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 7513 t_true, t_false); 7514 7515 /* 7516 * Note that sregs & hregs write back zeros to the high bits, 7517 * and we've already done the zero-extension. 7518 */ 7519 write_fp_dreg(s, a->rd, t_true); 7520 return true; 7521 } 7522 7523 /* 7524 * Advanced SIMD Extract 7525 */ 7526 7527 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 7528 { 7529 if (fp_access_check(s)) { 7530 TCGv_i64 lo = read_fp_dreg(s, a->rn); 7531 if (a->imm != 0) { 7532 TCGv_i64 hi = read_fp_dreg(s, a->rm); 7533 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 7534 } 7535 write_fp_dreg(s, a->rd, lo); 7536 } 7537 return true; 7538 } 7539 7540 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 7541 { 7542 TCGv_i64 lo, hi; 7543 int pos = (a->imm & 7) * 8; 7544 int elt = a->imm >> 3; 7545 7546 if (!fp_access_check(s)) { 7547 return true; 7548 } 7549 7550 lo = tcg_temp_new_i64(); 7551 hi = tcg_temp_new_i64(); 7552 7553 read_vec_element(s, lo, a->rn, elt, MO_64); 7554 elt++; 7555 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 7556 elt++; 7557 7558 if (pos != 0) { 7559 TCGv_i64 hh = tcg_temp_new_i64(); 7560 tcg_gen_extract2_i64(lo, lo, hi, pos); 7561 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 7562 tcg_gen_extract2_i64(hi, hi, hh, pos); 7563 } 7564 7565 write_vec_element(s, lo, a->rd, 0, MO_64); 7566 write_vec_element(s, hi, a->rd, 1, MO_64); 7567 clear_vec_high(s, true, a->rd); 7568 return true; 7569 } 7570 7571 /* 7572 * Floating-point data-processing (3 source) 7573 */ 7574 7575 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 7576 { 7577 TCGv_ptr fpst; 7578 7579 /* 7580 * These are fused multiply-add. Note that doing the negations here 7581 * as separate steps is correct: an input NaN should come out with 7582 * its sign bit flipped if it is a negated-input. 7583 */ 7584 switch (a->esz) { 7585 case MO_64: 7586 if (fp_access_check(s)) { 7587 TCGv_i64 tn = read_fp_dreg(s, a->rn); 7588 TCGv_i64 tm = read_fp_dreg(s, a->rm); 7589 TCGv_i64 ta = read_fp_dreg(s, a->ra); 7590 7591 if (neg_a) { 7592 gen_vfp_maybe_ah_negd(s, ta, ta); 7593 } 7594 if (neg_n) { 7595 gen_vfp_maybe_ah_negd(s, tn, tn); 7596 } 7597 fpst = fpstatus_ptr(FPST_A64); 7598 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 7599 write_fp_dreg_merging(s, a->rd, a->ra, ta); 7600 } 7601 break; 7602 7603 case MO_32: 7604 if (fp_access_check(s)) { 7605 TCGv_i32 tn = read_fp_sreg(s, a->rn); 7606 TCGv_i32 tm = read_fp_sreg(s, a->rm); 7607 TCGv_i32 ta = read_fp_sreg(s, a->ra); 7608 7609 if (neg_a) { 7610 gen_vfp_maybe_ah_negs(s, ta, ta); 7611 } 7612 if (neg_n) { 7613 gen_vfp_maybe_ah_negs(s, tn, tn); 7614 } 7615 fpst = fpstatus_ptr(FPST_A64); 7616 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 7617 write_fp_sreg_merging(s, a->rd, a->ra, ta); 7618 } 7619 break; 7620 7621 case MO_16: 7622 if (!dc_isar_feature(aa64_fp16, s)) { 7623 return false; 7624 } 7625 if (fp_access_check(s)) { 7626 TCGv_i32 tn = read_fp_hreg(s, a->rn); 7627 TCGv_i32 tm = read_fp_hreg(s, a->rm); 7628 TCGv_i32 ta = read_fp_hreg(s, a->ra); 7629 7630 if (neg_a) { 7631 gen_vfp_maybe_ah_negh(s, ta, ta); 7632 } 7633 if (neg_n) { 7634 gen_vfp_maybe_ah_negh(s, tn, tn); 7635 } 7636 fpst = fpstatus_ptr(FPST_A64_F16); 7637 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 7638 write_fp_hreg_merging(s, a->rd, a->ra, ta); 7639 } 7640 break; 7641 7642 default: 7643 return false; 7644 } 7645 return true; 7646 } 7647 7648 TRANS(FMADD, do_fmadd, a, false, false) 7649 TRANS(FNMADD, do_fmadd, a, true, true) 7650 TRANS(FMSUB, do_fmadd, a, false, true) 7651 TRANS(FNMSUB, do_fmadd, a, true, false) 7652 7653 /* 7654 * Advanced SIMD Across Lanes 7655 */ 7656 7657 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 7658 MemOp src_sign, NeonGenTwo64OpFn *fn) 7659 { 7660 TCGv_i64 tcg_res, tcg_elt; 7661 MemOp src_mop = a->esz | src_sign; 7662 int elements = (a->q ? 16 : 8) >> a->esz; 7663 7664 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 7665 if (elements < 4) { 7666 return false; 7667 } 7668 if (!fp_access_check(s)) { 7669 return true; 7670 } 7671 7672 tcg_res = tcg_temp_new_i64(); 7673 tcg_elt = tcg_temp_new_i64(); 7674 7675 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 7676 for (int i = 1; i < elements; i++) { 7677 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 7678 fn(tcg_res, tcg_res, tcg_elt); 7679 } 7680 7681 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 7682 write_fp_dreg(s, a->rd, tcg_res); 7683 return true; 7684 } 7685 7686 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 7687 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 7688 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 7689 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 7690 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 7691 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 7692 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 7693 7694 /* 7695 * do_fp_reduction helper 7696 * 7697 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7698 * important for correct NaN propagation that we do these 7699 * operations in exactly the order specified by the pseudocode. 7700 * 7701 * This is a recursive function. 7702 */ 7703 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 7704 int ebase, int ecount, TCGv_ptr fpst, 7705 NeonGenTwoSingleOpFn *fn) 7706 { 7707 if (ecount == 1) { 7708 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 7709 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 7710 return tcg_elem; 7711 } else { 7712 int half = ecount >> 1; 7713 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7714 7715 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 7716 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 7717 tcg_res = tcg_temp_new_i32(); 7718 7719 fn(tcg_res, tcg_lo, tcg_hi, fpst); 7720 return tcg_res; 7721 } 7722 } 7723 7724 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 7725 NeonGenTwoSingleOpFn *fnormal, 7726 NeonGenTwoSingleOpFn *fah) 7727 { 7728 if (fp_access_check(s)) { 7729 MemOp esz = a->esz; 7730 int elts = (a->q ? 16 : 8) >> esz; 7731 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 7732 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, 7733 s->fpcr_ah ? fah : fnormal); 7734 write_fp_sreg(s, a->rd, res); 7735 } 7736 return true; 7737 } 7738 7739 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, 7740 gen_helper_vfp_maxnumh, gen_helper_vfp_maxnumh) 7741 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, 7742 gen_helper_vfp_minnumh, gen_helper_vfp_minnumh) 7743 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, 7744 gen_helper_vfp_maxh, gen_helper_vfp_ah_maxh) 7745 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, 7746 gen_helper_vfp_minh, gen_helper_vfp_ah_minh) 7747 7748 TRANS(FMAXNMV_s, do_fp_reduction, a, 7749 gen_helper_vfp_maxnums, gen_helper_vfp_maxnums) 7750 TRANS(FMINNMV_s, do_fp_reduction, a, 7751 gen_helper_vfp_minnums, gen_helper_vfp_minnums) 7752 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs, gen_helper_vfp_ah_maxs) 7753 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins, gen_helper_vfp_ah_mins) 7754 7755 /* 7756 * Floating-point Immediate 7757 */ 7758 7759 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 7760 { 7761 int check = fp_access_check_scalar_hsd(s, a->esz); 7762 uint64_t imm; 7763 7764 if (check <= 0) { 7765 return check == 0; 7766 } 7767 7768 imm = vfp_expand_imm(a->esz, a->imm); 7769 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 7770 return true; 7771 } 7772 7773 /* 7774 * Floating point compare, conditional compare 7775 */ 7776 7777 static void handle_fp_compare(DisasContext *s, int size, 7778 unsigned int rn, unsigned int rm, 7779 bool cmp_with_zero, bool signal_all_nans) 7780 { 7781 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7782 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64); 7783 7784 if (size == MO_64) { 7785 TCGv_i64 tcg_vn, tcg_vm; 7786 7787 tcg_vn = read_fp_dreg(s, rn); 7788 if (cmp_with_zero) { 7789 tcg_vm = tcg_constant_i64(0); 7790 } else { 7791 tcg_vm = read_fp_dreg(s, rm); 7792 } 7793 if (signal_all_nans) { 7794 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7795 } else { 7796 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7797 } 7798 } else { 7799 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7800 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7801 7802 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7803 if (cmp_with_zero) { 7804 tcg_gen_movi_i32(tcg_vm, 0); 7805 } else { 7806 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7807 } 7808 7809 switch (size) { 7810 case MO_32: 7811 if (signal_all_nans) { 7812 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7813 } else { 7814 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7815 } 7816 break; 7817 case MO_16: 7818 if (signal_all_nans) { 7819 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7820 } else { 7821 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7822 } 7823 break; 7824 default: 7825 g_assert_not_reached(); 7826 } 7827 } 7828 7829 gen_set_nzcv(tcg_flags); 7830 } 7831 7832 /* FCMP, FCMPE */ 7833 static bool trans_FCMP(DisasContext *s, arg_FCMP *a) 7834 { 7835 int check = fp_access_check_scalar_hsd(s, a->esz); 7836 7837 if (check <= 0) { 7838 return check == 0; 7839 } 7840 7841 handle_fp_compare(s, a->esz, a->rn, a->rm, a->z, a->e); 7842 return true; 7843 } 7844 7845 /* FCCMP, FCCMPE */ 7846 static bool trans_FCCMP(DisasContext *s, arg_FCCMP *a) 7847 { 7848 TCGLabel *label_continue = NULL; 7849 int check = fp_access_check_scalar_hsd(s, a->esz); 7850 7851 if (check <= 0) { 7852 return check == 0; 7853 } 7854 7855 if (a->cond < 0x0e) { /* not always */ 7856 TCGLabel *label_match = gen_new_label(); 7857 label_continue = gen_new_label(); 7858 arm_gen_test_cc(a->cond, label_match); 7859 /* nomatch: */ 7860 gen_set_nzcv(tcg_constant_i64(a->nzcv << 28)); 7861 tcg_gen_br(label_continue); 7862 gen_set_label(label_match); 7863 } 7864 7865 handle_fp_compare(s, a->esz, a->rn, a->rm, false, a->e); 7866 7867 if (label_continue) { 7868 gen_set_label(label_continue); 7869 } 7870 return true; 7871 } 7872 7873 /* 7874 * Advanced SIMD Modified Immediate 7875 */ 7876 7877 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 7878 { 7879 if (!dc_isar_feature(aa64_fp16, s)) { 7880 return false; 7881 } 7882 if (fp_access_check(s)) { 7883 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 7884 a->q ? 16 : 8, vec_full_reg_size(s), 7885 vfp_expand_imm(MO_16, a->abcdefgh)); 7886 } 7887 return true; 7888 } 7889 7890 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 7891 int64_t c, uint32_t oprsz, uint32_t maxsz) 7892 { 7893 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 7894 } 7895 7896 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 7897 { 7898 GVecGen2iFn *fn; 7899 7900 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 7901 if ((a->cmode & 1) && a->cmode < 12) { 7902 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 7903 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 7904 } else { 7905 /* There is one unallocated cmode/op combination in this space */ 7906 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 7907 return false; 7908 } 7909 fn = gen_movi; 7910 } 7911 7912 if (fp_access_check(s)) { 7913 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 7914 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 7915 } 7916 return true; 7917 } 7918 7919 /* 7920 * Advanced SIMD Shift by Immediate 7921 */ 7922 7923 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 7924 { 7925 if (fp_access_check(s)) { 7926 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 7927 } 7928 return true; 7929 } 7930 7931 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 7932 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 7933 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 7934 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 7935 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 7936 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 7937 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 7938 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 7939 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 7940 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 7941 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 7942 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 7943 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 7944 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 7945 7946 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 7947 { 7948 TCGv_i64 tcg_rn, tcg_rd; 7949 int esz = a->esz; 7950 int esize; 7951 7952 if (!fp_access_check(s)) { 7953 return true; 7954 } 7955 7956 /* 7957 * For the LL variants the store is larger than the load, 7958 * so if rd == rn we would overwrite parts of our input. 7959 * So load everything right now and use shifts in the main loop. 7960 */ 7961 tcg_rd = tcg_temp_new_i64(); 7962 tcg_rn = tcg_temp_new_i64(); 7963 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7964 7965 esize = 8 << esz; 7966 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7967 if (is_u) { 7968 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7969 } else { 7970 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7971 } 7972 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7973 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7974 } 7975 clear_vec_high(s, true, a->rd); 7976 return true; 7977 } 7978 7979 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7980 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7981 7982 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7983 { 7984 assert(shift >= 0 && shift <= 64); 7985 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7986 } 7987 7988 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7989 { 7990 assert(shift >= 0 && shift <= 64); 7991 if (shift == 64) { 7992 tcg_gen_movi_i64(dst, 0); 7993 } else { 7994 tcg_gen_shri_i64(dst, src, shift); 7995 } 7996 } 7997 7998 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7999 { 8000 gen_sshr_d(src, src, shift); 8001 tcg_gen_add_i64(dst, dst, src); 8002 } 8003 8004 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8005 { 8006 gen_ushr_d(src, src, shift); 8007 tcg_gen_add_i64(dst, dst, src); 8008 } 8009 8010 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8011 { 8012 assert(shift >= 0 && shift <= 32); 8013 if (shift) { 8014 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 8015 tcg_gen_add_i64(dst, src, rnd); 8016 tcg_gen_sari_i64(dst, dst, shift); 8017 } else { 8018 tcg_gen_mov_i64(dst, src); 8019 } 8020 } 8021 8022 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8023 { 8024 assert(shift >= 0 && shift <= 32); 8025 if (shift) { 8026 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 8027 tcg_gen_add_i64(dst, src, rnd); 8028 tcg_gen_shri_i64(dst, dst, shift); 8029 } else { 8030 tcg_gen_mov_i64(dst, src); 8031 } 8032 } 8033 8034 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8035 { 8036 assert(shift >= 0 && shift <= 64); 8037 if (shift == 0) { 8038 tcg_gen_mov_i64(dst, src); 8039 } else if (shift == 64) { 8040 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 8041 tcg_gen_movi_i64(dst, 0); 8042 } else { 8043 TCGv_i64 rnd = tcg_temp_new_i64(); 8044 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 8045 tcg_gen_sari_i64(dst, src, shift); 8046 tcg_gen_add_i64(dst, dst, rnd); 8047 } 8048 } 8049 8050 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8051 { 8052 assert(shift >= 0 && shift <= 64); 8053 if (shift == 0) { 8054 tcg_gen_mov_i64(dst, src); 8055 } else if (shift == 64) { 8056 /* Rounding will propagate bit 63 into bit 64. */ 8057 tcg_gen_shri_i64(dst, src, 63); 8058 } else { 8059 TCGv_i64 rnd = tcg_temp_new_i64(); 8060 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 8061 tcg_gen_shri_i64(dst, src, shift); 8062 tcg_gen_add_i64(dst, dst, rnd); 8063 } 8064 } 8065 8066 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8067 { 8068 gen_srshr_d(src, src, shift); 8069 tcg_gen_add_i64(dst, dst, src); 8070 } 8071 8072 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8073 { 8074 gen_urshr_d(src, src, shift); 8075 tcg_gen_add_i64(dst, dst, src); 8076 } 8077 8078 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8079 { 8080 /* If shift is 64, dst is unchanged. */ 8081 if (shift != 64) { 8082 tcg_gen_shri_i64(src, src, shift); 8083 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 8084 } 8085 } 8086 8087 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 8088 { 8089 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 8090 } 8091 8092 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 8093 WideShiftImmFn * const fns[3], MemOp sign) 8094 { 8095 TCGv_i64 tcg_rn, tcg_rd; 8096 int esz = a->esz; 8097 int esize; 8098 WideShiftImmFn *fn; 8099 8100 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 8101 8102 if (!fp_access_check(s)) { 8103 return true; 8104 } 8105 8106 tcg_rn = tcg_temp_new_i64(); 8107 tcg_rd = tcg_temp_new_i64(); 8108 tcg_gen_movi_i64(tcg_rd, 0); 8109 8110 fn = fns[esz]; 8111 esize = 8 << esz; 8112 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 8113 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 8114 fn(tcg_rn, tcg_rn, a->imm); 8115 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 8116 } 8117 8118 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 8119 clear_vec_high(s, a->q, a->rd); 8120 return true; 8121 } 8122 8123 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8124 { 8125 tcg_gen_sari_i64(d, s, i); 8126 tcg_gen_ext16u_i64(d, d); 8127 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 8128 } 8129 8130 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8131 { 8132 tcg_gen_sari_i64(d, s, i); 8133 tcg_gen_ext32u_i64(d, d); 8134 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 8135 } 8136 8137 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8138 { 8139 gen_sshr_d(d, s, i); 8140 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 8141 } 8142 8143 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8144 { 8145 tcg_gen_shri_i64(d, s, i); 8146 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 8147 } 8148 8149 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8150 { 8151 tcg_gen_shri_i64(d, s, i); 8152 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 8153 } 8154 8155 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8156 { 8157 gen_ushr_d(d, s, i); 8158 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 8159 } 8160 8161 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8162 { 8163 tcg_gen_sari_i64(d, s, i); 8164 tcg_gen_ext16u_i64(d, d); 8165 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 8166 } 8167 8168 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8169 { 8170 tcg_gen_sari_i64(d, s, i); 8171 tcg_gen_ext32u_i64(d, d); 8172 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 8173 } 8174 8175 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8176 { 8177 gen_sshr_d(d, s, i); 8178 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 8179 } 8180 8181 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8182 { 8183 gen_srshr_bhs(d, s, i); 8184 tcg_gen_ext16u_i64(d, d); 8185 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 8186 } 8187 8188 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8189 { 8190 gen_srshr_bhs(d, s, i); 8191 tcg_gen_ext32u_i64(d, d); 8192 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 8193 } 8194 8195 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8196 { 8197 gen_srshr_d(d, s, i); 8198 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 8199 } 8200 8201 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8202 { 8203 gen_urshr_bhs(d, s, i); 8204 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 8205 } 8206 8207 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8208 { 8209 gen_urshr_bhs(d, s, i); 8210 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 8211 } 8212 8213 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8214 { 8215 gen_urshr_d(d, s, i); 8216 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 8217 } 8218 8219 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8220 { 8221 gen_srshr_bhs(d, s, i); 8222 tcg_gen_ext16u_i64(d, d); 8223 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 8224 } 8225 8226 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8227 { 8228 gen_srshr_bhs(d, s, i); 8229 tcg_gen_ext32u_i64(d, d); 8230 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 8231 } 8232 8233 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8234 { 8235 gen_srshr_d(d, s, i); 8236 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 8237 } 8238 8239 static WideShiftImmFn * const shrn_fns[] = { 8240 tcg_gen_shri_i64, 8241 tcg_gen_shri_i64, 8242 gen_ushr_d, 8243 }; 8244 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 8245 8246 static WideShiftImmFn * const rshrn_fns[] = { 8247 gen_urshr_bhs, 8248 gen_urshr_bhs, 8249 gen_urshr_d, 8250 }; 8251 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 8252 8253 static WideShiftImmFn * const sqshrn_fns[] = { 8254 gen_sqshrn_b, 8255 gen_sqshrn_h, 8256 gen_sqshrn_s, 8257 }; 8258 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 8259 8260 static WideShiftImmFn * const uqshrn_fns[] = { 8261 gen_uqshrn_b, 8262 gen_uqshrn_h, 8263 gen_uqshrn_s, 8264 }; 8265 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 8266 8267 static WideShiftImmFn * const sqshrun_fns[] = { 8268 gen_sqshrun_b, 8269 gen_sqshrun_h, 8270 gen_sqshrun_s, 8271 }; 8272 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 8273 8274 static WideShiftImmFn * const sqrshrn_fns[] = { 8275 gen_sqrshrn_b, 8276 gen_sqrshrn_h, 8277 gen_sqrshrn_s, 8278 }; 8279 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 8280 8281 static WideShiftImmFn * const uqrshrn_fns[] = { 8282 gen_uqrshrn_b, 8283 gen_uqrshrn_h, 8284 gen_uqrshrn_s, 8285 }; 8286 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 8287 8288 static WideShiftImmFn * const sqrshrun_fns[] = { 8289 gen_sqrshrun_b, 8290 gen_sqrshrun_h, 8291 gen_sqrshrun_s, 8292 }; 8293 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 8294 8295 /* 8296 * Advanced SIMD Scalar Shift by Immediate 8297 */ 8298 8299 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 8300 WideShiftImmFn *fn, bool accumulate, 8301 MemOp sign) 8302 { 8303 if (fp_access_check(s)) { 8304 TCGv_i64 rd = tcg_temp_new_i64(); 8305 TCGv_i64 rn = tcg_temp_new_i64(); 8306 8307 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 8308 if (accumulate) { 8309 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 8310 } 8311 fn(rd, rn, a->imm); 8312 write_fp_dreg(s, a->rd, rd); 8313 } 8314 return true; 8315 } 8316 8317 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 8318 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 8319 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 8320 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 8321 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 8322 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 8323 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 8324 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 8325 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 8326 8327 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 8328 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 8329 8330 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 8331 NeonGenTwoOpEnvFn *fn) 8332 { 8333 TCGv_i32 t = tcg_temp_new_i32(); 8334 tcg_gen_extrl_i64_i32(t, s); 8335 fn(t, tcg_env, t, tcg_constant_i32(i)); 8336 tcg_gen_extu_i32_i64(d, t); 8337 } 8338 8339 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8340 { 8341 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 8342 } 8343 8344 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8345 { 8346 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 8347 } 8348 8349 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8350 { 8351 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 8352 } 8353 8354 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 8355 { 8356 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 8357 } 8358 8359 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8360 { 8361 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 8362 } 8363 8364 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8365 { 8366 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 8367 } 8368 8369 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8370 { 8371 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 8372 } 8373 8374 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 8375 { 8376 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 8377 } 8378 8379 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 8380 { 8381 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 8382 } 8383 8384 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 8385 { 8386 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 8387 } 8388 8389 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 8390 { 8391 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 8392 } 8393 8394 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 8395 { 8396 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 8397 } 8398 8399 static WideShiftImmFn * const f_scalar_sqshli[] = { 8400 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 8401 }; 8402 8403 static WideShiftImmFn * const f_scalar_uqshli[] = { 8404 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 8405 }; 8406 8407 static WideShiftImmFn * const f_scalar_sqshlui[] = { 8408 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 8409 }; 8410 8411 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 8412 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 8413 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 8414 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 8415 8416 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 8417 WideShiftImmFn * const fns[3], 8418 MemOp sign, bool zext) 8419 { 8420 MemOp esz = a->esz; 8421 8422 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 8423 8424 if (fp_access_check(s)) { 8425 TCGv_i64 rd = tcg_temp_new_i64(); 8426 TCGv_i64 rn = tcg_temp_new_i64(); 8427 8428 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 8429 fns[esz](rd, rn, a->imm); 8430 if (zext) { 8431 tcg_gen_ext_i64(rd, rd, esz); 8432 } 8433 write_fp_dreg(s, a->rd, rd); 8434 } 8435 return true; 8436 } 8437 8438 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 8439 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 8440 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 8441 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 8442 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 8443 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 8444 8445 static bool do_div(DisasContext *s, arg_rrr_sf *a, bool is_signed) 8446 { 8447 TCGv_i64 tcg_n, tcg_m, tcg_rd; 8448 tcg_rd = cpu_reg(s, a->rd); 8449 8450 if (!a->sf && is_signed) { 8451 tcg_n = tcg_temp_new_i64(); 8452 tcg_m = tcg_temp_new_i64(); 8453 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, a->rn)); 8454 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, a->rm)); 8455 } else { 8456 tcg_n = read_cpu_reg(s, a->rn, a->sf); 8457 tcg_m = read_cpu_reg(s, a->rm, a->sf); 8458 } 8459 8460 if (is_signed) { 8461 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 8462 } else { 8463 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 8464 } 8465 8466 if (!a->sf) { /* zero extend final result */ 8467 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8468 } 8469 return true; 8470 } 8471 8472 TRANS(SDIV, do_div, a, true) 8473 TRANS(UDIV, do_div, a, false) 8474 8475 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 8476 * Note that it is the caller's responsibility to ensure that the 8477 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 8478 * mandated semantics for out of range shifts. 8479 */ 8480 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 8481 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 8482 { 8483 switch (shift_type) { 8484 case A64_SHIFT_TYPE_LSL: 8485 tcg_gen_shl_i64(dst, src, shift_amount); 8486 break; 8487 case A64_SHIFT_TYPE_LSR: 8488 tcg_gen_shr_i64(dst, src, shift_amount); 8489 break; 8490 case A64_SHIFT_TYPE_ASR: 8491 if (!sf) { 8492 tcg_gen_ext32s_i64(dst, src); 8493 } 8494 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 8495 break; 8496 case A64_SHIFT_TYPE_ROR: 8497 if (sf) { 8498 tcg_gen_rotr_i64(dst, src, shift_amount); 8499 } else { 8500 TCGv_i32 t0, t1; 8501 t0 = tcg_temp_new_i32(); 8502 t1 = tcg_temp_new_i32(); 8503 tcg_gen_extrl_i64_i32(t0, src); 8504 tcg_gen_extrl_i64_i32(t1, shift_amount); 8505 tcg_gen_rotr_i32(t0, t0, t1); 8506 tcg_gen_extu_i32_i64(dst, t0); 8507 } 8508 break; 8509 default: 8510 assert(FALSE); /* all shift types should be handled */ 8511 break; 8512 } 8513 8514 if (!sf) { /* zero extend final result */ 8515 tcg_gen_ext32u_i64(dst, dst); 8516 } 8517 } 8518 8519 /* Shift a TCGv src by immediate, put result in dst. 8520 * The shift amount must be in range (this should always be true as the 8521 * relevant instructions will UNDEF on bad shift immediates). 8522 */ 8523 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 8524 enum a64_shift_type shift_type, unsigned int shift_i) 8525 { 8526 assert(shift_i < (sf ? 64 : 32)); 8527 8528 if (shift_i == 0) { 8529 tcg_gen_mov_i64(dst, src); 8530 } else { 8531 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 8532 } 8533 } 8534 8535 static bool do_shift_reg(DisasContext *s, arg_rrr_sf *a, 8536 enum a64_shift_type shift_type) 8537 { 8538 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8539 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8540 TCGv_i64 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8541 8542 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, a->rm), a->sf ? 63 : 31); 8543 shift_reg(tcg_rd, tcg_rn, a->sf, shift_type, tcg_shift); 8544 return true; 8545 } 8546 8547 TRANS(LSLV, do_shift_reg, a, A64_SHIFT_TYPE_LSL) 8548 TRANS(LSRV, do_shift_reg, a, A64_SHIFT_TYPE_LSR) 8549 TRANS(ASRV, do_shift_reg, a, A64_SHIFT_TYPE_ASR) 8550 TRANS(RORV, do_shift_reg, a, A64_SHIFT_TYPE_ROR) 8551 8552 static bool do_crc32(DisasContext *s, arg_rrr_e *a, bool crc32c) 8553 { 8554 TCGv_i64 tcg_acc, tcg_val, tcg_rd; 8555 TCGv_i32 tcg_bytes; 8556 8557 switch (a->esz) { 8558 case MO_8: 8559 case MO_16: 8560 case MO_32: 8561 tcg_val = tcg_temp_new_i64(); 8562 tcg_gen_extract_i64(tcg_val, cpu_reg(s, a->rm), 0, 8 << a->esz); 8563 break; 8564 case MO_64: 8565 tcg_val = cpu_reg(s, a->rm); 8566 break; 8567 default: 8568 g_assert_not_reached(); 8569 } 8570 tcg_acc = cpu_reg(s, a->rn); 8571 tcg_bytes = tcg_constant_i32(1 << a->esz); 8572 tcg_rd = cpu_reg(s, a->rd); 8573 8574 if (crc32c) { 8575 gen_helper_crc32c_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8576 } else { 8577 gen_helper_crc32_64(tcg_rd, tcg_acc, tcg_val, tcg_bytes); 8578 } 8579 return true; 8580 } 8581 8582 TRANS_FEAT(CRC32, aa64_crc32, do_crc32, a, false) 8583 TRANS_FEAT(CRC32C, aa64_crc32, do_crc32, a, true) 8584 8585 static bool do_subp(DisasContext *s, arg_rrr *a, bool setflag) 8586 { 8587 TCGv_i64 tcg_n = read_cpu_reg_sp(s, a->rn, true); 8588 TCGv_i64 tcg_m = read_cpu_reg_sp(s, a->rm, true); 8589 TCGv_i64 tcg_d = cpu_reg(s, a->rd); 8590 8591 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8592 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8593 8594 if (setflag) { 8595 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8596 } else { 8597 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8598 } 8599 return true; 8600 } 8601 8602 TRANS_FEAT(SUBP, aa64_mte_insn_reg, do_subp, a, false) 8603 TRANS_FEAT(SUBPS, aa64_mte_insn_reg, do_subp, a, true) 8604 8605 static bool trans_IRG(DisasContext *s, arg_rrr *a) 8606 { 8607 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8608 TCGv_i64 tcg_rd = cpu_reg_sp(s, a->rd); 8609 TCGv_i64 tcg_rn = cpu_reg_sp(s, a->rn); 8610 8611 if (s->ata[0]) { 8612 gen_helper_irg(tcg_rd, tcg_env, tcg_rn, cpu_reg(s, a->rm)); 8613 } else { 8614 gen_address_with_allocation_tag0(tcg_rd, tcg_rn); 8615 } 8616 return true; 8617 } 8618 return false; 8619 } 8620 8621 static bool trans_GMI(DisasContext *s, arg_rrr *a) 8622 { 8623 if (dc_isar_feature(aa64_mte_insn_reg, s)) { 8624 TCGv_i64 t = tcg_temp_new_i64(); 8625 8626 tcg_gen_extract_i64(t, cpu_reg_sp(s, a->rn), 56, 4); 8627 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8628 tcg_gen_or_i64(cpu_reg(s, a->rd), cpu_reg(s, a->rm), t); 8629 return true; 8630 } 8631 return false; 8632 } 8633 8634 static bool trans_PACGA(DisasContext *s, arg_rrr *a) 8635 { 8636 if (dc_isar_feature(aa64_pauth, s)) { 8637 gen_helper_pacga(cpu_reg(s, a->rd), tcg_env, 8638 cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm)); 8639 return true; 8640 } 8641 return false; 8642 } 8643 8644 static bool gen_rrr(DisasContext *s, arg_rrr_sf *a, ArithTwoOp fn) 8645 { 8646 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8647 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8648 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8649 8650 fn(tcg_rd, tcg_rn, tcg_rm); 8651 if (!a->sf) { 8652 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8653 } 8654 return true; 8655 } 8656 8657 TRANS_FEAT(SMAX, aa64_cssc, gen_rrr, a, 8658 a->sf ? tcg_gen_smax_i64 : gen_smax32_i64) 8659 TRANS_FEAT(SMIN, aa64_cssc, gen_rrr, a, 8660 a->sf ? tcg_gen_smin_i64 : gen_smin32_i64) 8661 TRANS_FEAT(UMAX, aa64_cssc, gen_rrr, a, 8662 a->sf ? tcg_gen_umax_i64 : gen_umax32_i64) 8663 TRANS_FEAT(UMIN, aa64_cssc, gen_rrr, a, 8664 a->sf ? tcg_gen_umin_i64 : gen_umin32_i64) 8665 8666 typedef void ArithOneOp(TCGv_i64, TCGv_i64); 8667 8668 static bool gen_rr(DisasContext *s, int rd, int rn, ArithOneOp fn) 8669 { 8670 fn(cpu_reg(s, rd), cpu_reg(s, rn)); 8671 return true; 8672 } 8673 8674 /* 8675 * Perform 32-bit operation fn on the low half of n; 8676 * the high half of the output is zeroed. 8677 */ 8678 static void gen_wrap2_i32(TCGv_i64 d, TCGv_i64 n, NeonGenOneOpFn fn) 8679 { 8680 TCGv_i32 t = tcg_temp_new_i32(); 8681 8682 tcg_gen_extrl_i64_i32(t, n); 8683 fn(t, t); 8684 tcg_gen_extu_i32_i64(d, t); 8685 } 8686 8687 static void gen_rbit32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8688 { 8689 gen_wrap2_i32(tcg_rd, tcg_rn, gen_helper_rbit); 8690 } 8691 8692 static void gen_rev16_xx(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 mask) 8693 { 8694 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8695 8696 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8697 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8698 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8699 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8700 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8701 } 8702 8703 static void gen_rev16_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8704 { 8705 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff)); 8706 } 8707 8708 static void gen_rev16_64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8709 { 8710 gen_rev16_xx(tcg_rd, tcg_rn, tcg_constant_i64(0x00ff00ff00ff00ffull)); 8711 } 8712 8713 static void gen_rev_32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8714 { 8715 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8716 } 8717 8718 static void gen_rev32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8719 { 8720 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8721 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8722 } 8723 8724 TRANS(RBIT, gen_rr, a->rd, a->rn, a->sf ? gen_helper_rbit64 : gen_rbit32) 8725 TRANS(REV16, gen_rr, a->rd, a->rn, a->sf ? gen_rev16_64 : gen_rev16_32) 8726 TRANS(REV32, gen_rr, a->rd, a->rn, a->sf ? gen_rev32 : gen_rev_32) 8727 TRANS(REV64, gen_rr, a->rd, a->rn, tcg_gen_bswap64_i64) 8728 8729 static void gen_clz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8730 { 8731 TCGv_i32 t32 = tcg_temp_new_i32(); 8732 8733 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8734 tcg_gen_clzi_i32(t32, t32, 32); 8735 tcg_gen_extu_i32_i64(tcg_rd, t32); 8736 } 8737 8738 static void gen_clz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8739 { 8740 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8741 } 8742 8743 static void gen_cls32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8744 { 8745 gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_clrsb_i32); 8746 } 8747 8748 TRANS(CLZ, gen_rr, a->rd, a->rn, a->sf ? gen_clz64 : gen_clz32) 8749 TRANS(CLS, gen_rr, a->rd, a->rn, a->sf ? tcg_gen_clrsb_i64 : gen_cls32) 8750 8751 static void gen_ctz32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8752 { 8753 TCGv_i32 t32 = tcg_temp_new_i32(); 8754 8755 tcg_gen_extrl_i64_i32(t32, tcg_rn); 8756 tcg_gen_ctzi_i32(t32, t32, 32); 8757 tcg_gen_extu_i32_i64(tcg_rd, t32); 8758 } 8759 8760 static void gen_ctz64(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8761 { 8762 tcg_gen_ctzi_i64(tcg_rd, tcg_rn, 64); 8763 } 8764 8765 static void gen_cnt32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8766 { 8767 gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_ctpop_i32); 8768 } 8769 8770 static void gen_abs32(TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) 8771 { 8772 gen_wrap2_i32(tcg_rd, tcg_rn, tcg_gen_abs_i32); 8773 } 8774 8775 TRANS_FEAT(CTZ, aa64_cssc, gen_rr, a->rd, a->rn, 8776 a->sf ? gen_ctz64 : gen_ctz32) 8777 TRANS_FEAT(CNT, aa64_cssc, gen_rr, a->rd, a->rn, 8778 a->sf ? tcg_gen_ctpop_i64 : gen_cnt32) 8779 TRANS_FEAT(ABS, aa64_cssc, gen_rr, a->rd, a->rn, 8780 a->sf ? tcg_gen_abs_i64 : gen_abs32) 8781 8782 static bool gen_pacaut(DisasContext *s, arg_pacaut *a, NeonGenTwo64OpEnvFn fn) 8783 { 8784 TCGv_i64 tcg_rd, tcg_rn; 8785 8786 if (a->z) { 8787 if (a->rn != 31) { 8788 return false; 8789 } 8790 tcg_rn = tcg_constant_i64(0); 8791 } else { 8792 tcg_rn = cpu_reg_sp(s, a->rn); 8793 } 8794 if (s->pauth_active) { 8795 tcg_rd = cpu_reg(s, a->rd); 8796 fn(tcg_rd, tcg_env, tcg_rd, tcg_rn); 8797 } 8798 return true; 8799 } 8800 8801 TRANS_FEAT(PACIA, aa64_pauth, gen_pacaut, a, gen_helper_pacia) 8802 TRANS_FEAT(PACIB, aa64_pauth, gen_pacaut, a, gen_helper_pacib) 8803 TRANS_FEAT(PACDA, aa64_pauth, gen_pacaut, a, gen_helper_pacda) 8804 TRANS_FEAT(PACDB, aa64_pauth, gen_pacaut, a, gen_helper_pacdb) 8805 8806 TRANS_FEAT(AUTIA, aa64_pauth, gen_pacaut, a, gen_helper_autia) 8807 TRANS_FEAT(AUTIB, aa64_pauth, gen_pacaut, a, gen_helper_autib) 8808 TRANS_FEAT(AUTDA, aa64_pauth, gen_pacaut, a, gen_helper_autda) 8809 TRANS_FEAT(AUTDB, aa64_pauth, gen_pacaut, a, gen_helper_autdb) 8810 8811 static bool do_xpac(DisasContext *s, int rd, NeonGenOne64OpEnvFn *fn) 8812 { 8813 if (s->pauth_active) { 8814 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8815 fn(tcg_rd, tcg_env, tcg_rd); 8816 } 8817 return true; 8818 } 8819 8820 TRANS_FEAT(XPACI, aa64_pauth, do_xpac, a->rd, gen_helper_xpaci) 8821 TRANS_FEAT(XPACD, aa64_pauth, do_xpac, a->rd, gen_helper_xpacd) 8822 8823 static bool do_logic_reg(DisasContext *s, arg_logic_shift *a, 8824 ArithTwoOp *fn, ArithTwoOp *inv_fn, bool setflags) 8825 { 8826 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 8827 8828 if (!a->sf && (a->sa & (1 << 5))) { 8829 return false; 8830 } 8831 8832 tcg_rd = cpu_reg(s, a->rd); 8833 tcg_rn = cpu_reg(s, a->rn); 8834 8835 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8836 if (a->sa) { 8837 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8838 } 8839 8840 (a->n ? inv_fn : fn)(tcg_rd, tcg_rn, tcg_rm); 8841 if (!a->sf) { 8842 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8843 } 8844 if (setflags) { 8845 gen_logic_CC(a->sf, tcg_rd); 8846 } 8847 return true; 8848 } 8849 8850 static bool trans_ORR_r(DisasContext *s, arg_logic_shift *a) 8851 { 8852 /* 8853 * Unshifted ORR and ORN with WZR/XZR is the standard encoding for 8854 * register-register MOV and MVN, so it is worth special casing. 8855 */ 8856 if (a->sa == 0 && a->st == 0 && a->rn == 31) { 8857 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8858 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8859 8860 if (a->n) { 8861 tcg_gen_not_i64(tcg_rd, tcg_rm); 8862 if (!a->sf) { 8863 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8864 } 8865 } else { 8866 if (a->sf) { 8867 tcg_gen_mov_i64(tcg_rd, tcg_rm); 8868 } else { 8869 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 8870 } 8871 } 8872 return true; 8873 } 8874 8875 return do_logic_reg(s, a, tcg_gen_or_i64, tcg_gen_orc_i64, false); 8876 } 8877 8878 TRANS(AND_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, false) 8879 TRANS(ANDS_r, do_logic_reg, a, tcg_gen_and_i64, tcg_gen_andc_i64, true) 8880 TRANS(EOR_r, do_logic_reg, a, tcg_gen_xor_i64, tcg_gen_eqv_i64, false) 8881 8882 static bool do_addsub_ext(DisasContext *s, arg_addsub_ext *a, 8883 bool sub_op, bool setflags) 8884 { 8885 TCGv_i64 tcg_rm, tcg_rn, tcg_rd, tcg_result; 8886 8887 if (a->sa > 4) { 8888 return false; 8889 } 8890 8891 /* non-flag setting ops may use SP */ 8892 if (!setflags) { 8893 tcg_rd = cpu_reg_sp(s, a->rd); 8894 } else { 8895 tcg_rd = cpu_reg(s, a->rd); 8896 } 8897 tcg_rn = read_cpu_reg_sp(s, a->rn, a->sf); 8898 8899 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8900 ext_and_shift_reg(tcg_rm, tcg_rm, a->st, a->sa); 8901 8902 tcg_result = tcg_temp_new_i64(); 8903 if (!setflags) { 8904 if (sub_op) { 8905 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8906 } else { 8907 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8908 } 8909 } else { 8910 if (sub_op) { 8911 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8912 } else { 8913 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8914 } 8915 } 8916 8917 if (a->sf) { 8918 tcg_gen_mov_i64(tcg_rd, tcg_result); 8919 } else { 8920 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8921 } 8922 return true; 8923 } 8924 8925 TRANS(ADD_ext, do_addsub_ext, a, false, false) 8926 TRANS(SUB_ext, do_addsub_ext, a, true, false) 8927 TRANS(ADDS_ext, do_addsub_ext, a, false, true) 8928 TRANS(SUBS_ext, do_addsub_ext, a, true, true) 8929 8930 static bool do_addsub_reg(DisasContext *s, arg_addsub_shift *a, 8931 bool sub_op, bool setflags) 8932 { 8933 TCGv_i64 tcg_rd, tcg_rn, tcg_rm, tcg_result; 8934 8935 if (a->st == 3 || (!a->sf && (a->sa & 32))) { 8936 return false; 8937 } 8938 8939 tcg_rd = cpu_reg(s, a->rd); 8940 tcg_rn = read_cpu_reg(s, a->rn, a->sf); 8941 tcg_rm = read_cpu_reg(s, a->rm, a->sf); 8942 8943 shift_reg_imm(tcg_rm, tcg_rm, a->sf, a->st, a->sa); 8944 8945 tcg_result = tcg_temp_new_i64(); 8946 if (!setflags) { 8947 if (sub_op) { 8948 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 8949 } else { 8950 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 8951 } 8952 } else { 8953 if (sub_op) { 8954 gen_sub_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8955 } else { 8956 gen_add_CC(a->sf, tcg_result, tcg_rn, tcg_rm); 8957 } 8958 } 8959 8960 if (a->sf) { 8961 tcg_gen_mov_i64(tcg_rd, tcg_result); 8962 } else { 8963 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 8964 } 8965 return true; 8966 } 8967 8968 TRANS(ADD_r, do_addsub_reg, a, false, false) 8969 TRANS(SUB_r, do_addsub_reg, a, true, false) 8970 TRANS(ADDS_r, do_addsub_reg, a, false, true) 8971 TRANS(SUBS_r, do_addsub_reg, a, true, true) 8972 8973 static bool do_mulh(DisasContext *s, arg_rrr *a, 8974 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 8975 { 8976 TCGv_i64 discard = tcg_temp_new_i64(); 8977 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8978 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 8979 TCGv_i64 tcg_rm = cpu_reg(s, a->rm); 8980 8981 fn(discard, tcg_rd, tcg_rn, tcg_rm); 8982 return true; 8983 } 8984 8985 TRANS(SMULH, do_mulh, a, tcg_gen_muls2_i64) 8986 TRANS(UMULH, do_mulh, a, tcg_gen_mulu2_i64) 8987 8988 static bool do_muladd(DisasContext *s, arg_rrrr *a, 8989 bool sf, bool is_sub, MemOp mop) 8990 { 8991 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 8992 TCGv_i64 tcg_op1, tcg_op2; 8993 8994 if (mop == MO_64) { 8995 tcg_op1 = cpu_reg(s, a->rn); 8996 tcg_op2 = cpu_reg(s, a->rm); 8997 } else { 8998 tcg_op1 = tcg_temp_new_i64(); 8999 tcg_op2 = tcg_temp_new_i64(); 9000 tcg_gen_ext_i64(tcg_op1, cpu_reg(s, a->rn), mop); 9001 tcg_gen_ext_i64(tcg_op2, cpu_reg(s, a->rm), mop); 9002 } 9003 9004 if (a->ra == 31 && !is_sub) { 9005 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 9006 tcg_gen_mul_i64(tcg_rd, tcg_op1, tcg_op2); 9007 } else { 9008 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9009 TCGv_i64 tcg_ra = cpu_reg(s, a->ra); 9010 9011 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 9012 if (is_sub) { 9013 tcg_gen_sub_i64(tcg_rd, tcg_ra, tcg_tmp); 9014 } else { 9015 tcg_gen_add_i64(tcg_rd, tcg_ra, tcg_tmp); 9016 } 9017 } 9018 9019 if (!sf) { 9020 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 9021 } 9022 return true; 9023 } 9024 9025 TRANS(MADD_w, do_muladd, a, false, false, MO_64) 9026 TRANS(MSUB_w, do_muladd, a, false, true, MO_64) 9027 TRANS(MADD_x, do_muladd, a, true, false, MO_64) 9028 TRANS(MSUB_x, do_muladd, a, true, true, MO_64) 9029 9030 TRANS(SMADDL, do_muladd, a, true, false, MO_SL) 9031 TRANS(SMSUBL, do_muladd, a, true, true, MO_SL) 9032 TRANS(UMADDL, do_muladd, a, true, false, MO_UL) 9033 TRANS(UMSUBL, do_muladd, a, true, true, MO_UL) 9034 9035 static bool do_adc_sbc(DisasContext *s, arg_rrr_sf *a, 9036 bool is_sub, bool setflags) 9037 { 9038 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 9039 9040 tcg_rd = cpu_reg(s, a->rd); 9041 tcg_rn = cpu_reg(s, a->rn); 9042 9043 if (is_sub) { 9044 tcg_y = tcg_temp_new_i64(); 9045 tcg_gen_not_i64(tcg_y, cpu_reg(s, a->rm)); 9046 } else { 9047 tcg_y = cpu_reg(s, a->rm); 9048 } 9049 9050 if (setflags) { 9051 gen_adc_CC(a->sf, tcg_rd, tcg_rn, tcg_y); 9052 } else { 9053 gen_adc(a->sf, tcg_rd, tcg_rn, tcg_y); 9054 } 9055 return true; 9056 } 9057 9058 TRANS(ADC, do_adc_sbc, a, false, false) 9059 TRANS(SBC, do_adc_sbc, a, true, false) 9060 TRANS(ADCS, do_adc_sbc, a, false, true) 9061 TRANS(SBCS, do_adc_sbc, a, true, true) 9062 9063 static bool trans_RMIF(DisasContext *s, arg_RMIF *a) 9064 { 9065 int mask = a->mask; 9066 TCGv_i64 tcg_rn; 9067 TCGv_i32 nzcv; 9068 9069 if (!dc_isar_feature(aa64_condm_4, s)) { 9070 return false; 9071 } 9072 9073 tcg_rn = read_cpu_reg(s, a->rn, 1); 9074 tcg_gen_rotri_i64(tcg_rn, tcg_rn, a->imm); 9075 9076 nzcv = tcg_temp_new_i32(); 9077 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 9078 9079 if (mask & 8) { /* N */ 9080 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 9081 } 9082 if (mask & 4) { /* Z */ 9083 tcg_gen_not_i32(cpu_ZF, nzcv); 9084 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 9085 } 9086 if (mask & 2) { /* C */ 9087 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 9088 } 9089 if (mask & 1) { /* V */ 9090 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 9091 } 9092 return true; 9093 } 9094 9095 static bool do_setf(DisasContext *s, int rn, int shift) 9096 { 9097 TCGv_i32 tmp = tcg_temp_new_i32(); 9098 9099 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 9100 tcg_gen_shli_i32(cpu_NF, tmp, shift); 9101 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 9102 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 9103 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 9104 return true; 9105 } 9106 9107 TRANS_FEAT(SETF8, aa64_condm_4, do_setf, a->rn, 24) 9108 TRANS_FEAT(SETF16, aa64_condm_4, do_setf, a->rn, 16) 9109 9110 /* CCMP, CCMN */ 9111 static bool trans_CCMP(DisasContext *s, arg_CCMP *a) 9112 { 9113 TCGv_i32 tcg_t0 = tcg_temp_new_i32(); 9114 TCGv_i32 tcg_t1 = tcg_temp_new_i32(); 9115 TCGv_i32 tcg_t2 = tcg_temp_new_i32(); 9116 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9117 TCGv_i64 tcg_rn, tcg_y; 9118 DisasCompare c; 9119 unsigned nzcv; 9120 bool has_andc; 9121 9122 /* Set T0 = !COND. */ 9123 arm_test_cc(&c, a->cond); 9124 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 9125 9126 /* Load the arguments for the new comparison. */ 9127 if (a->imm) { 9128 tcg_y = tcg_constant_i64(a->y); 9129 } else { 9130 tcg_y = cpu_reg(s, a->y); 9131 } 9132 tcg_rn = cpu_reg(s, a->rn); 9133 9134 /* Set the flags for the new comparison. */ 9135 if (a->op) { 9136 gen_sub_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 9137 } else { 9138 gen_add_CC(a->sf, tcg_tmp, tcg_rn, tcg_y); 9139 } 9140 9141 /* 9142 * If COND was false, force the flags to #nzcv. Compute two masks 9143 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 9144 * For tcg hosts that support ANDC, we can make do with just T1. 9145 * In either case, allow the tcg optimizer to delete any unused mask. 9146 */ 9147 tcg_gen_neg_i32(tcg_t1, tcg_t0); 9148 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 9149 9150 nzcv = a->nzcv; 9151 has_andc = tcg_op_supported(INDEX_op_andc, TCG_TYPE_I32, 0); 9152 if (nzcv & 8) { /* N */ 9153 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 9154 } else { 9155 if (has_andc) { 9156 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 9157 } else { 9158 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 9159 } 9160 } 9161 if (nzcv & 4) { /* Z */ 9162 if (has_andc) { 9163 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 9164 } else { 9165 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 9166 } 9167 } else { 9168 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 9169 } 9170 if (nzcv & 2) { /* C */ 9171 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 9172 } else { 9173 if (has_andc) { 9174 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 9175 } else { 9176 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 9177 } 9178 } 9179 if (nzcv & 1) { /* V */ 9180 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 9181 } else { 9182 if (has_andc) { 9183 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 9184 } else { 9185 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 9186 } 9187 } 9188 return true; 9189 } 9190 9191 static bool trans_CSEL(DisasContext *s, arg_CSEL *a) 9192 { 9193 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9194 TCGv_i64 zero = tcg_constant_i64(0); 9195 DisasCompare64 c; 9196 9197 a64_test_cc(&c, a->cond); 9198 9199 if (a->rn == 31 && a->rm == 31 && (a->else_inc ^ a->else_inv)) { 9200 /* CSET & CSETM. */ 9201 if (a->else_inv) { 9202 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 9203 tcg_rd, c.value, zero); 9204 } else { 9205 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 9206 tcg_rd, c.value, zero); 9207 } 9208 } else { 9209 TCGv_i64 t_true = cpu_reg(s, a->rn); 9210 TCGv_i64 t_false = read_cpu_reg(s, a->rm, 1); 9211 9212 if (a->else_inv && a->else_inc) { 9213 tcg_gen_neg_i64(t_false, t_false); 9214 } else if (a->else_inv) { 9215 tcg_gen_not_i64(t_false, t_false); 9216 } else if (a->else_inc) { 9217 tcg_gen_addi_i64(t_false, t_false, 1); 9218 } 9219 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 9220 } 9221 9222 if (!a->sf) { 9223 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 9224 } 9225 return true; 9226 } 9227 9228 typedef struct FPScalar1Int { 9229 void (*gen_h)(TCGv_i32, TCGv_i32); 9230 void (*gen_s)(TCGv_i32, TCGv_i32); 9231 void (*gen_d)(TCGv_i64, TCGv_i64); 9232 } FPScalar1Int; 9233 9234 static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, 9235 const FPScalar1Int *f, 9236 bool merging) 9237 { 9238 switch (a->esz) { 9239 case MO_64: 9240 if (fp_access_check(s)) { 9241 TCGv_i64 t = read_fp_dreg(s, a->rn); 9242 f->gen_d(t, t); 9243 if (merging) { 9244 write_fp_dreg_merging(s, a->rd, a->rd, t); 9245 } else { 9246 write_fp_dreg(s, a->rd, t); 9247 } 9248 } 9249 break; 9250 case MO_32: 9251 if (fp_access_check(s)) { 9252 TCGv_i32 t = read_fp_sreg(s, a->rn); 9253 f->gen_s(t, t); 9254 if (merging) { 9255 write_fp_sreg_merging(s, a->rd, a->rd, t); 9256 } else { 9257 write_fp_sreg(s, a->rd, t); 9258 } 9259 } 9260 break; 9261 case MO_16: 9262 if (!dc_isar_feature(aa64_fp16, s)) { 9263 return false; 9264 } 9265 if (fp_access_check(s)) { 9266 TCGv_i32 t = read_fp_hreg(s, a->rn); 9267 f->gen_h(t, t); 9268 if (merging) { 9269 write_fp_hreg_merging(s, a->rd, a->rd, t); 9270 } else { 9271 write_fp_sreg(s, a->rd, t); 9272 } 9273 } 9274 break; 9275 default: 9276 return false; 9277 } 9278 return true; 9279 } 9280 9281 static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a, 9282 const FPScalar1Int *fnormal, 9283 const FPScalar1Int *fah) 9284 { 9285 return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true); 9286 } 9287 9288 static const FPScalar1Int f_scalar_fmov = { 9289 tcg_gen_mov_i32, 9290 tcg_gen_mov_i32, 9291 tcg_gen_mov_i64, 9292 }; 9293 TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov, false) 9294 9295 static const FPScalar1Int f_scalar_fabs = { 9296 gen_vfp_absh, 9297 gen_vfp_abss, 9298 gen_vfp_absd, 9299 }; 9300 static const FPScalar1Int f_scalar_ah_fabs = { 9301 gen_vfp_ah_absh, 9302 gen_vfp_ah_abss, 9303 gen_vfp_ah_absd, 9304 }; 9305 TRANS(FABS_s, do_fp1_scalar_int_2fn, a, &f_scalar_fabs, &f_scalar_ah_fabs) 9306 9307 static const FPScalar1Int f_scalar_fneg = { 9308 gen_vfp_negh, 9309 gen_vfp_negs, 9310 gen_vfp_negd, 9311 }; 9312 static const FPScalar1Int f_scalar_ah_fneg = { 9313 gen_vfp_ah_negh, 9314 gen_vfp_ah_negs, 9315 gen_vfp_ah_negd, 9316 }; 9317 TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg) 9318 9319 typedef struct FPScalar1 { 9320 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr); 9321 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_ptr); 9322 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_ptr); 9323 } FPScalar1; 9324 9325 static bool do_fp1_scalar_with_fpsttype(DisasContext *s, arg_rr_e *a, 9326 const FPScalar1 *f, int rmode, 9327 ARMFPStatusFlavour fpsttype) 9328 { 9329 TCGv_i32 tcg_rmode = NULL; 9330 TCGv_ptr fpst; 9331 TCGv_i64 t64; 9332 TCGv_i32 t32; 9333 int check = fp_access_check_scalar_hsd(s, a->esz); 9334 9335 if (check <= 0) { 9336 return check == 0; 9337 } 9338 9339 fpst = fpstatus_ptr(fpsttype); 9340 if (rmode >= 0) { 9341 tcg_rmode = gen_set_rmode(rmode, fpst); 9342 } 9343 9344 switch (a->esz) { 9345 case MO_64: 9346 t64 = read_fp_dreg(s, a->rn); 9347 f->gen_d(t64, t64, fpst); 9348 write_fp_dreg_merging(s, a->rd, a->rd, t64); 9349 break; 9350 case MO_32: 9351 t32 = read_fp_sreg(s, a->rn); 9352 f->gen_s(t32, t32, fpst); 9353 write_fp_sreg_merging(s, a->rd, a->rd, t32); 9354 break; 9355 case MO_16: 9356 t32 = read_fp_hreg(s, a->rn); 9357 f->gen_h(t32, t32, fpst); 9358 write_fp_hreg_merging(s, a->rd, a->rd, t32); 9359 break; 9360 default: 9361 g_assert_not_reached(); 9362 } 9363 9364 if (rmode >= 0) { 9365 gen_restore_rmode(tcg_rmode, fpst); 9366 } 9367 return true; 9368 } 9369 9370 static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a, 9371 const FPScalar1 *f, int rmode) 9372 { 9373 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, 9374 a->esz == MO_16 ? 9375 FPST_A64_F16 : FPST_A64); 9376 } 9377 9378 static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a, 9379 const FPScalar1 *f, int rmode) 9380 { 9381 return do_fp1_scalar_with_fpsttype(s, a, f, rmode, select_ah_fpst(s, a->esz)); 9382 } 9383 9384 static const FPScalar1 f_scalar_fsqrt = { 9385 gen_helper_vfp_sqrth, 9386 gen_helper_vfp_sqrts, 9387 gen_helper_vfp_sqrtd, 9388 }; 9389 TRANS(FSQRT_s, do_fp1_scalar, a, &f_scalar_fsqrt, -1) 9390 9391 static const FPScalar1 f_scalar_frint = { 9392 gen_helper_advsimd_rinth, 9393 gen_helper_rints, 9394 gen_helper_rintd, 9395 }; 9396 TRANS(FRINTN_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 9397 TRANS(FRINTP_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_POSINF) 9398 TRANS(FRINTM_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_NEGINF) 9399 TRANS(FRINTZ_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_ZERO) 9400 TRANS(FRINTA_s, do_fp1_scalar, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 9401 TRANS(FRINTI_s, do_fp1_scalar, a, &f_scalar_frint, -1) 9402 9403 static const FPScalar1 f_scalar_frintx = { 9404 gen_helper_advsimd_rinth_exact, 9405 gen_helper_rints_exact, 9406 gen_helper_rintd_exact, 9407 }; 9408 TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1) 9409 9410 static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a) 9411 { 9412 ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64; 9413 TCGv_i32 t32; 9414 int check; 9415 9416 if (!dc_isar_feature(aa64_bf16, s)) { 9417 return false; 9418 } 9419 9420 check = fp_access_check_scalar_hsd(s, a->esz); 9421 9422 if (check <= 0) { 9423 return check == 0; 9424 } 9425 9426 t32 = read_fp_sreg(s, a->rn); 9427 gen_helper_bfcvt(t32, t32, fpstatus_ptr(fpsttype)); 9428 write_fp_hreg_merging(s, a->rd, a->rd, t32); 9429 return true; 9430 } 9431 9432 static const FPScalar1 f_scalar_frint32 = { 9433 NULL, 9434 gen_helper_frint32_s, 9435 gen_helper_frint32_d, 9436 }; 9437 TRANS_FEAT(FRINT32Z_s, aa64_frint, do_fp1_scalar, a, 9438 &f_scalar_frint32, FPROUNDING_ZERO) 9439 TRANS_FEAT(FRINT32X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint32, -1) 9440 9441 static const FPScalar1 f_scalar_frint64 = { 9442 NULL, 9443 gen_helper_frint64_s, 9444 gen_helper_frint64_d, 9445 }; 9446 TRANS_FEAT(FRINT64Z_s, aa64_frint, do_fp1_scalar, a, 9447 &f_scalar_frint64, FPROUNDING_ZERO) 9448 TRANS_FEAT(FRINT64X_s, aa64_frint, do_fp1_scalar, a, &f_scalar_frint64, -1) 9449 9450 static const FPScalar1 f_scalar_frecpe = { 9451 gen_helper_recpe_f16, 9452 gen_helper_recpe_f32, 9453 gen_helper_recpe_f64, 9454 }; 9455 static const FPScalar1 f_scalar_frecpe_rpres = { 9456 gen_helper_recpe_f16, 9457 gen_helper_recpe_rpres_f32, 9458 gen_helper_recpe_f64, 9459 }; 9460 TRANS(FRECPE_s, do_fp1_scalar_ah, a, 9461 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 9462 &f_scalar_frecpe_rpres : &f_scalar_frecpe, -1) 9463 9464 static const FPScalar1 f_scalar_frecpx = { 9465 gen_helper_frecpx_f16, 9466 gen_helper_frecpx_f32, 9467 gen_helper_frecpx_f64, 9468 }; 9469 TRANS(FRECPX_s, do_fp1_scalar_ah, a, &f_scalar_frecpx, -1) 9470 9471 static const FPScalar1 f_scalar_frsqrte = { 9472 gen_helper_rsqrte_f16, 9473 gen_helper_rsqrte_f32, 9474 gen_helper_rsqrte_f64, 9475 }; 9476 static const FPScalar1 f_scalar_frsqrte_rpres = { 9477 gen_helper_rsqrte_f16, 9478 gen_helper_rsqrte_rpres_f32, 9479 gen_helper_rsqrte_f64, 9480 }; 9481 TRANS(FRSQRTE_s, do_fp1_scalar_ah, a, 9482 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? 9483 &f_scalar_frsqrte_rpres : &f_scalar_frsqrte, -1) 9484 9485 static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a) 9486 { 9487 if (fp_access_check(s)) { 9488 TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn); 9489 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9490 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9491 9492 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst); 9493 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9494 } 9495 return true; 9496 } 9497 9498 static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a) 9499 { 9500 if (fp_access_check(s)) { 9501 TCGv_i32 tmp = read_fp_sreg(s, a->rn); 9502 TCGv_i32 ahp = get_ahp_flag(); 9503 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9504 9505 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp); 9506 /* write_fp_hreg_merging is OK here because top half of result is zero */ 9507 write_fp_hreg_merging(s, a->rd, a->rd, tmp); 9508 } 9509 return true; 9510 } 9511 9512 static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a) 9513 { 9514 if (fp_access_check(s)) { 9515 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 9516 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9517 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9518 9519 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst); 9520 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 9521 } 9522 return true; 9523 } 9524 9525 static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a) 9526 { 9527 if (fp_access_check(s)) { 9528 TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn); 9529 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9530 TCGv_i32 ahp = get_ahp_flag(); 9531 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 9532 9533 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 9534 /* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */ 9535 write_fp_hreg_merging(s, a->rd, a->rd, tcg_rd); 9536 } 9537 return true; 9538 } 9539 9540 static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a) 9541 { 9542 if (fp_access_check(s)) { 9543 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9544 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9545 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9546 TCGv_i32 tcg_ahp = get_ahp_flag(); 9547 9548 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9549 write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd); 9550 } 9551 return true; 9552 } 9553 9554 static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a) 9555 { 9556 if (fp_access_check(s)) { 9557 TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn); 9558 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9559 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16); 9560 TCGv_i32 tcg_ahp = get_ahp_flag(); 9561 9562 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9563 write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd); 9564 } 9565 return true; 9566 } 9567 9568 static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift, 9569 TCGv_i64 tcg_int, bool is_signed) 9570 { 9571 TCGv_ptr tcg_fpstatus; 9572 TCGv_i32 tcg_shift, tcg_single; 9573 TCGv_i64 tcg_double; 9574 9575 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9576 tcg_shift = tcg_constant_i32(shift); 9577 9578 switch (esz) { 9579 case MO_64: 9580 tcg_double = tcg_temp_new_i64(); 9581 if (is_signed) { 9582 gen_helper_vfp_sqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9583 } else { 9584 gen_helper_vfp_uqtod(tcg_double, tcg_int, tcg_shift, tcg_fpstatus); 9585 } 9586 write_fp_dreg_merging(s, rd, rd, tcg_double); 9587 break; 9588 9589 case MO_32: 9590 tcg_single = tcg_temp_new_i32(); 9591 if (is_signed) { 9592 gen_helper_vfp_sqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9593 } else { 9594 gen_helper_vfp_uqtos(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9595 } 9596 write_fp_sreg_merging(s, rd, rd, tcg_single); 9597 break; 9598 9599 case MO_16: 9600 tcg_single = tcg_temp_new_i32(); 9601 if (is_signed) { 9602 gen_helper_vfp_sqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9603 } else { 9604 gen_helper_vfp_uqtoh(tcg_single, tcg_int, tcg_shift, tcg_fpstatus); 9605 } 9606 write_fp_hreg_merging(s, rd, rd, tcg_single); 9607 break; 9608 9609 default: 9610 g_assert_not_reached(); 9611 } 9612 return true; 9613 } 9614 9615 static bool do_cvtf_g(DisasContext *s, arg_fcvt *a, bool is_signed) 9616 { 9617 TCGv_i64 tcg_int; 9618 int check = fp_access_check_scalar_hsd(s, a->esz); 9619 9620 if (check <= 0) { 9621 return check == 0; 9622 } 9623 9624 if (a->sf) { 9625 tcg_int = cpu_reg(s, a->rn); 9626 } else { 9627 tcg_int = read_cpu_reg(s, a->rn, true); 9628 if (is_signed) { 9629 tcg_gen_ext32s_i64(tcg_int, tcg_int); 9630 } else { 9631 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9632 } 9633 } 9634 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9635 } 9636 9637 TRANS(SCVTF_g, do_cvtf_g, a, true) 9638 TRANS(UCVTF_g, do_cvtf_g, a, false) 9639 9640 /* 9641 * [US]CVTF (vector), scalar version. 9642 * Which sounds weird, but really just means input from fp register 9643 * instead of input from general register. Input and output element 9644 * size are always equal. 9645 */ 9646 static bool do_cvtf_f(DisasContext *s, arg_fcvt *a, bool is_signed) 9647 { 9648 TCGv_i64 tcg_int; 9649 int check = fp_access_check_scalar_hsd(s, a->esz); 9650 9651 if (check <= 0) { 9652 return check == 0; 9653 } 9654 9655 tcg_int = tcg_temp_new_i64(); 9656 read_vec_element(s, tcg_int, a->rn, 0, a->esz | (is_signed ? MO_SIGN : 0)); 9657 return do_cvtf_scalar(s, a->esz, a->rd, a->shift, tcg_int, is_signed); 9658 } 9659 9660 TRANS(SCVTF_f, do_cvtf_f, a, true) 9661 TRANS(UCVTF_f, do_cvtf_f, a, false) 9662 9663 static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz, 9664 TCGv_i64 tcg_out, int shift, int rn, 9665 ARMFPRounding rmode) 9666 { 9667 TCGv_ptr tcg_fpstatus; 9668 TCGv_i32 tcg_shift, tcg_rmode, tcg_single; 9669 9670 tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64); 9671 tcg_shift = tcg_constant_i32(shift); 9672 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9673 9674 switch (esz) { 9675 case MO_64: 9676 read_vec_element(s, tcg_out, rn, 0, MO_64); 9677 switch (out) { 9678 case MO_64 | MO_SIGN: 9679 gen_helper_vfp_tosqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9680 break; 9681 case MO_64: 9682 gen_helper_vfp_touqd(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9683 break; 9684 case MO_32 | MO_SIGN: 9685 gen_helper_vfp_tosld(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9686 break; 9687 case MO_32: 9688 gen_helper_vfp_tould(tcg_out, tcg_out, tcg_shift, tcg_fpstatus); 9689 break; 9690 default: 9691 g_assert_not_reached(); 9692 } 9693 break; 9694 9695 case MO_32: 9696 tcg_single = read_fp_sreg(s, rn); 9697 switch (out) { 9698 case MO_64 | MO_SIGN: 9699 gen_helper_vfp_tosqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9700 break; 9701 case MO_64: 9702 gen_helper_vfp_touqs(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9703 break; 9704 case MO_32 | MO_SIGN: 9705 gen_helper_vfp_tosls(tcg_single, tcg_single, 9706 tcg_shift, tcg_fpstatus); 9707 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9708 break; 9709 case MO_32: 9710 gen_helper_vfp_touls(tcg_single, tcg_single, 9711 tcg_shift, tcg_fpstatus); 9712 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9713 break; 9714 default: 9715 g_assert_not_reached(); 9716 } 9717 break; 9718 9719 case MO_16: 9720 tcg_single = read_fp_hreg(s, rn); 9721 switch (out) { 9722 case MO_64 | MO_SIGN: 9723 gen_helper_vfp_tosqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9724 break; 9725 case MO_64: 9726 gen_helper_vfp_touqh(tcg_out, tcg_single, tcg_shift, tcg_fpstatus); 9727 break; 9728 case MO_32 | MO_SIGN: 9729 gen_helper_vfp_toslh(tcg_single, tcg_single, 9730 tcg_shift, tcg_fpstatus); 9731 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9732 break; 9733 case MO_32: 9734 gen_helper_vfp_toulh(tcg_single, tcg_single, 9735 tcg_shift, tcg_fpstatus); 9736 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9737 break; 9738 case MO_16 | MO_SIGN: 9739 gen_helper_vfp_toshh(tcg_single, tcg_single, 9740 tcg_shift, tcg_fpstatus); 9741 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9742 break; 9743 case MO_16: 9744 gen_helper_vfp_touhh(tcg_single, tcg_single, 9745 tcg_shift, tcg_fpstatus); 9746 tcg_gen_extu_i32_i64(tcg_out, tcg_single); 9747 break; 9748 default: 9749 g_assert_not_reached(); 9750 } 9751 break; 9752 9753 default: 9754 g_assert_not_reached(); 9755 } 9756 9757 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9758 } 9759 9760 static bool do_fcvt_g(DisasContext *s, arg_fcvt *a, 9761 ARMFPRounding rmode, bool is_signed) 9762 { 9763 TCGv_i64 tcg_int; 9764 int check = fp_access_check_scalar_hsd(s, a->esz); 9765 9766 if (check <= 0) { 9767 return check == 0; 9768 } 9769 9770 tcg_int = cpu_reg(s, a->rd); 9771 do_fcvt_scalar(s, (a->sf ? MO_64 : MO_32) | (is_signed ? MO_SIGN : 0), 9772 a->esz, tcg_int, a->shift, a->rn, rmode); 9773 9774 if (!a->sf) { 9775 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9776 } 9777 return true; 9778 } 9779 9780 TRANS(FCVTNS_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, true) 9781 TRANS(FCVTNU_g, do_fcvt_g, a, FPROUNDING_TIEEVEN, false) 9782 TRANS(FCVTPS_g, do_fcvt_g, a, FPROUNDING_POSINF, true) 9783 TRANS(FCVTPU_g, do_fcvt_g, a, FPROUNDING_POSINF, false) 9784 TRANS(FCVTMS_g, do_fcvt_g, a, FPROUNDING_NEGINF, true) 9785 TRANS(FCVTMU_g, do_fcvt_g, a, FPROUNDING_NEGINF, false) 9786 TRANS(FCVTZS_g, do_fcvt_g, a, FPROUNDING_ZERO, true) 9787 TRANS(FCVTZU_g, do_fcvt_g, a, FPROUNDING_ZERO, false) 9788 TRANS(FCVTAS_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, true) 9789 TRANS(FCVTAU_g, do_fcvt_g, a, FPROUNDING_TIEAWAY, false) 9790 9791 /* 9792 * FCVT* (vector), scalar version. 9793 * Which sounds weird, but really just means output to fp register 9794 * instead of output to general register. Input and output element 9795 * size are always equal. 9796 */ 9797 static bool do_fcvt_f(DisasContext *s, arg_fcvt *a, 9798 ARMFPRounding rmode, bool is_signed) 9799 { 9800 TCGv_i64 tcg_int; 9801 int check = fp_access_check_scalar_hsd(s, a->esz); 9802 9803 if (check <= 0) { 9804 return check == 0; 9805 } 9806 9807 tcg_int = tcg_temp_new_i64(); 9808 do_fcvt_scalar(s, a->esz | (is_signed ? MO_SIGN : 0), 9809 a->esz, tcg_int, a->shift, a->rn, rmode); 9810 9811 if (!s->fpcr_nep) { 9812 clear_vec(s, a->rd); 9813 } 9814 write_vec_element(s, tcg_int, a->rd, 0, a->esz); 9815 return true; 9816 } 9817 9818 TRANS(FCVTNS_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, true) 9819 TRANS(FCVTNU_f, do_fcvt_f, a, FPROUNDING_TIEEVEN, false) 9820 TRANS(FCVTPS_f, do_fcvt_f, a, FPROUNDING_POSINF, true) 9821 TRANS(FCVTPU_f, do_fcvt_f, a, FPROUNDING_POSINF, false) 9822 TRANS(FCVTMS_f, do_fcvt_f, a, FPROUNDING_NEGINF, true) 9823 TRANS(FCVTMU_f, do_fcvt_f, a, FPROUNDING_NEGINF, false) 9824 TRANS(FCVTZS_f, do_fcvt_f, a, FPROUNDING_ZERO, true) 9825 TRANS(FCVTZU_f, do_fcvt_f, a, FPROUNDING_ZERO, false) 9826 TRANS(FCVTAS_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, true) 9827 TRANS(FCVTAU_f, do_fcvt_f, a, FPROUNDING_TIEAWAY, false) 9828 9829 static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a) 9830 { 9831 if (!dc_isar_feature(aa64_jscvt, s)) { 9832 return false; 9833 } 9834 if (fp_access_check(s)) { 9835 TCGv_i64 t = read_fp_dreg(s, a->rn); 9836 TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64); 9837 9838 gen_helper_fjcvtzs(t, t, fpstatus); 9839 9840 tcg_gen_ext32u_i64(cpu_reg(s, a->rd), t); 9841 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9842 tcg_gen_movi_i32(cpu_CF, 0); 9843 tcg_gen_movi_i32(cpu_NF, 0); 9844 tcg_gen_movi_i32(cpu_VF, 0); 9845 } 9846 return true; 9847 } 9848 9849 static bool trans_FMOV_hx(DisasContext *s, arg_rr *a) 9850 { 9851 if (!dc_isar_feature(aa64_fp16, s)) { 9852 return false; 9853 } 9854 if (fp_access_check(s)) { 9855 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9856 TCGv_i64 tmp = tcg_temp_new_i64(); 9857 tcg_gen_ext16u_i64(tmp, tcg_rn); 9858 write_fp_dreg(s, a->rd, tmp); 9859 } 9860 return true; 9861 } 9862 9863 static bool trans_FMOV_sw(DisasContext *s, arg_rr *a) 9864 { 9865 if (fp_access_check(s)) { 9866 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9867 TCGv_i64 tmp = tcg_temp_new_i64(); 9868 tcg_gen_ext32u_i64(tmp, tcg_rn); 9869 write_fp_dreg(s, a->rd, tmp); 9870 } 9871 return true; 9872 } 9873 9874 static bool trans_FMOV_dx(DisasContext *s, arg_rr *a) 9875 { 9876 if (fp_access_check(s)) { 9877 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9878 write_fp_dreg(s, a->rd, tcg_rn); 9879 } 9880 return true; 9881 } 9882 9883 static bool trans_FMOV_ux(DisasContext *s, arg_rr *a) 9884 { 9885 if (fp_access_check(s)) { 9886 TCGv_i64 tcg_rn = cpu_reg(s, a->rn); 9887 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, a->rd)); 9888 clear_vec_high(s, true, a->rd); 9889 } 9890 return true; 9891 } 9892 9893 static bool trans_FMOV_xh(DisasContext *s, arg_rr *a) 9894 { 9895 if (!dc_isar_feature(aa64_fp16, s)) { 9896 return false; 9897 } 9898 if (fp_access_check(s)) { 9899 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9900 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_16)); 9901 } 9902 return true; 9903 } 9904 9905 static bool trans_FMOV_ws(DisasContext *s, arg_rr *a) 9906 { 9907 if (fp_access_check(s)) { 9908 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9909 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_32)); 9910 } 9911 return true; 9912 } 9913 9914 static bool trans_FMOV_xd(DisasContext *s, arg_rr *a) 9915 { 9916 if (fp_access_check(s)) { 9917 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9918 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, a->rn, MO_64)); 9919 } 9920 return true; 9921 } 9922 9923 static bool trans_FMOV_xu(DisasContext *s, arg_rr *a) 9924 { 9925 if (fp_access_check(s)) { 9926 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 9927 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, a->rn)); 9928 } 9929 return true; 9930 } 9931 9932 typedef struct ENVScalar1 { 9933 NeonGenOneOpEnvFn *gen_bhs[3]; 9934 NeonGenOne64OpEnvFn *gen_d; 9935 } ENVScalar1; 9936 9937 static bool do_env_scalar1(DisasContext *s, arg_rr_e *a, const ENVScalar1 *f) 9938 { 9939 if (!fp_access_check(s)) { 9940 return true; 9941 } 9942 if (a->esz == MO_64) { 9943 TCGv_i64 t = read_fp_dreg(s, a->rn); 9944 f->gen_d(t, tcg_env, t); 9945 write_fp_dreg(s, a->rd, t); 9946 } else { 9947 TCGv_i32 t = tcg_temp_new_i32(); 9948 9949 read_vec_element_i32(s, t, a->rn, 0, a->esz); 9950 f->gen_bhs[a->esz](t, tcg_env, t); 9951 write_fp_sreg(s, a->rd, t); 9952 } 9953 return true; 9954 } 9955 9956 static bool do_env_vector1(DisasContext *s, arg_qrr_e *a, const ENVScalar1 *f) 9957 { 9958 if (a->esz == MO_64 && !a->q) { 9959 return false; 9960 } 9961 if (!fp_access_check(s)) { 9962 return true; 9963 } 9964 if (a->esz == MO_64) { 9965 TCGv_i64 t = tcg_temp_new_i64(); 9966 9967 for (int i = 0; i < 2; ++i) { 9968 read_vec_element(s, t, a->rn, i, MO_64); 9969 f->gen_d(t, tcg_env, t); 9970 write_vec_element(s, t, a->rd, i, MO_64); 9971 } 9972 } else { 9973 TCGv_i32 t = tcg_temp_new_i32(); 9974 int n = (a->q ? 16 : 8) >> a->esz; 9975 9976 for (int i = 0; i < n; ++i) { 9977 read_vec_element_i32(s, t, a->rn, i, a->esz); 9978 f->gen_bhs[a->esz](t, tcg_env, t); 9979 write_vec_element_i32(s, t, a->rd, i, a->esz); 9980 } 9981 } 9982 clear_vec_high(s, a->q, a->rd); 9983 return true; 9984 } 9985 9986 static const ENVScalar1 f_scalar_sqabs = { 9987 { gen_helper_neon_qabs_s8, 9988 gen_helper_neon_qabs_s16, 9989 gen_helper_neon_qabs_s32 }, 9990 gen_helper_neon_qabs_s64, 9991 }; 9992 TRANS(SQABS_s, do_env_scalar1, a, &f_scalar_sqabs) 9993 TRANS(SQABS_v, do_env_vector1, a, &f_scalar_sqabs) 9994 9995 static const ENVScalar1 f_scalar_sqneg = { 9996 { gen_helper_neon_qneg_s8, 9997 gen_helper_neon_qneg_s16, 9998 gen_helper_neon_qneg_s32 }, 9999 gen_helper_neon_qneg_s64, 10000 }; 10001 TRANS(SQNEG_s, do_env_scalar1, a, &f_scalar_sqneg) 10002 TRANS(SQNEG_v, do_env_vector1, a, &f_scalar_sqneg) 10003 10004 static bool do_scalar1_d(DisasContext *s, arg_rr *a, ArithOneOp *f) 10005 { 10006 if (fp_access_check(s)) { 10007 TCGv_i64 t = read_fp_dreg(s, a->rn); 10008 f(t, t); 10009 write_fp_dreg(s, a->rd, t); 10010 } 10011 return true; 10012 } 10013 10014 TRANS(ABS_s, do_scalar1_d, a, tcg_gen_abs_i64) 10015 TRANS(NEG_s, do_scalar1_d, a, tcg_gen_neg_i64) 10016 10017 static bool do_cmop0_d(DisasContext *s, arg_rr *a, TCGCond cond) 10018 { 10019 if (fp_access_check(s)) { 10020 TCGv_i64 t = read_fp_dreg(s, a->rn); 10021 tcg_gen_negsetcond_i64(cond, t, t, tcg_constant_i64(0)); 10022 write_fp_dreg(s, a->rd, t); 10023 } 10024 return true; 10025 } 10026 10027 TRANS(CMGT0_s, do_cmop0_d, a, TCG_COND_GT) 10028 TRANS(CMGE0_s, do_cmop0_d, a, TCG_COND_GE) 10029 TRANS(CMLE0_s, do_cmop0_d, a, TCG_COND_LE) 10030 TRANS(CMLT0_s, do_cmop0_d, a, TCG_COND_LT) 10031 TRANS(CMEQ0_s, do_cmop0_d, a, TCG_COND_EQ) 10032 10033 static bool do_2misc_narrow_scalar(DisasContext *s, arg_rr_e *a, 10034 ArithOneOp * const fn[3]) 10035 { 10036 if (a->esz == MO_64) { 10037 return false; 10038 } 10039 if (fp_access_check(s)) { 10040 TCGv_i64 t = tcg_temp_new_i64(); 10041 10042 read_vec_element(s, t, a->rn, 0, a->esz + 1); 10043 fn[a->esz](t, t); 10044 clear_vec(s, a->rd); 10045 write_vec_element(s, t, a->rd, 0, a->esz); 10046 } 10047 return true; 10048 } 10049 10050 #define WRAP_ENV(NAME) \ 10051 static void gen_##NAME(TCGv_i64 d, TCGv_i64 n) \ 10052 { gen_helper_##NAME(d, tcg_env, n); } 10053 10054 WRAP_ENV(neon_unarrow_sat8) 10055 WRAP_ENV(neon_unarrow_sat16) 10056 WRAP_ENV(neon_unarrow_sat32) 10057 10058 static ArithOneOp * const f_scalar_sqxtun[] = { 10059 gen_neon_unarrow_sat8, 10060 gen_neon_unarrow_sat16, 10061 gen_neon_unarrow_sat32, 10062 }; 10063 TRANS(SQXTUN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtun) 10064 10065 WRAP_ENV(neon_narrow_sat_s8) 10066 WRAP_ENV(neon_narrow_sat_s16) 10067 WRAP_ENV(neon_narrow_sat_s32) 10068 10069 static ArithOneOp * const f_scalar_sqxtn[] = { 10070 gen_neon_narrow_sat_s8, 10071 gen_neon_narrow_sat_s16, 10072 gen_neon_narrow_sat_s32, 10073 }; 10074 TRANS(SQXTN_s, do_2misc_narrow_scalar, a, f_scalar_sqxtn) 10075 10076 WRAP_ENV(neon_narrow_sat_u8) 10077 WRAP_ENV(neon_narrow_sat_u16) 10078 WRAP_ENV(neon_narrow_sat_u32) 10079 10080 static ArithOneOp * const f_scalar_uqxtn[] = { 10081 gen_neon_narrow_sat_u8, 10082 gen_neon_narrow_sat_u16, 10083 gen_neon_narrow_sat_u32, 10084 }; 10085 TRANS(UQXTN_s, do_2misc_narrow_scalar, a, f_scalar_uqxtn) 10086 10087 static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a) 10088 { 10089 if (fp_access_check(s)) { 10090 /* 10091 * 64 bit to 32 bit float conversion 10092 * with von Neumann rounding (round to odd) 10093 */ 10094 TCGv_i64 src = read_fp_dreg(s, a->rn); 10095 TCGv_i32 dst = tcg_temp_new_i32(); 10096 gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64)); 10097 write_fp_sreg_merging(s, a->rd, a->rd, dst); 10098 } 10099 return true; 10100 } 10101 10102 #undef WRAP_ENV 10103 10104 static bool do_gvec_fn2(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 10105 { 10106 if (!a->q && a->esz == MO_64) { 10107 return false; 10108 } 10109 if (fp_access_check(s)) { 10110 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 10111 } 10112 return true; 10113 } 10114 10115 TRANS(ABS_v, do_gvec_fn2, a, tcg_gen_gvec_abs) 10116 TRANS(NEG_v, do_gvec_fn2, a, tcg_gen_gvec_neg) 10117 TRANS(NOT_v, do_gvec_fn2, a, tcg_gen_gvec_not) 10118 TRANS(CNT_v, do_gvec_fn2, a, gen_gvec_cnt) 10119 TRANS(RBIT_v, do_gvec_fn2, a, gen_gvec_rbit) 10120 TRANS(CMGT0_v, do_gvec_fn2, a, gen_gvec_cgt0) 10121 TRANS(CMGE0_v, do_gvec_fn2, a, gen_gvec_cge0) 10122 TRANS(CMLT0_v, do_gvec_fn2, a, gen_gvec_clt0) 10123 TRANS(CMLE0_v, do_gvec_fn2, a, gen_gvec_cle0) 10124 TRANS(CMEQ0_v, do_gvec_fn2, a, gen_gvec_ceq0) 10125 TRANS(REV16_v, do_gvec_fn2, a, gen_gvec_rev16) 10126 TRANS(REV32_v, do_gvec_fn2, a, gen_gvec_rev32) 10127 TRANS(URECPE_v, do_gvec_fn2, a, gen_gvec_urecpe) 10128 TRANS(URSQRTE_v, do_gvec_fn2, a, gen_gvec_ursqrte) 10129 10130 static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 10131 { 10132 if (a->esz == MO_64) { 10133 return false; 10134 } 10135 if (fp_access_check(s)) { 10136 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 10137 } 10138 return true; 10139 } 10140 10141 TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls) 10142 TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz) 10143 TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64) 10144 TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp) 10145 TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp) 10146 TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp) 10147 TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp) 10148 10149 static bool do_2misc_narrow_vector(DisasContext *s, arg_qrr_e *a, 10150 ArithOneOp * const fn[3]) 10151 { 10152 if (a->esz == MO_64) { 10153 return false; 10154 } 10155 if (fp_access_check(s)) { 10156 TCGv_i64 t0 = tcg_temp_new_i64(); 10157 TCGv_i64 t1 = tcg_temp_new_i64(); 10158 10159 read_vec_element(s, t0, a->rn, 0, MO_64); 10160 read_vec_element(s, t1, a->rn, 1, MO_64); 10161 fn[a->esz](t0, t0); 10162 fn[a->esz](t1, t1); 10163 write_vec_element(s, t0, a->rd, a->q ? 2 : 0, MO_32); 10164 write_vec_element(s, t1, a->rd, a->q ? 3 : 1, MO_32); 10165 clear_vec_high(s, a->q, a->rd); 10166 } 10167 return true; 10168 } 10169 10170 static ArithOneOp * const f_scalar_xtn[] = { 10171 gen_helper_neon_narrow_u8, 10172 gen_helper_neon_narrow_u16, 10173 tcg_gen_ext32u_i64, 10174 }; 10175 TRANS(XTN, do_2misc_narrow_vector, a, f_scalar_xtn) 10176 TRANS(SQXTUN_v, do_2misc_narrow_vector, a, f_scalar_sqxtun) 10177 TRANS(SQXTN_v, do_2misc_narrow_vector, a, f_scalar_sqxtn) 10178 TRANS(UQXTN_v, do_2misc_narrow_vector, a, f_scalar_uqxtn) 10179 10180 static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n) 10181 { 10182 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10183 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10184 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 10185 TCGv_i32 ahp = get_ahp_flag(); 10186 10187 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n); 10188 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10189 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10190 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 10191 tcg_gen_extu_i32_i64(d, tcg_lo); 10192 } 10193 10194 static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n) 10195 { 10196 TCGv_i32 tmp = tcg_temp_new_i32(); 10197 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 10198 10199 gen_helper_vfp_fcvtsd(tmp, n, fpst); 10200 tcg_gen_extu_i32_i64(d, tmp); 10201 } 10202 10203 static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n) 10204 { 10205 /* 10206 * 64 bit to 32 bit float conversion 10207 * with von Neumann rounding (round to odd) 10208 */ 10209 TCGv_i32 tmp = tcg_temp_new_i32(); 10210 gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64)); 10211 tcg_gen_extu_i32_i64(d, tmp); 10212 } 10213 10214 static ArithOneOp * const f_vector_fcvtn[] = { 10215 NULL, 10216 gen_fcvtn_hs, 10217 gen_fcvtn_sd, 10218 }; 10219 static ArithOneOp * const f_scalar_fcvtxn[] = { 10220 NULL, 10221 NULL, 10222 gen_fcvtxn_sd, 10223 }; 10224 TRANS(FCVTN_v, do_2misc_narrow_vector, a, f_vector_fcvtn) 10225 TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn) 10226 10227 static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n) 10228 { 10229 TCGv_ptr fpst = fpstatus_ptr(FPST_A64); 10230 TCGv_i32 tmp = tcg_temp_new_i32(); 10231 gen_helper_bfcvt_pair(tmp, n, fpst); 10232 tcg_gen_extu_i32_i64(d, tmp); 10233 } 10234 10235 static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n) 10236 { 10237 TCGv_ptr fpst = fpstatus_ptr(FPST_AH); 10238 TCGv_i32 tmp = tcg_temp_new_i32(); 10239 gen_helper_bfcvt_pair(tmp, n, fpst); 10240 tcg_gen_extu_i32_i64(d, tmp); 10241 } 10242 10243 static ArithOneOp * const f_vector_bfcvtn[2][3] = { 10244 { 10245 NULL, 10246 gen_bfcvtn_hs, 10247 NULL, 10248 }, { 10249 NULL, 10250 gen_bfcvtn_ah_hs, 10251 NULL, 10252 } 10253 }; 10254 TRANS_FEAT(BFCVTN_v, aa64_bf16, do_2misc_narrow_vector, a, 10255 f_vector_bfcvtn[s->fpcr_ah]) 10256 10257 static bool trans_SHLL_v(DisasContext *s, arg_qrr_e *a) 10258 { 10259 static NeonGenWidenFn * const widenfns[3] = { 10260 gen_helper_neon_widen_u8, 10261 gen_helper_neon_widen_u16, 10262 tcg_gen_extu_i32_i64, 10263 }; 10264 NeonGenWidenFn *widenfn; 10265 TCGv_i64 tcg_res[2]; 10266 TCGv_i32 tcg_op; 10267 int part, pass; 10268 10269 if (a->esz == MO_64) { 10270 return false; 10271 } 10272 if (!fp_access_check(s)) { 10273 return true; 10274 } 10275 10276 tcg_op = tcg_temp_new_i32(); 10277 widenfn = widenfns[a->esz]; 10278 part = a->q ? 2 : 0; 10279 10280 for (pass = 0; pass < 2; pass++) { 10281 read_vec_element_i32(s, tcg_op, a->rn, part + pass, MO_32); 10282 tcg_res[pass] = tcg_temp_new_i64(); 10283 widenfn(tcg_res[pass], tcg_op); 10284 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << a->esz); 10285 } 10286 10287 for (pass = 0; pass < 2; pass++) { 10288 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10289 } 10290 return true; 10291 } 10292 10293 static bool do_fabs_fneg_v(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn) 10294 { 10295 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 10296 10297 if (check <= 0) { 10298 return check == 0; 10299 } 10300 10301 gen_gvec_fn2(s, a->q, a->rd, a->rn, fn, a->esz); 10302 return true; 10303 } 10304 10305 TRANS(FABS_v, do_fabs_fneg_v, a, gen_gvec_fabs) 10306 TRANS(FNEG_v, do_fabs_fneg_v, a, gen_gvec_fneg) 10307 10308 static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a, 10309 const FPScalar1 *f, int rmode) 10310 { 10311 TCGv_i32 tcg_rmode = NULL; 10312 TCGv_ptr fpst; 10313 int check = fp_access_check_vector_hsd(s, a->q, a->esz); 10314 10315 if (check <= 0) { 10316 return check == 0; 10317 } 10318 10319 fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); 10320 if (rmode >= 0) { 10321 tcg_rmode = gen_set_rmode(rmode, fpst); 10322 } 10323 10324 if (a->esz == MO_64) { 10325 TCGv_i64 t64 = tcg_temp_new_i64(); 10326 10327 for (int pass = 0; pass < 2; ++pass) { 10328 read_vec_element(s, t64, a->rn, pass, MO_64); 10329 f->gen_d(t64, t64, fpst); 10330 write_vec_element(s, t64, a->rd, pass, MO_64); 10331 } 10332 } else { 10333 TCGv_i32 t32 = tcg_temp_new_i32(); 10334 void (*gen)(TCGv_i32, TCGv_i32, TCGv_ptr) 10335 = (a->esz == MO_16 ? f->gen_h : f->gen_s); 10336 10337 for (int pass = 0, n = (a->q ? 16 : 8) >> a->esz; pass < n; ++pass) { 10338 read_vec_element_i32(s, t32, a->rn, pass, a->esz); 10339 gen(t32, t32, fpst); 10340 write_vec_element_i32(s, t32, a->rd, pass, a->esz); 10341 } 10342 } 10343 clear_vec_high(s, a->q, a->rd); 10344 10345 if (rmode >= 0) { 10346 gen_restore_rmode(tcg_rmode, fpst); 10347 } 10348 return true; 10349 } 10350 10351 TRANS(FSQRT_v, do_fp1_vector, a, &f_scalar_fsqrt, -1) 10352 10353 TRANS(FRINTN_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEEVEN) 10354 TRANS(FRINTP_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_POSINF) 10355 TRANS(FRINTM_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_NEGINF) 10356 TRANS(FRINTZ_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_ZERO) 10357 TRANS(FRINTA_v, do_fp1_vector, a, &f_scalar_frint, FPROUNDING_TIEAWAY) 10358 TRANS(FRINTI_v, do_fp1_vector, a, &f_scalar_frint, -1) 10359 TRANS(FRINTX_v, do_fp1_vector, a, &f_scalar_frintx, -1) 10360 10361 TRANS_FEAT(FRINT32Z_v, aa64_frint, do_fp1_vector, a, 10362 &f_scalar_frint32, FPROUNDING_ZERO) 10363 TRANS_FEAT(FRINT32X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint32, -1) 10364 TRANS_FEAT(FRINT64Z_v, aa64_frint, do_fp1_vector, a, 10365 &f_scalar_frint64, FPROUNDING_ZERO) 10366 TRANS_FEAT(FRINT64X_v, aa64_frint, do_fp1_vector, a, &f_scalar_frint64, -1) 10367 10368 static bool do_gvec_op2_fpst_with_fpsttype(DisasContext *s, MemOp esz, 10369 bool is_q, int rd, int rn, int data, 10370 gen_helper_gvec_2_ptr * const fns[3], 10371 ARMFPStatusFlavour fpsttype) 10372 { 10373 int check = fp_access_check_vector_hsd(s, is_q, esz); 10374 TCGv_ptr fpst; 10375 10376 if (check <= 0) { 10377 return check == 0; 10378 } 10379 10380 fpst = fpstatus_ptr(fpsttype); 10381 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 10382 vec_full_reg_offset(s, rn), fpst, 10383 is_q ? 16 : 8, vec_full_reg_size(s), 10384 data, fns[esz - 1]); 10385 return true; 10386 } 10387 10388 static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q, 10389 int rd, int rn, int data, 10390 gen_helper_gvec_2_ptr * const fns[3]) 10391 { 10392 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns, 10393 esz == MO_16 ? FPST_A64_F16 : 10394 FPST_A64); 10395 } 10396 10397 static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q, 10398 int rd, int rn, int data, 10399 gen_helper_gvec_2_ptr * const fns[3]) 10400 { 10401 return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, 10402 fns, select_ah_fpst(s, esz)); 10403 } 10404 10405 static gen_helper_gvec_2_ptr * const f_scvtf_v[] = { 10406 gen_helper_gvec_vcvt_sh, 10407 gen_helper_gvec_vcvt_sf, 10408 gen_helper_gvec_vcvt_sd, 10409 }; 10410 TRANS(SCVTF_vi, do_gvec_op2_fpst, 10411 a->esz, a->q, a->rd, a->rn, 0, f_scvtf_v) 10412 TRANS(SCVTF_vf, do_gvec_op2_fpst, 10413 a->esz, a->q, a->rd, a->rn, a->shift, f_scvtf_v) 10414 10415 static gen_helper_gvec_2_ptr * const f_ucvtf_v[] = { 10416 gen_helper_gvec_vcvt_uh, 10417 gen_helper_gvec_vcvt_uf, 10418 gen_helper_gvec_vcvt_ud, 10419 }; 10420 TRANS(UCVTF_vi, do_gvec_op2_fpst, 10421 a->esz, a->q, a->rd, a->rn, 0, f_ucvtf_v) 10422 TRANS(UCVTF_vf, do_gvec_op2_fpst, 10423 a->esz, a->q, a->rd, a->rn, a->shift, f_ucvtf_v) 10424 10425 static gen_helper_gvec_2_ptr * const f_fcvtzs_vf[] = { 10426 gen_helper_gvec_vcvt_rz_hs, 10427 gen_helper_gvec_vcvt_rz_fs, 10428 gen_helper_gvec_vcvt_rz_ds, 10429 }; 10430 TRANS(FCVTZS_vf, do_gvec_op2_fpst, 10431 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzs_vf) 10432 10433 static gen_helper_gvec_2_ptr * const f_fcvtzu_vf[] = { 10434 gen_helper_gvec_vcvt_rz_hu, 10435 gen_helper_gvec_vcvt_rz_fu, 10436 gen_helper_gvec_vcvt_rz_du, 10437 }; 10438 TRANS(FCVTZU_vf, do_gvec_op2_fpst, 10439 a->esz, a->q, a->rd, a->rn, a->shift, f_fcvtzu_vf) 10440 10441 static gen_helper_gvec_2_ptr * const f_fcvt_s_vi[] = { 10442 gen_helper_gvec_vcvt_rm_sh, 10443 gen_helper_gvec_vcvt_rm_ss, 10444 gen_helper_gvec_vcvt_rm_sd, 10445 }; 10446 10447 static gen_helper_gvec_2_ptr * const f_fcvt_u_vi[] = { 10448 gen_helper_gvec_vcvt_rm_uh, 10449 gen_helper_gvec_vcvt_rm_us, 10450 gen_helper_gvec_vcvt_rm_ud, 10451 }; 10452 10453 TRANS(FCVTNS_vi, do_gvec_op2_fpst, 10454 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_s_vi) 10455 TRANS(FCVTNU_vi, do_gvec_op2_fpst, 10456 a->esz, a->q, a->rd, a->rn, float_round_nearest_even, f_fcvt_u_vi) 10457 TRANS(FCVTPS_vi, do_gvec_op2_fpst, 10458 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_s_vi) 10459 TRANS(FCVTPU_vi, do_gvec_op2_fpst, 10460 a->esz, a->q, a->rd, a->rn, float_round_up, f_fcvt_u_vi) 10461 TRANS(FCVTMS_vi, do_gvec_op2_fpst, 10462 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_s_vi) 10463 TRANS(FCVTMU_vi, do_gvec_op2_fpst, 10464 a->esz, a->q, a->rd, a->rn, float_round_down, f_fcvt_u_vi) 10465 TRANS(FCVTZS_vi, do_gvec_op2_fpst, 10466 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_s_vi) 10467 TRANS(FCVTZU_vi, do_gvec_op2_fpst, 10468 a->esz, a->q, a->rd, a->rn, float_round_to_zero, f_fcvt_u_vi) 10469 TRANS(FCVTAS_vi, do_gvec_op2_fpst, 10470 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_s_vi) 10471 TRANS(FCVTAU_vi, do_gvec_op2_fpst, 10472 a->esz, a->q, a->rd, a->rn, float_round_ties_away, f_fcvt_u_vi) 10473 10474 static gen_helper_gvec_2_ptr * const f_fceq0[] = { 10475 gen_helper_gvec_fceq0_h, 10476 gen_helper_gvec_fceq0_s, 10477 gen_helper_gvec_fceq0_d, 10478 }; 10479 TRANS(FCMEQ0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fceq0) 10480 10481 static gen_helper_gvec_2_ptr * const f_fcgt0[] = { 10482 gen_helper_gvec_fcgt0_h, 10483 gen_helper_gvec_fcgt0_s, 10484 gen_helper_gvec_fcgt0_d, 10485 }; 10486 TRANS(FCMGT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcgt0) 10487 10488 static gen_helper_gvec_2_ptr * const f_fcge0[] = { 10489 gen_helper_gvec_fcge0_h, 10490 gen_helper_gvec_fcge0_s, 10491 gen_helper_gvec_fcge0_d, 10492 }; 10493 TRANS(FCMGE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcge0) 10494 10495 static gen_helper_gvec_2_ptr * const f_fclt0[] = { 10496 gen_helper_gvec_fclt0_h, 10497 gen_helper_gvec_fclt0_s, 10498 gen_helper_gvec_fclt0_d, 10499 }; 10500 TRANS(FCMLT0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fclt0) 10501 10502 static gen_helper_gvec_2_ptr * const f_fcle0[] = { 10503 gen_helper_gvec_fcle0_h, 10504 gen_helper_gvec_fcle0_s, 10505 gen_helper_gvec_fcle0_d, 10506 }; 10507 TRANS(FCMLE0_v, do_gvec_op2_fpst, a->esz, a->q, a->rd, a->rn, 0, f_fcle0) 10508 10509 static gen_helper_gvec_2_ptr * const f_frecpe[] = { 10510 gen_helper_gvec_frecpe_h, 10511 gen_helper_gvec_frecpe_s, 10512 gen_helper_gvec_frecpe_d, 10513 }; 10514 static gen_helper_gvec_2_ptr * const f_frecpe_rpres[] = { 10515 gen_helper_gvec_frecpe_h, 10516 gen_helper_gvec_frecpe_rpres_s, 10517 gen_helper_gvec_frecpe_d, 10518 }; 10519 TRANS(FRECPE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 10520 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frecpe_rpres : f_frecpe) 10521 10522 static gen_helper_gvec_2_ptr * const f_frsqrte[] = { 10523 gen_helper_gvec_frsqrte_h, 10524 gen_helper_gvec_frsqrte_s, 10525 gen_helper_gvec_frsqrte_d, 10526 }; 10527 static gen_helper_gvec_2_ptr * const f_frsqrte_rpres[] = { 10528 gen_helper_gvec_frsqrte_h, 10529 gen_helper_gvec_frsqrte_rpres_s, 10530 gen_helper_gvec_frsqrte_d, 10531 }; 10532 TRANS(FRSQRTE_v, do_gvec_op2_ah_fpst, a->esz, a->q, a->rd, a->rn, 0, 10533 s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ? f_frsqrte_rpres : f_frsqrte) 10534 10535 static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a) 10536 { 10537 /* Handle 2-reg-misc ops which are widening (so each size element 10538 * in the source becomes a 2*size element in the destination. 10539 * The only instruction like this is FCVTL. 10540 */ 10541 int pass; 10542 TCGv_ptr fpst; 10543 10544 if (!fp_access_check(s)) { 10545 return true; 10546 } 10547 10548 if (a->esz == MO_64) { 10549 /* 32 -> 64 bit fp conversion */ 10550 TCGv_i64 tcg_res[2]; 10551 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10552 int srcelt = a->q ? 2 : 0; 10553 10554 fpst = fpstatus_ptr(FPST_A64); 10555 10556 for (pass = 0; pass < 2; pass++) { 10557 tcg_res[pass] = tcg_temp_new_i64(); 10558 read_vec_element_i32(s, tcg_op, a->rn, srcelt + pass, MO_32); 10559 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, fpst); 10560 } 10561 for (pass = 0; pass < 2; pass++) { 10562 write_vec_element(s, tcg_res[pass], a->rd, pass, MO_64); 10563 } 10564 } else { 10565 /* 16 -> 32 bit fp conversion */ 10566 int srcelt = a->q ? 4 : 0; 10567 TCGv_i32 tcg_res[4]; 10568 TCGv_i32 ahp = get_ahp_flag(); 10569 10570 fpst = fpstatus_ptr(FPST_A64_F16); 10571 10572 for (pass = 0; pass < 4; pass++) { 10573 tcg_res[pass] = tcg_temp_new_i32(); 10574 read_vec_element_i32(s, tcg_res[pass], a->rn, srcelt + pass, MO_16); 10575 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10576 fpst, ahp); 10577 } 10578 for (pass = 0; pass < 4; pass++) { 10579 write_vec_element_i32(s, tcg_res[pass], a->rd, pass, MO_32); 10580 } 10581 } 10582 clear_vec_high(s, true, a->rd); 10583 return true; 10584 } 10585 10586 static bool trans_OK(DisasContext *s, arg_OK *a) 10587 { 10588 return true; 10589 } 10590 10591 static bool trans_FAIL(DisasContext *s, arg_OK *a) 10592 { 10593 s->is_nonstreaming = true; 10594 return true; 10595 } 10596 10597 /** 10598 * btype_destination_ok: 10599 * @insn: The instruction at the branch destination 10600 * @bt: SCTLR_ELx.BT 10601 * @btype: PSTATE.BTYPE, and is non-zero 10602 * 10603 * On a guarded page, there are a limited number of insns 10604 * that may be present at the branch target: 10605 * - branch target identifiers, 10606 * - paciasp, pacibsp, 10607 * - BRK insn 10608 * - HLT insn 10609 * Anything else causes a Branch Target Exception. 10610 * 10611 * Return true if the branch is compatible, false to raise BTITRAP. 10612 */ 10613 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 10614 { 10615 if ((insn & 0xfffff01fu) == 0xd503201fu) { 10616 /* HINT space */ 10617 switch (extract32(insn, 5, 7)) { 10618 case 0b011001: /* PACIASP */ 10619 case 0b011011: /* PACIBSP */ 10620 /* 10621 * If SCTLR_ELx.BT, then PACI*SP are not compatible 10622 * with btype == 3. Otherwise all btype are ok. 10623 */ 10624 return !bt || btype != 3; 10625 case 0b100000: /* BTI */ 10626 /* Not compatible with any btype. */ 10627 return false; 10628 case 0b100010: /* BTI c */ 10629 /* Not compatible with btype == 3 */ 10630 return btype != 3; 10631 case 0b100100: /* BTI j */ 10632 /* Not compatible with btype == 2 */ 10633 return btype != 2; 10634 case 0b100110: /* BTI jc */ 10635 /* Compatible with any btype. */ 10636 return true; 10637 } 10638 } else { 10639 switch (insn & 0xffe0001fu) { 10640 case 0xd4200000u: /* BRK */ 10641 case 0xd4400000u: /* HLT */ 10642 /* Give priority to the breakpoint exception. */ 10643 return true; 10644 } 10645 } 10646 return false; 10647 } 10648 10649 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 10650 CPUState *cpu) 10651 { 10652 DisasContext *dc = container_of(dcbase, DisasContext, base); 10653 CPUARMState *env = cpu_env(cpu); 10654 ARMCPU *arm_cpu = env_archcpu(env); 10655 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 10656 int bound, core_mmu_idx; 10657 10658 dc->isar = &arm_cpu->isar; 10659 dc->condjmp = 0; 10660 dc->pc_save = dc->base.pc_first; 10661 dc->aarch64 = true; 10662 dc->thumb = false; 10663 dc->sctlr_b = 0; 10664 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 10665 dc->condexec_mask = 0; 10666 dc->condexec_cond = 0; 10667 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 10668 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 10669 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 10670 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 10671 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 10672 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 10673 #if !defined(CONFIG_USER_ONLY) 10674 dc->user = (dc->current_el == 0); 10675 #endif 10676 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 10677 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 10678 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 10679 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 10680 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 10681 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 10682 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 10683 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 10684 dc->zt0_excp_el = EX_TBFLAG_A64(tb_flags, ZT0EXC_EL); 10685 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 10686 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 10687 dc->max_svl = arm_cpu->sme_max_vq * 16; 10688 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 10689 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 10690 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 10691 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 10692 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 10693 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 10694 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 10695 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 10696 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 10697 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 10698 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 10699 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 10700 dc->e2h = EX_TBFLAG_A64(tb_flags, E2H); 10701 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 10702 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 10703 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 10704 dc->nv2_mem_e20 = dc->nv2 && dc->e2h; 10705 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 10706 dc->fpcr_ah = EX_TBFLAG_A64(tb_flags, AH); 10707 dc->fpcr_nep = EX_TBFLAG_A64(tb_flags, NEP); 10708 dc->gcs_en = EX_TBFLAG_A64(tb_flags, GCS_EN); 10709 dc->gcs_rvcen = EX_TBFLAG_A64(tb_flags, GCS_RVCEN); 10710 dc->gcsstr_el = EX_TBFLAG_A64(tb_flags, GCSSTR_EL); 10711 dc->vec_len = 0; 10712 dc->vec_stride = 0; 10713 dc->cp_regs = arm_cpu->cp_regs; 10714 dc->features = env->features; 10715 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 10716 dc->gm_blocksize = arm_cpu->gm_blocksize; 10717 10718 #ifdef CONFIG_USER_ONLY 10719 /* In sve_probe_page, we assume TBI is enabled. */ 10720 tcg_debug_assert(dc->tbid & 1); 10721 #endif 10722 10723 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 10724 10725 /* Single step state. The code-generation logic here is: 10726 * SS_ACTIVE == 0: 10727 * generate code with no special handling for single-stepping (except 10728 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 10729 * this happens anyway because those changes are all system register or 10730 * PSTATE writes). 10731 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 10732 * emit code for one insn 10733 * emit code to clear PSTATE.SS 10734 * emit code to generate software step exception for completed step 10735 * end TB (as usual for having generated an exception) 10736 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 10737 * emit code to generate a software step exception 10738 * end the TB 10739 */ 10740 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 10741 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 10742 dc->is_ldex = false; 10743 10744 /* Bound the number of insns to execute to those left on the page. */ 10745 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 10746 10747 /* If architectural single step active, limit to 1. */ 10748 if (dc->ss_active) { 10749 bound = 1; 10750 } 10751 dc->base.max_insns = MIN(dc->base.max_insns, bound); 10752 } 10753 10754 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 10755 { 10756 } 10757 10758 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 10759 { 10760 DisasContext *dc = container_of(dcbase, DisasContext, base); 10761 target_ulong pc_arg = dc->base.pc_next; 10762 10763 if (tb_cflags(dcbase->tb) & CF_PCREL) { 10764 pc_arg &= ~TARGET_PAGE_MASK; 10765 } 10766 tcg_gen_insn_start(pc_arg, 0, 0); 10767 dc->insn_start_updated = false; 10768 } 10769 10770 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 10771 { 10772 DisasContext *s = container_of(dcbase, DisasContext, base); 10773 CPUARMState *env = cpu_env(cpu); 10774 uint64_t pc = s->base.pc_next; 10775 uint32_t insn; 10776 10777 /* Singlestep exceptions have the highest priority. */ 10778 if (s->ss_active && !s->pstate_ss) { 10779 /* Singlestep state is Active-pending. 10780 * If we're in this state at the start of a TB then either 10781 * a) we just took an exception to an EL which is being debugged 10782 * and this is the first insn in the exception handler 10783 * b) debug exceptions were masked and we just unmasked them 10784 * without changing EL (eg by clearing PSTATE.D) 10785 * In either case we're going to take a swstep exception in the 10786 * "did not step an insn" case, and so the syndrome ISV and EX 10787 * bits should be zero. 10788 */ 10789 assert(s->base.num_insns == 1); 10790 gen_swstep_exception(s, 0, 0); 10791 s->base.is_jmp = DISAS_NORETURN; 10792 s->base.pc_next = pc + 4; 10793 return; 10794 } 10795 10796 if (pc & 3) { 10797 /* 10798 * PC alignment fault. This has priority over the instruction abort 10799 * that we would receive from a translation fault via arm_ldl_code. 10800 * This should only be possible after an indirect branch, at the 10801 * start of the TB. 10802 */ 10803 assert(s->base.num_insns == 1); 10804 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_vaddr(pc)); 10805 s->base.is_jmp = DISAS_NORETURN; 10806 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 10807 return; 10808 } 10809 10810 s->pc_curr = pc; 10811 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 10812 s->insn = insn; 10813 s->base.pc_next = pc + 4; 10814 10815 s->fp_access_checked = 0; 10816 s->sve_access_checked = 0; 10817 10818 if (s->pstate_il) { 10819 /* 10820 * Illegal execution state. This has priority over BTI 10821 * exceptions, but comes after instruction abort exceptions. 10822 */ 10823 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 10824 return; 10825 } 10826 10827 if (dc_isar_feature(aa64_bti, s)) { 10828 if (s->base.num_insns == 1) { 10829 /* First insn can have btype set to non-zero. */ 10830 tcg_debug_assert(s->btype >= 0); 10831 10832 /* 10833 * Note that the Branch Target Exception has fairly high 10834 * priority -- below debugging exceptions but above most 10835 * everything else. This allows us to handle this now 10836 * instead of waiting until the insn is otherwise decoded. 10837 * 10838 * We can check all but the guarded page check here; 10839 * defer the latter to a helper. 10840 */ 10841 if (s->btype != 0 10842 && !btype_destination_ok(insn, s->bt, s->btype)) { 10843 gen_helper_guarded_page_check(tcg_env); 10844 } 10845 } else { 10846 /* Not the first insn: btype must be 0. */ 10847 tcg_debug_assert(s->btype == 0); 10848 } 10849 } 10850 10851 s->is_nonstreaming = false; 10852 if (s->sme_trap_nonstreaming) { 10853 disas_sme_fa64(s, insn); 10854 } 10855 10856 if (!disas_a64(s, insn) && 10857 !disas_sme(s, insn) && 10858 !disas_sve(s, insn)) { 10859 unallocated_encoding(s); 10860 } 10861 10862 /* 10863 * After execution of most insns, btype is reset to 0. 10864 * Note that we set btype == -1 when the insn sets btype. 10865 */ 10866 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 10867 reset_btype(s); 10868 } 10869 } 10870 10871 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 10872 { 10873 DisasContext *dc = container_of(dcbase, DisasContext, base); 10874 10875 if (unlikely(dc->ss_active)) { 10876 /* Note that this means single stepping WFI doesn't halt the CPU. 10877 * For conditional branch insns this is harmless unreachable code as 10878 * gen_goto_tb() has already handled emitting the debug exception 10879 * (and thus a tb-jump is not possible when singlestepping). 10880 */ 10881 switch (dc->base.is_jmp) { 10882 default: 10883 gen_a64_update_pc(dc, 4); 10884 /* fall through */ 10885 case DISAS_EXIT: 10886 case DISAS_JUMP: 10887 gen_step_complete_exception(dc); 10888 break; 10889 case DISAS_NORETURN: 10890 break; 10891 } 10892 } else { 10893 switch (dc->base.is_jmp) { 10894 case DISAS_NEXT: 10895 case DISAS_TOO_MANY: 10896 gen_goto_tb(dc, 1, 4); 10897 break; 10898 default: 10899 case DISAS_UPDATE_EXIT: 10900 gen_a64_update_pc(dc, 4); 10901 /* fall through */ 10902 case DISAS_EXIT: 10903 tcg_gen_exit_tb(NULL, 0); 10904 break; 10905 case DISAS_UPDATE_NOCHAIN: 10906 gen_a64_update_pc(dc, 4); 10907 /* fall through */ 10908 case DISAS_JUMP: 10909 tcg_gen_lookup_and_goto_ptr(); 10910 break; 10911 case DISAS_NORETURN: 10912 case DISAS_SWI: 10913 break; 10914 case DISAS_WFE: 10915 gen_a64_update_pc(dc, 4); 10916 gen_helper_wfe(tcg_env); 10917 break; 10918 case DISAS_YIELD: 10919 gen_a64_update_pc(dc, 4); 10920 gen_helper_yield(tcg_env); 10921 break; 10922 case DISAS_WFI: 10923 /* 10924 * This is a special case because we don't want to just halt 10925 * the CPU if trying to debug across a WFI. 10926 */ 10927 gen_a64_update_pc(dc, 4); 10928 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 10929 /* 10930 * The helper doesn't necessarily throw an exception, but we 10931 * must go back to the main loop to check for interrupts anyway. 10932 */ 10933 tcg_gen_exit_tb(NULL, 0); 10934 break; 10935 } 10936 } 10937 10938 emit_delayed_exceptions(dc); 10939 } 10940 10941 const TranslatorOps aarch64_translator_ops = { 10942 .init_disas_context = aarch64_tr_init_disas_context, 10943 .tb_start = aarch64_tr_tb_start, 10944 .insn_start = aarch64_tr_insn_start, 10945 .translate_insn = aarch64_tr_translate_insn, 10946 .tb_stop = aarch64_tr_tb_stop, 10947 }; 10948