1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ 199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ 264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ 284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ 318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ 346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ 377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ 403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ 538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ 550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ 559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ 590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ 596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ 607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 632 * If SVE is not enabled, then there are only 128 bits in the vector. 633 */ 634 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 635 { 636 unsigned ofs = fp_reg_offset(s, rd, MO_64); 637 unsigned vsz = vec_full_reg_size(s); 638 639 /* Nop move, with side effect of clearing the tail. */ 640 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 641 } 642 643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 644 { 645 unsigned ofs = fp_reg_offset(s, reg, MO_64); 646 647 tcg_gen_st_i64(v, tcg_env, ofs); 648 clear_vec_high(s, false, reg); 649 } 650 651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 652 { 653 TCGv_i64 tmp = tcg_temp_new_i64(); 654 655 tcg_gen_extu_i32_i64(tmp, v); 656 write_fp_dreg(s, reg, tmp); 657 } 658 659 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 661 GVecGen2Fn *gvec_fn, int vece) 662 { 663 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 664 is_q ? 16 : 8, vec_full_reg_size(s)); 665 } 666 667 /* Expand a 2-operand + immediate AdvSIMD vector operation using 668 * an expander function. 669 */ 670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 671 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 672 { 673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 674 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 675 } 676 677 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 679 GVecGen3Fn *gvec_fn, int vece) 680 { 681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 682 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 683 } 684 685 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 687 int rx, GVecGen4Fn *gvec_fn, int vece) 688 { 689 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 690 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 691 is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 2-operand operation using an out-of-line helper. */ 695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 696 int rn, int data, gen_helper_gvec_2 *fn) 697 { 698 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 699 vec_full_reg_offset(s, rn), 700 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 701 } 702 703 /* Expand a 3-operand operation using an out-of-line helper. */ 704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int rm, int data, gen_helper_gvec_3 *fn) 706 { 707 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 vec_full_reg_offset(s, rm), 710 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 711 } 712 713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 714 * an out-of-line helper. 715 */ 716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 717 int rm, bool is_fp16, int data, 718 gen_helper_gvec_3_ptr *fn) 719 { 720 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 721 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 722 vec_full_reg_offset(s, rn), 723 vec_full_reg_offset(s, rm), fpst, 724 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 725 } 726 727 /* Expand a 4-operand operation using an out-of-line helper. */ 728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 729 int rm, int ra, int data, gen_helper_gvec_4 *fn) 730 { 731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 732 vec_full_reg_offset(s, rn), 733 vec_full_reg_offset(s, rm), 734 vec_full_reg_offset(s, ra), 735 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 736 } 737 738 /* 739 * Expand a 4-operand operation using an out-of-line helper that takes 740 * a pointer to the CPU env. 741 */ 742 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 743 int rm, int ra, int data, 744 gen_helper_gvec_4_ptr *fn) 745 { 746 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 747 vec_full_reg_offset(s, rn), 748 vec_full_reg_offset(s, rm), 749 vec_full_reg_offset(s, ra), 750 tcg_env, 751 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 752 } 753 754 /* 755 * Expand a 4-operand + fpstatus pointer + simd data value operation using 756 * an out-of-line helper. 757 */ 758 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 759 int rm, int ra, bool is_fp16, int data, 760 gen_helper_gvec_4_ptr *fn) 761 { 762 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 763 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 764 vec_full_reg_offset(s, rn), 765 vec_full_reg_offset(s, rm), 766 vec_full_reg_offset(s, ra), fpst, 767 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 768 } 769 770 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 771 * than the 32 bit equivalent. 772 */ 773 static inline void gen_set_NZ64(TCGv_i64 result) 774 { 775 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 776 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 777 } 778 779 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 780 static inline void gen_logic_CC(int sf, TCGv_i64 result) 781 { 782 if (sf) { 783 gen_set_NZ64(result); 784 } else { 785 tcg_gen_extrl_i64_i32(cpu_ZF, result); 786 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 787 } 788 tcg_gen_movi_i32(cpu_CF, 0); 789 tcg_gen_movi_i32(cpu_VF, 0); 790 } 791 792 /* dest = T0 + T1; compute C, N, V and Z flags */ 793 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 794 { 795 TCGv_i64 result, flag, tmp; 796 result = tcg_temp_new_i64(); 797 flag = tcg_temp_new_i64(); 798 tmp = tcg_temp_new_i64(); 799 800 tcg_gen_movi_i64(tmp, 0); 801 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 802 803 tcg_gen_extrl_i64_i32(cpu_CF, flag); 804 805 gen_set_NZ64(result); 806 807 tcg_gen_xor_i64(flag, result, t0); 808 tcg_gen_xor_i64(tmp, t0, t1); 809 tcg_gen_andc_i64(flag, flag, tmp); 810 tcg_gen_extrh_i64_i32(cpu_VF, flag); 811 812 tcg_gen_mov_i64(dest, result); 813 } 814 815 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 816 { 817 TCGv_i32 t0_32 = tcg_temp_new_i32(); 818 TCGv_i32 t1_32 = tcg_temp_new_i32(); 819 TCGv_i32 tmp = tcg_temp_new_i32(); 820 821 tcg_gen_movi_i32(tmp, 0); 822 tcg_gen_extrl_i64_i32(t0_32, t0); 823 tcg_gen_extrl_i64_i32(t1_32, t1); 824 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 825 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 826 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 827 tcg_gen_xor_i32(tmp, t0_32, t1_32); 828 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 829 tcg_gen_extu_i32_i64(dest, cpu_NF); 830 } 831 832 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 833 { 834 if (sf) { 835 gen_add64_CC(dest, t0, t1); 836 } else { 837 gen_add32_CC(dest, t0, t1); 838 } 839 } 840 841 /* dest = T0 - T1; compute C, N, V and Z flags */ 842 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 843 { 844 /* 64 bit arithmetic */ 845 TCGv_i64 result, flag, tmp; 846 847 result = tcg_temp_new_i64(); 848 flag = tcg_temp_new_i64(); 849 tcg_gen_sub_i64(result, t0, t1); 850 851 gen_set_NZ64(result); 852 853 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 854 tcg_gen_extrl_i64_i32(cpu_CF, flag); 855 856 tcg_gen_xor_i64(flag, result, t0); 857 tmp = tcg_temp_new_i64(); 858 tcg_gen_xor_i64(tmp, t0, t1); 859 tcg_gen_and_i64(flag, flag, tmp); 860 tcg_gen_extrh_i64_i32(cpu_VF, flag); 861 tcg_gen_mov_i64(dest, result); 862 } 863 864 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 865 { 866 /* 32 bit arithmetic */ 867 TCGv_i32 t0_32 = tcg_temp_new_i32(); 868 TCGv_i32 t1_32 = tcg_temp_new_i32(); 869 TCGv_i32 tmp; 870 871 tcg_gen_extrl_i64_i32(t0_32, t0); 872 tcg_gen_extrl_i64_i32(t1_32, t1); 873 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 874 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 875 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 876 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 877 tmp = tcg_temp_new_i32(); 878 tcg_gen_xor_i32(tmp, t0_32, t1_32); 879 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 880 tcg_gen_extu_i32_i64(dest, cpu_NF); 881 } 882 883 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 884 { 885 if (sf) { 886 gen_sub64_CC(dest, t0, t1); 887 } else { 888 gen_sub32_CC(dest, t0, t1); 889 } 890 } 891 892 /* dest = T0 + T1 + CF; do not compute flags. */ 893 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 894 { 895 TCGv_i64 flag = tcg_temp_new_i64(); 896 tcg_gen_extu_i32_i64(flag, cpu_CF); 897 tcg_gen_add_i64(dest, t0, t1); 898 tcg_gen_add_i64(dest, dest, flag); 899 900 if (!sf) { 901 tcg_gen_ext32u_i64(dest, dest); 902 } 903 } 904 905 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 906 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 907 { 908 if (sf) { 909 TCGv_i64 result = tcg_temp_new_i64(); 910 TCGv_i64 cf_64 = tcg_temp_new_i64(); 911 TCGv_i64 vf_64 = tcg_temp_new_i64(); 912 TCGv_i64 tmp = tcg_temp_new_i64(); 913 TCGv_i64 zero = tcg_constant_i64(0); 914 915 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 916 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 917 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 918 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 919 gen_set_NZ64(result); 920 921 tcg_gen_xor_i64(vf_64, result, t0); 922 tcg_gen_xor_i64(tmp, t0, t1); 923 tcg_gen_andc_i64(vf_64, vf_64, tmp); 924 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 925 926 tcg_gen_mov_i64(dest, result); 927 } else { 928 TCGv_i32 t0_32 = tcg_temp_new_i32(); 929 TCGv_i32 t1_32 = tcg_temp_new_i32(); 930 TCGv_i32 tmp = tcg_temp_new_i32(); 931 TCGv_i32 zero = tcg_constant_i32(0); 932 933 tcg_gen_extrl_i64_i32(t0_32, t0); 934 tcg_gen_extrl_i64_i32(t1_32, t1); 935 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 936 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 937 938 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 939 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 940 tcg_gen_xor_i32(tmp, t0_32, t1_32); 941 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 942 tcg_gen_extu_i32_i64(dest, cpu_NF); 943 } 944 } 945 946 /* 947 * Load/Store generators 948 */ 949 950 /* 951 * Store from GPR register to memory. 952 */ 953 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 954 TCGv_i64 tcg_addr, MemOp memop, int memidx, 955 bool iss_valid, 956 unsigned int iss_srt, 957 bool iss_sf, bool iss_ar) 958 { 959 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 960 961 if (iss_valid) { 962 uint32_t syn; 963 964 syn = syn_data_abort_with_iss(0, 965 (memop & MO_SIZE), 966 false, 967 iss_srt, 968 iss_sf, 969 iss_ar, 970 0, 0, 0, 0, 0, false); 971 disas_set_insn_syndrome(s, syn); 972 } 973 } 974 975 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 976 TCGv_i64 tcg_addr, MemOp memop, 977 bool iss_valid, 978 unsigned int iss_srt, 979 bool iss_sf, bool iss_ar) 980 { 981 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 982 iss_valid, iss_srt, iss_sf, iss_ar); 983 } 984 985 /* 986 * Load from memory to GPR register 987 */ 988 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 989 MemOp memop, bool extend, int memidx, 990 bool iss_valid, unsigned int iss_srt, 991 bool iss_sf, bool iss_ar) 992 { 993 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 994 995 if (extend && (memop & MO_SIGN)) { 996 g_assert((memop & MO_SIZE) <= MO_32); 997 tcg_gen_ext32u_i64(dest, dest); 998 } 999 1000 if (iss_valid) { 1001 uint32_t syn; 1002 1003 syn = syn_data_abort_with_iss(0, 1004 (memop & MO_SIZE), 1005 (memop & MO_SIGN) != 0, 1006 iss_srt, 1007 iss_sf, 1008 iss_ar, 1009 0, 0, 0, 0, 0, false); 1010 disas_set_insn_syndrome(s, syn); 1011 } 1012 } 1013 1014 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1015 MemOp memop, bool extend, 1016 bool iss_valid, unsigned int iss_srt, 1017 bool iss_sf, bool iss_ar) 1018 { 1019 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1020 iss_valid, iss_srt, iss_sf, iss_ar); 1021 } 1022 1023 /* 1024 * Store from FP register to memory 1025 */ 1026 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1027 { 1028 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1029 TCGv_i64 tmplo = tcg_temp_new_i64(); 1030 1031 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1032 1033 if ((mop & MO_SIZE) < MO_128) { 1034 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1035 } else { 1036 TCGv_i64 tmphi = tcg_temp_new_i64(); 1037 TCGv_i128 t16 = tcg_temp_new_i128(); 1038 1039 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1040 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1041 1042 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1043 } 1044 } 1045 1046 /* 1047 * Load from memory to FP register 1048 */ 1049 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1050 { 1051 /* This always zero-extends and writes to a full 128 bit wide vector */ 1052 TCGv_i64 tmplo = tcg_temp_new_i64(); 1053 TCGv_i64 tmphi = NULL; 1054 1055 if ((mop & MO_SIZE) < MO_128) { 1056 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1057 } else { 1058 TCGv_i128 t16 = tcg_temp_new_i128(); 1059 1060 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1061 1062 tmphi = tcg_temp_new_i64(); 1063 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1064 } 1065 1066 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1067 1068 if (tmphi) { 1069 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1070 } 1071 clear_vec_high(s, tmphi != NULL, destidx); 1072 } 1073 1074 /* 1075 * Vector load/store helpers. 1076 * 1077 * The principal difference between this and a FP load is that we don't 1078 * zero extend as we are filling a partial chunk of the vector register. 1079 * These functions don't support 128 bit loads/stores, which would be 1080 * normal load/store operations. 1081 * 1082 * The _i32 versions are useful when operating on 32 bit quantities 1083 * (eg for floating point single or using Neon helper functions). 1084 */ 1085 1086 /* Get value of an element within a vector register */ 1087 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1088 int element, MemOp memop) 1089 { 1090 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1091 switch ((unsigned)memop) { 1092 case MO_8: 1093 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1094 break; 1095 case MO_16: 1096 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1097 break; 1098 case MO_32: 1099 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1100 break; 1101 case MO_8|MO_SIGN: 1102 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1103 break; 1104 case MO_16|MO_SIGN: 1105 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1106 break; 1107 case MO_32|MO_SIGN: 1108 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1109 break; 1110 case MO_64: 1111 case MO_64|MO_SIGN: 1112 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1113 break; 1114 default: 1115 g_assert_not_reached(); 1116 } 1117 } 1118 1119 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1120 int element, MemOp memop) 1121 { 1122 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1123 switch (memop) { 1124 case MO_8: 1125 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1126 break; 1127 case MO_16: 1128 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1129 break; 1130 case MO_8|MO_SIGN: 1131 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1132 break; 1133 case MO_16|MO_SIGN: 1134 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1135 break; 1136 case MO_32: 1137 case MO_32|MO_SIGN: 1138 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1139 break; 1140 default: 1141 g_assert_not_reached(); 1142 } 1143 } 1144 1145 /* Set value of an element within a vector register */ 1146 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1147 int element, MemOp memop) 1148 { 1149 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1150 switch (memop) { 1151 case MO_8: 1152 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1153 break; 1154 case MO_16: 1155 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1156 break; 1157 case MO_32: 1158 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1159 break; 1160 case MO_64: 1161 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1162 break; 1163 default: 1164 g_assert_not_reached(); 1165 } 1166 } 1167 1168 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1169 int destidx, int element, MemOp memop) 1170 { 1171 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1172 switch (memop) { 1173 case MO_8: 1174 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1175 break; 1176 case MO_16: 1177 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1178 break; 1179 case MO_32: 1180 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1181 break; 1182 default: 1183 g_assert_not_reached(); 1184 } 1185 } 1186 1187 /* Store from vector register to memory */ 1188 static void do_vec_st(DisasContext *s, int srcidx, int element, 1189 TCGv_i64 tcg_addr, MemOp mop) 1190 { 1191 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1192 1193 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1194 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1195 } 1196 1197 /* Load from memory to vector register */ 1198 static void do_vec_ld(DisasContext *s, int destidx, int element, 1199 TCGv_i64 tcg_addr, MemOp mop) 1200 { 1201 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1202 1203 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1204 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1205 } 1206 1207 /* Check that FP/Neon access is enabled. If it is, return 1208 * true. If not, emit code to generate an appropriate exception, 1209 * and return false; the caller should not emit any code for 1210 * the instruction. Note that this check must happen after all 1211 * unallocated-encoding checks (otherwise the syndrome information 1212 * for the resulting exception will be incorrect). 1213 */ 1214 static bool fp_access_check_only(DisasContext *s) 1215 { 1216 if (s->fp_excp_el) { 1217 assert(!s->fp_access_checked); 1218 s->fp_access_checked = true; 1219 1220 gen_exception_insn_el(s, 0, EXCP_UDEF, 1221 syn_fp_access_trap(1, 0xe, false, 0), 1222 s->fp_excp_el); 1223 return false; 1224 } 1225 s->fp_access_checked = true; 1226 return true; 1227 } 1228 1229 static bool fp_access_check(DisasContext *s) 1230 { 1231 if (!fp_access_check_only(s)) { 1232 return false; 1233 } 1234 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1235 gen_exception_insn(s, 0, EXCP_UDEF, 1236 syn_smetrap(SME_ET_Streaming, false)); 1237 return false; 1238 } 1239 return true; 1240 } 1241 1242 /* 1243 * Check that SVE access is enabled. If it is, return true. 1244 * If not, emit code to generate an appropriate exception and return false. 1245 * This function corresponds to CheckSVEEnabled(). 1246 */ 1247 bool sve_access_check(DisasContext *s) 1248 { 1249 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1250 assert(dc_isar_feature(aa64_sme, s)); 1251 if (!sme_sm_enabled_check(s)) { 1252 goto fail_exit; 1253 } 1254 } else if (s->sve_excp_el) { 1255 gen_exception_insn_el(s, 0, EXCP_UDEF, 1256 syn_sve_access_trap(), s->sve_excp_el); 1257 goto fail_exit; 1258 } 1259 s->sve_access_checked = true; 1260 return fp_access_check(s); 1261 1262 fail_exit: 1263 /* Assert that we only raise one exception per instruction. */ 1264 assert(!s->sve_access_checked); 1265 s->sve_access_checked = true; 1266 return false; 1267 } 1268 1269 /* 1270 * Check that SME access is enabled, raise an exception if not. 1271 * Note that this function corresponds to CheckSMEAccess and is 1272 * only used directly for cpregs. 1273 */ 1274 static bool sme_access_check(DisasContext *s) 1275 { 1276 if (s->sme_excp_el) { 1277 gen_exception_insn_el(s, 0, EXCP_UDEF, 1278 syn_smetrap(SME_ET_AccessTrap, false), 1279 s->sme_excp_el); 1280 return false; 1281 } 1282 return true; 1283 } 1284 1285 /* This function corresponds to CheckSMEEnabled. */ 1286 bool sme_enabled_check(DisasContext *s) 1287 { 1288 /* 1289 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1290 * to be zero when fp_excp_el has priority. This is because we need 1291 * sme_excp_el by itself for cpregs access checks. 1292 */ 1293 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1294 s->fp_access_checked = true; 1295 return sme_access_check(s); 1296 } 1297 return fp_access_check_only(s); 1298 } 1299 1300 /* Common subroutine for CheckSMEAnd*Enabled. */ 1301 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1302 { 1303 if (!sme_enabled_check(s)) { 1304 return false; 1305 } 1306 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1307 gen_exception_insn(s, 0, EXCP_UDEF, 1308 syn_smetrap(SME_ET_NotStreaming, false)); 1309 return false; 1310 } 1311 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1312 gen_exception_insn(s, 0, EXCP_UDEF, 1313 syn_smetrap(SME_ET_InactiveZA, false)); 1314 return false; 1315 } 1316 return true; 1317 } 1318 1319 /* 1320 * Expanders for AdvSIMD translation functions. 1321 */ 1322 1323 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1324 gen_helper_gvec_2 *fn) 1325 { 1326 if (!a->q && a->esz == MO_64) { 1327 return false; 1328 } 1329 if (fp_access_check(s)) { 1330 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1331 } 1332 return true; 1333 } 1334 1335 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1336 gen_helper_gvec_3 *fn) 1337 { 1338 if (!a->q && a->esz == MO_64) { 1339 return false; 1340 } 1341 if (fp_access_check(s)) { 1342 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1343 } 1344 return true; 1345 } 1346 1347 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1348 { 1349 if (!a->q && a->esz == MO_64) { 1350 return false; 1351 } 1352 if (fp_access_check(s)) { 1353 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1354 } 1355 return true; 1356 } 1357 1358 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1359 { 1360 if (a->esz == MO_64) { 1361 return false; 1362 } 1363 if (fp_access_check(s)) { 1364 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1365 } 1366 return true; 1367 } 1368 1369 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1370 { 1371 if (a->esz == MO_8) { 1372 return false; 1373 } 1374 return do_gvec_fn3_no64(s, a, fn); 1375 } 1376 1377 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1378 { 1379 if (!a->q && a->esz == MO_64) { 1380 return false; 1381 } 1382 if (fp_access_check(s)) { 1383 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1384 } 1385 return true; 1386 } 1387 1388 /* 1389 * This utility function is for doing register extension with an 1390 * optional shift. You will likely want to pass a temporary for the 1391 * destination register. See DecodeRegExtend() in the ARM ARM. 1392 */ 1393 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1394 int option, unsigned int shift) 1395 { 1396 int extsize = extract32(option, 0, 2); 1397 bool is_signed = extract32(option, 2, 1); 1398 1399 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1400 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1401 } 1402 1403 static inline void gen_check_sp_alignment(DisasContext *s) 1404 { 1405 /* The AArch64 architecture mandates that (if enabled via PSTATE 1406 * or SCTLR bits) there is a check that SP is 16-aligned on every 1407 * SP-relative load or store (with an exception generated if it is not). 1408 * In line with general QEMU practice regarding misaligned accesses, 1409 * we omit these checks for the sake of guest program performance. 1410 * This function is provided as a hook so we can more easily add these 1411 * checks in future (possibly as a "favour catching guest program bugs 1412 * over speed" user selectable option). 1413 */ 1414 } 1415 1416 /* 1417 * This provides a simple table based table lookup decoder. It is 1418 * intended to be used when the relevant bits for decode are too 1419 * awkwardly placed and switch/if based logic would be confusing and 1420 * deeply nested. Since it's a linear search through the table, tables 1421 * should be kept small. 1422 * 1423 * It returns the first handler where insn & mask == pattern, or 1424 * NULL if there is no match. 1425 * The table is terminated by an empty mask (i.e. 0) 1426 */ 1427 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1428 uint32_t insn) 1429 { 1430 const AArch64DecodeTable *tptr = table; 1431 1432 while (tptr->mask) { 1433 if ((insn & tptr->mask) == tptr->pattern) { 1434 return tptr->disas_fn; 1435 } 1436 tptr++; 1437 } 1438 return NULL; 1439 } 1440 1441 /* 1442 * The instruction disassembly implemented here matches 1443 * the instruction encoding classifications in chapter C4 1444 * of the ARM Architecture Reference Manual (DDI0487B_a); 1445 * classification names and decode diagrams here should generally 1446 * match up with those in the manual. 1447 */ 1448 1449 static bool trans_B(DisasContext *s, arg_i *a) 1450 { 1451 reset_btype(s); 1452 gen_goto_tb(s, 0, a->imm); 1453 return true; 1454 } 1455 1456 static bool trans_BL(DisasContext *s, arg_i *a) 1457 { 1458 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1459 reset_btype(s); 1460 gen_goto_tb(s, 0, a->imm); 1461 return true; 1462 } 1463 1464 1465 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1466 { 1467 DisasLabel match; 1468 TCGv_i64 tcg_cmp; 1469 1470 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1471 reset_btype(s); 1472 1473 match = gen_disas_label(s); 1474 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1475 tcg_cmp, 0, match.label); 1476 gen_goto_tb(s, 0, 4); 1477 set_disas_label(s, match); 1478 gen_goto_tb(s, 1, a->imm); 1479 return true; 1480 } 1481 1482 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1483 { 1484 DisasLabel match; 1485 TCGv_i64 tcg_cmp; 1486 1487 tcg_cmp = tcg_temp_new_i64(); 1488 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1489 1490 reset_btype(s); 1491 1492 match = gen_disas_label(s); 1493 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1494 tcg_cmp, 0, match.label); 1495 gen_goto_tb(s, 0, 4); 1496 set_disas_label(s, match); 1497 gen_goto_tb(s, 1, a->imm); 1498 return true; 1499 } 1500 1501 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1502 { 1503 /* BC.cond is only present with FEAT_HBC */ 1504 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1505 return false; 1506 } 1507 reset_btype(s); 1508 if (a->cond < 0x0e) { 1509 /* genuinely conditional branches */ 1510 DisasLabel match = gen_disas_label(s); 1511 arm_gen_test_cc(a->cond, match.label); 1512 gen_goto_tb(s, 0, 4); 1513 set_disas_label(s, match); 1514 gen_goto_tb(s, 1, a->imm); 1515 } else { 1516 /* 0xe and 0xf are both "always" conditions */ 1517 gen_goto_tb(s, 0, a->imm); 1518 } 1519 return true; 1520 } 1521 1522 static void set_btype_for_br(DisasContext *s, int rn) 1523 { 1524 if (dc_isar_feature(aa64_bti, s)) { 1525 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1526 if (rn == 16 || rn == 17) { 1527 set_btype(s, 1); 1528 } else { 1529 TCGv_i64 pc = tcg_temp_new_i64(); 1530 gen_pc_plus_diff(s, pc, 0); 1531 gen_helper_guarded_page_br(tcg_env, pc); 1532 s->btype = -1; 1533 } 1534 } 1535 } 1536 1537 static void set_btype_for_blr(DisasContext *s) 1538 { 1539 if (dc_isar_feature(aa64_bti, s)) { 1540 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1541 set_btype(s, 2); 1542 } 1543 } 1544 1545 static bool trans_BR(DisasContext *s, arg_r *a) 1546 { 1547 set_btype_for_br(s, a->rn); 1548 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1549 s->base.is_jmp = DISAS_JUMP; 1550 return true; 1551 } 1552 1553 static bool trans_BLR(DisasContext *s, arg_r *a) 1554 { 1555 TCGv_i64 dst = cpu_reg(s, a->rn); 1556 TCGv_i64 lr = cpu_reg(s, 30); 1557 if (dst == lr) { 1558 TCGv_i64 tmp = tcg_temp_new_i64(); 1559 tcg_gen_mov_i64(tmp, dst); 1560 dst = tmp; 1561 } 1562 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1563 gen_a64_set_pc(s, dst); 1564 set_btype_for_blr(s); 1565 s->base.is_jmp = DISAS_JUMP; 1566 return true; 1567 } 1568 1569 static bool trans_RET(DisasContext *s, arg_r *a) 1570 { 1571 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1572 s->base.is_jmp = DISAS_JUMP; 1573 return true; 1574 } 1575 1576 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1577 TCGv_i64 modifier, bool use_key_a) 1578 { 1579 TCGv_i64 truedst; 1580 /* 1581 * Return the branch target for a BRAA/RETA/etc, which is either 1582 * just the destination dst, or that value with the pauth check 1583 * done and the code removed from the high bits. 1584 */ 1585 if (!s->pauth_active) { 1586 return dst; 1587 } 1588 1589 truedst = tcg_temp_new_i64(); 1590 if (use_key_a) { 1591 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1592 } else { 1593 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1594 } 1595 return truedst; 1596 } 1597 1598 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1599 { 1600 TCGv_i64 dst; 1601 1602 if (!dc_isar_feature(aa64_pauth, s)) { 1603 return false; 1604 } 1605 1606 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1607 set_btype_for_br(s, a->rn); 1608 gen_a64_set_pc(s, dst); 1609 s->base.is_jmp = DISAS_JUMP; 1610 return true; 1611 } 1612 1613 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1614 { 1615 TCGv_i64 dst, lr; 1616 1617 if (!dc_isar_feature(aa64_pauth, s)) { 1618 return false; 1619 } 1620 1621 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1622 lr = cpu_reg(s, 30); 1623 if (dst == lr) { 1624 TCGv_i64 tmp = tcg_temp_new_i64(); 1625 tcg_gen_mov_i64(tmp, dst); 1626 dst = tmp; 1627 } 1628 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1629 gen_a64_set_pc(s, dst); 1630 set_btype_for_blr(s); 1631 s->base.is_jmp = DISAS_JUMP; 1632 return true; 1633 } 1634 1635 static bool trans_RETA(DisasContext *s, arg_reta *a) 1636 { 1637 TCGv_i64 dst; 1638 1639 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1640 gen_a64_set_pc(s, dst); 1641 s->base.is_jmp = DISAS_JUMP; 1642 return true; 1643 } 1644 1645 static bool trans_BRA(DisasContext *s, arg_bra *a) 1646 { 1647 TCGv_i64 dst; 1648 1649 if (!dc_isar_feature(aa64_pauth, s)) { 1650 return false; 1651 } 1652 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1653 gen_a64_set_pc(s, dst); 1654 set_btype_for_br(s, a->rn); 1655 s->base.is_jmp = DISAS_JUMP; 1656 return true; 1657 } 1658 1659 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1660 { 1661 TCGv_i64 dst, lr; 1662 1663 if (!dc_isar_feature(aa64_pauth, s)) { 1664 return false; 1665 } 1666 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1667 lr = cpu_reg(s, 30); 1668 if (dst == lr) { 1669 TCGv_i64 tmp = tcg_temp_new_i64(); 1670 tcg_gen_mov_i64(tmp, dst); 1671 dst = tmp; 1672 } 1673 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1674 gen_a64_set_pc(s, dst); 1675 set_btype_for_blr(s); 1676 s->base.is_jmp = DISAS_JUMP; 1677 return true; 1678 } 1679 1680 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1681 { 1682 TCGv_i64 dst; 1683 1684 if (s->current_el == 0) { 1685 return false; 1686 } 1687 if (s->trap_eret) { 1688 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1689 return true; 1690 } 1691 dst = tcg_temp_new_i64(); 1692 tcg_gen_ld_i64(dst, tcg_env, 1693 offsetof(CPUARMState, elr_el[s->current_el])); 1694 1695 translator_io_start(&s->base); 1696 1697 gen_helper_exception_return(tcg_env, dst); 1698 /* Must exit loop to check un-masked IRQs */ 1699 s->base.is_jmp = DISAS_EXIT; 1700 return true; 1701 } 1702 1703 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1704 { 1705 TCGv_i64 dst; 1706 1707 if (!dc_isar_feature(aa64_pauth, s)) { 1708 return false; 1709 } 1710 if (s->current_el == 0) { 1711 return false; 1712 } 1713 /* The FGT trap takes precedence over an auth trap. */ 1714 if (s->trap_eret) { 1715 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1716 return true; 1717 } 1718 dst = tcg_temp_new_i64(); 1719 tcg_gen_ld_i64(dst, tcg_env, 1720 offsetof(CPUARMState, elr_el[s->current_el])); 1721 1722 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1723 1724 translator_io_start(&s->base); 1725 1726 gen_helper_exception_return(tcg_env, dst); 1727 /* Must exit loop to check un-masked IRQs */ 1728 s->base.is_jmp = DISAS_EXIT; 1729 return true; 1730 } 1731 1732 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1733 { 1734 return true; 1735 } 1736 1737 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1738 { 1739 /* 1740 * When running in MTTCG we don't generate jumps to the yield and 1741 * WFE helpers as it won't affect the scheduling of other vCPUs. 1742 * If we wanted to more completely model WFE/SEV so we don't busy 1743 * spin unnecessarily we would need to do something more involved. 1744 */ 1745 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1746 s->base.is_jmp = DISAS_YIELD; 1747 } 1748 return true; 1749 } 1750 1751 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1752 { 1753 s->base.is_jmp = DISAS_WFI; 1754 return true; 1755 } 1756 1757 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1758 { 1759 /* 1760 * When running in MTTCG we don't generate jumps to the yield and 1761 * WFE helpers as it won't affect the scheduling of other vCPUs. 1762 * If we wanted to more completely model WFE/SEV so we don't busy 1763 * spin unnecessarily we would need to do something more involved. 1764 */ 1765 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1766 s->base.is_jmp = DISAS_WFE; 1767 } 1768 return true; 1769 } 1770 1771 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1772 { 1773 if (!dc_isar_feature(aa64_wfxt, s)) { 1774 return false; 1775 } 1776 1777 /* 1778 * Because we need to pass the register value to the helper, 1779 * it's easier to emit the code now, unlike trans_WFI which 1780 * defers it to aarch64_tr_tb_stop(). That means we need to 1781 * check ss_active so that single-stepping a WFIT doesn't halt. 1782 */ 1783 if (s->ss_active) { 1784 /* Act like a NOP under architectural singlestep */ 1785 return true; 1786 } 1787 1788 gen_a64_update_pc(s, 4); 1789 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1790 /* Go back to the main loop to check for interrupts */ 1791 s->base.is_jmp = DISAS_EXIT; 1792 return true; 1793 } 1794 1795 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1796 { 1797 if (!dc_isar_feature(aa64_wfxt, s)) { 1798 return false; 1799 } 1800 1801 /* 1802 * We rely here on our WFE implementation being a NOP, so we 1803 * don't need to do anything different to handle the WFET timeout 1804 * from what trans_WFE does. 1805 */ 1806 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1807 s->base.is_jmp = DISAS_WFE; 1808 } 1809 return true; 1810 } 1811 1812 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1813 { 1814 if (s->pauth_active) { 1815 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1816 } 1817 return true; 1818 } 1819 1820 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1821 { 1822 if (s->pauth_active) { 1823 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1824 } 1825 return true; 1826 } 1827 1828 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1829 { 1830 if (s->pauth_active) { 1831 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1832 } 1833 return true; 1834 } 1835 1836 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1837 { 1838 if (s->pauth_active) { 1839 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1840 } 1841 return true; 1842 } 1843 1844 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1845 { 1846 if (s->pauth_active) { 1847 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1848 } 1849 return true; 1850 } 1851 1852 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1853 { 1854 /* Without RAS, we must implement this as NOP. */ 1855 if (dc_isar_feature(aa64_ras, s)) { 1856 /* 1857 * QEMU does not have a source of physical SErrors, 1858 * so we are only concerned with virtual SErrors. 1859 * The pseudocode in the ARM for this case is 1860 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1861 * AArch64.vESBOperation(); 1862 * Most of the condition can be evaluated at translation time. 1863 * Test for EL2 present, and defer test for SEL2 to runtime. 1864 */ 1865 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1866 gen_helper_vesb(tcg_env); 1867 } 1868 } 1869 return true; 1870 } 1871 1872 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1873 { 1874 if (s->pauth_active) { 1875 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1876 } 1877 return true; 1878 } 1879 1880 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1881 { 1882 if (s->pauth_active) { 1883 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1884 } 1885 return true; 1886 } 1887 1888 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1889 { 1890 if (s->pauth_active) { 1891 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1892 } 1893 return true; 1894 } 1895 1896 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1897 { 1898 if (s->pauth_active) { 1899 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1900 } 1901 return true; 1902 } 1903 1904 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1905 { 1906 if (s->pauth_active) { 1907 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1908 } 1909 return true; 1910 } 1911 1912 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1913 { 1914 if (s->pauth_active) { 1915 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1916 } 1917 return true; 1918 } 1919 1920 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1921 { 1922 if (s->pauth_active) { 1923 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1924 } 1925 return true; 1926 } 1927 1928 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1929 { 1930 if (s->pauth_active) { 1931 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1932 } 1933 return true; 1934 } 1935 1936 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1937 { 1938 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1939 return true; 1940 } 1941 1942 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1943 { 1944 /* We handle DSB and DMB the same way */ 1945 TCGBar bar; 1946 1947 switch (a->types) { 1948 case 1: /* MBReqTypes_Reads */ 1949 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1950 break; 1951 case 2: /* MBReqTypes_Writes */ 1952 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1953 break; 1954 default: /* MBReqTypes_All */ 1955 bar = TCG_BAR_SC | TCG_MO_ALL; 1956 break; 1957 } 1958 tcg_gen_mb(bar); 1959 return true; 1960 } 1961 1962 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1963 { 1964 /* 1965 * We need to break the TB after this insn to execute 1966 * self-modifying code correctly and also to take 1967 * any pending interrupts immediately. 1968 */ 1969 reset_btype(s); 1970 gen_goto_tb(s, 0, 4); 1971 return true; 1972 } 1973 1974 static bool trans_SB(DisasContext *s, arg_SB *a) 1975 { 1976 if (!dc_isar_feature(aa64_sb, s)) { 1977 return false; 1978 } 1979 /* 1980 * TODO: There is no speculation barrier opcode for TCG; 1981 * MB and end the TB instead. 1982 */ 1983 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1984 gen_goto_tb(s, 0, 4); 1985 return true; 1986 } 1987 1988 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1989 { 1990 if (!dc_isar_feature(aa64_condm_4, s)) { 1991 return false; 1992 } 1993 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1994 return true; 1995 } 1996 1997 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1998 { 1999 TCGv_i32 z; 2000 2001 if (!dc_isar_feature(aa64_condm_5, s)) { 2002 return false; 2003 } 2004 2005 z = tcg_temp_new_i32(); 2006 2007 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2008 2009 /* 2010 * (!C & !Z) << 31 2011 * (!(C | Z)) << 31 2012 * ~((C | Z) << 31) 2013 * ~-(C | Z) 2014 * (C | Z) - 1 2015 */ 2016 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2017 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2018 2019 /* !(Z & C) */ 2020 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2021 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2022 2023 /* (!C & Z) << 31 -> -(Z & ~C) */ 2024 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2025 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2026 2027 /* C | Z */ 2028 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2029 2030 return true; 2031 } 2032 2033 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2034 { 2035 if (!dc_isar_feature(aa64_condm_5, s)) { 2036 return false; 2037 } 2038 2039 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2040 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2041 2042 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2043 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2044 2045 tcg_gen_movi_i32(cpu_NF, 0); 2046 tcg_gen_movi_i32(cpu_VF, 0); 2047 2048 return true; 2049 } 2050 2051 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2052 { 2053 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2054 return false; 2055 } 2056 if (a->imm & 1) { 2057 set_pstate_bits(PSTATE_UAO); 2058 } else { 2059 clear_pstate_bits(PSTATE_UAO); 2060 } 2061 gen_rebuild_hflags(s); 2062 s->base.is_jmp = DISAS_TOO_MANY; 2063 return true; 2064 } 2065 2066 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2067 { 2068 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2069 return false; 2070 } 2071 if (a->imm & 1) { 2072 set_pstate_bits(PSTATE_PAN); 2073 } else { 2074 clear_pstate_bits(PSTATE_PAN); 2075 } 2076 gen_rebuild_hflags(s); 2077 s->base.is_jmp = DISAS_TOO_MANY; 2078 return true; 2079 } 2080 2081 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2082 { 2083 if (s->current_el == 0) { 2084 return false; 2085 } 2086 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2087 s->base.is_jmp = DISAS_TOO_MANY; 2088 return true; 2089 } 2090 2091 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2092 { 2093 if (!dc_isar_feature(aa64_ssbs, s)) { 2094 return false; 2095 } 2096 if (a->imm & 1) { 2097 set_pstate_bits(PSTATE_SSBS); 2098 } else { 2099 clear_pstate_bits(PSTATE_SSBS); 2100 } 2101 /* Don't need to rebuild hflags since SSBS is a nop */ 2102 s->base.is_jmp = DISAS_TOO_MANY; 2103 return true; 2104 } 2105 2106 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2107 { 2108 if (!dc_isar_feature(aa64_dit, s)) { 2109 return false; 2110 } 2111 if (a->imm & 1) { 2112 set_pstate_bits(PSTATE_DIT); 2113 } else { 2114 clear_pstate_bits(PSTATE_DIT); 2115 } 2116 /* There's no need to rebuild hflags because DIT is a nop */ 2117 s->base.is_jmp = DISAS_TOO_MANY; 2118 return true; 2119 } 2120 2121 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2122 { 2123 if (dc_isar_feature(aa64_mte, s)) { 2124 /* Full MTE is enabled -- set the TCO bit as directed. */ 2125 if (a->imm & 1) { 2126 set_pstate_bits(PSTATE_TCO); 2127 } else { 2128 clear_pstate_bits(PSTATE_TCO); 2129 } 2130 gen_rebuild_hflags(s); 2131 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2132 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2133 return true; 2134 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2135 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2136 return true; 2137 } else { 2138 /* Insn not present */ 2139 return false; 2140 } 2141 } 2142 2143 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2144 { 2145 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2146 s->base.is_jmp = DISAS_TOO_MANY; 2147 return true; 2148 } 2149 2150 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2151 { 2152 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2153 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2154 s->base.is_jmp = DISAS_UPDATE_EXIT; 2155 return true; 2156 } 2157 2158 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2159 { 2160 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2161 return false; 2162 } 2163 2164 if (a->imm == 0) { 2165 clear_pstate_bits(PSTATE_ALLINT); 2166 } else if (s->current_el > 1) { 2167 set_pstate_bits(PSTATE_ALLINT); 2168 } else { 2169 gen_helper_msr_set_allint_el1(tcg_env); 2170 } 2171 2172 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2173 s->base.is_jmp = DISAS_UPDATE_EXIT; 2174 return true; 2175 } 2176 2177 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2178 { 2179 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2180 return false; 2181 } 2182 if (sme_access_check(s)) { 2183 int old = s->pstate_sm | (s->pstate_za << 1); 2184 int new = a->imm * 3; 2185 2186 if ((old ^ new) & a->mask) { 2187 /* At least one bit changes. */ 2188 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2189 tcg_constant_i32(a->mask)); 2190 s->base.is_jmp = DISAS_TOO_MANY; 2191 } 2192 } 2193 return true; 2194 } 2195 2196 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2197 { 2198 TCGv_i32 tmp = tcg_temp_new_i32(); 2199 TCGv_i32 nzcv = tcg_temp_new_i32(); 2200 2201 /* build bit 31, N */ 2202 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2203 /* build bit 30, Z */ 2204 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2205 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2206 /* build bit 29, C */ 2207 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2208 /* build bit 28, V */ 2209 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2210 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2211 /* generate result */ 2212 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2213 } 2214 2215 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2216 { 2217 TCGv_i32 nzcv = tcg_temp_new_i32(); 2218 2219 /* take NZCV from R[t] */ 2220 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2221 2222 /* bit 31, N */ 2223 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2224 /* bit 30, Z */ 2225 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2226 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2227 /* bit 29, C */ 2228 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2229 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2230 /* bit 28, V */ 2231 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2232 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2233 } 2234 2235 static void gen_sysreg_undef(DisasContext *s, bool isread, 2236 uint8_t op0, uint8_t op1, uint8_t op2, 2237 uint8_t crn, uint8_t crm, uint8_t rt) 2238 { 2239 /* 2240 * Generate code to emit an UNDEF with correct syndrome 2241 * information for a failed system register access. 2242 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2243 * but if FEAT_IDST is implemented then read accesses to registers 2244 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2245 * syndrome. 2246 */ 2247 uint32_t syndrome; 2248 2249 if (isread && dc_isar_feature(aa64_ids, s) && 2250 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2251 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2252 } else { 2253 syndrome = syn_uncategorized(); 2254 } 2255 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2256 } 2257 2258 /* MRS - move from system register 2259 * MSR (register) - move to system register 2260 * SYS 2261 * SYSL 2262 * These are all essentially the same insn in 'read' and 'write' 2263 * versions, with varying op0 fields. 2264 */ 2265 static void handle_sys(DisasContext *s, bool isread, 2266 unsigned int op0, unsigned int op1, unsigned int op2, 2267 unsigned int crn, unsigned int crm, unsigned int rt) 2268 { 2269 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2270 crn, crm, op0, op1, op2); 2271 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2272 bool need_exit_tb = false; 2273 bool nv_trap_to_el2 = false; 2274 bool nv_redirect_reg = false; 2275 bool skip_fp_access_checks = false; 2276 bool nv2_mem_redirect = false; 2277 TCGv_ptr tcg_ri = NULL; 2278 TCGv_i64 tcg_rt; 2279 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2280 2281 if (crn == 11 || crn == 15) { 2282 /* 2283 * Check for TIDCP trap, which must take precedence over 2284 * the UNDEF for "no such register" etc. 2285 */ 2286 switch (s->current_el) { 2287 case 0: 2288 if (dc_isar_feature(aa64_tidcp1, s)) { 2289 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2290 } 2291 break; 2292 case 1: 2293 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2294 break; 2295 } 2296 } 2297 2298 if (!ri) { 2299 /* Unknown register; this might be a guest error or a QEMU 2300 * unimplemented feature. 2301 */ 2302 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2303 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2304 isread ? "read" : "write", op0, op1, crn, crm, op2); 2305 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2306 return; 2307 } 2308 2309 if (s->nv2 && ri->nv2_redirect_offset) { 2310 /* 2311 * Some registers always redirect to memory; some only do so if 2312 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2313 * pairs which share an offset; see the table in R_CSRPQ). 2314 */ 2315 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2316 nv2_mem_redirect = s->nv1; 2317 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2318 nv2_mem_redirect = !s->nv1; 2319 } else { 2320 nv2_mem_redirect = true; 2321 } 2322 } 2323 2324 /* Check access permissions */ 2325 if (!cp_access_ok(s->current_el, ri, isread)) { 2326 /* 2327 * FEAT_NV/NV2 handling does not do the usual FP access checks 2328 * for registers only accessible at EL2 (though it *does* do them 2329 * for registers accessible at EL1). 2330 */ 2331 skip_fp_access_checks = true; 2332 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2333 /* 2334 * This is one of the few EL2 registers which should redirect 2335 * to the equivalent EL1 register. We do that after running 2336 * the EL2 register's accessfn. 2337 */ 2338 nv_redirect_reg = true; 2339 assert(!nv2_mem_redirect); 2340 } else if (nv2_mem_redirect) { 2341 /* 2342 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2343 * UNDEF to EL1. 2344 */ 2345 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2346 /* 2347 * This register / instruction exists and is an EL2 register, so 2348 * we must trap to EL2 if accessed in nested virtualization EL1 2349 * instead of UNDEFing. We'll do that after the usual access checks. 2350 * (This makes a difference only for a couple of registers like 2351 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2352 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2353 * an accessfn which does nothing when called from EL1, because 2354 * the trap-to-EL3 controls which would apply to that register 2355 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2356 */ 2357 nv_trap_to_el2 = true; 2358 } else { 2359 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2360 return; 2361 } 2362 } 2363 2364 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2365 /* Emit code to perform further access permissions checks at 2366 * runtime; this may result in an exception. 2367 */ 2368 gen_a64_update_pc(s, 0); 2369 tcg_ri = tcg_temp_new_ptr(); 2370 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2371 tcg_constant_i32(key), 2372 tcg_constant_i32(syndrome), 2373 tcg_constant_i32(isread)); 2374 } else if (ri->type & ARM_CP_RAISES_EXC) { 2375 /* 2376 * The readfn or writefn might raise an exception; 2377 * synchronize the CPU state in case it does. 2378 */ 2379 gen_a64_update_pc(s, 0); 2380 } 2381 2382 if (!skip_fp_access_checks) { 2383 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2384 return; 2385 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2386 return; 2387 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2388 return; 2389 } 2390 } 2391 2392 if (nv_trap_to_el2) { 2393 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2394 return; 2395 } 2396 2397 if (nv_redirect_reg) { 2398 /* 2399 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2400 * Conveniently in all cases the encoding of the EL1 register is 2401 * identical to the EL2 register except that opc1 is 0. 2402 * Get the reginfo for the EL1 register to use for the actual access. 2403 * We don't use the EL1 register's access function, and 2404 * fine-grained-traps on EL1 also do not apply here. 2405 */ 2406 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2407 crn, crm, op0, 0, op2); 2408 ri = get_arm_cp_reginfo(s->cp_regs, key); 2409 assert(ri); 2410 assert(cp_access_ok(s->current_el, ri, isread)); 2411 /* 2412 * We might not have done an update_pc earlier, so check we don't 2413 * need it. We could support this in future if necessary. 2414 */ 2415 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2416 } 2417 2418 if (nv2_mem_redirect) { 2419 /* 2420 * This system register is being redirected into an EL2 memory access. 2421 * This means it is not an IO operation, doesn't change hflags, 2422 * and need not end the TB, because it has no side effects. 2423 * 2424 * The access is 64-bit single copy atomic, guaranteed aligned because 2425 * of the definition of VCNR_EL2. Its endianness depends on 2426 * SCTLR_EL2.EE, not on the data endianness of EL1. 2427 * It is done under either the EL2 translation regime or the EL2&0 2428 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2429 * PSTATE.PAN is 0. 2430 */ 2431 TCGv_i64 ptr = tcg_temp_new_i64(); 2432 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2433 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2434 int memidx = arm_to_core_mmu_idx(armmemidx); 2435 uint32_t syn; 2436 2437 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2438 2439 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2440 tcg_gen_addi_i64(ptr, ptr, 2441 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2442 tcg_rt = cpu_reg(s, rt); 2443 2444 syn = syn_data_abort_vncr(0, !isread, 0); 2445 disas_set_insn_syndrome(s, syn); 2446 if (isread) { 2447 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2448 } else { 2449 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2450 } 2451 return; 2452 } 2453 2454 /* Handle special cases first */ 2455 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2456 case 0: 2457 break; 2458 case ARM_CP_NOP: 2459 return; 2460 case ARM_CP_NZCV: 2461 tcg_rt = cpu_reg(s, rt); 2462 if (isread) { 2463 gen_get_nzcv(tcg_rt); 2464 } else { 2465 gen_set_nzcv(tcg_rt); 2466 } 2467 return; 2468 case ARM_CP_CURRENTEL: 2469 { 2470 /* 2471 * Reads as current EL value from pstate, which is 2472 * guaranteed to be constant by the tb flags. 2473 * For nested virt we should report EL2. 2474 */ 2475 int el = s->nv ? 2 : s->current_el; 2476 tcg_rt = cpu_reg(s, rt); 2477 tcg_gen_movi_i64(tcg_rt, el << 2); 2478 return; 2479 } 2480 case ARM_CP_DC_ZVA: 2481 /* Writes clear the aligned block of memory which rt points into. */ 2482 if (s->mte_active[0]) { 2483 int desc = 0; 2484 2485 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2486 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2487 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2488 2489 tcg_rt = tcg_temp_new_i64(); 2490 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2491 tcg_constant_i32(desc), cpu_reg(s, rt)); 2492 } else { 2493 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2494 } 2495 gen_helper_dc_zva(tcg_env, tcg_rt); 2496 return; 2497 case ARM_CP_DC_GVA: 2498 { 2499 TCGv_i64 clean_addr, tag; 2500 2501 /* 2502 * DC_GVA, like DC_ZVA, requires that we supply the original 2503 * pointer for an invalid page. Probe that address first. 2504 */ 2505 tcg_rt = cpu_reg(s, rt); 2506 clean_addr = clean_data_tbi(s, tcg_rt); 2507 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2508 2509 if (s->ata[0]) { 2510 /* Extract the tag from the register to match STZGM. */ 2511 tag = tcg_temp_new_i64(); 2512 tcg_gen_shri_i64(tag, tcg_rt, 56); 2513 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2514 } 2515 } 2516 return; 2517 case ARM_CP_DC_GZVA: 2518 { 2519 TCGv_i64 clean_addr, tag; 2520 2521 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2522 tcg_rt = cpu_reg(s, rt); 2523 clean_addr = clean_data_tbi(s, tcg_rt); 2524 gen_helper_dc_zva(tcg_env, clean_addr); 2525 2526 if (s->ata[0]) { 2527 /* Extract the tag from the register to match STZGM. */ 2528 tag = tcg_temp_new_i64(); 2529 tcg_gen_shri_i64(tag, tcg_rt, 56); 2530 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2531 } 2532 } 2533 return; 2534 default: 2535 g_assert_not_reached(); 2536 } 2537 2538 if (ri->type & ARM_CP_IO) { 2539 /* I/O operations must end the TB here (whether read or write) */ 2540 need_exit_tb = translator_io_start(&s->base); 2541 } 2542 2543 tcg_rt = cpu_reg(s, rt); 2544 2545 if (isread) { 2546 if (ri->type & ARM_CP_CONST) { 2547 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2548 } else if (ri->readfn) { 2549 if (!tcg_ri) { 2550 tcg_ri = gen_lookup_cp_reg(key); 2551 } 2552 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2553 } else { 2554 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2555 } 2556 } else { 2557 if (ri->type & ARM_CP_CONST) { 2558 /* If not forbidden by access permissions, treat as WI */ 2559 return; 2560 } else if (ri->writefn) { 2561 if (!tcg_ri) { 2562 tcg_ri = gen_lookup_cp_reg(key); 2563 } 2564 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2565 } else { 2566 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2567 } 2568 } 2569 2570 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2571 /* 2572 * A write to any coprocessor register that ends a TB 2573 * must rebuild the hflags for the next TB. 2574 */ 2575 gen_rebuild_hflags(s); 2576 /* 2577 * We default to ending the TB on a coprocessor register write, 2578 * but allow this to be suppressed by the register definition 2579 * (usually only necessary to work around guest bugs). 2580 */ 2581 need_exit_tb = true; 2582 } 2583 if (need_exit_tb) { 2584 s->base.is_jmp = DISAS_UPDATE_EXIT; 2585 } 2586 } 2587 2588 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2589 { 2590 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2591 return true; 2592 } 2593 2594 static bool trans_SVC(DisasContext *s, arg_i *a) 2595 { 2596 /* 2597 * For SVC, HVC and SMC we advance the single-step state 2598 * machine before taking the exception. This is architecturally 2599 * mandated, to ensure that single-stepping a system call 2600 * instruction works properly. 2601 */ 2602 uint32_t syndrome = syn_aa64_svc(a->imm); 2603 if (s->fgt_svc) { 2604 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2605 return true; 2606 } 2607 gen_ss_advance(s); 2608 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2609 return true; 2610 } 2611 2612 static bool trans_HVC(DisasContext *s, arg_i *a) 2613 { 2614 int target_el = s->current_el == 3 ? 3 : 2; 2615 2616 if (s->current_el == 0) { 2617 unallocated_encoding(s); 2618 return true; 2619 } 2620 /* 2621 * The pre HVC helper handles cases when HVC gets trapped 2622 * as an undefined insn by runtime configuration. 2623 */ 2624 gen_a64_update_pc(s, 0); 2625 gen_helper_pre_hvc(tcg_env); 2626 /* Architecture requires ss advance before we do the actual work */ 2627 gen_ss_advance(s); 2628 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2629 return true; 2630 } 2631 2632 static bool trans_SMC(DisasContext *s, arg_i *a) 2633 { 2634 if (s->current_el == 0) { 2635 unallocated_encoding(s); 2636 return true; 2637 } 2638 gen_a64_update_pc(s, 0); 2639 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2640 /* Architecture requires ss advance before we do the actual work */ 2641 gen_ss_advance(s); 2642 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2643 return true; 2644 } 2645 2646 static bool trans_BRK(DisasContext *s, arg_i *a) 2647 { 2648 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2649 return true; 2650 } 2651 2652 static bool trans_HLT(DisasContext *s, arg_i *a) 2653 { 2654 /* 2655 * HLT. This has two purposes. 2656 * Architecturally, it is an external halting debug instruction. 2657 * Since QEMU doesn't implement external debug, we treat this as 2658 * it is required for halting debug disabled: it will UNDEF. 2659 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2660 */ 2661 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2662 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2663 } else { 2664 unallocated_encoding(s); 2665 } 2666 return true; 2667 } 2668 2669 /* 2670 * Load/Store exclusive instructions are implemented by remembering 2671 * the value/address loaded, and seeing if these are the same 2672 * when the store is performed. This is not actually the architecturally 2673 * mandated semantics, but it works for typical guest code sequences 2674 * and avoids having to monitor regular stores. 2675 * 2676 * The store exclusive uses the atomic cmpxchg primitives to avoid 2677 * races in multi-threaded linux-user and when MTTCG softmmu is 2678 * enabled. 2679 */ 2680 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2681 int size, bool is_pair) 2682 { 2683 int idx = get_mem_index(s); 2684 TCGv_i64 dirty_addr, clean_addr; 2685 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2686 2687 s->is_ldex = true; 2688 dirty_addr = cpu_reg_sp(s, rn); 2689 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2690 2691 g_assert(size <= 3); 2692 if (is_pair) { 2693 g_assert(size >= 2); 2694 if (size == 2) { 2695 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2696 if (s->be_data == MO_LE) { 2697 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2698 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2699 } else { 2700 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2701 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2702 } 2703 } else { 2704 TCGv_i128 t16 = tcg_temp_new_i128(); 2705 2706 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2707 2708 if (s->be_data == MO_LE) { 2709 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2710 cpu_exclusive_high, t16); 2711 } else { 2712 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2713 cpu_exclusive_val, t16); 2714 } 2715 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2716 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2717 } 2718 } else { 2719 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2720 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2721 } 2722 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2723 } 2724 2725 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2726 int rn, int size, int is_pair) 2727 { 2728 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2729 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2730 * [addr] = {Rt}; 2731 * if (is_pair) { 2732 * [addr + datasize] = {Rt2}; 2733 * } 2734 * {Rd} = 0; 2735 * } else { 2736 * {Rd} = 1; 2737 * } 2738 * env->exclusive_addr = -1; 2739 */ 2740 TCGLabel *fail_label = gen_new_label(); 2741 TCGLabel *done_label = gen_new_label(); 2742 TCGv_i64 tmp, clean_addr; 2743 MemOp memop; 2744 2745 /* 2746 * FIXME: We are out of spec here. We have recorded only the address 2747 * from load_exclusive, not the entire range, and we assume that the 2748 * size of the access on both sides match. The architecture allows the 2749 * store to be smaller than the load, so long as the stored bytes are 2750 * within the range recorded by the load. 2751 */ 2752 2753 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2754 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2755 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2756 2757 /* 2758 * The write, and any associated faults, only happen if the virtual 2759 * and physical addresses pass the exclusive monitor check. These 2760 * faults are exceedingly unlikely, because normally the guest uses 2761 * the exact same address register for the load_exclusive, and we 2762 * would have recognized these faults there. 2763 * 2764 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2765 * unaligned 4-byte write within the range of an aligned 8-byte load. 2766 * With LSE2, the store would need to cross a 16-byte boundary when the 2767 * load did not, which would mean the store is outside the range 2768 * recorded for the monitor, which would have failed a corrected monitor 2769 * check above. For now, we assume no size change and retain the 2770 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2771 * 2772 * It is possible to trigger an MTE fault, by performing the load with 2773 * a virtual address with a valid tag and performing the store with the 2774 * same virtual address and a different invalid tag. 2775 */ 2776 memop = size + is_pair; 2777 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2778 memop |= MO_ALIGN; 2779 } 2780 memop = finalize_memop(s, memop); 2781 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2782 2783 tmp = tcg_temp_new_i64(); 2784 if (is_pair) { 2785 if (size == 2) { 2786 if (s->be_data == MO_LE) { 2787 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2788 } else { 2789 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2790 } 2791 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2792 cpu_exclusive_val, tmp, 2793 get_mem_index(s), memop); 2794 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2795 } else { 2796 TCGv_i128 t16 = tcg_temp_new_i128(); 2797 TCGv_i128 c16 = tcg_temp_new_i128(); 2798 TCGv_i64 a, b; 2799 2800 if (s->be_data == MO_LE) { 2801 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2802 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2803 cpu_exclusive_high); 2804 } else { 2805 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2806 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2807 cpu_exclusive_val); 2808 } 2809 2810 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2811 get_mem_index(s), memop); 2812 2813 a = tcg_temp_new_i64(); 2814 b = tcg_temp_new_i64(); 2815 if (s->be_data == MO_LE) { 2816 tcg_gen_extr_i128_i64(a, b, t16); 2817 } else { 2818 tcg_gen_extr_i128_i64(b, a, t16); 2819 } 2820 2821 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2822 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2823 tcg_gen_or_i64(tmp, a, b); 2824 2825 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2826 } 2827 } else { 2828 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2829 cpu_reg(s, rt), get_mem_index(s), memop); 2830 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2831 } 2832 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2833 tcg_gen_br(done_label); 2834 2835 gen_set_label(fail_label); 2836 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2837 gen_set_label(done_label); 2838 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2839 } 2840 2841 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2842 int rn, int size) 2843 { 2844 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2845 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2846 int memidx = get_mem_index(s); 2847 TCGv_i64 clean_addr; 2848 MemOp memop; 2849 2850 if (rn == 31) { 2851 gen_check_sp_alignment(s); 2852 } 2853 memop = check_atomic_align(s, rn, size); 2854 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2855 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2856 memidx, memop); 2857 } 2858 2859 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2860 int rn, int size) 2861 { 2862 TCGv_i64 s1 = cpu_reg(s, rs); 2863 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2864 TCGv_i64 t1 = cpu_reg(s, rt); 2865 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2866 TCGv_i64 clean_addr; 2867 int memidx = get_mem_index(s); 2868 MemOp memop; 2869 2870 if (rn == 31) { 2871 gen_check_sp_alignment(s); 2872 } 2873 2874 /* This is a single atomic access, despite the "pair". */ 2875 memop = check_atomic_align(s, rn, size + 1); 2876 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2877 2878 if (size == 2) { 2879 TCGv_i64 cmp = tcg_temp_new_i64(); 2880 TCGv_i64 val = tcg_temp_new_i64(); 2881 2882 if (s->be_data == MO_LE) { 2883 tcg_gen_concat32_i64(val, t1, t2); 2884 tcg_gen_concat32_i64(cmp, s1, s2); 2885 } else { 2886 tcg_gen_concat32_i64(val, t2, t1); 2887 tcg_gen_concat32_i64(cmp, s2, s1); 2888 } 2889 2890 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2891 2892 if (s->be_data == MO_LE) { 2893 tcg_gen_extr32_i64(s1, s2, cmp); 2894 } else { 2895 tcg_gen_extr32_i64(s2, s1, cmp); 2896 } 2897 } else { 2898 TCGv_i128 cmp = tcg_temp_new_i128(); 2899 TCGv_i128 val = tcg_temp_new_i128(); 2900 2901 if (s->be_data == MO_LE) { 2902 tcg_gen_concat_i64_i128(val, t1, t2); 2903 tcg_gen_concat_i64_i128(cmp, s1, s2); 2904 } else { 2905 tcg_gen_concat_i64_i128(val, t2, t1); 2906 tcg_gen_concat_i64_i128(cmp, s2, s1); 2907 } 2908 2909 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2910 2911 if (s->be_data == MO_LE) { 2912 tcg_gen_extr_i128_i64(s1, s2, cmp); 2913 } else { 2914 tcg_gen_extr_i128_i64(s2, s1, cmp); 2915 } 2916 } 2917 } 2918 2919 /* 2920 * Compute the ISS.SF bit for syndrome information if an exception 2921 * is taken on a load or store. This indicates whether the instruction 2922 * is accessing a 32-bit or 64-bit register. This logic is derived 2923 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2924 */ 2925 static bool ldst_iss_sf(int size, bool sign, bool ext) 2926 { 2927 2928 if (sign) { 2929 /* 2930 * Signed loads are 64 bit results if we are not going to 2931 * do a zero-extend from 32 to 64 after the load. 2932 * (For a store, sign and ext are always false.) 2933 */ 2934 return !ext; 2935 } else { 2936 /* Unsigned loads/stores work at the specified size */ 2937 return size == MO_64; 2938 } 2939 } 2940 2941 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2942 { 2943 if (a->rn == 31) { 2944 gen_check_sp_alignment(s); 2945 } 2946 if (a->lasr) { 2947 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2948 } 2949 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2950 return true; 2951 } 2952 2953 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2954 { 2955 if (a->rn == 31) { 2956 gen_check_sp_alignment(s); 2957 } 2958 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2959 if (a->lasr) { 2960 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2961 } 2962 return true; 2963 } 2964 2965 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2966 { 2967 TCGv_i64 clean_addr; 2968 MemOp memop; 2969 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2970 2971 /* 2972 * StoreLORelease is the same as Store-Release for QEMU, but 2973 * needs the feature-test. 2974 */ 2975 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2976 return false; 2977 } 2978 /* Generate ISS for non-exclusive accesses including LASR. */ 2979 if (a->rn == 31) { 2980 gen_check_sp_alignment(s); 2981 } 2982 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2983 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2984 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2985 true, a->rn != 31, memop); 2986 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2987 iss_sf, a->lasr); 2988 return true; 2989 } 2990 2991 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2992 { 2993 TCGv_i64 clean_addr; 2994 MemOp memop; 2995 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2996 2997 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2998 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2999 return false; 3000 } 3001 /* Generate ISS for non-exclusive accesses including LASR. */ 3002 if (a->rn == 31) { 3003 gen_check_sp_alignment(s); 3004 } 3005 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3006 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3007 false, a->rn != 31, memop); 3008 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3009 a->rt, iss_sf, a->lasr); 3010 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3011 return true; 3012 } 3013 3014 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3015 { 3016 if (a->rn == 31) { 3017 gen_check_sp_alignment(s); 3018 } 3019 if (a->lasr) { 3020 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3021 } 3022 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3023 return true; 3024 } 3025 3026 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3027 { 3028 if (a->rn == 31) { 3029 gen_check_sp_alignment(s); 3030 } 3031 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3032 if (a->lasr) { 3033 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3034 } 3035 return true; 3036 } 3037 3038 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3039 { 3040 if (!dc_isar_feature(aa64_atomics, s)) { 3041 return false; 3042 } 3043 if (((a->rt | a->rs) & 1) != 0) { 3044 return false; 3045 } 3046 3047 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3048 return true; 3049 } 3050 3051 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3052 { 3053 if (!dc_isar_feature(aa64_atomics, s)) { 3054 return false; 3055 } 3056 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3057 return true; 3058 } 3059 3060 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3061 { 3062 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3063 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3064 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3065 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3066 3067 gen_pc_plus_diff(s, clean_addr, a->imm); 3068 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3069 false, true, a->rt, iss_sf, false); 3070 return true; 3071 } 3072 3073 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3074 { 3075 /* Load register (literal), vector version */ 3076 TCGv_i64 clean_addr; 3077 MemOp memop; 3078 3079 if (!fp_access_check(s)) { 3080 return true; 3081 } 3082 memop = finalize_memop_asimd(s, a->sz); 3083 clean_addr = tcg_temp_new_i64(); 3084 gen_pc_plus_diff(s, clean_addr, a->imm); 3085 do_fp_ld(s, a->rt, clean_addr, memop); 3086 return true; 3087 } 3088 3089 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3090 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3091 uint64_t offset, bool is_store, MemOp mop) 3092 { 3093 if (a->rn == 31) { 3094 gen_check_sp_alignment(s); 3095 } 3096 3097 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3098 if (!a->p) { 3099 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3100 } 3101 3102 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3103 (a->w || a->rn != 31), 2 << a->sz, mop); 3104 } 3105 3106 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3107 TCGv_i64 dirty_addr, uint64_t offset) 3108 { 3109 if (a->w) { 3110 if (a->p) { 3111 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3112 } 3113 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3114 } 3115 } 3116 3117 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3118 { 3119 uint64_t offset = a->imm << a->sz; 3120 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3121 MemOp mop = finalize_memop(s, a->sz); 3122 3123 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3124 tcg_rt = cpu_reg(s, a->rt); 3125 tcg_rt2 = cpu_reg(s, a->rt2); 3126 /* 3127 * We built mop above for the single logical access -- rebuild it 3128 * now for the paired operation. 3129 * 3130 * With LSE2, non-sign-extending pairs are treated atomically if 3131 * aligned, and if unaligned one of the pair will be completely 3132 * within a 16-byte block and that element will be atomic. 3133 * Otherwise each element is separately atomic. 3134 * In all cases, issue one operation with the correct atomicity. 3135 */ 3136 mop = a->sz + 1; 3137 if (s->align_mem) { 3138 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3139 } 3140 mop = finalize_memop_pair(s, mop); 3141 if (a->sz == 2) { 3142 TCGv_i64 tmp = tcg_temp_new_i64(); 3143 3144 if (s->be_data == MO_LE) { 3145 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3146 } else { 3147 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3148 } 3149 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3150 } else { 3151 TCGv_i128 tmp = tcg_temp_new_i128(); 3152 3153 if (s->be_data == MO_LE) { 3154 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3155 } else { 3156 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3157 } 3158 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3159 } 3160 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3161 return true; 3162 } 3163 3164 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3165 { 3166 uint64_t offset = a->imm << a->sz; 3167 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3168 MemOp mop = finalize_memop(s, a->sz); 3169 3170 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3171 tcg_rt = cpu_reg(s, a->rt); 3172 tcg_rt2 = cpu_reg(s, a->rt2); 3173 3174 /* 3175 * We built mop above for the single logical access -- rebuild it 3176 * now for the paired operation. 3177 * 3178 * With LSE2, non-sign-extending pairs are treated atomically if 3179 * aligned, and if unaligned one of the pair will be completely 3180 * within a 16-byte block and that element will be atomic. 3181 * Otherwise each element is separately atomic. 3182 * In all cases, issue one operation with the correct atomicity. 3183 * 3184 * This treats sign-extending loads like zero-extending loads, 3185 * since that reuses the most code below. 3186 */ 3187 mop = a->sz + 1; 3188 if (s->align_mem) { 3189 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3190 } 3191 mop = finalize_memop_pair(s, mop); 3192 if (a->sz == 2) { 3193 int o2 = s->be_data == MO_LE ? 32 : 0; 3194 int o1 = o2 ^ 32; 3195 3196 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3197 if (a->sign) { 3198 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3199 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3200 } else { 3201 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3202 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3203 } 3204 } else { 3205 TCGv_i128 tmp = tcg_temp_new_i128(); 3206 3207 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3208 if (s->be_data == MO_LE) { 3209 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3210 } else { 3211 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3212 } 3213 } 3214 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3215 return true; 3216 } 3217 3218 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3219 { 3220 uint64_t offset = a->imm << a->sz; 3221 TCGv_i64 clean_addr, dirty_addr; 3222 MemOp mop; 3223 3224 if (!fp_access_check(s)) { 3225 return true; 3226 } 3227 3228 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3229 mop = finalize_memop_asimd(s, a->sz); 3230 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3231 do_fp_st(s, a->rt, clean_addr, mop); 3232 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3233 do_fp_st(s, a->rt2, clean_addr, mop); 3234 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3235 return true; 3236 } 3237 3238 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3239 { 3240 uint64_t offset = a->imm << a->sz; 3241 TCGv_i64 clean_addr, dirty_addr; 3242 MemOp mop; 3243 3244 if (!fp_access_check(s)) { 3245 return true; 3246 } 3247 3248 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3249 mop = finalize_memop_asimd(s, a->sz); 3250 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3251 do_fp_ld(s, a->rt, clean_addr, mop); 3252 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3253 do_fp_ld(s, a->rt2, clean_addr, mop); 3254 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3255 return true; 3256 } 3257 3258 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3259 { 3260 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3261 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3262 MemOp mop; 3263 TCGv_i128 tmp; 3264 3265 /* STGP only comes in one size. */ 3266 tcg_debug_assert(a->sz == MO_64); 3267 3268 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3269 return false; 3270 } 3271 3272 if (a->rn == 31) { 3273 gen_check_sp_alignment(s); 3274 } 3275 3276 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3277 if (!a->p) { 3278 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3279 } 3280 3281 clean_addr = clean_data_tbi(s, dirty_addr); 3282 tcg_rt = cpu_reg(s, a->rt); 3283 tcg_rt2 = cpu_reg(s, a->rt2); 3284 3285 /* 3286 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3287 * and one tag operation. We implement it as one single aligned 16-byte 3288 * memory operation for convenience. Note that the alignment ensures 3289 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3290 */ 3291 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3292 3293 tmp = tcg_temp_new_i128(); 3294 if (s->be_data == MO_LE) { 3295 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3296 } else { 3297 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3298 } 3299 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3300 3301 /* Perform the tag store, if tag access enabled. */ 3302 if (s->ata[0]) { 3303 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3304 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3305 } else { 3306 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3307 } 3308 } 3309 3310 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3311 return true; 3312 } 3313 3314 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3315 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3316 uint64_t offset, bool is_store, MemOp mop) 3317 { 3318 int memidx; 3319 3320 if (a->rn == 31) { 3321 gen_check_sp_alignment(s); 3322 } 3323 3324 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3325 if (!a->p) { 3326 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3327 } 3328 memidx = get_a64_user_mem_index(s, a->unpriv); 3329 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3330 a->w || a->rn != 31, 3331 mop, a->unpriv, memidx); 3332 } 3333 3334 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3335 TCGv_i64 dirty_addr, uint64_t offset) 3336 { 3337 if (a->w) { 3338 if (a->p) { 3339 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3340 } 3341 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3342 } 3343 } 3344 3345 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3346 { 3347 bool iss_sf, iss_valid = !a->w; 3348 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3349 int memidx = get_a64_user_mem_index(s, a->unpriv); 3350 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3351 3352 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3353 3354 tcg_rt = cpu_reg(s, a->rt); 3355 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3356 3357 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3358 iss_valid, a->rt, iss_sf, false); 3359 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3360 return true; 3361 } 3362 3363 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3364 { 3365 bool iss_sf, iss_valid = !a->w; 3366 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3367 int memidx = get_a64_user_mem_index(s, a->unpriv); 3368 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3369 3370 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3371 3372 tcg_rt = cpu_reg(s, a->rt); 3373 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3374 3375 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3376 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3377 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3378 return true; 3379 } 3380 3381 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3382 { 3383 TCGv_i64 clean_addr, dirty_addr; 3384 MemOp mop; 3385 3386 if (!fp_access_check(s)) { 3387 return true; 3388 } 3389 mop = finalize_memop_asimd(s, a->sz); 3390 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3391 do_fp_st(s, a->rt, clean_addr, mop); 3392 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3393 return true; 3394 } 3395 3396 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3397 { 3398 TCGv_i64 clean_addr, dirty_addr; 3399 MemOp mop; 3400 3401 if (!fp_access_check(s)) { 3402 return true; 3403 } 3404 mop = finalize_memop_asimd(s, a->sz); 3405 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3406 do_fp_ld(s, a->rt, clean_addr, mop); 3407 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3408 return true; 3409 } 3410 3411 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3412 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3413 bool is_store, MemOp memop) 3414 { 3415 TCGv_i64 tcg_rm; 3416 3417 if (a->rn == 31) { 3418 gen_check_sp_alignment(s); 3419 } 3420 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3421 3422 tcg_rm = read_cpu_reg(s, a->rm, 1); 3423 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3424 3425 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3426 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3427 } 3428 3429 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3430 { 3431 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3432 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3433 MemOp memop; 3434 3435 if (extract32(a->opt, 1, 1) == 0) { 3436 return false; 3437 } 3438 3439 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3440 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3441 tcg_rt = cpu_reg(s, a->rt); 3442 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3443 a->ext, true, a->rt, iss_sf, false); 3444 return true; 3445 } 3446 3447 static bool trans_STR(DisasContext *s, arg_ldst *a) 3448 { 3449 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3450 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3451 MemOp memop; 3452 3453 if (extract32(a->opt, 1, 1) == 0) { 3454 return false; 3455 } 3456 3457 memop = finalize_memop(s, a->sz); 3458 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3459 tcg_rt = cpu_reg(s, a->rt); 3460 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3461 return true; 3462 } 3463 3464 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3465 { 3466 TCGv_i64 clean_addr, dirty_addr; 3467 MemOp memop; 3468 3469 if (extract32(a->opt, 1, 1) == 0) { 3470 return false; 3471 } 3472 3473 if (!fp_access_check(s)) { 3474 return true; 3475 } 3476 3477 memop = finalize_memop_asimd(s, a->sz); 3478 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3479 do_fp_ld(s, a->rt, clean_addr, memop); 3480 return true; 3481 } 3482 3483 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3484 { 3485 TCGv_i64 clean_addr, dirty_addr; 3486 MemOp memop; 3487 3488 if (extract32(a->opt, 1, 1) == 0) { 3489 return false; 3490 } 3491 3492 if (!fp_access_check(s)) { 3493 return true; 3494 } 3495 3496 memop = finalize_memop_asimd(s, a->sz); 3497 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3498 do_fp_st(s, a->rt, clean_addr, memop); 3499 return true; 3500 } 3501 3502 3503 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3504 int sign, bool invert) 3505 { 3506 MemOp mop = a->sz | sign; 3507 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3508 3509 if (a->rn == 31) { 3510 gen_check_sp_alignment(s); 3511 } 3512 mop = check_atomic_align(s, a->rn, mop); 3513 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3514 a->rn != 31, mop); 3515 tcg_rs = read_cpu_reg(s, a->rs, true); 3516 tcg_rt = cpu_reg(s, a->rt); 3517 if (invert) { 3518 tcg_gen_not_i64(tcg_rs, tcg_rs); 3519 } 3520 /* 3521 * The tcg atomic primitives are all full barriers. Therefore we 3522 * can ignore the Acquire and Release bits of this instruction. 3523 */ 3524 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3525 3526 if (mop & MO_SIGN) { 3527 switch (a->sz) { 3528 case MO_8: 3529 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3530 break; 3531 case MO_16: 3532 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3533 break; 3534 case MO_32: 3535 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3536 break; 3537 case MO_64: 3538 break; 3539 default: 3540 g_assert_not_reached(); 3541 } 3542 } 3543 return true; 3544 } 3545 3546 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3547 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3548 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3549 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3550 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3551 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3552 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3553 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3554 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3555 3556 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3557 { 3558 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3559 TCGv_i64 clean_addr; 3560 MemOp mop; 3561 3562 if (!dc_isar_feature(aa64_atomics, s) || 3563 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3564 return false; 3565 } 3566 if (a->rn == 31) { 3567 gen_check_sp_alignment(s); 3568 } 3569 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3570 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3571 a->rn != 31, mop); 3572 /* 3573 * LDAPR* are a special case because they are a simple load, not a 3574 * fetch-and-do-something op. 3575 * The architectural consistency requirements here are weaker than 3576 * full load-acquire (we only need "load-acquire processor consistent"), 3577 * but we choose to implement them as full LDAQ. 3578 */ 3579 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3580 true, a->rt, iss_sf, true); 3581 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3582 return true; 3583 } 3584 3585 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3586 { 3587 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3588 MemOp memop; 3589 3590 /* Load with pointer authentication */ 3591 if (!dc_isar_feature(aa64_pauth, s)) { 3592 return false; 3593 } 3594 3595 if (a->rn == 31) { 3596 gen_check_sp_alignment(s); 3597 } 3598 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3599 3600 if (s->pauth_active) { 3601 if (!a->m) { 3602 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3603 tcg_constant_i64(0)); 3604 } else { 3605 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3606 tcg_constant_i64(0)); 3607 } 3608 } 3609 3610 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3611 3612 memop = finalize_memop(s, MO_64); 3613 3614 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3615 clean_addr = gen_mte_check1(s, dirty_addr, false, 3616 a->w || a->rn != 31, memop); 3617 3618 tcg_rt = cpu_reg(s, a->rt); 3619 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3620 /* extend */ false, /* iss_valid */ !a->w, 3621 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3622 3623 if (a->w) { 3624 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3625 } 3626 return true; 3627 } 3628 3629 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3630 { 3631 TCGv_i64 clean_addr, dirty_addr; 3632 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3633 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3634 3635 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3636 return false; 3637 } 3638 3639 if (a->rn == 31) { 3640 gen_check_sp_alignment(s); 3641 } 3642 3643 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3644 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3645 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3646 clean_addr = clean_data_tbi(s, dirty_addr); 3647 3648 /* 3649 * Load-AcquirePC semantics; we implement as the slightly more 3650 * restrictive Load-Acquire. 3651 */ 3652 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3653 a->rt, iss_sf, true); 3654 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3655 return true; 3656 } 3657 3658 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3659 { 3660 TCGv_i64 clean_addr, dirty_addr; 3661 MemOp mop = a->sz; 3662 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3663 3664 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3665 return false; 3666 } 3667 3668 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3669 3670 if (a->rn == 31) { 3671 gen_check_sp_alignment(s); 3672 } 3673 3674 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3675 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3676 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3677 clean_addr = clean_data_tbi(s, dirty_addr); 3678 3679 /* Store-Release semantics */ 3680 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3681 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3682 return true; 3683 } 3684 3685 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3686 { 3687 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3688 MemOp endian, align, mop; 3689 3690 int total; /* total bytes */ 3691 int elements; /* elements per vector */ 3692 int r; 3693 int size = a->sz; 3694 3695 if (!a->p && a->rm != 0) { 3696 /* For non-postindexed accesses the Rm field must be 0 */ 3697 return false; 3698 } 3699 if (size == 3 && !a->q && a->selem != 1) { 3700 return false; 3701 } 3702 if (!fp_access_check(s)) { 3703 return true; 3704 } 3705 3706 if (a->rn == 31) { 3707 gen_check_sp_alignment(s); 3708 } 3709 3710 /* For our purposes, bytes are always little-endian. */ 3711 endian = s->be_data; 3712 if (size == 0) { 3713 endian = MO_LE; 3714 } 3715 3716 total = a->rpt * a->selem * (a->q ? 16 : 8); 3717 tcg_rn = cpu_reg_sp(s, a->rn); 3718 3719 /* 3720 * Issue the MTE check vs the logical repeat count, before we 3721 * promote consecutive little-endian elements below. 3722 */ 3723 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3724 finalize_memop_asimd(s, size)); 3725 3726 /* 3727 * Consecutive little-endian elements from a single register 3728 * can be promoted to a larger little-endian operation. 3729 */ 3730 align = MO_ALIGN; 3731 if (a->selem == 1 && endian == MO_LE) { 3732 align = pow2_align(size); 3733 size = 3; 3734 } 3735 if (!s->align_mem) { 3736 align = 0; 3737 } 3738 mop = endian | size | align; 3739 3740 elements = (a->q ? 16 : 8) >> size; 3741 tcg_ebytes = tcg_constant_i64(1 << size); 3742 for (r = 0; r < a->rpt; r++) { 3743 int e; 3744 for (e = 0; e < elements; e++) { 3745 int xs; 3746 for (xs = 0; xs < a->selem; xs++) { 3747 int tt = (a->rt + r + xs) % 32; 3748 do_vec_ld(s, tt, e, clean_addr, mop); 3749 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3750 } 3751 } 3752 } 3753 3754 /* 3755 * For non-quad operations, setting a slice of the low 64 bits of 3756 * the register clears the high 64 bits (in the ARM ARM pseudocode 3757 * this is implicit in the fact that 'rval' is a 64 bit wide 3758 * variable). For quad operations, we might still need to zero 3759 * the high bits of SVE. 3760 */ 3761 for (r = 0; r < a->rpt * a->selem; r++) { 3762 int tt = (a->rt + r) % 32; 3763 clear_vec_high(s, a->q, tt); 3764 } 3765 3766 if (a->p) { 3767 if (a->rm == 31) { 3768 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3769 } else { 3770 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3771 } 3772 } 3773 return true; 3774 } 3775 3776 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3777 { 3778 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3779 MemOp endian, align, mop; 3780 3781 int total; /* total bytes */ 3782 int elements; /* elements per vector */ 3783 int r; 3784 int size = a->sz; 3785 3786 if (!a->p && a->rm != 0) { 3787 /* For non-postindexed accesses the Rm field must be 0 */ 3788 return false; 3789 } 3790 if (size == 3 && !a->q && a->selem != 1) { 3791 return false; 3792 } 3793 if (!fp_access_check(s)) { 3794 return true; 3795 } 3796 3797 if (a->rn == 31) { 3798 gen_check_sp_alignment(s); 3799 } 3800 3801 /* For our purposes, bytes are always little-endian. */ 3802 endian = s->be_data; 3803 if (size == 0) { 3804 endian = MO_LE; 3805 } 3806 3807 total = a->rpt * a->selem * (a->q ? 16 : 8); 3808 tcg_rn = cpu_reg_sp(s, a->rn); 3809 3810 /* 3811 * Issue the MTE check vs the logical repeat count, before we 3812 * promote consecutive little-endian elements below. 3813 */ 3814 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3815 finalize_memop_asimd(s, size)); 3816 3817 /* 3818 * Consecutive little-endian elements from a single register 3819 * can be promoted to a larger little-endian operation. 3820 */ 3821 align = MO_ALIGN; 3822 if (a->selem == 1 && endian == MO_LE) { 3823 align = pow2_align(size); 3824 size = 3; 3825 } 3826 if (!s->align_mem) { 3827 align = 0; 3828 } 3829 mop = endian | size | align; 3830 3831 elements = (a->q ? 16 : 8) >> size; 3832 tcg_ebytes = tcg_constant_i64(1 << size); 3833 for (r = 0; r < a->rpt; r++) { 3834 int e; 3835 for (e = 0; e < elements; e++) { 3836 int xs; 3837 for (xs = 0; xs < a->selem; xs++) { 3838 int tt = (a->rt + r + xs) % 32; 3839 do_vec_st(s, tt, e, clean_addr, mop); 3840 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3841 } 3842 } 3843 } 3844 3845 if (a->p) { 3846 if (a->rm == 31) { 3847 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3848 } else { 3849 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3850 } 3851 } 3852 return true; 3853 } 3854 3855 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3856 { 3857 int xs, total, rt; 3858 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3859 MemOp mop; 3860 3861 if (!a->p && a->rm != 0) { 3862 return false; 3863 } 3864 if (!fp_access_check(s)) { 3865 return true; 3866 } 3867 3868 if (a->rn == 31) { 3869 gen_check_sp_alignment(s); 3870 } 3871 3872 total = a->selem << a->scale; 3873 tcg_rn = cpu_reg_sp(s, a->rn); 3874 3875 mop = finalize_memop_asimd(s, a->scale); 3876 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3877 total, mop); 3878 3879 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3880 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3881 do_vec_st(s, rt, a->index, clean_addr, mop); 3882 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3883 } 3884 3885 if (a->p) { 3886 if (a->rm == 31) { 3887 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3888 } else { 3889 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3890 } 3891 } 3892 return true; 3893 } 3894 3895 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3896 { 3897 int xs, total, rt; 3898 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3899 MemOp mop; 3900 3901 if (!a->p && a->rm != 0) { 3902 return false; 3903 } 3904 if (!fp_access_check(s)) { 3905 return true; 3906 } 3907 3908 if (a->rn == 31) { 3909 gen_check_sp_alignment(s); 3910 } 3911 3912 total = a->selem << a->scale; 3913 tcg_rn = cpu_reg_sp(s, a->rn); 3914 3915 mop = finalize_memop_asimd(s, a->scale); 3916 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3917 total, mop); 3918 3919 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3920 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3921 do_vec_ld(s, rt, a->index, clean_addr, mop); 3922 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3923 } 3924 3925 if (a->p) { 3926 if (a->rm == 31) { 3927 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3928 } else { 3929 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3930 } 3931 } 3932 return true; 3933 } 3934 3935 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3936 { 3937 int xs, total, rt; 3938 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3939 MemOp mop; 3940 3941 if (!a->p && a->rm != 0) { 3942 return false; 3943 } 3944 if (!fp_access_check(s)) { 3945 return true; 3946 } 3947 3948 if (a->rn == 31) { 3949 gen_check_sp_alignment(s); 3950 } 3951 3952 total = a->selem << a->scale; 3953 tcg_rn = cpu_reg_sp(s, a->rn); 3954 3955 mop = finalize_memop_asimd(s, a->scale); 3956 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3957 total, mop); 3958 3959 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3960 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3961 /* Load and replicate to all elements */ 3962 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3963 3964 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3965 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3966 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3967 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3968 } 3969 3970 if (a->p) { 3971 if (a->rm == 31) { 3972 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3973 } else { 3974 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3975 } 3976 } 3977 return true; 3978 } 3979 3980 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3981 { 3982 TCGv_i64 addr, clean_addr, tcg_rt; 3983 int size = 4 << s->dcz_blocksize; 3984 3985 if (!dc_isar_feature(aa64_mte, s)) { 3986 return false; 3987 } 3988 if (s->current_el == 0) { 3989 return false; 3990 } 3991 3992 if (a->rn == 31) { 3993 gen_check_sp_alignment(s); 3994 } 3995 3996 addr = read_cpu_reg_sp(s, a->rn, true); 3997 tcg_gen_addi_i64(addr, addr, a->imm); 3998 tcg_rt = cpu_reg(s, a->rt); 3999 4000 if (s->ata[0]) { 4001 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4002 } 4003 /* 4004 * The non-tags portion of STZGM is mostly like DC_ZVA, 4005 * except the alignment happens before the access. 4006 */ 4007 clean_addr = clean_data_tbi(s, addr); 4008 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4009 gen_helper_dc_zva(tcg_env, clean_addr); 4010 return true; 4011 } 4012 4013 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4014 { 4015 TCGv_i64 addr, clean_addr, tcg_rt; 4016 4017 if (!dc_isar_feature(aa64_mte, s)) { 4018 return false; 4019 } 4020 if (s->current_el == 0) { 4021 return false; 4022 } 4023 4024 if (a->rn == 31) { 4025 gen_check_sp_alignment(s); 4026 } 4027 4028 addr = read_cpu_reg_sp(s, a->rn, true); 4029 tcg_gen_addi_i64(addr, addr, a->imm); 4030 tcg_rt = cpu_reg(s, a->rt); 4031 4032 if (s->ata[0]) { 4033 gen_helper_stgm(tcg_env, addr, tcg_rt); 4034 } else { 4035 MMUAccessType acc = MMU_DATA_STORE; 4036 int size = 4 << s->gm_blocksize; 4037 4038 clean_addr = clean_data_tbi(s, addr); 4039 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4040 gen_probe_access(s, clean_addr, acc, size); 4041 } 4042 return true; 4043 } 4044 4045 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4046 { 4047 TCGv_i64 addr, clean_addr, tcg_rt; 4048 4049 if (!dc_isar_feature(aa64_mte, s)) { 4050 return false; 4051 } 4052 if (s->current_el == 0) { 4053 return false; 4054 } 4055 4056 if (a->rn == 31) { 4057 gen_check_sp_alignment(s); 4058 } 4059 4060 addr = read_cpu_reg_sp(s, a->rn, true); 4061 tcg_gen_addi_i64(addr, addr, a->imm); 4062 tcg_rt = cpu_reg(s, a->rt); 4063 4064 if (s->ata[0]) { 4065 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4066 } else { 4067 MMUAccessType acc = MMU_DATA_LOAD; 4068 int size = 4 << s->gm_blocksize; 4069 4070 clean_addr = clean_data_tbi(s, addr); 4071 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4072 gen_probe_access(s, clean_addr, acc, size); 4073 /* The result tags are zeros. */ 4074 tcg_gen_movi_i64(tcg_rt, 0); 4075 } 4076 return true; 4077 } 4078 4079 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4080 { 4081 TCGv_i64 addr, clean_addr, tcg_rt; 4082 4083 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4084 return false; 4085 } 4086 4087 if (a->rn == 31) { 4088 gen_check_sp_alignment(s); 4089 } 4090 4091 addr = read_cpu_reg_sp(s, a->rn, true); 4092 if (!a->p) { 4093 /* pre-index or signed offset */ 4094 tcg_gen_addi_i64(addr, addr, a->imm); 4095 } 4096 4097 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4098 tcg_rt = cpu_reg(s, a->rt); 4099 if (s->ata[0]) { 4100 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4101 } else { 4102 /* 4103 * Tag access disabled: we must check for aborts on the load 4104 * load from [rn+offset], and then insert a 0 tag into rt. 4105 */ 4106 clean_addr = clean_data_tbi(s, addr); 4107 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4108 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4109 } 4110 4111 if (a->w) { 4112 /* pre-index or post-index */ 4113 if (a->p) { 4114 /* post-index */ 4115 tcg_gen_addi_i64(addr, addr, a->imm); 4116 } 4117 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4118 } 4119 return true; 4120 } 4121 4122 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4123 { 4124 TCGv_i64 addr, tcg_rt; 4125 4126 if (a->rn == 31) { 4127 gen_check_sp_alignment(s); 4128 } 4129 4130 addr = read_cpu_reg_sp(s, a->rn, true); 4131 if (!a->p) { 4132 /* pre-index or signed offset */ 4133 tcg_gen_addi_i64(addr, addr, a->imm); 4134 } 4135 tcg_rt = cpu_reg_sp(s, a->rt); 4136 if (!s->ata[0]) { 4137 /* 4138 * For STG and ST2G, we need to check alignment and probe memory. 4139 * TODO: For STZG and STZ2G, we could rely on the stores below, 4140 * at least for system mode; user-only won't enforce alignment. 4141 */ 4142 if (is_pair) { 4143 gen_helper_st2g_stub(tcg_env, addr); 4144 } else { 4145 gen_helper_stg_stub(tcg_env, addr); 4146 } 4147 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4148 if (is_pair) { 4149 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4150 } else { 4151 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4152 } 4153 } else { 4154 if (is_pair) { 4155 gen_helper_st2g(tcg_env, addr, tcg_rt); 4156 } else { 4157 gen_helper_stg(tcg_env, addr, tcg_rt); 4158 } 4159 } 4160 4161 if (is_zero) { 4162 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4163 TCGv_i64 zero64 = tcg_constant_i64(0); 4164 TCGv_i128 zero128 = tcg_temp_new_i128(); 4165 int mem_index = get_mem_index(s); 4166 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4167 4168 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4169 4170 /* This is 1 or 2 atomic 16-byte operations. */ 4171 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4172 if (is_pair) { 4173 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4174 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4175 } 4176 } 4177 4178 if (a->w) { 4179 /* pre-index or post-index */ 4180 if (a->p) { 4181 /* post-index */ 4182 tcg_gen_addi_i64(addr, addr, a->imm); 4183 } 4184 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4185 } 4186 return true; 4187 } 4188 4189 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4190 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4191 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4192 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4193 4194 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4195 4196 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4197 bool is_setg, SetFn fn) 4198 { 4199 int memidx; 4200 uint32_t syndrome, desc = 0; 4201 4202 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4203 return false; 4204 } 4205 4206 /* 4207 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4208 * us to pull this check before the CheckMOPSEnabled() test 4209 * (which we do in the helper function) 4210 */ 4211 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4212 a->rd == 31 || a->rn == 31) { 4213 return false; 4214 } 4215 4216 memidx = get_a64_user_mem_index(s, a->unpriv); 4217 4218 /* 4219 * We pass option_a == true, matching our implementation; 4220 * we pass wrong_option == false: helper function may set that bit. 4221 */ 4222 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4223 is_epilogue, false, true, a->rd, a->rs, a->rn); 4224 4225 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4226 /* We may need to do MTE tag checking, so assemble the descriptor */ 4227 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4228 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4229 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4230 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4231 } 4232 /* The helper function always needs the memidx even with MTE disabled */ 4233 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4234 4235 /* 4236 * The helper needs the register numbers, but since they're in 4237 * the syndrome anyway, we let it extract them from there rather 4238 * than passing in an extra three integer arguments. 4239 */ 4240 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4241 return true; 4242 } 4243 4244 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4245 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4246 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4247 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4248 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4249 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4250 4251 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4252 4253 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4254 { 4255 int rmemidx, wmemidx; 4256 uint32_t syndrome, rdesc = 0, wdesc = 0; 4257 bool wunpriv = extract32(a->options, 0, 1); 4258 bool runpriv = extract32(a->options, 1, 1); 4259 4260 /* 4261 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4262 * us to pull this check before the CheckMOPSEnabled() test 4263 * (which we do in the helper function) 4264 */ 4265 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4266 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4267 return false; 4268 } 4269 4270 rmemidx = get_a64_user_mem_index(s, runpriv); 4271 wmemidx = get_a64_user_mem_index(s, wunpriv); 4272 4273 /* 4274 * We pass option_a == true, matching our implementation; 4275 * we pass wrong_option == false: helper function may set that bit. 4276 */ 4277 syndrome = syn_mop(false, false, a->options, is_epilogue, 4278 false, true, a->rd, a->rs, a->rn); 4279 4280 /* If we need to do MTE tag checking, assemble the descriptors */ 4281 if (s->mte_active[runpriv]) { 4282 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4283 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4284 } 4285 if (s->mte_active[wunpriv]) { 4286 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4287 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4288 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4289 } 4290 /* The helper function needs these parts of the descriptor regardless */ 4291 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4292 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4293 4294 /* 4295 * The helper needs the register numbers, but since they're in 4296 * the syndrome anyway, we let it extract them from there rather 4297 * than passing in an extra three integer arguments. 4298 */ 4299 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4300 tcg_constant_i32(rdesc)); 4301 return true; 4302 } 4303 4304 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4305 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4306 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4307 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4308 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4309 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4310 4311 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4312 4313 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4314 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4315 { 4316 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4317 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4318 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4319 4320 fn(tcg_rd, tcg_rn, tcg_imm); 4321 if (!a->sf) { 4322 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4323 } 4324 return true; 4325 } 4326 4327 /* 4328 * PC-rel. addressing 4329 */ 4330 4331 static bool trans_ADR(DisasContext *s, arg_ri *a) 4332 { 4333 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4334 return true; 4335 } 4336 4337 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4338 { 4339 int64_t offset = (int64_t)a->imm << 12; 4340 4341 /* The page offset is ok for CF_PCREL. */ 4342 offset -= s->pc_curr & 0xfff; 4343 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4344 return true; 4345 } 4346 4347 /* 4348 * Add/subtract (immediate) 4349 */ 4350 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4351 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4352 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4353 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4354 4355 /* 4356 * Add/subtract (immediate, with tags) 4357 */ 4358 4359 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4360 bool sub_op) 4361 { 4362 TCGv_i64 tcg_rn, tcg_rd; 4363 int imm; 4364 4365 imm = a->uimm6 << LOG2_TAG_GRANULE; 4366 if (sub_op) { 4367 imm = -imm; 4368 } 4369 4370 tcg_rn = cpu_reg_sp(s, a->rn); 4371 tcg_rd = cpu_reg_sp(s, a->rd); 4372 4373 if (s->ata[0]) { 4374 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4375 tcg_constant_i32(imm), 4376 tcg_constant_i32(a->uimm4)); 4377 } else { 4378 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4379 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4380 } 4381 return true; 4382 } 4383 4384 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4385 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4386 4387 /* The input should be a value in the bottom e bits (with higher 4388 * bits zero); returns that value replicated into every element 4389 * of size e in a 64 bit integer. 4390 */ 4391 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4392 { 4393 assert(e != 0); 4394 while (e < 64) { 4395 mask |= mask << e; 4396 e *= 2; 4397 } 4398 return mask; 4399 } 4400 4401 /* 4402 * Logical (immediate) 4403 */ 4404 4405 /* 4406 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4407 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4408 * value (ie should cause a guest UNDEF exception), and true if they are 4409 * valid, in which case the decoded bit pattern is written to result. 4410 */ 4411 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4412 unsigned int imms, unsigned int immr) 4413 { 4414 uint64_t mask; 4415 unsigned e, levels, s, r; 4416 int len; 4417 4418 assert(immn < 2 && imms < 64 && immr < 64); 4419 4420 /* The bit patterns we create here are 64 bit patterns which 4421 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4422 * 64 bits each. Each element contains the same value: a run 4423 * of between 1 and e-1 non-zero bits, rotated within the 4424 * element by between 0 and e-1 bits. 4425 * 4426 * The element size and run length are encoded into immn (1 bit) 4427 * and imms (6 bits) as follows: 4428 * 64 bit elements: immn = 1, imms = <length of run - 1> 4429 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4430 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4431 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4432 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4433 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4434 * Notice that immn = 0, imms = 11111x is the only combination 4435 * not covered by one of the above options; this is reserved. 4436 * Further, <length of run - 1> all-ones is a reserved pattern. 4437 * 4438 * In all cases the rotation is by immr % e (and immr is 6 bits). 4439 */ 4440 4441 /* First determine the element size */ 4442 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4443 if (len < 1) { 4444 /* This is the immn == 0, imms == 0x11111x case */ 4445 return false; 4446 } 4447 e = 1 << len; 4448 4449 levels = e - 1; 4450 s = imms & levels; 4451 r = immr & levels; 4452 4453 if (s == levels) { 4454 /* <length of run - 1> mustn't be all-ones. */ 4455 return false; 4456 } 4457 4458 /* Create the value of one element: s+1 set bits rotated 4459 * by r within the element (which is e bits wide)... 4460 */ 4461 mask = MAKE_64BIT_MASK(0, s + 1); 4462 if (r) { 4463 mask = (mask >> r) | (mask << (e - r)); 4464 mask &= MAKE_64BIT_MASK(0, e); 4465 } 4466 /* ...then replicate the element over the whole 64 bit value */ 4467 mask = bitfield_replicate(mask, e); 4468 *result = mask; 4469 return true; 4470 } 4471 4472 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4473 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4474 { 4475 TCGv_i64 tcg_rd, tcg_rn; 4476 uint64_t imm; 4477 4478 /* Some immediate field values are reserved. */ 4479 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4480 extract32(a->dbm, 0, 6), 4481 extract32(a->dbm, 6, 6))) { 4482 return false; 4483 } 4484 if (!a->sf) { 4485 imm &= 0xffffffffull; 4486 } 4487 4488 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4489 tcg_rn = cpu_reg(s, a->rn); 4490 4491 fn(tcg_rd, tcg_rn, imm); 4492 if (set_cc) { 4493 gen_logic_CC(a->sf, tcg_rd); 4494 } 4495 if (!a->sf) { 4496 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4497 } 4498 return true; 4499 } 4500 4501 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4502 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4503 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4504 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4505 4506 /* 4507 * Move wide (immediate) 4508 */ 4509 4510 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4511 { 4512 int pos = a->hw << 4; 4513 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4514 return true; 4515 } 4516 4517 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4518 { 4519 int pos = a->hw << 4; 4520 uint64_t imm = a->imm; 4521 4522 imm = ~(imm << pos); 4523 if (!a->sf) { 4524 imm = (uint32_t)imm; 4525 } 4526 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4527 return true; 4528 } 4529 4530 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4531 { 4532 int pos = a->hw << 4; 4533 TCGv_i64 tcg_rd, tcg_im; 4534 4535 tcg_rd = cpu_reg(s, a->rd); 4536 tcg_im = tcg_constant_i64(a->imm); 4537 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4538 if (!a->sf) { 4539 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4540 } 4541 return true; 4542 } 4543 4544 /* 4545 * Bitfield 4546 */ 4547 4548 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4549 { 4550 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4551 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4552 unsigned int bitsize = a->sf ? 64 : 32; 4553 unsigned int ri = a->immr; 4554 unsigned int si = a->imms; 4555 unsigned int pos, len; 4556 4557 if (si >= ri) { 4558 /* Wd<s-r:0> = Wn<s:r> */ 4559 len = (si - ri) + 1; 4560 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4561 if (!a->sf) { 4562 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4563 } 4564 } else { 4565 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4566 len = si + 1; 4567 pos = (bitsize - ri) & (bitsize - 1); 4568 4569 if (len < ri) { 4570 /* 4571 * Sign extend the destination field from len to fill the 4572 * balance of the word. Let the deposit below insert all 4573 * of those sign bits. 4574 */ 4575 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4576 len = ri; 4577 } 4578 4579 /* 4580 * We start with zero, and we haven't modified any bits outside 4581 * bitsize, therefore no final zero-extension is unneeded for !sf. 4582 */ 4583 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4584 } 4585 return true; 4586 } 4587 4588 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4589 { 4590 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4591 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4592 unsigned int bitsize = a->sf ? 64 : 32; 4593 unsigned int ri = a->immr; 4594 unsigned int si = a->imms; 4595 unsigned int pos, len; 4596 4597 tcg_rd = cpu_reg(s, a->rd); 4598 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4599 4600 if (si >= ri) { 4601 /* Wd<s-r:0> = Wn<s:r> */ 4602 len = (si - ri) + 1; 4603 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4604 } else { 4605 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4606 len = si + 1; 4607 pos = (bitsize - ri) & (bitsize - 1); 4608 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4609 } 4610 return true; 4611 } 4612 4613 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4614 { 4615 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4616 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4617 unsigned int bitsize = a->sf ? 64 : 32; 4618 unsigned int ri = a->immr; 4619 unsigned int si = a->imms; 4620 unsigned int pos, len; 4621 4622 tcg_rd = cpu_reg(s, a->rd); 4623 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4624 4625 if (si >= ri) { 4626 /* Wd<s-r:0> = Wn<s:r> */ 4627 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4628 len = (si - ri) + 1; 4629 pos = 0; 4630 } else { 4631 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4632 len = si + 1; 4633 pos = (bitsize - ri) & (bitsize - 1); 4634 } 4635 4636 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4637 if (!a->sf) { 4638 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4639 } 4640 return true; 4641 } 4642 4643 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4644 { 4645 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4646 4647 tcg_rd = cpu_reg(s, a->rd); 4648 4649 if (unlikely(a->imm == 0)) { 4650 /* 4651 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4652 * so an extract from bit 0 is a special case. 4653 */ 4654 if (a->sf) { 4655 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4656 } else { 4657 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4658 } 4659 } else { 4660 tcg_rm = cpu_reg(s, a->rm); 4661 tcg_rn = cpu_reg(s, a->rn); 4662 4663 if (a->sf) { 4664 /* Specialization to ROR happens in EXTRACT2. */ 4665 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4666 } else { 4667 TCGv_i32 t0 = tcg_temp_new_i32(); 4668 4669 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4670 if (a->rm == a->rn) { 4671 tcg_gen_rotri_i32(t0, t0, a->imm); 4672 } else { 4673 TCGv_i32 t1 = tcg_temp_new_i32(); 4674 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4675 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4676 } 4677 tcg_gen_extu_i32_i64(tcg_rd, t0); 4678 } 4679 } 4680 return true; 4681 } 4682 4683 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4684 { 4685 if (fp_access_check(s)) { 4686 int len = (a->len + 1) * 16; 4687 4688 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4689 vec_full_reg_offset(s, a->rm), tcg_env, 4690 a->q ? 16 : 8, vec_full_reg_size(s), 4691 (len << 6) | (a->tbx << 5) | a->rn, 4692 gen_helper_simd_tblx); 4693 } 4694 return true; 4695 } 4696 4697 typedef int simd_permute_idx_fn(int i, int part, int elements); 4698 4699 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4700 simd_permute_idx_fn *fn, int part) 4701 { 4702 MemOp esz = a->esz; 4703 int datasize = a->q ? 16 : 8; 4704 int elements = datasize >> esz; 4705 TCGv_i64 tcg_res[2], tcg_ele; 4706 4707 if (esz == MO_64 && !a->q) { 4708 return false; 4709 } 4710 if (!fp_access_check(s)) { 4711 return true; 4712 } 4713 4714 tcg_res[0] = tcg_temp_new_i64(); 4715 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4716 tcg_ele = tcg_temp_new_i64(); 4717 4718 for (int i = 0; i < elements; i++) { 4719 int o, w, idx; 4720 4721 idx = fn(i, part, elements); 4722 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4723 idx & (elements - 1), esz); 4724 4725 w = (i << (esz + 3)) / 64; 4726 o = (i << (esz + 3)) % 64; 4727 if (o == 0) { 4728 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4729 } else { 4730 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4731 } 4732 } 4733 4734 for (int i = a->q; i >= 0; --i) { 4735 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4736 } 4737 clear_vec_high(s, a->q, a->rd); 4738 return true; 4739 } 4740 4741 static int permute_load_uzp(int i, int part, int elements) 4742 { 4743 return 2 * i + part; 4744 } 4745 4746 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4747 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4748 4749 static int permute_load_trn(int i, int part, int elements) 4750 { 4751 return (i & 1) * elements + (i & ~1) + part; 4752 } 4753 4754 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4755 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4756 4757 static int permute_load_zip(int i, int part, int elements) 4758 { 4759 return (i & 1) * elements + ((part * elements + i) >> 1); 4760 } 4761 4762 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4763 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4764 4765 /* 4766 * Cryptographic AES, SHA, SHA512 4767 */ 4768 4769 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4770 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4771 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4772 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4773 4774 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4775 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4776 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4777 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4778 4779 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4780 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4781 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4782 4783 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4784 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4785 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4786 4787 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4788 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4789 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4790 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4791 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4792 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4793 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4794 4795 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4796 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4797 4798 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4799 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4800 4801 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4802 { 4803 if (!dc_isar_feature(aa64_sm3, s)) { 4804 return false; 4805 } 4806 if (fp_access_check(s)) { 4807 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4808 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4809 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4810 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4811 unsigned vsz, dofs; 4812 4813 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4814 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4815 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4816 4817 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4818 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4819 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4820 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4821 4822 /* Clear the whole register first, then store bits [127:96]. */ 4823 vsz = vec_full_reg_size(s); 4824 dofs = vec_full_reg_offset(s, a->rd); 4825 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 4826 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4827 } 4828 return true; 4829 } 4830 4831 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4832 { 4833 if (fp_access_check(s)) { 4834 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4835 } 4836 return true; 4837 } 4838 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4839 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4840 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4841 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4842 4843 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4844 { 4845 if (!dc_isar_feature(aa64_sha3, s)) { 4846 return false; 4847 } 4848 if (fp_access_check(s)) { 4849 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4850 vec_full_reg_offset(s, a->rn), 4851 vec_full_reg_offset(s, a->rm), a->imm, 16, 4852 vec_full_reg_size(s)); 4853 } 4854 return true; 4855 } 4856 4857 /* 4858 * Advanced SIMD copy 4859 */ 4860 4861 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4862 { 4863 unsigned esz = ctz32(imm); 4864 if (esz <= MO_64) { 4865 *pesz = esz; 4866 *pidx = imm >> (esz + 1); 4867 return true; 4868 } 4869 return false; 4870 } 4871 4872 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4873 { 4874 MemOp esz; 4875 unsigned idx; 4876 4877 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4878 return false; 4879 } 4880 if (fp_access_check(s)) { 4881 /* 4882 * This instruction just extracts the specified element and 4883 * zero-extends it into the bottom of the destination register. 4884 */ 4885 TCGv_i64 tmp = tcg_temp_new_i64(); 4886 read_vec_element(s, tmp, a->rn, idx, esz); 4887 write_fp_dreg(s, a->rd, tmp); 4888 } 4889 return true; 4890 } 4891 4892 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4893 { 4894 MemOp esz; 4895 unsigned idx; 4896 4897 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4898 return false; 4899 } 4900 if (esz == MO_64 && !a->q) { 4901 return false; 4902 } 4903 if (fp_access_check(s)) { 4904 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4905 vec_reg_offset(s, a->rn, idx, esz), 4906 a->q ? 16 : 8, vec_full_reg_size(s)); 4907 } 4908 return true; 4909 } 4910 4911 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4912 { 4913 MemOp esz; 4914 unsigned idx; 4915 4916 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4917 return false; 4918 } 4919 if (esz == MO_64 && !a->q) { 4920 return false; 4921 } 4922 if (fp_access_check(s)) { 4923 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4924 a->q ? 16 : 8, vec_full_reg_size(s), 4925 cpu_reg(s, a->rn)); 4926 } 4927 return true; 4928 } 4929 4930 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4931 { 4932 MemOp esz; 4933 unsigned idx; 4934 4935 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4936 return false; 4937 } 4938 if (is_signed) { 4939 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4940 return false; 4941 } 4942 } else { 4943 if (esz == MO_64 ? !a->q : a->q) { 4944 return false; 4945 } 4946 } 4947 if (fp_access_check(s)) { 4948 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4949 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4950 if (is_signed && !a->q) { 4951 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4952 } 4953 } 4954 return true; 4955 } 4956 4957 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4958 TRANS(UMOV, do_smov_umov, a, 0) 4959 4960 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4961 { 4962 MemOp esz; 4963 unsigned idx; 4964 4965 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4966 return false; 4967 } 4968 if (fp_access_check(s)) { 4969 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 4970 clear_vec_high(s, true, a->rd); 4971 } 4972 return true; 4973 } 4974 4975 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 4976 { 4977 MemOp esz; 4978 unsigned didx, sidx; 4979 4980 if (!decode_esz_idx(a->di, &esz, &didx)) { 4981 return false; 4982 } 4983 sidx = a->si >> esz; 4984 if (fp_access_check(s)) { 4985 TCGv_i64 tmp = tcg_temp_new_i64(); 4986 4987 read_vec_element(s, tmp, a->rn, sidx, esz); 4988 write_vec_element(s, tmp, a->rd, didx, esz); 4989 4990 /* INS is considered a 128-bit write for SVE. */ 4991 clear_vec_high(s, true, a->rd); 4992 } 4993 return true; 4994 } 4995 4996 /* 4997 * Advanced SIMD three same 4998 */ 4999 5000 typedef struct FPScalar { 5001 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5002 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5003 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5004 } FPScalar; 5005 5006 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 5007 { 5008 switch (a->esz) { 5009 case MO_64: 5010 if (fp_access_check(s)) { 5011 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5012 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5013 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5014 write_fp_dreg(s, a->rd, t0); 5015 } 5016 break; 5017 case MO_32: 5018 if (fp_access_check(s)) { 5019 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5020 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5021 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5022 write_fp_sreg(s, a->rd, t0); 5023 } 5024 break; 5025 case MO_16: 5026 if (!dc_isar_feature(aa64_fp16, s)) { 5027 return false; 5028 } 5029 if (fp_access_check(s)) { 5030 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5031 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5032 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 5033 write_fp_sreg(s, a->rd, t0); 5034 } 5035 break; 5036 default: 5037 return false; 5038 } 5039 return true; 5040 } 5041 5042 static const FPScalar f_scalar_fadd = { 5043 gen_helper_vfp_addh, 5044 gen_helper_vfp_adds, 5045 gen_helper_vfp_addd, 5046 }; 5047 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 5048 5049 static const FPScalar f_scalar_fsub = { 5050 gen_helper_vfp_subh, 5051 gen_helper_vfp_subs, 5052 gen_helper_vfp_subd, 5053 }; 5054 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 5055 5056 static const FPScalar f_scalar_fdiv = { 5057 gen_helper_vfp_divh, 5058 gen_helper_vfp_divs, 5059 gen_helper_vfp_divd, 5060 }; 5061 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 5062 5063 static const FPScalar f_scalar_fmul = { 5064 gen_helper_vfp_mulh, 5065 gen_helper_vfp_muls, 5066 gen_helper_vfp_muld, 5067 }; 5068 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 5069 5070 static const FPScalar f_scalar_fmax = { 5071 gen_helper_advsimd_maxh, 5072 gen_helper_vfp_maxs, 5073 gen_helper_vfp_maxd, 5074 }; 5075 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 5076 5077 static const FPScalar f_scalar_fmin = { 5078 gen_helper_advsimd_minh, 5079 gen_helper_vfp_mins, 5080 gen_helper_vfp_mind, 5081 }; 5082 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 5083 5084 static const FPScalar f_scalar_fmaxnm = { 5085 gen_helper_advsimd_maxnumh, 5086 gen_helper_vfp_maxnums, 5087 gen_helper_vfp_maxnumd, 5088 }; 5089 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 5090 5091 static const FPScalar f_scalar_fminnm = { 5092 gen_helper_advsimd_minnumh, 5093 gen_helper_vfp_minnums, 5094 gen_helper_vfp_minnumd, 5095 }; 5096 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 5097 5098 static const FPScalar f_scalar_fmulx = { 5099 gen_helper_advsimd_mulxh, 5100 gen_helper_vfp_mulxs, 5101 gen_helper_vfp_mulxd, 5102 }; 5103 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5104 5105 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5106 { 5107 gen_helper_vfp_mulh(d, n, m, s); 5108 gen_vfp_negh(d, d); 5109 } 5110 5111 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5112 { 5113 gen_helper_vfp_muls(d, n, m, s); 5114 gen_vfp_negs(d, d); 5115 } 5116 5117 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5118 { 5119 gen_helper_vfp_muld(d, n, m, s); 5120 gen_vfp_negd(d, d); 5121 } 5122 5123 static const FPScalar f_scalar_fnmul = { 5124 gen_fnmul_h, 5125 gen_fnmul_s, 5126 gen_fnmul_d, 5127 }; 5128 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5129 5130 static const FPScalar f_scalar_fcmeq = { 5131 gen_helper_advsimd_ceq_f16, 5132 gen_helper_neon_ceq_f32, 5133 gen_helper_neon_ceq_f64, 5134 }; 5135 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5136 5137 static const FPScalar f_scalar_fcmge = { 5138 gen_helper_advsimd_cge_f16, 5139 gen_helper_neon_cge_f32, 5140 gen_helper_neon_cge_f64, 5141 }; 5142 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5143 5144 static const FPScalar f_scalar_fcmgt = { 5145 gen_helper_advsimd_cgt_f16, 5146 gen_helper_neon_cgt_f32, 5147 gen_helper_neon_cgt_f64, 5148 }; 5149 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5150 5151 static const FPScalar f_scalar_facge = { 5152 gen_helper_advsimd_acge_f16, 5153 gen_helper_neon_acge_f32, 5154 gen_helper_neon_acge_f64, 5155 }; 5156 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5157 5158 static const FPScalar f_scalar_facgt = { 5159 gen_helper_advsimd_acgt_f16, 5160 gen_helper_neon_acgt_f32, 5161 gen_helper_neon_acgt_f64, 5162 }; 5163 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5164 5165 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5166 { 5167 gen_helper_vfp_subh(d, n, m, s); 5168 gen_vfp_absh(d, d); 5169 } 5170 5171 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5172 { 5173 gen_helper_vfp_subs(d, n, m, s); 5174 gen_vfp_abss(d, d); 5175 } 5176 5177 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5178 { 5179 gen_helper_vfp_subd(d, n, m, s); 5180 gen_vfp_absd(d, d); 5181 } 5182 5183 static const FPScalar f_scalar_fabd = { 5184 gen_fabd_h, 5185 gen_fabd_s, 5186 gen_fabd_d, 5187 }; 5188 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5189 5190 static const FPScalar f_scalar_frecps = { 5191 gen_helper_recpsf_f16, 5192 gen_helper_recpsf_f32, 5193 gen_helper_recpsf_f64, 5194 }; 5195 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5196 5197 static const FPScalar f_scalar_frsqrts = { 5198 gen_helper_rsqrtsf_f16, 5199 gen_helper_rsqrtsf_f32, 5200 gen_helper_rsqrtsf_f64, 5201 }; 5202 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5203 5204 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5205 MemOp sgn_n, MemOp sgn_m, 5206 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5207 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5208 { 5209 TCGv_i64 t0, t1, t2, qc; 5210 MemOp esz = a->esz; 5211 5212 if (!fp_access_check(s)) { 5213 return true; 5214 } 5215 5216 t0 = tcg_temp_new_i64(); 5217 t1 = tcg_temp_new_i64(); 5218 t2 = tcg_temp_new_i64(); 5219 qc = tcg_temp_new_i64(); 5220 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5221 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5222 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5223 5224 if (esz == MO_64) { 5225 gen_d(t0, qc, t1, t2); 5226 } else { 5227 gen_bhs(t0, qc, t1, t2, esz); 5228 tcg_gen_ext_i64(t0, t0, esz); 5229 } 5230 5231 write_fp_dreg(s, a->rd, t0); 5232 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5233 return true; 5234 } 5235 5236 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5237 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5238 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5239 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5240 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5241 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5242 5243 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5244 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5245 { 5246 if (fp_access_check(s)) { 5247 TCGv_i64 t0 = tcg_temp_new_i64(); 5248 TCGv_i64 t1 = tcg_temp_new_i64(); 5249 5250 read_vec_element(s, t0, a->rn, 0, MO_64); 5251 read_vec_element(s, t1, a->rm, 0, MO_64); 5252 fn(t0, t0, t1); 5253 write_fp_dreg(s, a->rd, t0); 5254 } 5255 return true; 5256 } 5257 5258 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5259 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5260 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5261 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5262 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5263 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5264 5265 typedef struct ENVScalar2 { 5266 NeonGenTwoOpEnvFn *gen_bhs[3]; 5267 NeonGenTwo64OpEnvFn *gen_d; 5268 } ENVScalar2; 5269 5270 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5271 { 5272 if (!fp_access_check(s)) { 5273 return true; 5274 } 5275 if (a->esz == MO_64) { 5276 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5277 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5278 f->gen_d(t0, tcg_env, t0, t1); 5279 write_fp_dreg(s, a->rd, t0); 5280 } else { 5281 TCGv_i32 t0 = tcg_temp_new_i32(); 5282 TCGv_i32 t1 = tcg_temp_new_i32(); 5283 5284 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5285 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5286 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5287 write_fp_sreg(s, a->rd, t0); 5288 } 5289 return true; 5290 } 5291 5292 static const ENVScalar2 f_scalar_sqshl = { 5293 { gen_helper_neon_qshl_s8, 5294 gen_helper_neon_qshl_s16, 5295 gen_helper_neon_qshl_s32 }, 5296 gen_helper_neon_qshl_s64, 5297 }; 5298 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5299 5300 static const ENVScalar2 f_scalar_uqshl = { 5301 { gen_helper_neon_qshl_u8, 5302 gen_helper_neon_qshl_u16, 5303 gen_helper_neon_qshl_u32 }, 5304 gen_helper_neon_qshl_u64, 5305 }; 5306 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5307 5308 static const ENVScalar2 f_scalar_sqrshl = { 5309 { gen_helper_neon_qrshl_s8, 5310 gen_helper_neon_qrshl_s16, 5311 gen_helper_neon_qrshl_s32 }, 5312 gen_helper_neon_qrshl_s64, 5313 }; 5314 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5315 5316 static const ENVScalar2 f_scalar_uqrshl = { 5317 { gen_helper_neon_qrshl_u8, 5318 gen_helper_neon_qrshl_u16, 5319 gen_helper_neon_qrshl_u32 }, 5320 gen_helper_neon_qrshl_u64, 5321 }; 5322 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5323 5324 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5325 const ENVScalar2 *f) 5326 { 5327 if (a->esz == MO_16 || a->esz == MO_32) { 5328 return do_env_scalar2(s, a, f); 5329 } 5330 return false; 5331 } 5332 5333 static const ENVScalar2 f_scalar_sqdmulh = { 5334 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5335 }; 5336 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5337 5338 static const ENVScalar2 f_scalar_sqrdmulh = { 5339 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5340 }; 5341 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5342 5343 typedef struct ENVScalar3 { 5344 NeonGenThreeOpEnvFn *gen_hs[2]; 5345 } ENVScalar3; 5346 5347 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5348 const ENVScalar3 *f) 5349 { 5350 TCGv_i32 t0, t1, t2; 5351 5352 if (a->esz != MO_16 && a->esz != MO_32) { 5353 return false; 5354 } 5355 if (!fp_access_check(s)) { 5356 return true; 5357 } 5358 5359 t0 = tcg_temp_new_i32(); 5360 t1 = tcg_temp_new_i32(); 5361 t2 = tcg_temp_new_i32(); 5362 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5363 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5364 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5365 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5366 write_fp_sreg(s, a->rd, t0); 5367 return true; 5368 } 5369 5370 static const ENVScalar3 f_scalar_sqrdmlah = { 5371 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5372 }; 5373 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5374 5375 static const ENVScalar3 f_scalar_sqrdmlsh = { 5376 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5377 }; 5378 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5379 5380 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5381 { 5382 if (fp_access_check(s)) { 5383 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5384 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5385 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5386 write_fp_dreg(s, a->rd, t0); 5387 } 5388 return true; 5389 } 5390 5391 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5392 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5393 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5394 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5395 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5396 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5397 5398 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5399 gen_helper_gvec_3_ptr * const fns[3]) 5400 { 5401 MemOp esz = a->esz; 5402 5403 switch (esz) { 5404 case MO_64: 5405 if (!a->q) { 5406 return false; 5407 } 5408 break; 5409 case MO_32: 5410 break; 5411 case MO_16: 5412 if (!dc_isar_feature(aa64_fp16, s)) { 5413 return false; 5414 } 5415 break; 5416 default: 5417 return false; 5418 } 5419 if (fp_access_check(s)) { 5420 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5421 esz == MO_16, data, fns[esz - 1]); 5422 } 5423 return true; 5424 } 5425 5426 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5427 gen_helper_gvec_fadd_h, 5428 gen_helper_gvec_fadd_s, 5429 gen_helper_gvec_fadd_d, 5430 }; 5431 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5432 5433 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5434 gen_helper_gvec_fsub_h, 5435 gen_helper_gvec_fsub_s, 5436 gen_helper_gvec_fsub_d, 5437 }; 5438 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5439 5440 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5441 gen_helper_gvec_fdiv_h, 5442 gen_helper_gvec_fdiv_s, 5443 gen_helper_gvec_fdiv_d, 5444 }; 5445 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5446 5447 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5448 gen_helper_gvec_fmul_h, 5449 gen_helper_gvec_fmul_s, 5450 gen_helper_gvec_fmul_d, 5451 }; 5452 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5453 5454 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5455 gen_helper_gvec_fmax_h, 5456 gen_helper_gvec_fmax_s, 5457 gen_helper_gvec_fmax_d, 5458 }; 5459 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5460 5461 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5462 gen_helper_gvec_fmin_h, 5463 gen_helper_gvec_fmin_s, 5464 gen_helper_gvec_fmin_d, 5465 }; 5466 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5467 5468 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5469 gen_helper_gvec_fmaxnum_h, 5470 gen_helper_gvec_fmaxnum_s, 5471 gen_helper_gvec_fmaxnum_d, 5472 }; 5473 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5474 5475 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5476 gen_helper_gvec_fminnum_h, 5477 gen_helper_gvec_fminnum_s, 5478 gen_helper_gvec_fminnum_d, 5479 }; 5480 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5481 5482 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5483 gen_helper_gvec_fmulx_h, 5484 gen_helper_gvec_fmulx_s, 5485 gen_helper_gvec_fmulx_d, 5486 }; 5487 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5488 5489 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5490 gen_helper_gvec_vfma_h, 5491 gen_helper_gvec_vfma_s, 5492 gen_helper_gvec_vfma_d, 5493 }; 5494 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5495 5496 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5497 gen_helper_gvec_vfms_h, 5498 gen_helper_gvec_vfms_s, 5499 gen_helper_gvec_vfms_d, 5500 }; 5501 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5502 5503 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5504 gen_helper_gvec_fceq_h, 5505 gen_helper_gvec_fceq_s, 5506 gen_helper_gvec_fceq_d, 5507 }; 5508 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5509 5510 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5511 gen_helper_gvec_fcge_h, 5512 gen_helper_gvec_fcge_s, 5513 gen_helper_gvec_fcge_d, 5514 }; 5515 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5516 5517 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5518 gen_helper_gvec_fcgt_h, 5519 gen_helper_gvec_fcgt_s, 5520 gen_helper_gvec_fcgt_d, 5521 }; 5522 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5523 5524 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5525 gen_helper_gvec_facge_h, 5526 gen_helper_gvec_facge_s, 5527 gen_helper_gvec_facge_d, 5528 }; 5529 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5530 5531 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5532 gen_helper_gvec_facgt_h, 5533 gen_helper_gvec_facgt_s, 5534 gen_helper_gvec_facgt_d, 5535 }; 5536 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5537 5538 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5539 gen_helper_gvec_fabd_h, 5540 gen_helper_gvec_fabd_s, 5541 gen_helper_gvec_fabd_d, 5542 }; 5543 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5544 5545 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5546 gen_helper_gvec_recps_h, 5547 gen_helper_gvec_recps_s, 5548 gen_helper_gvec_recps_d, 5549 }; 5550 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5551 5552 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5553 gen_helper_gvec_rsqrts_h, 5554 gen_helper_gvec_rsqrts_s, 5555 gen_helper_gvec_rsqrts_d, 5556 }; 5557 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5558 5559 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5560 gen_helper_gvec_faddp_h, 5561 gen_helper_gvec_faddp_s, 5562 gen_helper_gvec_faddp_d, 5563 }; 5564 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5565 5566 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5567 gen_helper_gvec_fmaxp_h, 5568 gen_helper_gvec_fmaxp_s, 5569 gen_helper_gvec_fmaxp_d, 5570 }; 5571 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5572 5573 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5574 gen_helper_gvec_fminp_h, 5575 gen_helper_gvec_fminp_s, 5576 gen_helper_gvec_fminp_d, 5577 }; 5578 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5579 5580 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5581 gen_helper_gvec_fmaxnump_h, 5582 gen_helper_gvec_fmaxnump_s, 5583 gen_helper_gvec_fmaxnump_d, 5584 }; 5585 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5586 5587 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5588 gen_helper_gvec_fminnump_h, 5589 gen_helper_gvec_fminnump_s, 5590 gen_helper_gvec_fminnump_d, 5591 }; 5592 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5593 5594 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5595 { 5596 if (fp_access_check(s)) { 5597 int data = (is_2 << 1) | is_s; 5598 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5599 vec_full_reg_offset(s, a->rn), 5600 vec_full_reg_offset(s, a->rm), tcg_env, 5601 a->q ? 16 : 8, vec_full_reg_size(s), 5602 data, gen_helper_gvec_fmlal_a64); 5603 } 5604 return true; 5605 } 5606 5607 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5608 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5609 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5610 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5611 5612 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5613 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5614 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5615 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5616 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5617 5618 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5619 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5620 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5621 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5622 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5623 5624 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5625 { 5626 if (fp_access_check(s)) { 5627 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5628 } 5629 return true; 5630 } 5631 5632 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5633 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5634 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5635 5636 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5637 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5638 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5639 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5640 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5641 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5642 5643 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5644 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5645 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5646 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5647 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5648 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5649 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5650 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5651 5652 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5653 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5654 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5655 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5656 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5657 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5658 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5659 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5660 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5661 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5662 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5663 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5664 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5665 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5666 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5667 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5668 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5669 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5670 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5671 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5672 5673 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5674 { 5675 if (a->esz == MO_64 && !a->q) { 5676 return false; 5677 } 5678 if (fp_access_check(s)) { 5679 tcg_gen_gvec_cmp(cond, a->esz, 5680 vec_full_reg_offset(s, a->rd), 5681 vec_full_reg_offset(s, a->rn), 5682 vec_full_reg_offset(s, a->rm), 5683 a->q ? 16 : 8, vec_full_reg_size(s)); 5684 } 5685 return true; 5686 } 5687 5688 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5689 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5690 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5691 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5692 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5693 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5694 5695 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5696 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5697 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5698 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5699 5700 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5701 gen_helper_gvec_4 *fn) 5702 { 5703 if (fp_access_check(s)) { 5704 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5705 } 5706 return true; 5707 } 5708 5709 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 5710 gen_helper_gvec_4_ptr *fn) 5711 { 5712 if (fp_access_check(s)) { 5713 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5714 } 5715 return true; 5716 } 5717 5718 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5719 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5720 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5721 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 5722 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 5723 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5724 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5725 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5726 5727 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5728 { 5729 if (!dc_isar_feature(aa64_bf16, s)) { 5730 return false; 5731 } 5732 if (fp_access_check(s)) { 5733 /* Q bit selects BFMLALB vs BFMLALT. */ 5734 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5735 gen_helper_gvec_bfmlal); 5736 } 5737 return true; 5738 } 5739 5740 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5741 gen_helper_gvec_fcaddh, 5742 gen_helper_gvec_fcadds, 5743 gen_helper_gvec_fcaddd, 5744 }; 5745 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5746 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5747 5748 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5749 { 5750 gen_helper_gvec_4_ptr *fn; 5751 5752 if (!dc_isar_feature(aa64_fcma, s)) { 5753 return false; 5754 } 5755 switch (a->esz) { 5756 case MO_64: 5757 if (!a->q) { 5758 return false; 5759 } 5760 fn = gen_helper_gvec_fcmlad; 5761 break; 5762 case MO_32: 5763 fn = gen_helper_gvec_fcmlas; 5764 break; 5765 case MO_16: 5766 if (!dc_isar_feature(aa64_fp16, s)) { 5767 return false; 5768 } 5769 fn = gen_helper_gvec_fcmlah; 5770 break; 5771 default: 5772 return false; 5773 } 5774 if (fp_access_check(s)) { 5775 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5776 a->esz == MO_16, a->rot, fn); 5777 } 5778 return true; 5779 } 5780 5781 /* 5782 * Widening vector x vector/indexed. 5783 * 5784 * These read from the top or bottom half of a 128-bit vector. 5785 * After widening, optionally accumulate with a 128-bit vector. 5786 * Implement these inline, as the number of elements are limited 5787 * and the related SVE and SME operations on larger vectors use 5788 * even/odd elements instead of top/bottom half. 5789 * 5790 * If idx >= 0, operand 2 is indexed, otherwise vector. 5791 * If acc, operand 0 is loaded with rd. 5792 */ 5793 5794 /* For low half, iterating up. */ 5795 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5796 int rd, int rn, int rm, int idx, 5797 NeonGenTwo64OpFn *fn, bool acc) 5798 { 5799 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5800 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5801 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5802 MemOp esz = memop & MO_SIZE; 5803 int half = 8 >> esz; 5804 int top_swap, top_half; 5805 5806 /* There are no 64x64->128 bit operations. */ 5807 if (esz >= MO_64) { 5808 return false; 5809 } 5810 if (!fp_access_check(s)) { 5811 return true; 5812 } 5813 5814 if (idx >= 0) { 5815 read_vec_element(s, tcg_op2, rm, idx, memop); 5816 } 5817 5818 /* 5819 * For top half inputs, iterate forward; backward for bottom half. 5820 * This means the store to the destination will not occur until 5821 * overlapping input inputs are consumed. 5822 * Use top_swap to conditionally invert the forward iteration index. 5823 */ 5824 top_swap = top ? 0 : half - 1; 5825 top_half = top ? half : 0; 5826 5827 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5828 int elt = elt_fwd ^ top_swap; 5829 5830 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5831 if (idx < 0) { 5832 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5833 } 5834 if (acc) { 5835 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5836 } 5837 fn(tcg_op0, tcg_op1, tcg_op2); 5838 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5839 } 5840 clear_vec_high(s, 1, rd); 5841 return true; 5842 } 5843 5844 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5845 { 5846 TCGv_i64 t = tcg_temp_new_i64(); 5847 tcg_gen_mul_i64(t, n, m); 5848 tcg_gen_add_i64(d, d, t); 5849 } 5850 5851 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5852 { 5853 TCGv_i64 t = tcg_temp_new_i64(); 5854 tcg_gen_mul_i64(t, n, m); 5855 tcg_gen_sub_i64(d, d, t); 5856 } 5857 5858 TRANS(SMULL_v, do_3op_widening, 5859 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5860 tcg_gen_mul_i64, false) 5861 TRANS(UMULL_v, do_3op_widening, 5862 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5863 tcg_gen_mul_i64, false) 5864 TRANS(SMLAL_v, do_3op_widening, 5865 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5866 gen_muladd_i64, true) 5867 TRANS(UMLAL_v, do_3op_widening, 5868 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5869 gen_muladd_i64, true) 5870 TRANS(SMLSL_v, do_3op_widening, 5871 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5872 gen_mulsub_i64, true) 5873 TRANS(UMLSL_v, do_3op_widening, 5874 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5875 gen_mulsub_i64, true) 5876 5877 TRANS(SMULL_vi, do_3op_widening, 5878 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5879 tcg_gen_mul_i64, false) 5880 TRANS(UMULL_vi, do_3op_widening, 5881 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5882 tcg_gen_mul_i64, false) 5883 TRANS(SMLAL_vi, do_3op_widening, 5884 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5885 gen_muladd_i64, true) 5886 TRANS(UMLAL_vi, do_3op_widening, 5887 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5888 gen_muladd_i64, true) 5889 TRANS(SMLSL_vi, do_3op_widening, 5890 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5891 gen_mulsub_i64, true) 5892 TRANS(UMLSL_vi, do_3op_widening, 5893 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5894 gen_mulsub_i64, true) 5895 5896 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5897 { 5898 TCGv_i64 t1 = tcg_temp_new_i64(); 5899 TCGv_i64 t2 = tcg_temp_new_i64(); 5900 5901 tcg_gen_sub_i64(t1, n, m); 5902 tcg_gen_sub_i64(t2, m, n); 5903 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5904 } 5905 5906 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5907 { 5908 TCGv_i64 t1 = tcg_temp_new_i64(); 5909 TCGv_i64 t2 = tcg_temp_new_i64(); 5910 5911 tcg_gen_sub_i64(t1, n, m); 5912 tcg_gen_sub_i64(t2, m, n); 5913 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5914 } 5915 5916 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5917 { 5918 TCGv_i64 t = tcg_temp_new_i64(); 5919 gen_sabd_i64(t, n, m); 5920 tcg_gen_add_i64(d, d, t); 5921 } 5922 5923 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5924 { 5925 TCGv_i64 t = tcg_temp_new_i64(); 5926 gen_uabd_i64(t, n, m); 5927 tcg_gen_add_i64(d, d, t); 5928 } 5929 5930 TRANS(SADDL_v, do_3op_widening, 5931 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5932 tcg_gen_add_i64, false) 5933 TRANS(UADDL_v, do_3op_widening, 5934 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5935 tcg_gen_add_i64, false) 5936 TRANS(SSUBL_v, do_3op_widening, 5937 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5938 tcg_gen_sub_i64, false) 5939 TRANS(USUBL_v, do_3op_widening, 5940 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5941 tcg_gen_sub_i64, false) 5942 TRANS(SABDL_v, do_3op_widening, 5943 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5944 gen_sabd_i64, false) 5945 TRANS(UABDL_v, do_3op_widening, 5946 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5947 gen_uabd_i64, false) 5948 TRANS(SABAL_v, do_3op_widening, 5949 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5950 gen_saba_i64, true) 5951 TRANS(UABAL_v, do_3op_widening, 5952 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5953 gen_uaba_i64, true) 5954 5955 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5956 { 5957 tcg_gen_mul_i64(d, n, m); 5958 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 5959 } 5960 5961 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5962 { 5963 tcg_gen_mul_i64(d, n, m); 5964 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 5965 } 5966 5967 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5968 { 5969 TCGv_i64 t = tcg_temp_new_i64(); 5970 5971 tcg_gen_mul_i64(t, n, m); 5972 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5973 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5974 } 5975 5976 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5977 { 5978 TCGv_i64 t = tcg_temp_new_i64(); 5979 5980 tcg_gen_mul_i64(t, n, m); 5981 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 5982 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 5983 } 5984 5985 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5986 { 5987 TCGv_i64 t = tcg_temp_new_i64(); 5988 5989 tcg_gen_mul_i64(t, n, m); 5990 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5991 tcg_gen_neg_i64(t, t); 5992 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5993 } 5994 5995 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5996 { 5997 TCGv_i64 t = tcg_temp_new_i64(); 5998 5999 tcg_gen_mul_i64(t, n, m); 6000 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6001 tcg_gen_neg_i64(t, t); 6002 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6003 } 6004 6005 TRANS(SQDMULL_v, do_3op_widening, 6006 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6007 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6008 TRANS(SQDMLAL_v, do_3op_widening, 6009 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6010 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6011 TRANS(SQDMLSL_v, do_3op_widening, 6012 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6013 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6014 6015 TRANS(SQDMULL_vi, do_3op_widening, 6016 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6017 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6018 TRANS(SQDMLAL_vi, do_3op_widening, 6019 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6020 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6021 TRANS(SQDMLSL_vi, do_3op_widening, 6022 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6023 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6024 6025 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6026 MemOp sign, bool sub) 6027 { 6028 TCGv_i64 tcg_op0, tcg_op1; 6029 MemOp esz = a->esz; 6030 int half = 8 >> esz; 6031 bool top = a->q; 6032 int top_swap = top ? 0 : half - 1; 6033 int top_half = top ? half : 0; 6034 6035 /* There are no 64x64->128 bit operations. */ 6036 if (esz >= MO_64) { 6037 return false; 6038 } 6039 if (!fp_access_check(s)) { 6040 return true; 6041 } 6042 tcg_op0 = tcg_temp_new_i64(); 6043 tcg_op1 = tcg_temp_new_i64(); 6044 6045 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6046 int elt = elt_fwd ^ top_swap; 6047 6048 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6049 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6050 if (sub) { 6051 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6052 } else { 6053 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6054 } 6055 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6056 } 6057 clear_vec_high(s, 1, a->rd); 6058 return true; 6059 } 6060 6061 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6062 TRANS(UADDW, do_addsub_wide, a, 0, false) 6063 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6064 TRANS(USUBW, do_addsub_wide, a, 0, true) 6065 6066 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6067 bool sub, bool round) 6068 { 6069 TCGv_i64 tcg_op0, tcg_op1; 6070 MemOp esz = a->esz; 6071 int half = 8 >> esz; 6072 bool top = a->q; 6073 int ebits = 8 << esz; 6074 uint64_t rbit = 1ull << (ebits - 1); 6075 int top_swap, top_half; 6076 6077 /* There are no 128x128->64 bit operations. */ 6078 if (esz >= MO_64) { 6079 return false; 6080 } 6081 if (!fp_access_check(s)) { 6082 return true; 6083 } 6084 tcg_op0 = tcg_temp_new_i64(); 6085 tcg_op1 = tcg_temp_new_i64(); 6086 6087 /* 6088 * For top half inputs, iterate backward; forward for bottom half. 6089 * This means the store to the destination will not occur until 6090 * overlapping input inputs are consumed. 6091 */ 6092 top_swap = top ? half - 1 : 0; 6093 top_half = top ? half : 0; 6094 6095 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6096 int elt = elt_fwd ^ top_swap; 6097 6098 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6099 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6100 if (sub) { 6101 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6102 } else { 6103 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6104 } 6105 if (round) { 6106 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6107 } 6108 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6109 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6110 } 6111 clear_vec_high(s, top, a->rd); 6112 return true; 6113 } 6114 6115 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6116 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6117 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6118 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6119 6120 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6121 { 6122 if (fp_access_check(s)) { 6123 /* The Q field specifies lo/hi half input for these insns. */ 6124 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6125 } 6126 return true; 6127 } 6128 6129 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6130 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6131 6132 /* 6133 * Advanced SIMD scalar/vector x indexed element 6134 */ 6135 6136 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6137 { 6138 switch (a->esz) { 6139 case MO_64: 6140 if (fp_access_check(s)) { 6141 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6142 TCGv_i64 t1 = tcg_temp_new_i64(); 6143 6144 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6145 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6146 write_fp_dreg(s, a->rd, t0); 6147 } 6148 break; 6149 case MO_32: 6150 if (fp_access_check(s)) { 6151 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6152 TCGv_i32 t1 = tcg_temp_new_i32(); 6153 6154 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6155 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6156 write_fp_sreg(s, a->rd, t0); 6157 } 6158 break; 6159 case MO_16: 6160 if (!dc_isar_feature(aa64_fp16, s)) { 6161 return false; 6162 } 6163 if (fp_access_check(s)) { 6164 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6165 TCGv_i32 t1 = tcg_temp_new_i32(); 6166 6167 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6168 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6169 write_fp_sreg(s, a->rd, t0); 6170 } 6171 break; 6172 default: 6173 g_assert_not_reached(); 6174 } 6175 return true; 6176 } 6177 6178 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6179 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6180 6181 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6182 { 6183 switch (a->esz) { 6184 case MO_64: 6185 if (fp_access_check(s)) { 6186 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6187 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6188 TCGv_i64 t2 = tcg_temp_new_i64(); 6189 6190 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6191 if (neg) { 6192 gen_vfp_negd(t1, t1); 6193 } 6194 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6195 write_fp_dreg(s, a->rd, t0); 6196 } 6197 break; 6198 case MO_32: 6199 if (fp_access_check(s)) { 6200 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6201 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6202 TCGv_i32 t2 = tcg_temp_new_i32(); 6203 6204 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6205 if (neg) { 6206 gen_vfp_negs(t1, t1); 6207 } 6208 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6209 write_fp_sreg(s, a->rd, t0); 6210 } 6211 break; 6212 case MO_16: 6213 if (!dc_isar_feature(aa64_fp16, s)) { 6214 return false; 6215 } 6216 if (fp_access_check(s)) { 6217 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6218 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6219 TCGv_i32 t2 = tcg_temp_new_i32(); 6220 6221 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6222 if (neg) { 6223 gen_vfp_negh(t1, t1); 6224 } 6225 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6226 fpstatus_ptr(FPST_FPCR_F16)); 6227 write_fp_sreg(s, a->rd, t0); 6228 } 6229 break; 6230 default: 6231 g_assert_not_reached(); 6232 } 6233 return true; 6234 } 6235 6236 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6237 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6238 6239 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6240 const ENVScalar2 *f) 6241 { 6242 if (a->esz < MO_16 || a->esz > MO_32) { 6243 return false; 6244 } 6245 if (fp_access_check(s)) { 6246 TCGv_i32 t0 = tcg_temp_new_i32(); 6247 TCGv_i32 t1 = tcg_temp_new_i32(); 6248 6249 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6250 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6251 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6252 write_fp_sreg(s, a->rd, t0); 6253 } 6254 return true; 6255 } 6256 6257 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6258 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6259 6260 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6261 const ENVScalar3 *f) 6262 { 6263 if (a->esz < MO_16 || a->esz > MO_32) { 6264 return false; 6265 } 6266 if (fp_access_check(s)) { 6267 TCGv_i32 t0 = tcg_temp_new_i32(); 6268 TCGv_i32 t1 = tcg_temp_new_i32(); 6269 TCGv_i32 t2 = tcg_temp_new_i32(); 6270 6271 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6272 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6273 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6274 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6275 write_fp_sreg(s, a->rd, t0); 6276 } 6277 return true; 6278 } 6279 6280 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6281 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6282 6283 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6284 NeonGenTwo64OpFn *fn, bool acc) 6285 { 6286 if (fp_access_check(s)) { 6287 TCGv_i64 t0 = tcg_temp_new_i64(); 6288 TCGv_i64 t1 = tcg_temp_new_i64(); 6289 TCGv_i64 t2 = tcg_temp_new_i64(); 6290 unsigned vsz, dofs; 6291 6292 if (acc) { 6293 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6294 } 6295 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6296 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6297 fn(t0, t1, t2); 6298 6299 /* Clear the whole register first, then store scalar. */ 6300 vsz = vec_full_reg_size(s); 6301 dofs = vec_full_reg_offset(s, a->rd); 6302 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 6303 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6304 } 6305 return true; 6306 } 6307 6308 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6309 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6310 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6311 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6312 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6313 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6314 6315 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6316 gen_helper_gvec_3_ptr * const fns[3]) 6317 { 6318 MemOp esz = a->esz; 6319 6320 switch (esz) { 6321 case MO_64: 6322 if (!a->q) { 6323 return false; 6324 } 6325 break; 6326 case MO_32: 6327 break; 6328 case MO_16: 6329 if (!dc_isar_feature(aa64_fp16, s)) { 6330 return false; 6331 } 6332 break; 6333 default: 6334 g_assert_not_reached(); 6335 } 6336 if (fp_access_check(s)) { 6337 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6338 esz == MO_16, a->idx, fns[esz - 1]); 6339 } 6340 return true; 6341 } 6342 6343 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6344 gen_helper_gvec_fmul_idx_h, 6345 gen_helper_gvec_fmul_idx_s, 6346 gen_helper_gvec_fmul_idx_d, 6347 }; 6348 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6349 6350 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6351 gen_helper_gvec_fmulx_idx_h, 6352 gen_helper_gvec_fmulx_idx_s, 6353 gen_helper_gvec_fmulx_idx_d, 6354 }; 6355 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6356 6357 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6358 { 6359 static gen_helper_gvec_4_ptr * const fns[3] = { 6360 gen_helper_gvec_fmla_idx_h, 6361 gen_helper_gvec_fmla_idx_s, 6362 gen_helper_gvec_fmla_idx_d, 6363 }; 6364 MemOp esz = a->esz; 6365 6366 switch (esz) { 6367 case MO_64: 6368 if (!a->q) { 6369 return false; 6370 } 6371 break; 6372 case MO_32: 6373 break; 6374 case MO_16: 6375 if (!dc_isar_feature(aa64_fp16, s)) { 6376 return false; 6377 } 6378 break; 6379 default: 6380 g_assert_not_reached(); 6381 } 6382 if (fp_access_check(s)) { 6383 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6384 esz == MO_16, (a->idx << 1) | neg, 6385 fns[esz - 1]); 6386 } 6387 return true; 6388 } 6389 6390 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6391 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6392 6393 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6394 { 6395 if (fp_access_check(s)) { 6396 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6397 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6398 vec_full_reg_offset(s, a->rn), 6399 vec_full_reg_offset(s, a->rm), tcg_env, 6400 a->q ? 16 : 8, vec_full_reg_size(s), 6401 data, gen_helper_gvec_fmlal_idx_a64); 6402 } 6403 return true; 6404 } 6405 6406 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6407 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6408 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6409 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6410 6411 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6412 gen_helper_gvec_3 * const fns[2]) 6413 { 6414 assert(a->esz == MO_16 || a->esz == MO_32); 6415 if (fp_access_check(s)) { 6416 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6417 } 6418 return true; 6419 } 6420 6421 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6422 gen_helper_gvec_mul_idx_h, 6423 gen_helper_gvec_mul_idx_s, 6424 }; 6425 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6426 6427 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6428 { 6429 static gen_helper_gvec_4 * const fns[2][2] = { 6430 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6431 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6432 }; 6433 6434 assert(a->esz == MO_16 || a->esz == MO_32); 6435 if (fp_access_check(s)) { 6436 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6437 a->idx, fns[a->esz - 1][sub]); 6438 } 6439 return true; 6440 } 6441 6442 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6443 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6444 6445 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6446 gen_helper_gvec_4 * const fns[2]) 6447 { 6448 assert(a->esz == MO_16 || a->esz == MO_32); 6449 if (fp_access_check(s)) { 6450 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6451 vec_full_reg_offset(s, a->rn), 6452 vec_full_reg_offset(s, a->rm), 6453 offsetof(CPUARMState, vfp.qc), 6454 a->q ? 16 : 8, vec_full_reg_size(s), 6455 a->idx, fns[a->esz - 1]); 6456 } 6457 return true; 6458 } 6459 6460 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6461 gen_helper_neon_sqdmulh_idx_h, 6462 gen_helper_neon_sqdmulh_idx_s, 6463 }; 6464 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6465 6466 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6467 gen_helper_neon_sqrdmulh_idx_h, 6468 gen_helper_neon_sqrdmulh_idx_s, 6469 }; 6470 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6471 6472 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6473 gen_helper_neon_sqrdmlah_idx_h, 6474 gen_helper_neon_sqrdmlah_idx_s, 6475 }; 6476 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6477 f_vector_idx_sqrdmlah) 6478 6479 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6480 gen_helper_neon_sqrdmlsh_idx_h, 6481 gen_helper_neon_sqrdmlsh_idx_s, 6482 }; 6483 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6484 f_vector_idx_sqrdmlsh) 6485 6486 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6487 gen_helper_gvec_4 *fn) 6488 { 6489 if (fp_access_check(s)) { 6490 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6491 } 6492 return true; 6493 } 6494 6495 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6496 gen_helper_gvec_4_ptr *fn) 6497 { 6498 if (fp_access_check(s)) { 6499 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6500 } 6501 return true; 6502 } 6503 6504 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6505 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6506 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6507 gen_helper_gvec_sudot_idx_b) 6508 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6509 gen_helper_gvec_usdot_idx_b) 6510 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6511 gen_helper_gvec_bfdot_idx) 6512 6513 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6514 { 6515 if (!dc_isar_feature(aa64_bf16, s)) { 6516 return false; 6517 } 6518 if (fp_access_check(s)) { 6519 /* Q bit selects BFMLALB vs BFMLALT. */ 6520 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6521 (a->idx << 1) | a->q, 6522 gen_helper_gvec_bfmlal_idx); 6523 } 6524 return true; 6525 } 6526 6527 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6528 { 6529 gen_helper_gvec_4_ptr *fn; 6530 6531 if (!dc_isar_feature(aa64_fcma, s)) { 6532 return false; 6533 } 6534 switch (a->esz) { 6535 case MO_16: 6536 if (!dc_isar_feature(aa64_fp16, s)) { 6537 return false; 6538 } 6539 fn = gen_helper_gvec_fcmlah_idx; 6540 break; 6541 case MO_32: 6542 fn = gen_helper_gvec_fcmlas_idx; 6543 break; 6544 default: 6545 g_assert_not_reached(); 6546 } 6547 if (fp_access_check(s)) { 6548 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6549 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6550 } 6551 return true; 6552 } 6553 6554 /* 6555 * Advanced SIMD scalar pairwise 6556 */ 6557 6558 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6559 { 6560 switch (a->esz) { 6561 case MO_64: 6562 if (fp_access_check(s)) { 6563 TCGv_i64 t0 = tcg_temp_new_i64(); 6564 TCGv_i64 t1 = tcg_temp_new_i64(); 6565 6566 read_vec_element(s, t0, a->rn, 0, MO_64); 6567 read_vec_element(s, t1, a->rn, 1, MO_64); 6568 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6569 write_fp_dreg(s, a->rd, t0); 6570 } 6571 break; 6572 case MO_32: 6573 if (fp_access_check(s)) { 6574 TCGv_i32 t0 = tcg_temp_new_i32(); 6575 TCGv_i32 t1 = tcg_temp_new_i32(); 6576 6577 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6578 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6579 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6580 write_fp_sreg(s, a->rd, t0); 6581 } 6582 break; 6583 case MO_16: 6584 if (!dc_isar_feature(aa64_fp16, s)) { 6585 return false; 6586 } 6587 if (fp_access_check(s)) { 6588 TCGv_i32 t0 = tcg_temp_new_i32(); 6589 TCGv_i32 t1 = tcg_temp_new_i32(); 6590 6591 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6592 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6593 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6594 write_fp_sreg(s, a->rd, t0); 6595 } 6596 break; 6597 default: 6598 g_assert_not_reached(); 6599 } 6600 return true; 6601 } 6602 6603 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6604 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6605 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6606 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6607 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6608 6609 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6610 { 6611 if (fp_access_check(s)) { 6612 TCGv_i64 t0 = tcg_temp_new_i64(); 6613 TCGv_i64 t1 = tcg_temp_new_i64(); 6614 6615 read_vec_element(s, t0, a->rn, 0, MO_64); 6616 read_vec_element(s, t1, a->rn, 1, MO_64); 6617 tcg_gen_add_i64(t0, t0, t1); 6618 write_fp_dreg(s, a->rd, t0); 6619 } 6620 return true; 6621 } 6622 6623 /* 6624 * Floating-point conditional select 6625 */ 6626 6627 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6628 { 6629 TCGv_i64 t_true, t_false; 6630 DisasCompare64 c; 6631 6632 switch (a->esz) { 6633 case MO_32: 6634 case MO_64: 6635 break; 6636 case MO_16: 6637 if (!dc_isar_feature(aa64_fp16, s)) { 6638 return false; 6639 } 6640 break; 6641 default: 6642 return false; 6643 } 6644 6645 if (!fp_access_check(s)) { 6646 return true; 6647 } 6648 6649 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6650 t_true = tcg_temp_new_i64(); 6651 t_false = tcg_temp_new_i64(); 6652 read_vec_element(s, t_true, a->rn, 0, a->esz); 6653 read_vec_element(s, t_false, a->rm, 0, a->esz); 6654 6655 a64_test_cc(&c, a->cond); 6656 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6657 t_true, t_false); 6658 6659 /* 6660 * Note that sregs & hregs write back zeros to the high bits, 6661 * and we've already done the zero-extension. 6662 */ 6663 write_fp_dreg(s, a->rd, t_true); 6664 return true; 6665 } 6666 6667 /* 6668 * Advanced SIMD Extract 6669 */ 6670 6671 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 6672 { 6673 if (fp_access_check(s)) { 6674 TCGv_i64 lo = read_fp_dreg(s, a->rn); 6675 if (a->imm != 0) { 6676 TCGv_i64 hi = read_fp_dreg(s, a->rm); 6677 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 6678 } 6679 write_fp_dreg(s, a->rd, lo); 6680 } 6681 return true; 6682 } 6683 6684 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 6685 { 6686 TCGv_i64 lo, hi; 6687 int pos = (a->imm & 7) * 8; 6688 int elt = a->imm >> 3; 6689 6690 if (!fp_access_check(s)) { 6691 return true; 6692 } 6693 6694 lo = tcg_temp_new_i64(); 6695 hi = tcg_temp_new_i64(); 6696 6697 read_vec_element(s, lo, a->rn, elt, MO_64); 6698 elt++; 6699 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 6700 elt++; 6701 6702 if (pos != 0) { 6703 TCGv_i64 hh = tcg_temp_new_i64(); 6704 tcg_gen_extract2_i64(lo, lo, hi, pos); 6705 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 6706 tcg_gen_extract2_i64(hi, hi, hh, pos); 6707 } 6708 6709 write_vec_element(s, lo, a->rd, 0, MO_64); 6710 write_vec_element(s, hi, a->rd, 1, MO_64); 6711 clear_vec_high(s, true, a->rd); 6712 return true; 6713 } 6714 6715 /* 6716 * Floating-point data-processing (3 source) 6717 */ 6718 6719 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6720 { 6721 TCGv_ptr fpst; 6722 6723 /* 6724 * These are fused multiply-add. Note that doing the negations here 6725 * as separate steps is correct: an input NaN should come out with 6726 * its sign bit flipped if it is a negated-input. 6727 */ 6728 switch (a->esz) { 6729 case MO_64: 6730 if (fp_access_check(s)) { 6731 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6732 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6733 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6734 6735 if (neg_a) { 6736 gen_vfp_negd(ta, ta); 6737 } 6738 if (neg_n) { 6739 gen_vfp_negd(tn, tn); 6740 } 6741 fpst = fpstatus_ptr(FPST_FPCR); 6742 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6743 write_fp_dreg(s, a->rd, ta); 6744 } 6745 break; 6746 6747 case MO_32: 6748 if (fp_access_check(s)) { 6749 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6750 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6751 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6752 6753 if (neg_a) { 6754 gen_vfp_negs(ta, ta); 6755 } 6756 if (neg_n) { 6757 gen_vfp_negs(tn, tn); 6758 } 6759 fpst = fpstatus_ptr(FPST_FPCR); 6760 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6761 write_fp_sreg(s, a->rd, ta); 6762 } 6763 break; 6764 6765 case MO_16: 6766 if (!dc_isar_feature(aa64_fp16, s)) { 6767 return false; 6768 } 6769 if (fp_access_check(s)) { 6770 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6771 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6772 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6773 6774 if (neg_a) { 6775 gen_vfp_negh(ta, ta); 6776 } 6777 if (neg_n) { 6778 gen_vfp_negh(tn, tn); 6779 } 6780 fpst = fpstatus_ptr(FPST_FPCR_F16); 6781 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6782 write_fp_sreg(s, a->rd, ta); 6783 } 6784 break; 6785 6786 default: 6787 return false; 6788 } 6789 return true; 6790 } 6791 6792 TRANS(FMADD, do_fmadd, a, false, false) 6793 TRANS(FNMADD, do_fmadd, a, true, true) 6794 TRANS(FMSUB, do_fmadd, a, false, true) 6795 TRANS(FNMSUB, do_fmadd, a, true, false) 6796 6797 /* 6798 * Advanced SIMD Across Lanes 6799 */ 6800 6801 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 6802 MemOp src_sign, NeonGenTwo64OpFn *fn) 6803 { 6804 TCGv_i64 tcg_res, tcg_elt; 6805 MemOp src_mop = a->esz | src_sign; 6806 int elements = (a->q ? 16 : 8) >> a->esz; 6807 6808 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 6809 if (elements < 4) { 6810 return false; 6811 } 6812 if (!fp_access_check(s)) { 6813 return true; 6814 } 6815 6816 tcg_res = tcg_temp_new_i64(); 6817 tcg_elt = tcg_temp_new_i64(); 6818 6819 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 6820 for (int i = 1; i < elements; i++) { 6821 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 6822 fn(tcg_res, tcg_res, tcg_elt); 6823 } 6824 6825 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 6826 write_fp_dreg(s, a->rd, tcg_res); 6827 return true; 6828 } 6829 6830 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) 6831 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 6832 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 6833 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 6834 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 6835 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 6836 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 6837 6838 /* 6839 * do_fp_reduction helper 6840 * 6841 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 6842 * important for correct NaN propagation that we do these 6843 * operations in exactly the order specified by the pseudocode. 6844 * 6845 * This is a recursive function. 6846 */ 6847 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 6848 int ebase, int ecount, TCGv_ptr fpst, 6849 NeonGenTwoSingleOpFn *fn) 6850 { 6851 if (ecount == 1) { 6852 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 6853 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 6854 return tcg_elem; 6855 } else { 6856 int half = ecount >> 1; 6857 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 6858 6859 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 6860 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 6861 tcg_res = tcg_temp_new_i32(); 6862 6863 fn(tcg_res, tcg_lo, tcg_hi, fpst); 6864 return tcg_res; 6865 } 6866 } 6867 6868 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 6869 NeonGenTwoSingleOpFn *fn) 6870 { 6871 if (fp_access_check(s)) { 6872 MemOp esz = a->esz; 6873 int elts = (a->q ? 16 : 8) >> esz; 6874 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6875 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); 6876 write_fp_sreg(s, a->rd, res); 6877 } 6878 return true; 6879 } 6880 6881 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh) 6882 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh) 6883 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh) 6884 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh) 6885 6886 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) 6887 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) 6888 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) 6889 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) 6890 6891 /* 6892 * Floating-point Immediate 6893 */ 6894 6895 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 6896 { 6897 switch (a->esz) { 6898 case MO_32: 6899 case MO_64: 6900 break; 6901 case MO_16: 6902 if (!dc_isar_feature(aa64_fp16, s)) { 6903 return false; 6904 } 6905 break; 6906 default: 6907 return false; 6908 } 6909 if (fp_access_check(s)) { 6910 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 6911 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 6912 } 6913 return true; 6914 } 6915 6916 /* 6917 * Advanced SIMD Modified Immediate 6918 */ 6919 6920 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 6921 { 6922 if (!dc_isar_feature(aa64_fp16, s)) { 6923 return false; 6924 } 6925 if (fp_access_check(s)) { 6926 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 6927 a->q ? 16 : 8, vec_full_reg_size(s), 6928 vfp_expand_imm(MO_16, a->abcdefgh)); 6929 } 6930 return true; 6931 } 6932 6933 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 6934 int64_t c, uint32_t oprsz, uint32_t maxsz) 6935 { 6936 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 6937 } 6938 6939 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 6940 { 6941 GVecGen2iFn *fn; 6942 6943 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 6944 if ((a->cmode & 1) && a->cmode < 12) { 6945 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 6946 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 6947 } else { 6948 /* There is one unallocated cmode/op combination in this space */ 6949 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 6950 return false; 6951 } 6952 fn = gen_movi; 6953 } 6954 6955 if (fp_access_check(s)) { 6956 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 6957 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 6958 } 6959 return true; 6960 } 6961 6962 /* 6963 * Advanced SIMD Shift by Immediate 6964 */ 6965 6966 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 6967 { 6968 if (fp_access_check(s)) { 6969 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 6970 } 6971 return true; 6972 } 6973 6974 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 6975 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 6976 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 6977 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 6978 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 6979 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 6980 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 6981 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 6982 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 6983 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 6984 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); 6985 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 6986 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 6987 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 6988 6989 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 6990 { 6991 TCGv_i64 tcg_rn, tcg_rd; 6992 int esz = a->esz; 6993 int esize; 6994 6995 if (!fp_access_check(s)) { 6996 return true; 6997 } 6998 6999 /* 7000 * For the LL variants the store is larger than the load, 7001 * so if rd == rn we would overwrite parts of our input. 7002 * So load everything right now and use shifts in the main loop. 7003 */ 7004 tcg_rd = tcg_temp_new_i64(); 7005 tcg_rn = tcg_temp_new_i64(); 7006 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7007 7008 esize = 8 << esz; 7009 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7010 if (is_u) { 7011 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7012 } else { 7013 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7014 } 7015 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7016 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7017 } 7018 clear_vec_high(s, true, a->rd); 7019 return true; 7020 } 7021 7022 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7023 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7024 7025 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7026 { 7027 assert(shift >= 0 && shift <= 64); 7028 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7029 } 7030 7031 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7032 { 7033 assert(shift >= 0 && shift <= 64); 7034 if (shift == 64) { 7035 tcg_gen_movi_i64(dst, 0); 7036 } else { 7037 tcg_gen_shri_i64(dst, src, shift); 7038 } 7039 } 7040 7041 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7042 { 7043 gen_sshr_d(src, src, shift); 7044 tcg_gen_add_i64(dst, dst, src); 7045 } 7046 7047 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7048 { 7049 gen_ushr_d(src, src, shift); 7050 tcg_gen_add_i64(dst, dst, src); 7051 } 7052 7053 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7054 { 7055 assert(shift >= 0 && shift <= 32); 7056 if (shift) { 7057 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7058 tcg_gen_add_i64(dst, src, rnd); 7059 tcg_gen_sari_i64(dst, dst, shift); 7060 } else { 7061 tcg_gen_mov_i64(dst, src); 7062 } 7063 } 7064 7065 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7066 { 7067 assert(shift >= 0 && shift <= 32); 7068 if (shift) { 7069 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7070 tcg_gen_add_i64(dst, src, rnd); 7071 tcg_gen_shri_i64(dst, dst, shift); 7072 } else { 7073 tcg_gen_mov_i64(dst, src); 7074 } 7075 } 7076 7077 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7078 { 7079 assert(shift >= 0 && shift <= 64); 7080 if (shift == 0) { 7081 tcg_gen_mov_i64(dst, src); 7082 } else if (shift == 64) { 7083 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7084 tcg_gen_movi_i64(dst, 0); 7085 } else { 7086 TCGv_i64 rnd = tcg_temp_new_i64(); 7087 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7088 tcg_gen_sari_i64(dst, src, shift); 7089 tcg_gen_add_i64(dst, dst, rnd); 7090 } 7091 } 7092 7093 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7094 { 7095 assert(shift >= 0 && shift <= 64); 7096 if (shift == 0) { 7097 tcg_gen_mov_i64(dst, src); 7098 } else if (shift == 64) { 7099 /* Rounding will propagate bit 63 into bit 64. */ 7100 tcg_gen_shri_i64(dst, src, 63); 7101 } else { 7102 TCGv_i64 rnd = tcg_temp_new_i64(); 7103 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7104 tcg_gen_shri_i64(dst, src, shift); 7105 tcg_gen_add_i64(dst, dst, rnd); 7106 } 7107 } 7108 7109 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7110 { 7111 gen_srshr_d(src, src, shift); 7112 tcg_gen_add_i64(dst, dst, src); 7113 } 7114 7115 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7116 { 7117 gen_urshr_d(src, src, shift); 7118 tcg_gen_add_i64(dst, dst, src); 7119 } 7120 7121 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7122 { 7123 /* If shift is 64, dst is unchanged. */ 7124 if (shift != 64) { 7125 tcg_gen_shri_i64(src, src, shift); 7126 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7127 } 7128 } 7129 7130 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7131 { 7132 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7133 } 7134 7135 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7136 WideShiftImmFn * const fns[3], MemOp sign) 7137 { 7138 TCGv_i64 tcg_rn, tcg_rd; 7139 int esz = a->esz; 7140 int esize; 7141 WideShiftImmFn *fn; 7142 7143 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7144 7145 if (!fp_access_check(s)) { 7146 return true; 7147 } 7148 7149 tcg_rn = tcg_temp_new_i64(); 7150 tcg_rd = tcg_temp_new_i64(); 7151 tcg_gen_movi_i64(tcg_rd, 0); 7152 7153 fn = fns[esz]; 7154 esize = 8 << esz; 7155 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7156 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7157 fn(tcg_rn, tcg_rn, a->imm); 7158 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7159 } 7160 7161 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7162 clear_vec_high(s, a->q, a->rd); 7163 return true; 7164 } 7165 7166 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7167 { 7168 tcg_gen_sari_i64(d, s, i); 7169 tcg_gen_ext16u_i64(d, d); 7170 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7171 } 7172 7173 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7174 { 7175 tcg_gen_sari_i64(d, s, i); 7176 tcg_gen_ext32u_i64(d, d); 7177 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7178 } 7179 7180 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7181 { 7182 gen_sshr_d(d, s, i); 7183 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7184 } 7185 7186 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7187 { 7188 tcg_gen_shri_i64(d, s, i); 7189 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7190 } 7191 7192 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7193 { 7194 tcg_gen_shri_i64(d, s, i); 7195 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7196 } 7197 7198 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7199 { 7200 gen_ushr_d(d, s, i); 7201 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7202 } 7203 7204 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7205 { 7206 tcg_gen_sari_i64(d, s, i); 7207 tcg_gen_ext16u_i64(d, d); 7208 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7209 } 7210 7211 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7212 { 7213 tcg_gen_sari_i64(d, s, i); 7214 tcg_gen_ext32u_i64(d, d); 7215 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7216 } 7217 7218 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7219 { 7220 gen_sshr_d(d, s, i); 7221 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7222 } 7223 7224 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7225 { 7226 gen_srshr_bhs(d, s, i); 7227 tcg_gen_ext16u_i64(d, d); 7228 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7229 } 7230 7231 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7232 { 7233 gen_srshr_bhs(d, s, i); 7234 tcg_gen_ext32u_i64(d, d); 7235 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7236 } 7237 7238 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7239 { 7240 gen_srshr_d(d, s, i); 7241 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7242 } 7243 7244 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7245 { 7246 gen_urshr_bhs(d, s, i); 7247 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7248 } 7249 7250 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7251 { 7252 gen_urshr_bhs(d, s, i); 7253 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7254 } 7255 7256 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7257 { 7258 gen_urshr_d(d, s, i); 7259 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7260 } 7261 7262 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7263 { 7264 gen_srshr_bhs(d, s, i); 7265 tcg_gen_ext16u_i64(d, d); 7266 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7267 } 7268 7269 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7270 { 7271 gen_srshr_bhs(d, s, i); 7272 tcg_gen_ext32u_i64(d, d); 7273 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7274 } 7275 7276 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7277 { 7278 gen_srshr_d(d, s, i); 7279 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7280 } 7281 7282 static WideShiftImmFn * const shrn_fns[] = { 7283 tcg_gen_shri_i64, 7284 tcg_gen_shri_i64, 7285 gen_ushr_d, 7286 }; 7287 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7288 7289 static WideShiftImmFn * const rshrn_fns[] = { 7290 gen_urshr_bhs, 7291 gen_urshr_bhs, 7292 gen_urshr_d, 7293 }; 7294 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7295 7296 static WideShiftImmFn * const sqshrn_fns[] = { 7297 gen_sqshrn_b, 7298 gen_sqshrn_h, 7299 gen_sqshrn_s, 7300 }; 7301 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7302 7303 static WideShiftImmFn * const uqshrn_fns[] = { 7304 gen_uqshrn_b, 7305 gen_uqshrn_h, 7306 gen_uqshrn_s, 7307 }; 7308 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7309 7310 static WideShiftImmFn * const sqshrun_fns[] = { 7311 gen_sqshrun_b, 7312 gen_sqshrun_h, 7313 gen_sqshrun_s, 7314 }; 7315 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7316 7317 static WideShiftImmFn * const sqrshrn_fns[] = { 7318 gen_sqrshrn_b, 7319 gen_sqrshrn_h, 7320 gen_sqrshrn_s, 7321 }; 7322 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7323 7324 static WideShiftImmFn * const uqrshrn_fns[] = { 7325 gen_uqrshrn_b, 7326 gen_uqrshrn_h, 7327 gen_uqrshrn_s, 7328 }; 7329 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7330 7331 static WideShiftImmFn * const sqrshrun_fns[] = { 7332 gen_sqrshrun_b, 7333 gen_sqrshrun_h, 7334 gen_sqrshrun_s, 7335 }; 7336 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7337 7338 /* 7339 * Advanced SIMD Scalar Shift by Immediate 7340 */ 7341 7342 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7343 WideShiftImmFn *fn, bool accumulate, 7344 MemOp sign) 7345 { 7346 if (fp_access_check(s)) { 7347 TCGv_i64 rd = tcg_temp_new_i64(); 7348 TCGv_i64 rn = tcg_temp_new_i64(); 7349 7350 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7351 if (accumulate) { 7352 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7353 } 7354 fn(rd, rn, a->imm); 7355 write_fp_dreg(s, a->rd, rd); 7356 } 7357 return true; 7358 } 7359 7360 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7361 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7362 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7363 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7364 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7365 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7366 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7367 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7368 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7369 7370 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7371 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7372 7373 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7374 NeonGenTwoOpEnvFn *fn) 7375 { 7376 TCGv_i32 t = tcg_temp_new_i32(); 7377 tcg_gen_extrl_i64_i32(t, s); 7378 fn(t, tcg_env, t, tcg_constant_i32(i)); 7379 tcg_gen_extu_i32_i64(d, t); 7380 } 7381 7382 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7383 { 7384 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7385 } 7386 7387 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7388 { 7389 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7390 } 7391 7392 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7393 { 7394 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7395 } 7396 7397 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7398 { 7399 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7400 } 7401 7402 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7403 { 7404 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7405 } 7406 7407 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7408 { 7409 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7410 } 7411 7412 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7413 { 7414 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7415 } 7416 7417 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7418 { 7419 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7420 } 7421 7422 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7423 { 7424 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7425 } 7426 7427 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7428 { 7429 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7430 } 7431 7432 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7433 { 7434 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7435 } 7436 7437 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7438 { 7439 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7440 } 7441 7442 static WideShiftImmFn * const f_scalar_sqshli[] = { 7443 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7444 }; 7445 7446 static WideShiftImmFn * const f_scalar_uqshli[] = { 7447 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7448 }; 7449 7450 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7451 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7452 }; 7453 7454 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7455 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7456 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7457 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7458 7459 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7460 WideShiftImmFn * const fns[3], 7461 MemOp sign, bool zext) 7462 { 7463 MemOp esz = a->esz; 7464 7465 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7466 7467 if (fp_access_check(s)) { 7468 TCGv_i64 rd = tcg_temp_new_i64(); 7469 TCGv_i64 rn = tcg_temp_new_i64(); 7470 7471 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7472 fns[esz](rd, rn, a->imm); 7473 if (zext) { 7474 tcg_gen_ext_i64(rd, rd, esz); 7475 } 7476 write_fp_dreg(s, a->rd, rd); 7477 } 7478 return true; 7479 } 7480 7481 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7482 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7483 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7484 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7485 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7486 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7487 7488 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7489 * Note that it is the caller's responsibility to ensure that the 7490 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7491 * mandated semantics for out of range shifts. 7492 */ 7493 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7494 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7495 { 7496 switch (shift_type) { 7497 case A64_SHIFT_TYPE_LSL: 7498 tcg_gen_shl_i64(dst, src, shift_amount); 7499 break; 7500 case A64_SHIFT_TYPE_LSR: 7501 tcg_gen_shr_i64(dst, src, shift_amount); 7502 break; 7503 case A64_SHIFT_TYPE_ASR: 7504 if (!sf) { 7505 tcg_gen_ext32s_i64(dst, src); 7506 } 7507 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 7508 break; 7509 case A64_SHIFT_TYPE_ROR: 7510 if (sf) { 7511 tcg_gen_rotr_i64(dst, src, shift_amount); 7512 } else { 7513 TCGv_i32 t0, t1; 7514 t0 = tcg_temp_new_i32(); 7515 t1 = tcg_temp_new_i32(); 7516 tcg_gen_extrl_i64_i32(t0, src); 7517 tcg_gen_extrl_i64_i32(t1, shift_amount); 7518 tcg_gen_rotr_i32(t0, t0, t1); 7519 tcg_gen_extu_i32_i64(dst, t0); 7520 } 7521 break; 7522 default: 7523 assert(FALSE); /* all shift types should be handled */ 7524 break; 7525 } 7526 7527 if (!sf) { /* zero extend final result */ 7528 tcg_gen_ext32u_i64(dst, dst); 7529 } 7530 } 7531 7532 /* Shift a TCGv src by immediate, put result in dst. 7533 * The shift amount must be in range (this should always be true as the 7534 * relevant instructions will UNDEF on bad shift immediates). 7535 */ 7536 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 7537 enum a64_shift_type shift_type, unsigned int shift_i) 7538 { 7539 assert(shift_i < (sf ? 64 : 32)); 7540 7541 if (shift_i == 0) { 7542 tcg_gen_mov_i64(dst, src); 7543 } else { 7544 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 7545 } 7546 } 7547 7548 /* Logical (shifted register) 7549 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 7550 * +----+-----+-----------+-------+---+------+--------+------+------+ 7551 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 7552 * +----+-----+-----------+-------+---+------+--------+------+------+ 7553 */ 7554 static void disas_logic_reg(DisasContext *s, uint32_t insn) 7555 { 7556 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 7557 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 7558 7559 sf = extract32(insn, 31, 1); 7560 opc = extract32(insn, 29, 2); 7561 shift_type = extract32(insn, 22, 2); 7562 invert = extract32(insn, 21, 1); 7563 rm = extract32(insn, 16, 5); 7564 shift_amount = extract32(insn, 10, 6); 7565 rn = extract32(insn, 5, 5); 7566 rd = extract32(insn, 0, 5); 7567 7568 if (!sf && (shift_amount & (1 << 5))) { 7569 unallocated_encoding(s); 7570 return; 7571 } 7572 7573 tcg_rd = cpu_reg(s, rd); 7574 7575 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 7576 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 7577 * register-register MOV and MVN, so it is worth special casing. 7578 */ 7579 tcg_rm = cpu_reg(s, rm); 7580 if (invert) { 7581 tcg_gen_not_i64(tcg_rd, tcg_rm); 7582 if (!sf) { 7583 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7584 } 7585 } else { 7586 if (sf) { 7587 tcg_gen_mov_i64(tcg_rd, tcg_rm); 7588 } else { 7589 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 7590 } 7591 } 7592 return; 7593 } 7594 7595 tcg_rm = read_cpu_reg(s, rm, sf); 7596 7597 if (shift_amount) { 7598 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 7599 } 7600 7601 tcg_rn = cpu_reg(s, rn); 7602 7603 switch (opc | (invert << 2)) { 7604 case 0: /* AND */ 7605 case 3: /* ANDS */ 7606 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 7607 break; 7608 case 1: /* ORR */ 7609 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 7610 break; 7611 case 2: /* EOR */ 7612 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 7613 break; 7614 case 4: /* BIC */ 7615 case 7: /* BICS */ 7616 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 7617 break; 7618 case 5: /* ORN */ 7619 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 7620 break; 7621 case 6: /* EON */ 7622 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 7623 break; 7624 default: 7625 assert(FALSE); 7626 break; 7627 } 7628 7629 if (!sf) { 7630 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7631 } 7632 7633 if (opc == 3) { 7634 gen_logic_CC(sf, tcg_rd); 7635 } 7636 } 7637 7638 /* 7639 * Add/subtract (extended register) 7640 * 7641 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 7642 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 7643 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 7644 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 7645 * 7646 * sf: 0 -> 32bit, 1 -> 64bit 7647 * op: 0 -> add , 1 -> sub 7648 * S: 1 -> set flags 7649 * opt: 00 7650 * option: extension type (see DecodeRegExtend) 7651 * imm3: optional shift to Rm 7652 * 7653 * Rd = Rn + LSL(extend(Rm), amount) 7654 */ 7655 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 7656 { 7657 int rd = extract32(insn, 0, 5); 7658 int rn = extract32(insn, 5, 5); 7659 int imm3 = extract32(insn, 10, 3); 7660 int option = extract32(insn, 13, 3); 7661 int rm = extract32(insn, 16, 5); 7662 int opt = extract32(insn, 22, 2); 7663 bool setflags = extract32(insn, 29, 1); 7664 bool sub_op = extract32(insn, 30, 1); 7665 bool sf = extract32(insn, 31, 1); 7666 7667 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 7668 TCGv_i64 tcg_rd; 7669 TCGv_i64 tcg_result; 7670 7671 if (imm3 > 4 || opt != 0) { 7672 unallocated_encoding(s); 7673 return; 7674 } 7675 7676 /* non-flag setting ops may use SP */ 7677 if (!setflags) { 7678 tcg_rd = cpu_reg_sp(s, rd); 7679 } else { 7680 tcg_rd = cpu_reg(s, rd); 7681 } 7682 tcg_rn = read_cpu_reg_sp(s, rn, sf); 7683 7684 tcg_rm = read_cpu_reg(s, rm, sf); 7685 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 7686 7687 tcg_result = tcg_temp_new_i64(); 7688 7689 if (!setflags) { 7690 if (sub_op) { 7691 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7692 } else { 7693 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 7694 } 7695 } else { 7696 if (sub_op) { 7697 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 7698 } else { 7699 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 7700 } 7701 } 7702 7703 if (sf) { 7704 tcg_gen_mov_i64(tcg_rd, tcg_result); 7705 } else { 7706 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 7707 } 7708 } 7709 7710 /* 7711 * Add/subtract (shifted register) 7712 * 7713 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 7714 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 7715 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 7716 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 7717 * 7718 * sf: 0 -> 32bit, 1 -> 64bit 7719 * op: 0 -> add , 1 -> sub 7720 * S: 1 -> set flags 7721 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 7722 * imm6: Shift amount to apply to Rm before the add/sub 7723 */ 7724 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 7725 { 7726 int rd = extract32(insn, 0, 5); 7727 int rn = extract32(insn, 5, 5); 7728 int imm6 = extract32(insn, 10, 6); 7729 int rm = extract32(insn, 16, 5); 7730 int shift_type = extract32(insn, 22, 2); 7731 bool setflags = extract32(insn, 29, 1); 7732 bool sub_op = extract32(insn, 30, 1); 7733 bool sf = extract32(insn, 31, 1); 7734 7735 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7736 TCGv_i64 tcg_rn, tcg_rm; 7737 TCGv_i64 tcg_result; 7738 7739 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 7740 unallocated_encoding(s); 7741 return; 7742 } 7743 7744 tcg_rn = read_cpu_reg(s, rn, sf); 7745 tcg_rm = read_cpu_reg(s, rm, sf); 7746 7747 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 7748 7749 tcg_result = tcg_temp_new_i64(); 7750 7751 if (!setflags) { 7752 if (sub_op) { 7753 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7754 } else { 7755 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 7756 } 7757 } else { 7758 if (sub_op) { 7759 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 7760 } else { 7761 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 7762 } 7763 } 7764 7765 if (sf) { 7766 tcg_gen_mov_i64(tcg_rd, tcg_result); 7767 } else { 7768 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 7769 } 7770 } 7771 7772 /* Data-processing (3 source) 7773 * 7774 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 7775 * +--+------+-----------+------+------+----+------+------+------+ 7776 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 7777 * +--+------+-----------+------+------+----+------+------+------+ 7778 */ 7779 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 7780 { 7781 int rd = extract32(insn, 0, 5); 7782 int rn = extract32(insn, 5, 5); 7783 int ra = extract32(insn, 10, 5); 7784 int rm = extract32(insn, 16, 5); 7785 int op_id = (extract32(insn, 29, 3) << 4) | 7786 (extract32(insn, 21, 3) << 1) | 7787 extract32(insn, 15, 1); 7788 bool sf = extract32(insn, 31, 1); 7789 bool is_sub = extract32(op_id, 0, 1); 7790 bool is_high = extract32(op_id, 2, 1); 7791 bool is_signed = false; 7792 TCGv_i64 tcg_op1; 7793 TCGv_i64 tcg_op2; 7794 TCGv_i64 tcg_tmp; 7795 7796 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 7797 switch (op_id) { 7798 case 0x42: /* SMADDL */ 7799 case 0x43: /* SMSUBL */ 7800 case 0x44: /* SMULH */ 7801 is_signed = true; 7802 break; 7803 case 0x0: /* MADD (32bit) */ 7804 case 0x1: /* MSUB (32bit) */ 7805 case 0x40: /* MADD (64bit) */ 7806 case 0x41: /* MSUB (64bit) */ 7807 case 0x4a: /* UMADDL */ 7808 case 0x4b: /* UMSUBL */ 7809 case 0x4c: /* UMULH */ 7810 break; 7811 default: 7812 unallocated_encoding(s); 7813 return; 7814 } 7815 7816 if (is_high) { 7817 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 7818 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7819 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7820 TCGv_i64 tcg_rm = cpu_reg(s, rm); 7821 7822 if (is_signed) { 7823 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 7824 } else { 7825 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 7826 } 7827 return; 7828 } 7829 7830 tcg_op1 = tcg_temp_new_i64(); 7831 tcg_op2 = tcg_temp_new_i64(); 7832 tcg_tmp = tcg_temp_new_i64(); 7833 7834 if (op_id < 0x42) { 7835 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 7836 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 7837 } else { 7838 if (is_signed) { 7839 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 7840 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 7841 } else { 7842 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 7843 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 7844 } 7845 } 7846 7847 if (ra == 31 && !is_sub) { 7848 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 7849 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 7850 } else { 7851 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 7852 if (is_sub) { 7853 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 7854 } else { 7855 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 7856 } 7857 } 7858 7859 if (!sf) { 7860 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 7861 } 7862 } 7863 7864 /* Add/subtract (with carry) 7865 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 7866 * +--+--+--+------------------------+------+-------------+------+-----+ 7867 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 7868 * +--+--+--+------------------------+------+-------------+------+-----+ 7869 */ 7870 7871 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 7872 { 7873 unsigned int sf, op, setflags, rm, rn, rd; 7874 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 7875 7876 sf = extract32(insn, 31, 1); 7877 op = extract32(insn, 30, 1); 7878 setflags = extract32(insn, 29, 1); 7879 rm = extract32(insn, 16, 5); 7880 rn = extract32(insn, 5, 5); 7881 rd = extract32(insn, 0, 5); 7882 7883 tcg_rd = cpu_reg(s, rd); 7884 tcg_rn = cpu_reg(s, rn); 7885 7886 if (op) { 7887 tcg_y = tcg_temp_new_i64(); 7888 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 7889 } else { 7890 tcg_y = cpu_reg(s, rm); 7891 } 7892 7893 if (setflags) { 7894 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 7895 } else { 7896 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 7897 } 7898 } 7899 7900 /* 7901 * Rotate right into flags 7902 * 31 30 29 21 15 10 5 4 0 7903 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7904 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 7905 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7906 */ 7907 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 7908 { 7909 int mask = extract32(insn, 0, 4); 7910 int o2 = extract32(insn, 4, 1); 7911 int rn = extract32(insn, 5, 5); 7912 int imm6 = extract32(insn, 15, 6); 7913 int sf_op_s = extract32(insn, 29, 3); 7914 TCGv_i64 tcg_rn; 7915 TCGv_i32 nzcv; 7916 7917 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 7918 unallocated_encoding(s); 7919 return; 7920 } 7921 7922 tcg_rn = read_cpu_reg(s, rn, 1); 7923 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 7924 7925 nzcv = tcg_temp_new_i32(); 7926 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 7927 7928 if (mask & 8) { /* N */ 7929 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 7930 } 7931 if (mask & 4) { /* Z */ 7932 tcg_gen_not_i32(cpu_ZF, nzcv); 7933 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 7934 } 7935 if (mask & 2) { /* C */ 7936 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 7937 } 7938 if (mask & 1) { /* V */ 7939 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 7940 } 7941 } 7942 7943 /* 7944 * Evaluate into flags 7945 * 31 30 29 21 15 14 10 5 4 0 7946 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7947 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 7948 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7949 */ 7950 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 7951 { 7952 int o3_mask = extract32(insn, 0, 5); 7953 int rn = extract32(insn, 5, 5); 7954 int o2 = extract32(insn, 15, 6); 7955 int sz = extract32(insn, 14, 1); 7956 int sf_op_s = extract32(insn, 29, 3); 7957 TCGv_i32 tmp; 7958 int shift; 7959 7960 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 7961 !dc_isar_feature(aa64_condm_4, s)) { 7962 unallocated_encoding(s); 7963 return; 7964 } 7965 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 7966 7967 tmp = tcg_temp_new_i32(); 7968 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 7969 tcg_gen_shli_i32(cpu_NF, tmp, shift); 7970 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 7971 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 7972 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 7973 } 7974 7975 /* Conditional compare (immediate / register) 7976 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 7977 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7978 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 7979 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7980 * [1] y [0] [0] 7981 */ 7982 static void disas_cc(DisasContext *s, uint32_t insn) 7983 { 7984 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 7985 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 7986 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 7987 DisasCompare c; 7988 7989 if (!extract32(insn, 29, 1)) { 7990 unallocated_encoding(s); 7991 return; 7992 } 7993 if (insn & (1 << 10 | 1 << 4)) { 7994 unallocated_encoding(s); 7995 return; 7996 } 7997 sf = extract32(insn, 31, 1); 7998 op = extract32(insn, 30, 1); 7999 is_imm = extract32(insn, 11, 1); 8000 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 8001 cond = extract32(insn, 12, 4); 8002 rn = extract32(insn, 5, 5); 8003 nzcv = extract32(insn, 0, 4); 8004 8005 /* Set T0 = !COND. */ 8006 tcg_t0 = tcg_temp_new_i32(); 8007 arm_test_cc(&c, cond); 8008 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8009 8010 /* Load the arguments for the new comparison. */ 8011 if (is_imm) { 8012 tcg_y = tcg_temp_new_i64(); 8013 tcg_gen_movi_i64(tcg_y, y); 8014 } else { 8015 tcg_y = cpu_reg(s, y); 8016 } 8017 tcg_rn = cpu_reg(s, rn); 8018 8019 /* Set the flags for the new comparison. */ 8020 tcg_tmp = tcg_temp_new_i64(); 8021 if (op) { 8022 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 8023 } else { 8024 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 8025 } 8026 8027 /* If COND was false, force the flags to #nzcv. Compute two masks 8028 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8029 * For tcg hosts that support ANDC, we can make do with just T1. 8030 * In either case, allow the tcg optimizer to delete any unused mask. 8031 */ 8032 tcg_t1 = tcg_temp_new_i32(); 8033 tcg_t2 = tcg_temp_new_i32(); 8034 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8035 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8036 8037 if (nzcv & 8) { /* N */ 8038 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8039 } else { 8040 if (TCG_TARGET_HAS_andc_i32) { 8041 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8042 } else { 8043 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8044 } 8045 } 8046 if (nzcv & 4) { /* Z */ 8047 if (TCG_TARGET_HAS_andc_i32) { 8048 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8049 } else { 8050 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8051 } 8052 } else { 8053 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8054 } 8055 if (nzcv & 2) { /* C */ 8056 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8057 } else { 8058 if (TCG_TARGET_HAS_andc_i32) { 8059 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8060 } else { 8061 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8062 } 8063 } 8064 if (nzcv & 1) { /* V */ 8065 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8066 } else { 8067 if (TCG_TARGET_HAS_andc_i32) { 8068 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8069 } else { 8070 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8071 } 8072 } 8073 } 8074 8075 /* Conditional select 8076 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 8077 * +----+----+---+-----------------+------+------+-----+------+------+ 8078 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 8079 * +----+----+---+-----------------+------+------+-----+------+------+ 8080 */ 8081 static void disas_cond_select(DisasContext *s, uint32_t insn) 8082 { 8083 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 8084 TCGv_i64 tcg_rd, zero; 8085 DisasCompare64 c; 8086 8087 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 8088 /* S == 1 or op2<1> == 1 */ 8089 unallocated_encoding(s); 8090 return; 8091 } 8092 sf = extract32(insn, 31, 1); 8093 else_inv = extract32(insn, 30, 1); 8094 rm = extract32(insn, 16, 5); 8095 cond = extract32(insn, 12, 4); 8096 else_inc = extract32(insn, 10, 1); 8097 rn = extract32(insn, 5, 5); 8098 rd = extract32(insn, 0, 5); 8099 8100 tcg_rd = cpu_reg(s, rd); 8101 8102 a64_test_cc(&c, cond); 8103 zero = tcg_constant_i64(0); 8104 8105 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 8106 /* CSET & CSETM. */ 8107 if (else_inv) { 8108 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8109 tcg_rd, c.value, zero); 8110 } else { 8111 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8112 tcg_rd, c.value, zero); 8113 } 8114 } else { 8115 TCGv_i64 t_true = cpu_reg(s, rn); 8116 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 8117 if (else_inv && else_inc) { 8118 tcg_gen_neg_i64(t_false, t_false); 8119 } else if (else_inv) { 8120 tcg_gen_not_i64(t_false, t_false); 8121 } else if (else_inc) { 8122 tcg_gen_addi_i64(t_false, t_false, 1); 8123 } 8124 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8125 } 8126 8127 if (!sf) { 8128 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8129 } 8130 } 8131 8132 static void handle_clz(DisasContext *s, unsigned int sf, 8133 unsigned int rn, unsigned int rd) 8134 { 8135 TCGv_i64 tcg_rd, tcg_rn; 8136 tcg_rd = cpu_reg(s, rd); 8137 tcg_rn = cpu_reg(s, rn); 8138 8139 if (sf) { 8140 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8141 } else { 8142 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8143 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8144 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 8145 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8146 } 8147 } 8148 8149 static void handle_cls(DisasContext *s, unsigned int sf, 8150 unsigned int rn, unsigned int rd) 8151 { 8152 TCGv_i64 tcg_rd, tcg_rn; 8153 tcg_rd = cpu_reg(s, rd); 8154 tcg_rn = cpu_reg(s, rn); 8155 8156 if (sf) { 8157 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 8158 } else { 8159 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8160 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8161 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 8162 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8163 } 8164 } 8165 8166 static void handle_rbit(DisasContext *s, unsigned int sf, 8167 unsigned int rn, unsigned int rd) 8168 { 8169 TCGv_i64 tcg_rd, tcg_rn; 8170 tcg_rd = cpu_reg(s, rd); 8171 tcg_rn = cpu_reg(s, rn); 8172 8173 if (sf) { 8174 gen_helper_rbit64(tcg_rd, tcg_rn); 8175 } else { 8176 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8177 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8178 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 8179 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8180 } 8181 } 8182 8183 /* REV with sf==1, opcode==3 ("REV64") */ 8184 static void handle_rev64(DisasContext *s, unsigned int sf, 8185 unsigned int rn, unsigned int rd) 8186 { 8187 if (!sf) { 8188 unallocated_encoding(s); 8189 return; 8190 } 8191 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 8192 } 8193 8194 /* REV with sf==0, opcode==2 8195 * REV32 (sf==1, opcode==2) 8196 */ 8197 static void handle_rev32(DisasContext *s, unsigned int sf, 8198 unsigned int rn, unsigned int rd) 8199 { 8200 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8201 TCGv_i64 tcg_rn = cpu_reg(s, rn); 8202 8203 if (sf) { 8204 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8205 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8206 } else { 8207 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8208 } 8209 } 8210 8211 /* REV16 (opcode==1) */ 8212 static void handle_rev16(DisasContext *s, unsigned int sf, 8213 unsigned int rn, unsigned int rd) 8214 { 8215 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8216 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8217 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 8218 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 8219 8220 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8221 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8222 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8223 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8224 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8225 } 8226 8227 /* Data-processing (1 source) 8228 * 31 30 29 28 21 20 16 15 10 9 5 4 0 8229 * +----+---+---+-----------------+---------+--------+------+------+ 8230 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 8231 * +----+---+---+-----------------+---------+--------+------+------+ 8232 */ 8233 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 8234 { 8235 unsigned int sf, opcode, opcode2, rn, rd; 8236 TCGv_i64 tcg_rd; 8237 8238 if (extract32(insn, 29, 1)) { 8239 unallocated_encoding(s); 8240 return; 8241 } 8242 8243 sf = extract32(insn, 31, 1); 8244 opcode = extract32(insn, 10, 6); 8245 opcode2 = extract32(insn, 16, 5); 8246 rn = extract32(insn, 5, 5); 8247 rd = extract32(insn, 0, 5); 8248 8249 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 8250 8251 switch (MAP(sf, opcode2, opcode)) { 8252 case MAP(0, 0x00, 0x00): /* RBIT */ 8253 case MAP(1, 0x00, 0x00): 8254 handle_rbit(s, sf, rn, rd); 8255 break; 8256 case MAP(0, 0x00, 0x01): /* REV16 */ 8257 case MAP(1, 0x00, 0x01): 8258 handle_rev16(s, sf, rn, rd); 8259 break; 8260 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 8261 case MAP(1, 0x00, 0x02): 8262 handle_rev32(s, sf, rn, rd); 8263 break; 8264 case MAP(1, 0x00, 0x03): /* REV64 */ 8265 handle_rev64(s, sf, rn, rd); 8266 break; 8267 case MAP(0, 0x00, 0x04): /* CLZ */ 8268 case MAP(1, 0x00, 0x04): 8269 handle_clz(s, sf, rn, rd); 8270 break; 8271 case MAP(0, 0x00, 0x05): /* CLS */ 8272 case MAP(1, 0x00, 0x05): 8273 handle_cls(s, sf, rn, rd); 8274 break; 8275 case MAP(1, 0x01, 0x00): /* PACIA */ 8276 if (s->pauth_active) { 8277 tcg_rd = cpu_reg(s, rd); 8278 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8279 } else if (!dc_isar_feature(aa64_pauth, s)) { 8280 goto do_unallocated; 8281 } 8282 break; 8283 case MAP(1, 0x01, 0x01): /* PACIB */ 8284 if (s->pauth_active) { 8285 tcg_rd = cpu_reg(s, rd); 8286 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8287 } else if (!dc_isar_feature(aa64_pauth, s)) { 8288 goto do_unallocated; 8289 } 8290 break; 8291 case MAP(1, 0x01, 0x02): /* PACDA */ 8292 if (s->pauth_active) { 8293 tcg_rd = cpu_reg(s, rd); 8294 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8295 } else if (!dc_isar_feature(aa64_pauth, s)) { 8296 goto do_unallocated; 8297 } 8298 break; 8299 case MAP(1, 0x01, 0x03): /* PACDB */ 8300 if (s->pauth_active) { 8301 tcg_rd = cpu_reg(s, rd); 8302 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8303 } else if (!dc_isar_feature(aa64_pauth, s)) { 8304 goto do_unallocated; 8305 } 8306 break; 8307 case MAP(1, 0x01, 0x04): /* AUTIA */ 8308 if (s->pauth_active) { 8309 tcg_rd = cpu_reg(s, rd); 8310 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8311 } else if (!dc_isar_feature(aa64_pauth, s)) { 8312 goto do_unallocated; 8313 } 8314 break; 8315 case MAP(1, 0x01, 0x05): /* AUTIB */ 8316 if (s->pauth_active) { 8317 tcg_rd = cpu_reg(s, rd); 8318 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8319 } else if (!dc_isar_feature(aa64_pauth, s)) { 8320 goto do_unallocated; 8321 } 8322 break; 8323 case MAP(1, 0x01, 0x06): /* AUTDA */ 8324 if (s->pauth_active) { 8325 tcg_rd = cpu_reg(s, rd); 8326 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8327 } else if (!dc_isar_feature(aa64_pauth, s)) { 8328 goto do_unallocated; 8329 } 8330 break; 8331 case MAP(1, 0x01, 0x07): /* AUTDB */ 8332 if (s->pauth_active) { 8333 tcg_rd = cpu_reg(s, rd); 8334 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8335 } else if (!dc_isar_feature(aa64_pauth, s)) { 8336 goto do_unallocated; 8337 } 8338 break; 8339 case MAP(1, 0x01, 0x08): /* PACIZA */ 8340 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8341 goto do_unallocated; 8342 } else if (s->pauth_active) { 8343 tcg_rd = cpu_reg(s, rd); 8344 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8345 } 8346 break; 8347 case MAP(1, 0x01, 0x09): /* PACIZB */ 8348 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8349 goto do_unallocated; 8350 } else if (s->pauth_active) { 8351 tcg_rd = cpu_reg(s, rd); 8352 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8353 } 8354 break; 8355 case MAP(1, 0x01, 0x0a): /* PACDZA */ 8356 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8357 goto do_unallocated; 8358 } else if (s->pauth_active) { 8359 tcg_rd = cpu_reg(s, rd); 8360 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8361 } 8362 break; 8363 case MAP(1, 0x01, 0x0b): /* PACDZB */ 8364 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8365 goto do_unallocated; 8366 } else if (s->pauth_active) { 8367 tcg_rd = cpu_reg(s, rd); 8368 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8369 } 8370 break; 8371 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 8372 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8373 goto do_unallocated; 8374 } else if (s->pauth_active) { 8375 tcg_rd = cpu_reg(s, rd); 8376 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8377 } 8378 break; 8379 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 8380 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8381 goto do_unallocated; 8382 } else if (s->pauth_active) { 8383 tcg_rd = cpu_reg(s, rd); 8384 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8385 } 8386 break; 8387 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 8388 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8389 goto do_unallocated; 8390 } else if (s->pauth_active) { 8391 tcg_rd = cpu_reg(s, rd); 8392 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8393 } 8394 break; 8395 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 8396 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8397 goto do_unallocated; 8398 } else if (s->pauth_active) { 8399 tcg_rd = cpu_reg(s, rd); 8400 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8401 } 8402 break; 8403 case MAP(1, 0x01, 0x10): /* XPACI */ 8404 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8405 goto do_unallocated; 8406 } else if (s->pauth_active) { 8407 tcg_rd = cpu_reg(s, rd); 8408 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); 8409 } 8410 break; 8411 case MAP(1, 0x01, 0x11): /* XPACD */ 8412 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8413 goto do_unallocated; 8414 } else if (s->pauth_active) { 8415 tcg_rd = cpu_reg(s, rd); 8416 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); 8417 } 8418 break; 8419 default: 8420 do_unallocated: 8421 unallocated_encoding(s); 8422 break; 8423 } 8424 8425 #undef MAP 8426 } 8427 8428 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 8429 unsigned int rm, unsigned int rn, unsigned int rd) 8430 { 8431 TCGv_i64 tcg_n, tcg_m, tcg_rd; 8432 tcg_rd = cpu_reg(s, rd); 8433 8434 if (!sf && is_signed) { 8435 tcg_n = tcg_temp_new_i64(); 8436 tcg_m = tcg_temp_new_i64(); 8437 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 8438 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 8439 } else { 8440 tcg_n = read_cpu_reg(s, rn, sf); 8441 tcg_m = read_cpu_reg(s, rm, sf); 8442 } 8443 8444 if (is_signed) { 8445 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 8446 } else { 8447 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 8448 } 8449 8450 if (!sf) { /* zero extend final result */ 8451 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8452 } 8453 } 8454 8455 /* LSLV, LSRV, ASRV, RORV */ 8456 static void handle_shift_reg(DisasContext *s, 8457 enum a64_shift_type shift_type, unsigned int sf, 8458 unsigned int rm, unsigned int rn, unsigned int rd) 8459 { 8460 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8461 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8462 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 8463 8464 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 8465 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 8466 } 8467 8468 /* CRC32[BHWX], CRC32C[BHWX] */ 8469 static void handle_crc32(DisasContext *s, 8470 unsigned int sf, unsigned int sz, bool crc32c, 8471 unsigned int rm, unsigned int rn, unsigned int rd) 8472 { 8473 TCGv_i64 tcg_acc, tcg_val; 8474 TCGv_i32 tcg_bytes; 8475 8476 if (!dc_isar_feature(aa64_crc32, s) 8477 || (sf == 1 && sz != 3) 8478 || (sf == 0 && sz == 3)) { 8479 unallocated_encoding(s); 8480 return; 8481 } 8482 8483 if (sz == 3) { 8484 tcg_val = cpu_reg(s, rm); 8485 } else { 8486 uint64_t mask; 8487 switch (sz) { 8488 case 0: 8489 mask = 0xFF; 8490 break; 8491 case 1: 8492 mask = 0xFFFF; 8493 break; 8494 case 2: 8495 mask = 0xFFFFFFFF; 8496 break; 8497 default: 8498 g_assert_not_reached(); 8499 } 8500 tcg_val = tcg_temp_new_i64(); 8501 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 8502 } 8503 8504 tcg_acc = cpu_reg(s, rn); 8505 tcg_bytes = tcg_constant_i32(1 << sz); 8506 8507 if (crc32c) { 8508 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 8509 } else { 8510 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 8511 } 8512 } 8513 8514 /* Data-processing (2 source) 8515 * 31 30 29 28 21 20 16 15 10 9 5 4 0 8516 * +----+---+---+-----------------+------+--------+------+------+ 8517 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 8518 * +----+---+---+-----------------+------+--------+------+------+ 8519 */ 8520 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 8521 { 8522 unsigned int sf, rm, opcode, rn, rd, setflag; 8523 sf = extract32(insn, 31, 1); 8524 setflag = extract32(insn, 29, 1); 8525 rm = extract32(insn, 16, 5); 8526 opcode = extract32(insn, 10, 6); 8527 rn = extract32(insn, 5, 5); 8528 rd = extract32(insn, 0, 5); 8529 8530 if (setflag && opcode != 0) { 8531 unallocated_encoding(s); 8532 return; 8533 } 8534 8535 switch (opcode) { 8536 case 0: /* SUBP(S) */ 8537 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8538 goto do_unallocated; 8539 } else { 8540 TCGv_i64 tcg_n, tcg_m, tcg_d; 8541 8542 tcg_n = read_cpu_reg_sp(s, rn, true); 8543 tcg_m = read_cpu_reg_sp(s, rm, true); 8544 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8545 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8546 tcg_d = cpu_reg(s, rd); 8547 8548 if (setflag) { 8549 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8550 } else { 8551 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8552 } 8553 } 8554 break; 8555 case 2: /* UDIV */ 8556 handle_div(s, false, sf, rm, rn, rd); 8557 break; 8558 case 3: /* SDIV */ 8559 handle_div(s, true, sf, rm, rn, rd); 8560 break; 8561 case 4: /* IRG */ 8562 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8563 goto do_unallocated; 8564 } 8565 if (s->ata[0]) { 8566 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, 8567 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 8568 } else { 8569 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 8570 cpu_reg_sp(s, rn)); 8571 } 8572 break; 8573 case 5: /* GMI */ 8574 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8575 goto do_unallocated; 8576 } else { 8577 TCGv_i64 t = tcg_temp_new_i64(); 8578 8579 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 8580 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8581 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 8582 } 8583 break; 8584 case 8: /* LSLV */ 8585 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 8586 break; 8587 case 9: /* LSRV */ 8588 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 8589 break; 8590 case 10: /* ASRV */ 8591 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 8592 break; 8593 case 11: /* RORV */ 8594 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 8595 break; 8596 case 12: /* PACGA */ 8597 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 8598 goto do_unallocated; 8599 } 8600 gen_helper_pacga(cpu_reg(s, rd), tcg_env, 8601 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 8602 break; 8603 case 16: 8604 case 17: 8605 case 18: 8606 case 19: 8607 case 20: 8608 case 21: 8609 case 22: 8610 case 23: /* CRC32 */ 8611 { 8612 int sz = extract32(opcode, 0, 2); 8613 bool crc32c = extract32(opcode, 2, 1); 8614 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 8615 break; 8616 } 8617 default: 8618 do_unallocated: 8619 unallocated_encoding(s); 8620 break; 8621 } 8622 } 8623 8624 /* 8625 * Data processing - register 8626 * 31 30 29 28 25 21 20 16 10 0 8627 * +--+---+--+---+-------+-----+-------+-------+---------+ 8628 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 8629 * +--+---+--+---+-------+-----+-------+-------+---------+ 8630 */ 8631 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 8632 { 8633 int op0 = extract32(insn, 30, 1); 8634 int op1 = extract32(insn, 28, 1); 8635 int op2 = extract32(insn, 21, 4); 8636 int op3 = extract32(insn, 10, 6); 8637 8638 if (!op1) { 8639 if (op2 & 8) { 8640 if (op2 & 1) { 8641 /* Add/sub (extended register) */ 8642 disas_add_sub_ext_reg(s, insn); 8643 } else { 8644 /* Add/sub (shifted register) */ 8645 disas_add_sub_reg(s, insn); 8646 } 8647 } else { 8648 /* Logical (shifted register) */ 8649 disas_logic_reg(s, insn); 8650 } 8651 return; 8652 } 8653 8654 switch (op2) { 8655 case 0x0: 8656 switch (op3) { 8657 case 0x00: /* Add/subtract (with carry) */ 8658 disas_adc_sbc(s, insn); 8659 break; 8660 8661 case 0x01: /* Rotate right into flags */ 8662 case 0x21: 8663 disas_rotate_right_into_flags(s, insn); 8664 break; 8665 8666 case 0x02: /* Evaluate into flags */ 8667 case 0x12: 8668 case 0x22: 8669 case 0x32: 8670 disas_evaluate_into_flags(s, insn); 8671 break; 8672 8673 default: 8674 goto do_unallocated; 8675 } 8676 break; 8677 8678 case 0x2: /* Conditional compare */ 8679 disas_cc(s, insn); /* both imm and reg forms */ 8680 break; 8681 8682 case 0x4: /* Conditional select */ 8683 disas_cond_select(s, insn); 8684 break; 8685 8686 case 0x6: /* Data-processing */ 8687 if (op0) { /* (1 source) */ 8688 disas_data_proc_1src(s, insn); 8689 } else { /* (2 source) */ 8690 disas_data_proc_2src(s, insn); 8691 } 8692 break; 8693 case 0x8 ... 0xf: /* (3 source) */ 8694 disas_data_proc_3src(s, insn); 8695 break; 8696 8697 default: 8698 do_unallocated: 8699 unallocated_encoding(s); 8700 break; 8701 } 8702 } 8703 8704 static void handle_fp_compare(DisasContext *s, int size, 8705 unsigned int rn, unsigned int rm, 8706 bool cmp_with_zero, bool signal_all_nans) 8707 { 8708 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 8709 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8710 8711 if (size == MO_64) { 8712 TCGv_i64 tcg_vn, tcg_vm; 8713 8714 tcg_vn = read_fp_dreg(s, rn); 8715 if (cmp_with_zero) { 8716 tcg_vm = tcg_constant_i64(0); 8717 } else { 8718 tcg_vm = read_fp_dreg(s, rm); 8719 } 8720 if (signal_all_nans) { 8721 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8722 } else { 8723 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8724 } 8725 } else { 8726 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 8727 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 8728 8729 read_vec_element_i32(s, tcg_vn, rn, 0, size); 8730 if (cmp_with_zero) { 8731 tcg_gen_movi_i32(tcg_vm, 0); 8732 } else { 8733 read_vec_element_i32(s, tcg_vm, rm, 0, size); 8734 } 8735 8736 switch (size) { 8737 case MO_32: 8738 if (signal_all_nans) { 8739 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8740 } else { 8741 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8742 } 8743 break; 8744 case MO_16: 8745 if (signal_all_nans) { 8746 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8747 } else { 8748 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8749 } 8750 break; 8751 default: 8752 g_assert_not_reached(); 8753 } 8754 } 8755 8756 gen_set_nzcv(tcg_flags); 8757 } 8758 8759 /* Floating point compare 8760 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 8761 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 8762 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 8763 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 8764 */ 8765 static void disas_fp_compare(DisasContext *s, uint32_t insn) 8766 { 8767 unsigned int mos, type, rm, op, rn, opc, op2r; 8768 int size; 8769 8770 mos = extract32(insn, 29, 3); 8771 type = extract32(insn, 22, 2); 8772 rm = extract32(insn, 16, 5); 8773 op = extract32(insn, 14, 2); 8774 rn = extract32(insn, 5, 5); 8775 opc = extract32(insn, 3, 2); 8776 op2r = extract32(insn, 0, 3); 8777 8778 if (mos || op || op2r) { 8779 unallocated_encoding(s); 8780 return; 8781 } 8782 8783 switch (type) { 8784 case 0: 8785 size = MO_32; 8786 break; 8787 case 1: 8788 size = MO_64; 8789 break; 8790 case 3: 8791 size = MO_16; 8792 if (dc_isar_feature(aa64_fp16, s)) { 8793 break; 8794 } 8795 /* fallthru */ 8796 default: 8797 unallocated_encoding(s); 8798 return; 8799 } 8800 8801 if (!fp_access_check(s)) { 8802 return; 8803 } 8804 8805 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 8806 } 8807 8808 /* Floating point conditional compare 8809 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 8810 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 8811 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 8812 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 8813 */ 8814 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 8815 { 8816 unsigned int mos, type, rm, cond, rn, op, nzcv; 8817 TCGLabel *label_continue = NULL; 8818 int size; 8819 8820 mos = extract32(insn, 29, 3); 8821 type = extract32(insn, 22, 2); 8822 rm = extract32(insn, 16, 5); 8823 cond = extract32(insn, 12, 4); 8824 rn = extract32(insn, 5, 5); 8825 op = extract32(insn, 4, 1); 8826 nzcv = extract32(insn, 0, 4); 8827 8828 if (mos) { 8829 unallocated_encoding(s); 8830 return; 8831 } 8832 8833 switch (type) { 8834 case 0: 8835 size = MO_32; 8836 break; 8837 case 1: 8838 size = MO_64; 8839 break; 8840 case 3: 8841 size = MO_16; 8842 if (dc_isar_feature(aa64_fp16, s)) { 8843 break; 8844 } 8845 /* fallthru */ 8846 default: 8847 unallocated_encoding(s); 8848 return; 8849 } 8850 8851 if (!fp_access_check(s)) { 8852 return; 8853 } 8854 8855 if (cond < 0x0e) { /* not always */ 8856 TCGLabel *label_match = gen_new_label(); 8857 label_continue = gen_new_label(); 8858 arm_gen_test_cc(cond, label_match); 8859 /* nomatch: */ 8860 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 8861 tcg_gen_br(label_continue); 8862 gen_set_label(label_match); 8863 } 8864 8865 handle_fp_compare(s, size, rn, rm, false, op); 8866 8867 if (cond < 0x0e) { 8868 gen_set_label(label_continue); 8869 } 8870 } 8871 8872 /* Floating-point data-processing (1 source) - half precision */ 8873 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 8874 { 8875 TCGv_ptr fpst = NULL; 8876 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 8877 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8878 8879 switch (opcode) { 8880 case 0x0: /* FMOV */ 8881 tcg_gen_mov_i32(tcg_res, tcg_op); 8882 break; 8883 case 0x1: /* FABS */ 8884 gen_vfp_absh(tcg_res, tcg_op); 8885 break; 8886 case 0x2: /* FNEG */ 8887 gen_vfp_negh(tcg_res, tcg_op); 8888 break; 8889 case 0x3: /* FSQRT */ 8890 fpst = fpstatus_ptr(FPST_FPCR_F16); 8891 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 8892 break; 8893 case 0x8: /* FRINTN */ 8894 case 0x9: /* FRINTP */ 8895 case 0xa: /* FRINTM */ 8896 case 0xb: /* FRINTZ */ 8897 case 0xc: /* FRINTA */ 8898 { 8899 TCGv_i32 tcg_rmode; 8900 8901 fpst = fpstatus_ptr(FPST_FPCR_F16); 8902 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 8903 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8904 gen_restore_rmode(tcg_rmode, fpst); 8905 break; 8906 } 8907 case 0xe: /* FRINTX */ 8908 fpst = fpstatus_ptr(FPST_FPCR_F16); 8909 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 8910 break; 8911 case 0xf: /* FRINTI */ 8912 fpst = fpstatus_ptr(FPST_FPCR_F16); 8913 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8914 break; 8915 default: 8916 g_assert_not_reached(); 8917 } 8918 8919 write_fp_sreg(s, rd, tcg_res); 8920 } 8921 8922 /* Floating-point data-processing (1 source) - single precision */ 8923 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 8924 { 8925 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 8926 TCGv_i32 tcg_op, tcg_res; 8927 TCGv_ptr fpst; 8928 int rmode = -1; 8929 8930 tcg_op = read_fp_sreg(s, rn); 8931 tcg_res = tcg_temp_new_i32(); 8932 8933 switch (opcode) { 8934 case 0x0: /* FMOV */ 8935 tcg_gen_mov_i32(tcg_res, tcg_op); 8936 goto done; 8937 case 0x1: /* FABS */ 8938 gen_vfp_abss(tcg_res, tcg_op); 8939 goto done; 8940 case 0x2: /* FNEG */ 8941 gen_vfp_negs(tcg_res, tcg_op); 8942 goto done; 8943 case 0x3: /* FSQRT */ 8944 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 8945 goto done; 8946 case 0x6: /* BFCVT */ 8947 gen_fpst = gen_helper_bfcvt; 8948 break; 8949 case 0x8: /* FRINTN */ 8950 case 0x9: /* FRINTP */ 8951 case 0xa: /* FRINTM */ 8952 case 0xb: /* FRINTZ */ 8953 case 0xc: /* FRINTA */ 8954 rmode = opcode & 7; 8955 gen_fpst = gen_helper_rints; 8956 break; 8957 case 0xe: /* FRINTX */ 8958 gen_fpst = gen_helper_rints_exact; 8959 break; 8960 case 0xf: /* FRINTI */ 8961 gen_fpst = gen_helper_rints; 8962 break; 8963 case 0x10: /* FRINT32Z */ 8964 rmode = FPROUNDING_ZERO; 8965 gen_fpst = gen_helper_frint32_s; 8966 break; 8967 case 0x11: /* FRINT32X */ 8968 gen_fpst = gen_helper_frint32_s; 8969 break; 8970 case 0x12: /* FRINT64Z */ 8971 rmode = FPROUNDING_ZERO; 8972 gen_fpst = gen_helper_frint64_s; 8973 break; 8974 case 0x13: /* FRINT64X */ 8975 gen_fpst = gen_helper_frint64_s; 8976 break; 8977 default: 8978 g_assert_not_reached(); 8979 } 8980 8981 fpst = fpstatus_ptr(FPST_FPCR); 8982 if (rmode >= 0) { 8983 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 8984 gen_fpst(tcg_res, tcg_op, fpst); 8985 gen_restore_rmode(tcg_rmode, fpst); 8986 } else { 8987 gen_fpst(tcg_res, tcg_op, fpst); 8988 } 8989 8990 done: 8991 write_fp_sreg(s, rd, tcg_res); 8992 } 8993 8994 /* Floating-point data-processing (1 source) - double precision */ 8995 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 8996 { 8997 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 8998 TCGv_i64 tcg_op, tcg_res; 8999 TCGv_ptr fpst; 9000 int rmode = -1; 9001 9002 switch (opcode) { 9003 case 0x0: /* FMOV */ 9004 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 9005 return; 9006 } 9007 9008 tcg_op = read_fp_dreg(s, rn); 9009 tcg_res = tcg_temp_new_i64(); 9010 9011 switch (opcode) { 9012 case 0x1: /* FABS */ 9013 gen_vfp_absd(tcg_res, tcg_op); 9014 goto done; 9015 case 0x2: /* FNEG */ 9016 gen_vfp_negd(tcg_res, tcg_op); 9017 goto done; 9018 case 0x3: /* FSQRT */ 9019 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); 9020 goto done; 9021 case 0x8: /* FRINTN */ 9022 case 0x9: /* FRINTP */ 9023 case 0xa: /* FRINTM */ 9024 case 0xb: /* FRINTZ */ 9025 case 0xc: /* FRINTA */ 9026 rmode = opcode & 7; 9027 gen_fpst = gen_helper_rintd; 9028 break; 9029 case 0xe: /* FRINTX */ 9030 gen_fpst = gen_helper_rintd_exact; 9031 break; 9032 case 0xf: /* FRINTI */ 9033 gen_fpst = gen_helper_rintd; 9034 break; 9035 case 0x10: /* FRINT32Z */ 9036 rmode = FPROUNDING_ZERO; 9037 gen_fpst = gen_helper_frint32_d; 9038 break; 9039 case 0x11: /* FRINT32X */ 9040 gen_fpst = gen_helper_frint32_d; 9041 break; 9042 case 0x12: /* FRINT64Z */ 9043 rmode = FPROUNDING_ZERO; 9044 gen_fpst = gen_helper_frint64_d; 9045 break; 9046 case 0x13: /* FRINT64X */ 9047 gen_fpst = gen_helper_frint64_d; 9048 break; 9049 default: 9050 g_assert_not_reached(); 9051 } 9052 9053 fpst = fpstatus_ptr(FPST_FPCR); 9054 if (rmode >= 0) { 9055 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 9056 gen_fpst(tcg_res, tcg_op, fpst); 9057 gen_restore_rmode(tcg_rmode, fpst); 9058 } else { 9059 gen_fpst(tcg_res, tcg_op, fpst); 9060 } 9061 9062 done: 9063 write_fp_dreg(s, rd, tcg_res); 9064 } 9065 9066 static void handle_fp_fcvt(DisasContext *s, int opcode, 9067 int rd, int rn, int dtype, int ntype) 9068 { 9069 switch (ntype) { 9070 case 0x0: 9071 { 9072 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 9073 if (dtype == 1) { 9074 /* Single to double */ 9075 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9076 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 9077 write_fp_dreg(s, rd, tcg_rd); 9078 } else { 9079 /* Single to half */ 9080 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9081 TCGv_i32 ahp = get_ahp_flag(); 9082 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9083 9084 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 9085 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 9086 write_fp_sreg(s, rd, tcg_rd); 9087 } 9088 break; 9089 } 9090 case 0x1: 9091 { 9092 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9093 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9094 if (dtype == 0) { 9095 /* Double to single */ 9096 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 9097 } else { 9098 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9099 TCGv_i32 ahp = get_ahp_flag(); 9100 /* Double to half */ 9101 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 9102 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 9103 } 9104 write_fp_sreg(s, rd, tcg_rd); 9105 break; 9106 } 9107 case 0x3: 9108 { 9109 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 9110 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 9111 TCGv_i32 tcg_ahp = get_ahp_flag(); 9112 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 9113 if (dtype == 0) { 9114 /* Half to single */ 9115 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9116 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9117 write_fp_sreg(s, rd, tcg_rd); 9118 } else { 9119 /* Half to double */ 9120 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9121 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9122 write_fp_dreg(s, rd, tcg_rd); 9123 } 9124 break; 9125 } 9126 default: 9127 g_assert_not_reached(); 9128 } 9129 } 9130 9131 /* Floating point data-processing (1 source) 9132 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 9133 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 9134 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 9135 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 9136 */ 9137 static void disas_fp_1src(DisasContext *s, uint32_t insn) 9138 { 9139 int mos = extract32(insn, 29, 3); 9140 int type = extract32(insn, 22, 2); 9141 int opcode = extract32(insn, 15, 6); 9142 int rn = extract32(insn, 5, 5); 9143 int rd = extract32(insn, 0, 5); 9144 9145 if (mos) { 9146 goto do_unallocated; 9147 } 9148 9149 switch (opcode) { 9150 case 0x4: case 0x5: case 0x7: 9151 { 9152 /* FCVT between half, single and double precision */ 9153 int dtype = extract32(opcode, 0, 2); 9154 if (type == 2 || dtype == type) { 9155 goto do_unallocated; 9156 } 9157 if (!fp_access_check(s)) { 9158 return; 9159 } 9160 9161 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 9162 break; 9163 } 9164 9165 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 9166 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 9167 goto do_unallocated; 9168 } 9169 /* fall through */ 9170 case 0x0 ... 0x3: 9171 case 0x8 ... 0xc: 9172 case 0xe ... 0xf: 9173 /* 32-to-32 and 64-to-64 ops */ 9174 switch (type) { 9175 case 0: 9176 if (!fp_access_check(s)) { 9177 return; 9178 } 9179 handle_fp_1src_single(s, opcode, rd, rn); 9180 break; 9181 case 1: 9182 if (!fp_access_check(s)) { 9183 return; 9184 } 9185 handle_fp_1src_double(s, opcode, rd, rn); 9186 break; 9187 case 3: 9188 if (!dc_isar_feature(aa64_fp16, s)) { 9189 goto do_unallocated; 9190 } 9191 9192 if (!fp_access_check(s)) { 9193 return; 9194 } 9195 handle_fp_1src_half(s, opcode, rd, rn); 9196 break; 9197 default: 9198 goto do_unallocated; 9199 } 9200 break; 9201 9202 case 0x6: 9203 switch (type) { 9204 case 1: /* BFCVT */ 9205 if (!dc_isar_feature(aa64_bf16, s)) { 9206 goto do_unallocated; 9207 } 9208 if (!fp_access_check(s)) { 9209 return; 9210 } 9211 handle_fp_1src_single(s, opcode, rd, rn); 9212 break; 9213 default: 9214 goto do_unallocated; 9215 } 9216 break; 9217 9218 default: 9219 do_unallocated: 9220 unallocated_encoding(s); 9221 break; 9222 } 9223 } 9224 9225 /* Handle floating point <=> fixed point conversions. Note that we can 9226 * also deal with fp <=> integer conversions as a special case (scale == 64) 9227 * OPTME: consider handling that special case specially or at least skipping 9228 * the call to scalbn in the helpers for zero shifts. 9229 */ 9230 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 9231 bool itof, int rmode, int scale, int sf, int type) 9232 { 9233 bool is_signed = !(opcode & 1); 9234 TCGv_ptr tcg_fpstatus; 9235 TCGv_i32 tcg_shift, tcg_single; 9236 TCGv_i64 tcg_double; 9237 9238 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 9239 9240 tcg_shift = tcg_constant_i32(64 - scale); 9241 9242 if (itof) { 9243 TCGv_i64 tcg_int = cpu_reg(s, rn); 9244 if (!sf) { 9245 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 9246 9247 if (is_signed) { 9248 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 9249 } else { 9250 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 9251 } 9252 9253 tcg_int = tcg_extend; 9254 } 9255 9256 switch (type) { 9257 case 1: /* float64 */ 9258 tcg_double = tcg_temp_new_i64(); 9259 if (is_signed) { 9260 gen_helper_vfp_sqtod(tcg_double, tcg_int, 9261 tcg_shift, tcg_fpstatus); 9262 } else { 9263 gen_helper_vfp_uqtod(tcg_double, tcg_int, 9264 tcg_shift, tcg_fpstatus); 9265 } 9266 write_fp_dreg(s, rd, tcg_double); 9267 break; 9268 9269 case 0: /* float32 */ 9270 tcg_single = tcg_temp_new_i32(); 9271 if (is_signed) { 9272 gen_helper_vfp_sqtos(tcg_single, tcg_int, 9273 tcg_shift, tcg_fpstatus); 9274 } else { 9275 gen_helper_vfp_uqtos(tcg_single, tcg_int, 9276 tcg_shift, tcg_fpstatus); 9277 } 9278 write_fp_sreg(s, rd, tcg_single); 9279 break; 9280 9281 case 3: /* float16 */ 9282 tcg_single = tcg_temp_new_i32(); 9283 if (is_signed) { 9284 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 9285 tcg_shift, tcg_fpstatus); 9286 } else { 9287 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 9288 tcg_shift, tcg_fpstatus); 9289 } 9290 write_fp_sreg(s, rd, tcg_single); 9291 break; 9292 9293 default: 9294 g_assert_not_reached(); 9295 } 9296 } else { 9297 TCGv_i64 tcg_int = cpu_reg(s, rd); 9298 TCGv_i32 tcg_rmode; 9299 9300 if (extract32(opcode, 2, 1)) { 9301 /* There are too many rounding modes to all fit into rmode, 9302 * so FCVTA[US] is a special case. 9303 */ 9304 rmode = FPROUNDING_TIEAWAY; 9305 } 9306 9307 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9308 9309 switch (type) { 9310 case 1: /* float64 */ 9311 tcg_double = read_fp_dreg(s, rn); 9312 if (is_signed) { 9313 if (!sf) { 9314 gen_helper_vfp_tosld(tcg_int, tcg_double, 9315 tcg_shift, tcg_fpstatus); 9316 } else { 9317 gen_helper_vfp_tosqd(tcg_int, tcg_double, 9318 tcg_shift, tcg_fpstatus); 9319 } 9320 } else { 9321 if (!sf) { 9322 gen_helper_vfp_tould(tcg_int, tcg_double, 9323 tcg_shift, tcg_fpstatus); 9324 } else { 9325 gen_helper_vfp_touqd(tcg_int, tcg_double, 9326 tcg_shift, tcg_fpstatus); 9327 } 9328 } 9329 if (!sf) { 9330 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9331 } 9332 break; 9333 9334 case 0: /* float32 */ 9335 tcg_single = read_fp_sreg(s, rn); 9336 if (sf) { 9337 if (is_signed) { 9338 gen_helper_vfp_tosqs(tcg_int, tcg_single, 9339 tcg_shift, tcg_fpstatus); 9340 } else { 9341 gen_helper_vfp_touqs(tcg_int, tcg_single, 9342 tcg_shift, tcg_fpstatus); 9343 } 9344 } else { 9345 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 9346 if (is_signed) { 9347 gen_helper_vfp_tosls(tcg_dest, tcg_single, 9348 tcg_shift, tcg_fpstatus); 9349 } else { 9350 gen_helper_vfp_touls(tcg_dest, tcg_single, 9351 tcg_shift, tcg_fpstatus); 9352 } 9353 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 9354 } 9355 break; 9356 9357 case 3: /* float16 */ 9358 tcg_single = read_fp_sreg(s, rn); 9359 if (sf) { 9360 if (is_signed) { 9361 gen_helper_vfp_tosqh(tcg_int, tcg_single, 9362 tcg_shift, tcg_fpstatus); 9363 } else { 9364 gen_helper_vfp_touqh(tcg_int, tcg_single, 9365 tcg_shift, tcg_fpstatus); 9366 } 9367 } else { 9368 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 9369 if (is_signed) { 9370 gen_helper_vfp_toslh(tcg_dest, tcg_single, 9371 tcg_shift, tcg_fpstatus); 9372 } else { 9373 gen_helper_vfp_toulh(tcg_dest, tcg_single, 9374 tcg_shift, tcg_fpstatus); 9375 } 9376 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 9377 } 9378 break; 9379 9380 default: 9381 g_assert_not_reached(); 9382 } 9383 9384 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9385 } 9386 } 9387 9388 /* Floating point <-> fixed point conversions 9389 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 9390 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 9391 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 9392 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 9393 */ 9394 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 9395 { 9396 int rd = extract32(insn, 0, 5); 9397 int rn = extract32(insn, 5, 5); 9398 int scale = extract32(insn, 10, 6); 9399 int opcode = extract32(insn, 16, 3); 9400 int rmode = extract32(insn, 19, 2); 9401 int type = extract32(insn, 22, 2); 9402 bool sbit = extract32(insn, 29, 1); 9403 bool sf = extract32(insn, 31, 1); 9404 bool itof; 9405 9406 if (sbit || (!sf && scale < 32)) { 9407 unallocated_encoding(s); 9408 return; 9409 } 9410 9411 switch (type) { 9412 case 0: /* float32 */ 9413 case 1: /* float64 */ 9414 break; 9415 case 3: /* float16 */ 9416 if (dc_isar_feature(aa64_fp16, s)) { 9417 break; 9418 } 9419 /* fallthru */ 9420 default: 9421 unallocated_encoding(s); 9422 return; 9423 } 9424 9425 switch ((rmode << 3) | opcode) { 9426 case 0x2: /* SCVTF */ 9427 case 0x3: /* UCVTF */ 9428 itof = true; 9429 break; 9430 case 0x18: /* FCVTZS */ 9431 case 0x19: /* FCVTZU */ 9432 itof = false; 9433 break; 9434 default: 9435 unallocated_encoding(s); 9436 return; 9437 } 9438 9439 if (!fp_access_check(s)) { 9440 return; 9441 } 9442 9443 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 9444 } 9445 9446 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 9447 { 9448 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 9449 * without conversion. 9450 */ 9451 9452 if (itof) { 9453 TCGv_i64 tcg_rn = cpu_reg(s, rn); 9454 TCGv_i64 tmp; 9455 9456 switch (type) { 9457 case 0: 9458 /* 32 bit */ 9459 tmp = tcg_temp_new_i64(); 9460 tcg_gen_ext32u_i64(tmp, tcg_rn); 9461 write_fp_dreg(s, rd, tmp); 9462 break; 9463 case 1: 9464 /* 64 bit */ 9465 write_fp_dreg(s, rd, tcg_rn); 9466 break; 9467 case 2: 9468 /* 64 bit to top half. */ 9469 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); 9470 clear_vec_high(s, true, rd); 9471 break; 9472 case 3: 9473 /* 16 bit */ 9474 tmp = tcg_temp_new_i64(); 9475 tcg_gen_ext16u_i64(tmp, tcg_rn); 9476 write_fp_dreg(s, rd, tmp); 9477 break; 9478 default: 9479 g_assert_not_reached(); 9480 } 9481 } else { 9482 TCGv_i64 tcg_rd = cpu_reg(s, rd); 9483 9484 switch (type) { 9485 case 0: 9486 /* 32 bit */ 9487 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); 9488 break; 9489 case 1: 9490 /* 64 bit */ 9491 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); 9492 break; 9493 case 2: 9494 /* 64 bits from top half */ 9495 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); 9496 break; 9497 case 3: 9498 /* 16 bit */ 9499 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); 9500 break; 9501 default: 9502 g_assert_not_reached(); 9503 } 9504 } 9505 } 9506 9507 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 9508 { 9509 TCGv_i64 t = read_fp_dreg(s, rn); 9510 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 9511 9512 gen_helper_fjcvtzs(t, t, fpstatus); 9513 9514 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 9515 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9516 tcg_gen_movi_i32(cpu_CF, 0); 9517 tcg_gen_movi_i32(cpu_NF, 0); 9518 tcg_gen_movi_i32(cpu_VF, 0); 9519 } 9520 9521 /* Floating point <-> integer conversions 9522 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 9523 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 9524 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 9525 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 9526 */ 9527 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 9528 { 9529 int rd = extract32(insn, 0, 5); 9530 int rn = extract32(insn, 5, 5); 9531 int opcode = extract32(insn, 16, 3); 9532 int rmode = extract32(insn, 19, 2); 9533 int type = extract32(insn, 22, 2); 9534 bool sbit = extract32(insn, 29, 1); 9535 bool sf = extract32(insn, 31, 1); 9536 bool itof = false; 9537 9538 if (sbit) { 9539 goto do_unallocated; 9540 } 9541 9542 switch (opcode) { 9543 case 2: /* SCVTF */ 9544 case 3: /* UCVTF */ 9545 itof = true; 9546 /* fallthru */ 9547 case 4: /* FCVTAS */ 9548 case 5: /* FCVTAU */ 9549 if (rmode != 0) { 9550 goto do_unallocated; 9551 } 9552 /* fallthru */ 9553 case 0: /* FCVT[NPMZ]S */ 9554 case 1: /* FCVT[NPMZ]U */ 9555 switch (type) { 9556 case 0: /* float32 */ 9557 case 1: /* float64 */ 9558 break; 9559 case 3: /* float16 */ 9560 if (!dc_isar_feature(aa64_fp16, s)) { 9561 goto do_unallocated; 9562 } 9563 break; 9564 default: 9565 goto do_unallocated; 9566 } 9567 if (!fp_access_check(s)) { 9568 return; 9569 } 9570 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 9571 break; 9572 9573 default: 9574 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 9575 case 0b01100110: /* FMOV half <-> 32-bit int */ 9576 case 0b01100111: 9577 case 0b11100110: /* FMOV half <-> 64-bit int */ 9578 case 0b11100111: 9579 if (!dc_isar_feature(aa64_fp16, s)) { 9580 goto do_unallocated; 9581 } 9582 /* fallthru */ 9583 case 0b00000110: /* FMOV 32-bit */ 9584 case 0b00000111: 9585 case 0b10100110: /* FMOV 64-bit */ 9586 case 0b10100111: 9587 case 0b11001110: /* FMOV top half of 128-bit */ 9588 case 0b11001111: 9589 if (!fp_access_check(s)) { 9590 return; 9591 } 9592 itof = opcode & 1; 9593 handle_fmov(s, rd, rn, type, itof); 9594 break; 9595 9596 case 0b00111110: /* FJCVTZS */ 9597 if (!dc_isar_feature(aa64_jscvt, s)) { 9598 goto do_unallocated; 9599 } else if (fp_access_check(s)) { 9600 handle_fjcvtzs(s, rd, rn); 9601 } 9602 break; 9603 9604 default: 9605 do_unallocated: 9606 unallocated_encoding(s); 9607 return; 9608 } 9609 break; 9610 } 9611 } 9612 9613 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 9614 * 31 30 29 28 25 24 0 9615 * +---+---+---+---------+-----------------------------+ 9616 * | | 0 | | 1 1 1 1 | | 9617 * +---+---+---+---------+-----------------------------+ 9618 */ 9619 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 9620 { 9621 if (extract32(insn, 24, 1)) { 9622 unallocated_encoding(s); /* in decodetree */ 9623 } else if (extract32(insn, 21, 1) == 0) { 9624 /* Floating point to fixed point conversions */ 9625 disas_fp_fixed_conv(s, insn); 9626 } else { 9627 switch (extract32(insn, 10, 2)) { 9628 case 1: 9629 /* Floating point conditional compare */ 9630 disas_fp_ccomp(s, insn); 9631 break; 9632 case 2: 9633 /* Floating point data-processing (2 source) */ 9634 unallocated_encoding(s); /* in decodetree */ 9635 break; 9636 case 3: 9637 /* Floating point conditional select */ 9638 unallocated_encoding(s); /* in decodetree */ 9639 break; 9640 case 0: 9641 switch (ctz32(extract32(insn, 12, 4))) { 9642 case 0: /* [15:12] == xxx1 */ 9643 /* Floating point immediate */ 9644 unallocated_encoding(s); /* in decodetree */ 9645 break; 9646 case 1: /* [15:12] == xx10 */ 9647 /* Floating point compare */ 9648 disas_fp_compare(s, insn); 9649 break; 9650 case 2: /* [15:12] == x100 */ 9651 /* Floating point data-processing (1 source) */ 9652 disas_fp_1src(s, insn); 9653 break; 9654 case 3: /* [15:12] == 1000 */ 9655 unallocated_encoding(s); 9656 break; 9657 default: /* [15:12] == 0000 */ 9658 /* Floating point <-> integer conversions */ 9659 disas_fp_int_conv(s, insn); 9660 break; 9661 } 9662 break; 9663 } 9664 } 9665 } 9666 9667 /* Common vector code for handling integer to FP conversion */ 9668 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 9669 int elements, int is_signed, 9670 int fracbits, int size) 9671 { 9672 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9673 TCGv_i32 tcg_shift = NULL; 9674 9675 MemOp mop = size | (is_signed ? MO_SIGN : 0); 9676 int pass; 9677 9678 if (fracbits || size == MO_64) { 9679 tcg_shift = tcg_constant_i32(fracbits); 9680 } 9681 9682 if (size == MO_64) { 9683 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 9684 TCGv_i64 tcg_double = tcg_temp_new_i64(); 9685 9686 for (pass = 0; pass < elements; pass++) { 9687 read_vec_element(s, tcg_int64, rn, pass, mop); 9688 9689 if (is_signed) { 9690 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 9691 tcg_shift, tcg_fpst); 9692 } else { 9693 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 9694 tcg_shift, tcg_fpst); 9695 } 9696 if (elements == 1) { 9697 write_fp_dreg(s, rd, tcg_double); 9698 } else { 9699 write_vec_element(s, tcg_double, rd, pass, MO_64); 9700 } 9701 } 9702 } else { 9703 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 9704 TCGv_i32 tcg_float = tcg_temp_new_i32(); 9705 9706 for (pass = 0; pass < elements; pass++) { 9707 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 9708 9709 switch (size) { 9710 case MO_32: 9711 if (fracbits) { 9712 if (is_signed) { 9713 gen_helper_vfp_sltos(tcg_float, tcg_int32, 9714 tcg_shift, tcg_fpst); 9715 } else { 9716 gen_helper_vfp_ultos(tcg_float, tcg_int32, 9717 tcg_shift, tcg_fpst); 9718 } 9719 } else { 9720 if (is_signed) { 9721 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 9722 } else { 9723 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 9724 } 9725 } 9726 break; 9727 case MO_16: 9728 if (fracbits) { 9729 if (is_signed) { 9730 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 9731 tcg_shift, tcg_fpst); 9732 } else { 9733 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 9734 tcg_shift, tcg_fpst); 9735 } 9736 } else { 9737 if (is_signed) { 9738 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 9739 } else { 9740 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 9741 } 9742 } 9743 break; 9744 default: 9745 g_assert_not_reached(); 9746 } 9747 9748 if (elements == 1) { 9749 write_fp_sreg(s, rd, tcg_float); 9750 } else { 9751 write_vec_element_i32(s, tcg_float, rd, pass, size); 9752 } 9753 } 9754 } 9755 9756 clear_vec_high(s, elements << size == 16, rd); 9757 } 9758 9759 /* UCVTF/SCVTF - Integer to FP conversion */ 9760 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 9761 bool is_q, bool is_u, 9762 int immh, int immb, int opcode, 9763 int rn, int rd) 9764 { 9765 int size, elements, fracbits; 9766 int immhb = immh << 3 | immb; 9767 9768 if (immh & 8) { 9769 size = MO_64; 9770 if (!is_scalar && !is_q) { 9771 unallocated_encoding(s); 9772 return; 9773 } 9774 } else if (immh & 4) { 9775 size = MO_32; 9776 } else if (immh & 2) { 9777 size = MO_16; 9778 if (!dc_isar_feature(aa64_fp16, s)) { 9779 unallocated_encoding(s); 9780 return; 9781 } 9782 } else { 9783 /* immh == 0 would be a failure of the decode logic */ 9784 g_assert(immh == 1); 9785 unallocated_encoding(s); 9786 return; 9787 } 9788 9789 if (is_scalar) { 9790 elements = 1; 9791 } else { 9792 elements = (8 << is_q) >> size; 9793 } 9794 fracbits = (16 << size) - immhb; 9795 9796 if (!fp_access_check(s)) { 9797 return; 9798 } 9799 9800 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 9801 } 9802 9803 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 9804 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 9805 bool is_q, bool is_u, 9806 int immh, int immb, int rn, int rd) 9807 { 9808 int immhb = immh << 3 | immb; 9809 int pass, size, fracbits; 9810 TCGv_ptr tcg_fpstatus; 9811 TCGv_i32 tcg_rmode, tcg_shift; 9812 9813 if (immh & 0x8) { 9814 size = MO_64; 9815 if (!is_scalar && !is_q) { 9816 unallocated_encoding(s); 9817 return; 9818 } 9819 } else if (immh & 0x4) { 9820 size = MO_32; 9821 } else if (immh & 0x2) { 9822 size = MO_16; 9823 if (!dc_isar_feature(aa64_fp16, s)) { 9824 unallocated_encoding(s); 9825 return; 9826 } 9827 } else { 9828 /* Should have split out AdvSIMD modified immediate earlier. */ 9829 assert(immh == 1); 9830 unallocated_encoding(s); 9831 return; 9832 } 9833 9834 if (!fp_access_check(s)) { 9835 return; 9836 } 9837 9838 assert(!(is_scalar && is_q)); 9839 9840 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9841 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 9842 fracbits = (16 << size) - immhb; 9843 tcg_shift = tcg_constant_i32(fracbits); 9844 9845 if (size == MO_64) { 9846 int maxpass = is_scalar ? 1 : 2; 9847 9848 for (pass = 0; pass < maxpass; pass++) { 9849 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9850 9851 read_vec_element(s, tcg_op, rn, pass, MO_64); 9852 if (is_u) { 9853 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9854 } else { 9855 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9856 } 9857 write_vec_element(s, tcg_op, rd, pass, MO_64); 9858 } 9859 clear_vec_high(s, is_q, rd); 9860 } else { 9861 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9862 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9863 9864 switch (size) { 9865 case MO_16: 9866 if (is_u) { 9867 fn = gen_helper_vfp_touhh; 9868 } else { 9869 fn = gen_helper_vfp_toshh; 9870 } 9871 break; 9872 case MO_32: 9873 if (is_u) { 9874 fn = gen_helper_vfp_touls; 9875 } else { 9876 fn = gen_helper_vfp_tosls; 9877 } 9878 break; 9879 default: 9880 g_assert_not_reached(); 9881 } 9882 9883 for (pass = 0; pass < maxpass; pass++) { 9884 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9885 9886 read_vec_element_i32(s, tcg_op, rn, pass, size); 9887 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9888 if (is_scalar) { 9889 if (size == MO_16 && !is_u) { 9890 tcg_gen_ext16u_i32(tcg_op, tcg_op); 9891 } 9892 write_fp_sreg(s, rd, tcg_op); 9893 } else { 9894 write_vec_element_i32(s, tcg_op, rd, pass, size); 9895 } 9896 } 9897 if (!is_scalar) { 9898 clear_vec_high(s, is_q, rd); 9899 } 9900 } 9901 9902 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9903 } 9904 9905 /* AdvSIMD scalar shift by immediate 9906 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9907 * +-----+---+-------------+------+------+--------+---+------+------+ 9908 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9909 * +-----+---+-------------+------+------+--------+---+------+------+ 9910 * 9911 * This is the scalar version so it works on a fixed sized registers 9912 */ 9913 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9914 { 9915 int rd = extract32(insn, 0, 5); 9916 int rn = extract32(insn, 5, 5); 9917 int opcode = extract32(insn, 11, 5); 9918 int immb = extract32(insn, 16, 3); 9919 int immh = extract32(insn, 19, 4); 9920 bool is_u = extract32(insn, 29, 1); 9921 9922 if (immh == 0) { 9923 unallocated_encoding(s); 9924 return; 9925 } 9926 9927 switch (opcode) { 9928 case 0x1c: /* SCVTF, UCVTF */ 9929 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 9930 opcode, rn, rd); 9931 break; 9932 case 0x1f: /* FCVTZS, FCVTZU */ 9933 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 9934 break; 9935 default: 9936 case 0x00: /* SSHR / USHR */ 9937 case 0x02: /* SSRA / USRA */ 9938 case 0x04: /* SRSHR / URSHR */ 9939 case 0x06: /* SRSRA / URSRA */ 9940 case 0x08: /* SRI */ 9941 case 0x0a: /* SHL / SLI */ 9942 case 0x0c: /* SQSHLU */ 9943 case 0x0e: /* SQSHL, UQSHL */ 9944 case 0x10: /* SQSHRUN */ 9945 case 0x11: /* SQRSHRUN */ 9946 case 0x12: /* SQSHRN, UQSHRN */ 9947 case 0x13: /* SQRSHRN, UQRSHRN */ 9948 unallocated_encoding(s); 9949 break; 9950 } 9951 } 9952 9953 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9954 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9955 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9956 { 9957 /* Handle 64->64 opcodes which are shared between the scalar and 9958 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9959 * is valid in either group and also the double-precision fp ops. 9960 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9961 * requires them. 9962 */ 9963 TCGCond cond; 9964 9965 switch (opcode) { 9966 case 0x4: /* CLS, CLZ */ 9967 if (u) { 9968 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9969 } else { 9970 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9971 } 9972 break; 9973 case 0x5: /* NOT */ 9974 /* This opcode is shared with CNT and RBIT but we have earlier 9975 * enforced that size == 3 if and only if this is the NOT insn. 9976 */ 9977 tcg_gen_not_i64(tcg_rd, tcg_rn); 9978 break; 9979 case 0x7: /* SQABS, SQNEG */ 9980 if (u) { 9981 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); 9982 } else { 9983 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); 9984 } 9985 break; 9986 case 0xa: /* CMLT */ 9987 cond = TCG_COND_LT; 9988 do_cmop: 9989 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 9990 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 9991 break; 9992 case 0x8: /* CMGT, CMGE */ 9993 cond = u ? TCG_COND_GE : TCG_COND_GT; 9994 goto do_cmop; 9995 case 0x9: /* CMEQ, CMLE */ 9996 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9997 goto do_cmop; 9998 case 0xb: /* ABS, NEG */ 9999 if (u) { 10000 tcg_gen_neg_i64(tcg_rd, tcg_rn); 10001 } else { 10002 tcg_gen_abs_i64(tcg_rd, tcg_rn); 10003 } 10004 break; 10005 case 0x2f: /* FABS */ 10006 gen_vfp_absd(tcg_rd, tcg_rn); 10007 break; 10008 case 0x6f: /* FNEG */ 10009 gen_vfp_negd(tcg_rd, tcg_rn); 10010 break; 10011 case 0x7f: /* FSQRT */ 10012 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); 10013 break; 10014 case 0x1a: /* FCVTNS */ 10015 case 0x1b: /* FCVTMS */ 10016 case 0x1c: /* FCVTAS */ 10017 case 0x3a: /* FCVTPS */ 10018 case 0x3b: /* FCVTZS */ 10019 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10020 break; 10021 case 0x5a: /* FCVTNU */ 10022 case 0x5b: /* FCVTMU */ 10023 case 0x5c: /* FCVTAU */ 10024 case 0x7a: /* FCVTPU */ 10025 case 0x7b: /* FCVTZU */ 10026 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10027 break; 10028 case 0x18: /* FRINTN */ 10029 case 0x19: /* FRINTM */ 10030 case 0x38: /* FRINTP */ 10031 case 0x39: /* FRINTZ */ 10032 case 0x58: /* FRINTA */ 10033 case 0x79: /* FRINTI */ 10034 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 10035 break; 10036 case 0x59: /* FRINTX */ 10037 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 10038 break; 10039 case 0x1e: /* FRINT32Z */ 10040 case 0x5e: /* FRINT32X */ 10041 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 10042 break; 10043 case 0x1f: /* FRINT64Z */ 10044 case 0x5f: /* FRINT64X */ 10045 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 10046 break; 10047 default: 10048 g_assert_not_reached(); 10049 } 10050 } 10051 10052 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 10053 bool is_scalar, bool is_u, bool is_q, 10054 int size, int rn, int rd) 10055 { 10056 bool is_double = (size == MO_64); 10057 TCGv_ptr fpst; 10058 10059 if (!fp_access_check(s)) { 10060 return; 10061 } 10062 10063 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10064 10065 if (is_double) { 10066 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10067 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10068 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10069 NeonGenTwoDoubleOpFn *genfn; 10070 bool swap = false; 10071 int pass; 10072 10073 switch (opcode) { 10074 case 0x2e: /* FCMLT (zero) */ 10075 swap = true; 10076 /* fallthrough */ 10077 case 0x2c: /* FCMGT (zero) */ 10078 genfn = gen_helper_neon_cgt_f64; 10079 break; 10080 case 0x2d: /* FCMEQ (zero) */ 10081 genfn = gen_helper_neon_ceq_f64; 10082 break; 10083 case 0x6d: /* FCMLE (zero) */ 10084 swap = true; 10085 /* fall through */ 10086 case 0x6c: /* FCMGE (zero) */ 10087 genfn = gen_helper_neon_cge_f64; 10088 break; 10089 default: 10090 g_assert_not_reached(); 10091 } 10092 10093 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10094 read_vec_element(s, tcg_op, rn, pass, MO_64); 10095 if (swap) { 10096 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10097 } else { 10098 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10099 } 10100 write_vec_element(s, tcg_res, rd, pass, MO_64); 10101 } 10102 10103 clear_vec_high(s, !is_scalar, rd); 10104 } else { 10105 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10106 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10107 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10108 NeonGenTwoSingleOpFn *genfn; 10109 bool swap = false; 10110 int pass, maxpasses; 10111 10112 if (size == MO_16) { 10113 switch (opcode) { 10114 case 0x2e: /* FCMLT (zero) */ 10115 swap = true; 10116 /* fall through */ 10117 case 0x2c: /* FCMGT (zero) */ 10118 genfn = gen_helper_advsimd_cgt_f16; 10119 break; 10120 case 0x2d: /* FCMEQ (zero) */ 10121 genfn = gen_helper_advsimd_ceq_f16; 10122 break; 10123 case 0x6d: /* FCMLE (zero) */ 10124 swap = true; 10125 /* fall through */ 10126 case 0x6c: /* FCMGE (zero) */ 10127 genfn = gen_helper_advsimd_cge_f16; 10128 break; 10129 default: 10130 g_assert_not_reached(); 10131 } 10132 } else { 10133 switch (opcode) { 10134 case 0x2e: /* FCMLT (zero) */ 10135 swap = true; 10136 /* fall through */ 10137 case 0x2c: /* FCMGT (zero) */ 10138 genfn = gen_helper_neon_cgt_f32; 10139 break; 10140 case 0x2d: /* FCMEQ (zero) */ 10141 genfn = gen_helper_neon_ceq_f32; 10142 break; 10143 case 0x6d: /* FCMLE (zero) */ 10144 swap = true; 10145 /* fall through */ 10146 case 0x6c: /* FCMGE (zero) */ 10147 genfn = gen_helper_neon_cge_f32; 10148 break; 10149 default: 10150 g_assert_not_reached(); 10151 } 10152 } 10153 10154 if (is_scalar) { 10155 maxpasses = 1; 10156 } else { 10157 int vector_size = 8 << is_q; 10158 maxpasses = vector_size >> size; 10159 } 10160 10161 for (pass = 0; pass < maxpasses; pass++) { 10162 read_vec_element_i32(s, tcg_op, rn, pass, size); 10163 if (swap) { 10164 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10165 } else { 10166 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10167 } 10168 if (is_scalar) { 10169 write_fp_sreg(s, rd, tcg_res); 10170 } else { 10171 write_vec_element_i32(s, tcg_res, rd, pass, size); 10172 } 10173 } 10174 10175 if (!is_scalar) { 10176 clear_vec_high(s, is_q, rd); 10177 } 10178 } 10179 } 10180 10181 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10182 bool is_scalar, bool is_u, bool is_q, 10183 int size, int rn, int rd) 10184 { 10185 bool is_double = (size == 3); 10186 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10187 10188 if (is_double) { 10189 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10190 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10191 int pass; 10192 10193 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10194 read_vec_element(s, tcg_op, rn, pass, MO_64); 10195 switch (opcode) { 10196 case 0x3d: /* FRECPE */ 10197 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10198 break; 10199 case 0x3f: /* FRECPX */ 10200 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10201 break; 10202 case 0x7d: /* FRSQRTE */ 10203 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10204 break; 10205 default: 10206 g_assert_not_reached(); 10207 } 10208 write_vec_element(s, tcg_res, rd, pass, MO_64); 10209 } 10210 clear_vec_high(s, !is_scalar, rd); 10211 } else { 10212 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10213 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10214 int pass, maxpasses; 10215 10216 if (is_scalar) { 10217 maxpasses = 1; 10218 } else { 10219 maxpasses = is_q ? 4 : 2; 10220 } 10221 10222 for (pass = 0; pass < maxpasses; pass++) { 10223 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10224 10225 switch (opcode) { 10226 case 0x3c: /* URECPE */ 10227 gen_helper_recpe_u32(tcg_res, tcg_op); 10228 break; 10229 case 0x3d: /* FRECPE */ 10230 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10231 break; 10232 case 0x3f: /* FRECPX */ 10233 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10234 break; 10235 case 0x7d: /* FRSQRTE */ 10236 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10237 break; 10238 default: 10239 g_assert_not_reached(); 10240 } 10241 10242 if (is_scalar) { 10243 write_fp_sreg(s, rd, tcg_res); 10244 } else { 10245 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10246 } 10247 } 10248 if (!is_scalar) { 10249 clear_vec_high(s, is_q, rd); 10250 } 10251 } 10252 } 10253 10254 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10255 int opcode, bool u, bool is_q, 10256 int size, int rn, int rd) 10257 { 10258 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10259 * in the source becomes a size element in the destination). 10260 */ 10261 int pass; 10262 TCGv_i64 tcg_res[2]; 10263 int destelt = is_q ? 2 : 0; 10264 int passes = scalar ? 1 : 2; 10265 10266 if (scalar) { 10267 tcg_res[1] = tcg_constant_i64(0); 10268 } 10269 10270 for (pass = 0; pass < passes; pass++) { 10271 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10272 NeonGenOne64OpFn *genfn = NULL; 10273 NeonGenOne64OpEnvFn *genenvfn = NULL; 10274 10275 if (scalar) { 10276 read_vec_element(s, tcg_op, rn, pass, size + 1); 10277 } else { 10278 read_vec_element(s, tcg_op, rn, pass, MO_64); 10279 } 10280 tcg_res[pass] = tcg_temp_new_i64(); 10281 10282 switch (opcode) { 10283 case 0x12: /* XTN, SQXTUN */ 10284 { 10285 static NeonGenOne64OpFn * const xtnfns[3] = { 10286 gen_helper_neon_narrow_u8, 10287 gen_helper_neon_narrow_u16, 10288 tcg_gen_ext32u_i64, 10289 }; 10290 static NeonGenOne64OpEnvFn * const sqxtunfns[3] = { 10291 gen_helper_neon_unarrow_sat8, 10292 gen_helper_neon_unarrow_sat16, 10293 gen_helper_neon_unarrow_sat32, 10294 }; 10295 if (u) { 10296 genenvfn = sqxtunfns[size]; 10297 } else { 10298 genfn = xtnfns[size]; 10299 } 10300 break; 10301 } 10302 case 0x14: /* SQXTN, UQXTN */ 10303 { 10304 static NeonGenOne64OpEnvFn * const fns[3][2] = { 10305 { gen_helper_neon_narrow_sat_s8, 10306 gen_helper_neon_narrow_sat_u8 }, 10307 { gen_helper_neon_narrow_sat_s16, 10308 gen_helper_neon_narrow_sat_u16 }, 10309 { gen_helper_neon_narrow_sat_s32, 10310 gen_helper_neon_narrow_sat_u32 }, 10311 }; 10312 genenvfn = fns[size][u]; 10313 break; 10314 } 10315 case 0x16: /* FCVTN, FCVTN2 */ 10316 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10317 if (size == 2) { 10318 TCGv_i32 tmp = tcg_temp_new_i32(); 10319 gen_helper_vfp_fcvtsd(tmp, tcg_op, tcg_env); 10320 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10321 } else { 10322 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10323 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10324 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10325 TCGv_i32 ahp = get_ahp_flag(); 10326 10327 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10328 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10329 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10330 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 10331 tcg_gen_extu_i32_i64(tcg_res[pass], tcg_lo); 10332 } 10333 break; 10334 case 0x36: /* BFCVTN, BFCVTN2 */ 10335 { 10336 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10337 TCGv_i32 tmp = tcg_temp_new_i32(); 10338 gen_helper_bfcvt_pair(tmp, tcg_op, fpst); 10339 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10340 } 10341 break; 10342 case 0x56: /* FCVTXN, FCVTXN2 */ 10343 { 10344 /* 10345 * 64 bit to 32 bit float conversion 10346 * with von Neumann rounding (round to odd) 10347 */ 10348 TCGv_i32 tmp = tcg_temp_new_i32(); 10349 assert(size == 2); 10350 gen_helper_fcvtx_f64_to_f32(tmp, tcg_op, tcg_env); 10351 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10352 } 10353 break; 10354 default: 10355 g_assert_not_reached(); 10356 } 10357 10358 if (genfn) { 10359 genfn(tcg_res[pass], tcg_op); 10360 } else if (genenvfn) { 10361 genenvfn(tcg_res[pass], tcg_env, tcg_op); 10362 } 10363 } 10364 10365 for (pass = 0; pass < 2; pass++) { 10366 write_vec_element(s, tcg_res[pass], rd, destelt + pass, MO_32); 10367 } 10368 clear_vec_high(s, is_q, rd); 10369 } 10370 10371 /* AdvSIMD scalar two reg misc 10372 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10373 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10374 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10375 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10376 */ 10377 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10378 { 10379 int rd = extract32(insn, 0, 5); 10380 int rn = extract32(insn, 5, 5); 10381 int opcode = extract32(insn, 12, 5); 10382 int size = extract32(insn, 22, 2); 10383 bool u = extract32(insn, 29, 1); 10384 bool is_fcvt = false; 10385 int rmode; 10386 TCGv_i32 tcg_rmode; 10387 TCGv_ptr tcg_fpstatus; 10388 10389 switch (opcode) { 10390 case 0x7: /* SQABS / SQNEG */ 10391 break; 10392 case 0xa: /* CMLT */ 10393 if (u) { 10394 unallocated_encoding(s); 10395 return; 10396 } 10397 /* fall through */ 10398 case 0x8: /* CMGT, CMGE */ 10399 case 0x9: /* CMEQ, CMLE */ 10400 case 0xb: /* ABS, NEG */ 10401 if (size != 3) { 10402 unallocated_encoding(s); 10403 return; 10404 } 10405 break; 10406 case 0x12: /* SQXTUN */ 10407 if (!u) { 10408 unallocated_encoding(s); 10409 return; 10410 } 10411 /* fall through */ 10412 case 0x14: /* SQXTN, UQXTN */ 10413 if (size == 3) { 10414 unallocated_encoding(s); 10415 return; 10416 } 10417 if (!fp_access_check(s)) { 10418 return; 10419 } 10420 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10421 return; 10422 case 0xc ... 0xf: 10423 case 0x16 ... 0x1d: 10424 case 0x1f: 10425 /* Floating point: U, size[1] and opcode indicate operation; 10426 * size[0] indicates single or double precision. 10427 */ 10428 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10429 size = extract32(size, 0, 1) ? 3 : 2; 10430 switch (opcode) { 10431 case 0x2c: /* FCMGT (zero) */ 10432 case 0x2d: /* FCMEQ (zero) */ 10433 case 0x2e: /* FCMLT (zero) */ 10434 case 0x6c: /* FCMGE (zero) */ 10435 case 0x6d: /* FCMLE (zero) */ 10436 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10437 return; 10438 case 0x1d: /* SCVTF */ 10439 case 0x5d: /* UCVTF */ 10440 { 10441 bool is_signed = (opcode == 0x1d); 10442 if (!fp_access_check(s)) { 10443 return; 10444 } 10445 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10446 return; 10447 } 10448 case 0x3d: /* FRECPE */ 10449 case 0x3f: /* FRECPX */ 10450 case 0x7d: /* FRSQRTE */ 10451 if (!fp_access_check(s)) { 10452 return; 10453 } 10454 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10455 return; 10456 case 0x1a: /* FCVTNS */ 10457 case 0x1b: /* FCVTMS */ 10458 case 0x3a: /* FCVTPS */ 10459 case 0x3b: /* FCVTZS */ 10460 case 0x5a: /* FCVTNU */ 10461 case 0x5b: /* FCVTMU */ 10462 case 0x7a: /* FCVTPU */ 10463 case 0x7b: /* FCVTZU */ 10464 is_fcvt = true; 10465 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10466 break; 10467 case 0x1c: /* FCVTAS */ 10468 case 0x5c: /* FCVTAU */ 10469 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10470 is_fcvt = true; 10471 rmode = FPROUNDING_TIEAWAY; 10472 break; 10473 case 0x56: /* FCVTXN, FCVTXN2 */ 10474 if (size == 2) { 10475 unallocated_encoding(s); 10476 return; 10477 } 10478 if (!fp_access_check(s)) { 10479 return; 10480 } 10481 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10482 return; 10483 default: 10484 unallocated_encoding(s); 10485 return; 10486 } 10487 break; 10488 default: 10489 case 0x3: /* USQADD / SUQADD */ 10490 unallocated_encoding(s); 10491 return; 10492 } 10493 10494 if (!fp_access_check(s)) { 10495 return; 10496 } 10497 10498 if (is_fcvt) { 10499 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10500 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 10501 } else { 10502 tcg_fpstatus = NULL; 10503 tcg_rmode = NULL; 10504 } 10505 10506 if (size == 3) { 10507 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10508 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10509 10510 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10511 write_fp_dreg(s, rd, tcg_rd); 10512 } else { 10513 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10514 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10515 10516 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10517 10518 switch (opcode) { 10519 case 0x7: /* SQABS, SQNEG */ 10520 { 10521 NeonGenOneOpEnvFn *genfn; 10522 static NeonGenOneOpEnvFn * const fns[3][2] = { 10523 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10524 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10525 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10526 }; 10527 genfn = fns[size][u]; 10528 genfn(tcg_rd, tcg_env, tcg_rn); 10529 break; 10530 } 10531 case 0x1a: /* FCVTNS */ 10532 case 0x1b: /* FCVTMS */ 10533 case 0x1c: /* FCVTAS */ 10534 case 0x3a: /* FCVTPS */ 10535 case 0x3b: /* FCVTZS */ 10536 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10537 tcg_fpstatus); 10538 break; 10539 case 0x5a: /* FCVTNU */ 10540 case 0x5b: /* FCVTMU */ 10541 case 0x5c: /* FCVTAU */ 10542 case 0x7a: /* FCVTPU */ 10543 case 0x7b: /* FCVTZU */ 10544 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10545 tcg_fpstatus); 10546 break; 10547 default: 10548 g_assert_not_reached(); 10549 } 10550 10551 write_fp_sreg(s, rd, tcg_rd); 10552 } 10553 10554 if (is_fcvt) { 10555 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10556 } 10557 } 10558 10559 /* AdvSIMD shift by immediate 10560 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10561 * +---+---+---+-------------+------+------+--------+---+------+------+ 10562 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10563 * +---+---+---+-------------+------+------+--------+---+------+------+ 10564 */ 10565 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10566 { 10567 int rd = extract32(insn, 0, 5); 10568 int rn = extract32(insn, 5, 5); 10569 int opcode = extract32(insn, 11, 5); 10570 int immb = extract32(insn, 16, 3); 10571 int immh = extract32(insn, 19, 4); 10572 bool is_u = extract32(insn, 29, 1); 10573 bool is_q = extract32(insn, 30, 1); 10574 10575 if (immh == 0) { 10576 unallocated_encoding(s); 10577 return; 10578 } 10579 10580 switch (opcode) { 10581 case 0x1c: /* SCVTF / UCVTF */ 10582 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10583 opcode, rn, rd); 10584 break; 10585 case 0x1f: /* FCVTZS/ FCVTZU */ 10586 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10587 return; 10588 default: 10589 case 0x00: /* SSHR / USHR */ 10590 case 0x02: /* SSRA / USRA (accumulate) */ 10591 case 0x04: /* SRSHR / URSHR (rounding) */ 10592 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10593 case 0x08: /* SRI */ 10594 case 0x0a: /* SHL / SLI */ 10595 case 0x0c: /* SQSHLU */ 10596 case 0x0e: /* SQSHL, UQSHL */ 10597 case 0x10: /* SHRN / SQSHRUN */ 10598 case 0x11: /* RSHRN / SQRSHRUN */ 10599 case 0x12: /* SQSHRN / UQSHRN */ 10600 case 0x13: /* SQRSHRN / UQRSHRN */ 10601 case 0x14: /* SSHLL / USHLL */ 10602 unallocated_encoding(s); 10603 return; 10604 } 10605 } 10606 10607 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 10608 int size, int rn, int rd) 10609 { 10610 /* Handle 2-reg-misc ops which are widening (so each size element 10611 * in the source becomes a 2*size element in the destination. 10612 * The only instruction like this is FCVTL. 10613 */ 10614 int pass; 10615 10616 if (size == 3) { 10617 /* 32 -> 64 bit fp conversion */ 10618 TCGv_i64 tcg_res[2]; 10619 int srcelt = is_q ? 2 : 0; 10620 10621 for (pass = 0; pass < 2; pass++) { 10622 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10623 tcg_res[pass] = tcg_temp_new_i64(); 10624 10625 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 10626 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 10627 } 10628 for (pass = 0; pass < 2; pass++) { 10629 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10630 } 10631 } else { 10632 /* 16 -> 32 bit fp conversion */ 10633 int srcelt = is_q ? 4 : 0; 10634 TCGv_i32 tcg_res[4]; 10635 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10636 TCGv_i32 ahp = get_ahp_flag(); 10637 10638 for (pass = 0; pass < 4; pass++) { 10639 tcg_res[pass] = tcg_temp_new_i32(); 10640 10641 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 10642 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10643 fpst, ahp); 10644 } 10645 for (pass = 0; pass < 4; pass++) { 10646 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10647 } 10648 } 10649 } 10650 10651 static void handle_rev(DisasContext *s, int opcode, bool u, 10652 bool is_q, int size, int rn, int rd) 10653 { 10654 int op = (opcode << 1) | u; 10655 int opsz = op + size; 10656 int grp_size = 3 - opsz; 10657 int dsize = is_q ? 128 : 64; 10658 int i; 10659 10660 if (opsz >= 3) { 10661 unallocated_encoding(s); 10662 return; 10663 } 10664 10665 if (!fp_access_check(s)) { 10666 return; 10667 } 10668 10669 if (size == 0) { 10670 /* Special case bytes, use bswap op on each group of elements */ 10671 int groups = dsize / (8 << grp_size); 10672 10673 for (i = 0; i < groups; i++) { 10674 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 10675 10676 read_vec_element(s, tcg_tmp, rn, i, grp_size); 10677 switch (grp_size) { 10678 case MO_16: 10679 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10680 break; 10681 case MO_32: 10682 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10683 break; 10684 case MO_64: 10685 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 10686 break; 10687 default: 10688 g_assert_not_reached(); 10689 } 10690 write_vec_element(s, tcg_tmp, rd, i, grp_size); 10691 } 10692 clear_vec_high(s, is_q, rd); 10693 } else { 10694 int revmask = (1 << grp_size) - 1; 10695 int esize = 8 << size; 10696 int elements = dsize / esize; 10697 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10698 TCGv_i64 tcg_rd[2]; 10699 10700 for (i = 0; i < 2; i++) { 10701 tcg_rd[i] = tcg_temp_new_i64(); 10702 tcg_gen_movi_i64(tcg_rd[i], 0); 10703 } 10704 10705 for (i = 0; i < elements; i++) { 10706 int e_rev = (i & 0xf) ^ revmask; 10707 int w = (e_rev * esize) / 64; 10708 int o = (e_rev * esize) % 64; 10709 10710 read_vec_element(s, tcg_rn, rn, i, size); 10711 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 10712 } 10713 10714 for (i = 0; i < 2; i++) { 10715 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 10716 } 10717 clear_vec_high(s, true, rd); 10718 } 10719 } 10720 10721 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 10722 bool is_q, int size, int rn, int rd) 10723 { 10724 /* Implement the pairwise operations from 2-misc: 10725 * SADDLP, UADDLP, SADALP, UADALP. 10726 * These all add pairs of elements in the input to produce a 10727 * double-width result element in the output (possibly accumulating). 10728 */ 10729 bool accum = (opcode == 0x6); 10730 int maxpass = is_q ? 2 : 1; 10731 int pass; 10732 TCGv_i64 tcg_res[2]; 10733 10734 if (size == 2) { 10735 /* 32 + 32 -> 64 op */ 10736 MemOp memop = size + (u ? 0 : MO_SIGN); 10737 10738 for (pass = 0; pass < maxpass; pass++) { 10739 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10740 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10741 10742 tcg_res[pass] = tcg_temp_new_i64(); 10743 10744 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 10745 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 10746 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 10747 if (accum) { 10748 read_vec_element(s, tcg_op1, rd, pass, MO_64); 10749 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 10750 } 10751 } 10752 } else { 10753 for (pass = 0; pass < maxpass; pass++) { 10754 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10755 NeonGenOne64OpFn *genfn; 10756 static NeonGenOne64OpFn * const fns[2][2] = { 10757 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 10758 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 10759 }; 10760 10761 genfn = fns[size][u]; 10762 10763 tcg_res[pass] = tcg_temp_new_i64(); 10764 10765 read_vec_element(s, tcg_op, rn, pass, MO_64); 10766 genfn(tcg_res[pass], tcg_op); 10767 10768 if (accum) { 10769 read_vec_element(s, tcg_op, rd, pass, MO_64); 10770 if (size == 0) { 10771 gen_helper_neon_addl_u16(tcg_res[pass], 10772 tcg_res[pass], tcg_op); 10773 } else { 10774 gen_helper_neon_addl_u32(tcg_res[pass], 10775 tcg_res[pass], tcg_op); 10776 } 10777 } 10778 } 10779 } 10780 if (!is_q) { 10781 tcg_res[1] = tcg_constant_i64(0); 10782 } 10783 for (pass = 0; pass < 2; pass++) { 10784 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10785 } 10786 } 10787 10788 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 10789 { 10790 /* Implement SHLL and SHLL2 */ 10791 int pass; 10792 int part = is_q ? 2 : 0; 10793 TCGv_i64 tcg_res[2]; 10794 10795 for (pass = 0; pass < 2; pass++) { 10796 static NeonGenWidenFn * const widenfns[3] = { 10797 gen_helper_neon_widen_u8, 10798 gen_helper_neon_widen_u16, 10799 tcg_gen_extu_i32_i64, 10800 }; 10801 NeonGenWidenFn *widenfn = widenfns[size]; 10802 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10803 10804 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 10805 tcg_res[pass] = tcg_temp_new_i64(); 10806 widenfn(tcg_res[pass], tcg_op); 10807 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 10808 } 10809 10810 for (pass = 0; pass < 2; pass++) { 10811 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10812 } 10813 } 10814 10815 /* AdvSIMD two reg misc 10816 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10817 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 10818 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10819 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 10820 */ 10821 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 10822 { 10823 int size = extract32(insn, 22, 2); 10824 int opcode = extract32(insn, 12, 5); 10825 bool u = extract32(insn, 29, 1); 10826 bool is_q = extract32(insn, 30, 1); 10827 int rn = extract32(insn, 5, 5); 10828 int rd = extract32(insn, 0, 5); 10829 bool need_fpstatus = false; 10830 int rmode = -1; 10831 TCGv_i32 tcg_rmode; 10832 TCGv_ptr tcg_fpstatus; 10833 10834 switch (opcode) { 10835 case 0x0: /* REV64, REV32 */ 10836 case 0x1: /* REV16 */ 10837 handle_rev(s, opcode, u, is_q, size, rn, rd); 10838 return; 10839 case 0x5: /* CNT, NOT, RBIT */ 10840 if (u && size == 0) { 10841 /* NOT */ 10842 break; 10843 } else if (u && size == 1) { 10844 /* RBIT */ 10845 break; 10846 } else if (!u && size == 0) { 10847 /* CNT */ 10848 break; 10849 } 10850 unallocated_encoding(s); 10851 return; 10852 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 10853 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 10854 if (size == 3) { 10855 unallocated_encoding(s); 10856 return; 10857 } 10858 if (!fp_access_check(s)) { 10859 return; 10860 } 10861 10862 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 10863 return; 10864 case 0x4: /* CLS, CLZ */ 10865 if (size == 3) { 10866 unallocated_encoding(s); 10867 return; 10868 } 10869 break; 10870 case 0x2: /* SADDLP, UADDLP */ 10871 case 0x6: /* SADALP, UADALP */ 10872 if (size == 3) { 10873 unallocated_encoding(s); 10874 return; 10875 } 10876 if (!fp_access_check(s)) { 10877 return; 10878 } 10879 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 10880 return; 10881 case 0x13: /* SHLL, SHLL2 */ 10882 if (u == 0 || size == 3) { 10883 unallocated_encoding(s); 10884 return; 10885 } 10886 if (!fp_access_check(s)) { 10887 return; 10888 } 10889 handle_shll(s, is_q, size, rn, rd); 10890 return; 10891 case 0xa: /* CMLT */ 10892 if (u == 1) { 10893 unallocated_encoding(s); 10894 return; 10895 } 10896 /* fall through */ 10897 case 0x8: /* CMGT, CMGE */ 10898 case 0x9: /* CMEQ, CMLE */ 10899 case 0xb: /* ABS, NEG */ 10900 if (size == 3 && !is_q) { 10901 unallocated_encoding(s); 10902 return; 10903 } 10904 break; 10905 case 0x7: /* SQABS, SQNEG */ 10906 if (size == 3 && !is_q) { 10907 unallocated_encoding(s); 10908 return; 10909 } 10910 break; 10911 case 0xc ... 0xf: 10912 case 0x16 ... 0x1f: 10913 { 10914 /* Floating point: U, size[1] and opcode indicate operation; 10915 * size[0] indicates single or double precision. 10916 */ 10917 int is_double = extract32(size, 0, 1); 10918 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10919 size = is_double ? 3 : 2; 10920 switch (opcode) { 10921 case 0x2f: /* FABS */ 10922 case 0x6f: /* FNEG */ 10923 if (size == 3 && !is_q) { 10924 unallocated_encoding(s); 10925 return; 10926 } 10927 break; 10928 case 0x1d: /* SCVTF */ 10929 case 0x5d: /* UCVTF */ 10930 { 10931 bool is_signed = (opcode == 0x1d) ? true : false; 10932 int elements = is_double ? 2 : is_q ? 4 : 2; 10933 if (is_double && !is_q) { 10934 unallocated_encoding(s); 10935 return; 10936 } 10937 if (!fp_access_check(s)) { 10938 return; 10939 } 10940 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 10941 return; 10942 } 10943 case 0x2c: /* FCMGT (zero) */ 10944 case 0x2d: /* FCMEQ (zero) */ 10945 case 0x2e: /* FCMLT (zero) */ 10946 case 0x6c: /* FCMGE (zero) */ 10947 case 0x6d: /* FCMLE (zero) */ 10948 if (size == 3 && !is_q) { 10949 unallocated_encoding(s); 10950 return; 10951 } 10952 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 10953 return; 10954 case 0x7f: /* FSQRT */ 10955 if (size == 3 && !is_q) { 10956 unallocated_encoding(s); 10957 return; 10958 } 10959 break; 10960 case 0x1a: /* FCVTNS */ 10961 case 0x1b: /* FCVTMS */ 10962 case 0x3a: /* FCVTPS */ 10963 case 0x3b: /* FCVTZS */ 10964 case 0x5a: /* FCVTNU */ 10965 case 0x5b: /* FCVTMU */ 10966 case 0x7a: /* FCVTPU */ 10967 case 0x7b: /* FCVTZU */ 10968 need_fpstatus = true; 10969 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10970 if (size == 3 && !is_q) { 10971 unallocated_encoding(s); 10972 return; 10973 } 10974 break; 10975 case 0x5c: /* FCVTAU */ 10976 case 0x1c: /* FCVTAS */ 10977 need_fpstatus = true; 10978 rmode = FPROUNDING_TIEAWAY; 10979 if (size == 3 && !is_q) { 10980 unallocated_encoding(s); 10981 return; 10982 } 10983 break; 10984 case 0x3c: /* URECPE */ 10985 if (size == 3) { 10986 unallocated_encoding(s); 10987 return; 10988 } 10989 /* fall through */ 10990 case 0x3d: /* FRECPE */ 10991 case 0x7d: /* FRSQRTE */ 10992 if (size == 3 && !is_q) { 10993 unallocated_encoding(s); 10994 return; 10995 } 10996 if (!fp_access_check(s)) { 10997 return; 10998 } 10999 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 11000 return; 11001 case 0x56: /* FCVTXN, FCVTXN2 */ 11002 if (size == 2) { 11003 unallocated_encoding(s); 11004 return; 11005 } 11006 /* fall through */ 11007 case 0x16: /* FCVTN, FCVTN2 */ 11008 /* handle_2misc_narrow does a 2*size -> size operation, but these 11009 * instructions encode the source size rather than dest size. 11010 */ 11011 if (!fp_access_check(s)) { 11012 return; 11013 } 11014 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11015 return; 11016 case 0x36: /* BFCVTN, BFCVTN2 */ 11017 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 11018 unallocated_encoding(s); 11019 return; 11020 } 11021 if (!fp_access_check(s)) { 11022 return; 11023 } 11024 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11025 return; 11026 case 0x17: /* FCVTL, FCVTL2 */ 11027 if (!fp_access_check(s)) { 11028 return; 11029 } 11030 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 11031 return; 11032 case 0x18: /* FRINTN */ 11033 case 0x19: /* FRINTM */ 11034 case 0x38: /* FRINTP */ 11035 case 0x39: /* FRINTZ */ 11036 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11037 /* fall through */ 11038 case 0x59: /* FRINTX */ 11039 case 0x79: /* FRINTI */ 11040 need_fpstatus = true; 11041 if (size == 3 && !is_q) { 11042 unallocated_encoding(s); 11043 return; 11044 } 11045 break; 11046 case 0x58: /* FRINTA */ 11047 rmode = FPROUNDING_TIEAWAY; 11048 need_fpstatus = true; 11049 if (size == 3 && !is_q) { 11050 unallocated_encoding(s); 11051 return; 11052 } 11053 break; 11054 case 0x7c: /* URSQRTE */ 11055 if (size == 3) { 11056 unallocated_encoding(s); 11057 return; 11058 } 11059 break; 11060 case 0x1e: /* FRINT32Z */ 11061 case 0x1f: /* FRINT64Z */ 11062 rmode = FPROUNDING_ZERO; 11063 /* fall through */ 11064 case 0x5e: /* FRINT32X */ 11065 case 0x5f: /* FRINT64X */ 11066 need_fpstatus = true; 11067 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 11068 unallocated_encoding(s); 11069 return; 11070 } 11071 break; 11072 default: 11073 unallocated_encoding(s); 11074 return; 11075 } 11076 break; 11077 } 11078 default: 11079 case 0x3: /* SUQADD, USQADD */ 11080 unallocated_encoding(s); 11081 return; 11082 } 11083 11084 if (!fp_access_check(s)) { 11085 return; 11086 } 11087 11088 if (need_fpstatus || rmode >= 0) { 11089 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 11090 } else { 11091 tcg_fpstatus = NULL; 11092 } 11093 if (rmode >= 0) { 11094 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11095 } else { 11096 tcg_rmode = NULL; 11097 } 11098 11099 switch (opcode) { 11100 case 0x5: 11101 if (u && size == 0) { /* NOT */ 11102 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 11103 return; 11104 } 11105 break; 11106 case 0x8: /* CMGT, CMGE */ 11107 if (u) { 11108 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 11109 } else { 11110 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 11111 } 11112 return; 11113 case 0x9: /* CMEQ, CMLE */ 11114 if (u) { 11115 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 11116 } else { 11117 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 11118 } 11119 return; 11120 case 0xa: /* CMLT */ 11121 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 11122 return; 11123 case 0xb: 11124 if (u) { /* ABS, NEG */ 11125 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 11126 } else { 11127 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 11128 } 11129 return; 11130 } 11131 11132 if (size == 3) { 11133 /* All 64-bit element operations can be shared with scalar 2misc */ 11134 int pass; 11135 11136 /* Coverity claims (size == 3 && !is_q) has been eliminated 11137 * from all paths leading to here. 11138 */ 11139 tcg_debug_assert(is_q); 11140 for (pass = 0; pass < 2; pass++) { 11141 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11142 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11143 11144 read_vec_element(s, tcg_op, rn, pass, MO_64); 11145 11146 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 11147 tcg_rmode, tcg_fpstatus); 11148 11149 write_vec_element(s, tcg_res, rd, pass, MO_64); 11150 } 11151 } else { 11152 int pass; 11153 11154 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11155 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11156 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11157 11158 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 11159 11160 if (size == 2) { 11161 /* Special cases for 32 bit elements */ 11162 switch (opcode) { 11163 case 0x4: /* CLS */ 11164 if (u) { 11165 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 11166 } else { 11167 tcg_gen_clrsb_i32(tcg_res, tcg_op); 11168 } 11169 break; 11170 case 0x7: /* SQABS, SQNEG */ 11171 if (u) { 11172 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); 11173 } else { 11174 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); 11175 } 11176 break; 11177 case 0x2f: /* FABS */ 11178 gen_vfp_abss(tcg_res, tcg_op); 11179 break; 11180 case 0x6f: /* FNEG */ 11181 gen_vfp_negs(tcg_res, tcg_op); 11182 break; 11183 case 0x7f: /* FSQRT */ 11184 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 11185 break; 11186 case 0x1a: /* FCVTNS */ 11187 case 0x1b: /* FCVTMS */ 11188 case 0x1c: /* FCVTAS */ 11189 case 0x3a: /* FCVTPS */ 11190 case 0x3b: /* FCVTZS */ 11191 gen_helper_vfp_tosls(tcg_res, tcg_op, 11192 tcg_constant_i32(0), tcg_fpstatus); 11193 break; 11194 case 0x5a: /* FCVTNU */ 11195 case 0x5b: /* FCVTMU */ 11196 case 0x5c: /* FCVTAU */ 11197 case 0x7a: /* FCVTPU */ 11198 case 0x7b: /* FCVTZU */ 11199 gen_helper_vfp_touls(tcg_res, tcg_op, 11200 tcg_constant_i32(0), tcg_fpstatus); 11201 break; 11202 case 0x18: /* FRINTN */ 11203 case 0x19: /* FRINTM */ 11204 case 0x38: /* FRINTP */ 11205 case 0x39: /* FRINTZ */ 11206 case 0x58: /* FRINTA */ 11207 case 0x79: /* FRINTI */ 11208 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 11209 break; 11210 case 0x59: /* FRINTX */ 11211 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 11212 break; 11213 case 0x7c: /* URSQRTE */ 11214 gen_helper_rsqrte_u32(tcg_res, tcg_op); 11215 break; 11216 case 0x1e: /* FRINT32Z */ 11217 case 0x5e: /* FRINT32X */ 11218 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 11219 break; 11220 case 0x1f: /* FRINT64Z */ 11221 case 0x5f: /* FRINT64X */ 11222 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 11223 break; 11224 default: 11225 g_assert_not_reached(); 11226 } 11227 } else { 11228 /* Use helpers for 8 and 16 bit elements */ 11229 switch (opcode) { 11230 case 0x5: /* CNT, RBIT */ 11231 /* For these two insns size is part of the opcode specifier 11232 * (handled earlier); they always operate on byte elements. 11233 */ 11234 if (u) { 11235 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 11236 } else { 11237 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 11238 } 11239 break; 11240 case 0x7: /* SQABS, SQNEG */ 11241 { 11242 NeonGenOneOpEnvFn *genfn; 11243 static NeonGenOneOpEnvFn * const fns[2][2] = { 11244 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 11245 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 11246 }; 11247 genfn = fns[size][u]; 11248 genfn(tcg_res, tcg_env, tcg_op); 11249 break; 11250 } 11251 case 0x4: /* CLS, CLZ */ 11252 if (u) { 11253 if (size == 0) { 11254 gen_helper_neon_clz_u8(tcg_res, tcg_op); 11255 } else { 11256 gen_helper_neon_clz_u16(tcg_res, tcg_op); 11257 } 11258 } else { 11259 if (size == 0) { 11260 gen_helper_neon_cls_s8(tcg_res, tcg_op); 11261 } else { 11262 gen_helper_neon_cls_s16(tcg_res, tcg_op); 11263 } 11264 } 11265 break; 11266 default: 11267 g_assert_not_reached(); 11268 } 11269 } 11270 11271 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11272 } 11273 } 11274 clear_vec_high(s, is_q, rd); 11275 11276 if (tcg_rmode) { 11277 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11278 } 11279 } 11280 11281 /* AdvSIMD [scalar] two register miscellaneous (FP16) 11282 * 11283 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 11284 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11285 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 11286 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11287 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 11288 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 11289 * 11290 * This actually covers two groups where scalar access is governed by 11291 * bit 28. A bunch of the instructions (float to integral) only exist 11292 * in the vector form and are un-allocated for the scalar decode. Also 11293 * in the scalar decode Q is always 1. 11294 */ 11295 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 11296 { 11297 int fpop, opcode, a, u; 11298 int rn, rd; 11299 bool is_q; 11300 bool is_scalar; 11301 bool only_in_vector = false; 11302 11303 int pass; 11304 TCGv_i32 tcg_rmode = NULL; 11305 TCGv_ptr tcg_fpstatus = NULL; 11306 bool need_fpst = true; 11307 int rmode = -1; 11308 11309 if (!dc_isar_feature(aa64_fp16, s)) { 11310 unallocated_encoding(s); 11311 return; 11312 } 11313 11314 rd = extract32(insn, 0, 5); 11315 rn = extract32(insn, 5, 5); 11316 11317 a = extract32(insn, 23, 1); 11318 u = extract32(insn, 29, 1); 11319 is_scalar = extract32(insn, 28, 1); 11320 is_q = extract32(insn, 30, 1); 11321 11322 opcode = extract32(insn, 12, 5); 11323 fpop = deposit32(opcode, 5, 1, a); 11324 fpop = deposit32(fpop, 6, 1, u); 11325 11326 switch (fpop) { 11327 case 0x1d: /* SCVTF */ 11328 case 0x5d: /* UCVTF */ 11329 { 11330 int elements; 11331 11332 if (is_scalar) { 11333 elements = 1; 11334 } else { 11335 elements = (is_q ? 8 : 4); 11336 } 11337 11338 if (!fp_access_check(s)) { 11339 return; 11340 } 11341 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 11342 return; 11343 } 11344 break; 11345 case 0x2c: /* FCMGT (zero) */ 11346 case 0x2d: /* FCMEQ (zero) */ 11347 case 0x2e: /* FCMLT (zero) */ 11348 case 0x6c: /* FCMGE (zero) */ 11349 case 0x6d: /* FCMLE (zero) */ 11350 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 11351 return; 11352 case 0x3d: /* FRECPE */ 11353 case 0x3f: /* FRECPX */ 11354 break; 11355 case 0x18: /* FRINTN */ 11356 only_in_vector = true; 11357 rmode = FPROUNDING_TIEEVEN; 11358 break; 11359 case 0x19: /* FRINTM */ 11360 only_in_vector = true; 11361 rmode = FPROUNDING_NEGINF; 11362 break; 11363 case 0x38: /* FRINTP */ 11364 only_in_vector = true; 11365 rmode = FPROUNDING_POSINF; 11366 break; 11367 case 0x39: /* FRINTZ */ 11368 only_in_vector = true; 11369 rmode = FPROUNDING_ZERO; 11370 break; 11371 case 0x58: /* FRINTA */ 11372 only_in_vector = true; 11373 rmode = FPROUNDING_TIEAWAY; 11374 break; 11375 case 0x59: /* FRINTX */ 11376 case 0x79: /* FRINTI */ 11377 only_in_vector = true; 11378 /* current rounding mode */ 11379 break; 11380 case 0x1a: /* FCVTNS */ 11381 rmode = FPROUNDING_TIEEVEN; 11382 break; 11383 case 0x1b: /* FCVTMS */ 11384 rmode = FPROUNDING_NEGINF; 11385 break; 11386 case 0x1c: /* FCVTAS */ 11387 rmode = FPROUNDING_TIEAWAY; 11388 break; 11389 case 0x3a: /* FCVTPS */ 11390 rmode = FPROUNDING_POSINF; 11391 break; 11392 case 0x3b: /* FCVTZS */ 11393 rmode = FPROUNDING_ZERO; 11394 break; 11395 case 0x5a: /* FCVTNU */ 11396 rmode = FPROUNDING_TIEEVEN; 11397 break; 11398 case 0x5b: /* FCVTMU */ 11399 rmode = FPROUNDING_NEGINF; 11400 break; 11401 case 0x5c: /* FCVTAU */ 11402 rmode = FPROUNDING_TIEAWAY; 11403 break; 11404 case 0x7a: /* FCVTPU */ 11405 rmode = FPROUNDING_POSINF; 11406 break; 11407 case 0x7b: /* FCVTZU */ 11408 rmode = FPROUNDING_ZERO; 11409 break; 11410 case 0x2f: /* FABS */ 11411 case 0x6f: /* FNEG */ 11412 need_fpst = false; 11413 break; 11414 case 0x7d: /* FRSQRTE */ 11415 case 0x7f: /* FSQRT (vector) */ 11416 break; 11417 default: 11418 unallocated_encoding(s); 11419 return; 11420 } 11421 11422 11423 /* Check additional constraints for the scalar encoding */ 11424 if (is_scalar) { 11425 if (!is_q) { 11426 unallocated_encoding(s); 11427 return; 11428 } 11429 /* FRINTxx is only in the vector form */ 11430 if (only_in_vector) { 11431 unallocated_encoding(s); 11432 return; 11433 } 11434 } 11435 11436 if (!fp_access_check(s)) { 11437 return; 11438 } 11439 11440 if (rmode >= 0 || need_fpst) { 11441 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 11442 } 11443 11444 if (rmode >= 0) { 11445 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11446 } 11447 11448 if (is_scalar) { 11449 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 11450 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11451 11452 switch (fpop) { 11453 case 0x1a: /* FCVTNS */ 11454 case 0x1b: /* FCVTMS */ 11455 case 0x1c: /* FCVTAS */ 11456 case 0x3a: /* FCVTPS */ 11457 case 0x3b: /* FCVTZS */ 11458 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11459 break; 11460 case 0x3d: /* FRECPE */ 11461 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11462 break; 11463 case 0x3f: /* FRECPX */ 11464 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 11465 break; 11466 case 0x5a: /* FCVTNU */ 11467 case 0x5b: /* FCVTMU */ 11468 case 0x5c: /* FCVTAU */ 11469 case 0x7a: /* FCVTPU */ 11470 case 0x7b: /* FCVTZU */ 11471 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11472 break; 11473 case 0x6f: /* FNEG */ 11474 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11475 break; 11476 case 0x7d: /* FRSQRTE */ 11477 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11478 break; 11479 default: 11480 g_assert_not_reached(); 11481 } 11482 11483 /* limit any sign extension going on */ 11484 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 11485 write_fp_sreg(s, rd, tcg_res); 11486 } else { 11487 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 11488 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11489 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11490 11491 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 11492 11493 switch (fpop) { 11494 case 0x1a: /* FCVTNS */ 11495 case 0x1b: /* FCVTMS */ 11496 case 0x1c: /* FCVTAS */ 11497 case 0x3a: /* FCVTPS */ 11498 case 0x3b: /* FCVTZS */ 11499 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11500 break; 11501 case 0x3d: /* FRECPE */ 11502 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11503 break; 11504 case 0x5a: /* FCVTNU */ 11505 case 0x5b: /* FCVTMU */ 11506 case 0x5c: /* FCVTAU */ 11507 case 0x7a: /* FCVTPU */ 11508 case 0x7b: /* FCVTZU */ 11509 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11510 break; 11511 case 0x18: /* FRINTN */ 11512 case 0x19: /* FRINTM */ 11513 case 0x38: /* FRINTP */ 11514 case 0x39: /* FRINTZ */ 11515 case 0x58: /* FRINTA */ 11516 case 0x79: /* FRINTI */ 11517 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 11518 break; 11519 case 0x59: /* FRINTX */ 11520 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 11521 break; 11522 case 0x2f: /* FABS */ 11523 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 11524 break; 11525 case 0x6f: /* FNEG */ 11526 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11527 break; 11528 case 0x7d: /* FRSQRTE */ 11529 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11530 break; 11531 case 0x7f: /* FSQRT */ 11532 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 11533 break; 11534 default: 11535 g_assert_not_reached(); 11536 } 11537 11538 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11539 } 11540 11541 clear_vec_high(s, is_q, rd); 11542 } 11543 11544 if (tcg_rmode) { 11545 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11546 } 11547 } 11548 11549 /* C3.6 Data processing - SIMD, inc Crypto 11550 * 11551 * As the decode gets a little complex we are using a table based 11552 * approach for this part of the decode. 11553 */ 11554 static const AArch64DecodeTable data_proc_simd[] = { 11555 /* pattern , mask , fn */ 11556 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 11557 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 11558 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 11559 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 11560 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 11561 { 0x00000000, 0x00000000, NULL } 11562 }; 11563 11564 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 11565 { 11566 /* Note that this is called with all non-FP cases from 11567 * table C3-6 so it must UNDEF for entries not specifically 11568 * allocated to instructions in that table. 11569 */ 11570 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 11571 if (fn) { 11572 fn(s, insn); 11573 } else { 11574 unallocated_encoding(s); 11575 } 11576 } 11577 11578 /* C3.6 Data processing - SIMD and floating point */ 11579 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 11580 { 11581 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 11582 disas_data_proc_fp(s, insn); 11583 } else { 11584 /* SIMD, including crypto */ 11585 disas_data_proc_simd(s, insn); 11586 } 11587 } 11588 11589 static bool trans_OK(DisasContext *s, arg_OK *a) 11590 { 11591 return true; 11592 } 11593 11594 static bool trans_FAIL(DisasContext *s, arg_OK *a) 11595 { 11596 s->is_nonstreaming = true; 11597 return true; 11598 } 11599 11600 /** 11601 * btype_destination_ok: 11602 * @insn: The instruction at the branch destination 11603 * @bt: SCTLR_ELx.BT 11604 * @btype: PSTATE.BTYPE, and is non-zero 11605 * 11606 * On a guarded page, there are a limited number of insns 11607 * that may be present at the branch target: 11608 * - branch target identifiers, 11609 * - paciasp, pacibsp, 11610 * - BRK insn 11611 * - HLT insn 11612 * Anything else causes a Branch Target Exception. 11613 * 11614 * Return true if the branch is compatible, false to raise BTITRAP. 11615 */ 11616 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 11617 { 11618 if ((insn & 0xfffff01fu) == 0xd503201fu) { 11619 /* HINT space */ 11620 switch (extract32(insn, 5, 7)) { 11621 case 0b011001: /* PACIASP */ 11622 case 0b011011: /* PACIBSP */ 11623 /* 11624 * If SCTLR_ELx.BT, then PACI*SP are not compatible 11625 * with btype == 3. Otherwise all btype are ok. 11626 */ 11627 return !bt || btype != 3; 11628 case 0b100000: /* BTI */ 11629 /* Not compatible with any btype. */ 11630 return false; 11631 case 0b100010: /* BTI c */ 11632 /* Not compatible with btype == 3 */ 11633 return btype != 3; 11634 case 0b100100: /* BTI j */ 11635 /* Not compatible with btype == 2 */ 11636 return btype != 2; 11637 case 0b100110: /* BTI jc */ 11638 /* Compatible with any btype. */ 11639 return true; 11640 } 11641 } else { 11642 switch (insn & 0xffe0001fu) { 11643 case 0xd4200000u: /* BRK */ 11644 case 0xd4400000u: /* HLT */ 11645 /* Give priority to the breakpoint exception. */ 11646 return true; 11647 } 11648 } 11649 return false; 11650 } 11651 11652 /* C3.1 A64 instruction index by encoding */ 11653 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 11654 { 11655 switch (extract32(insn, 25, 4)) { 11656 case 0x5: 11657 case 0xd: /* Data processing - register */ 11658 disas_data_proc_reg(s, insn); 11659 break; 11660 case 0x7: 11661 case 0xf: /* Data processing - SIMD and floating point */ 11662 disas_data_proc_simd_fp(s, insn); 11663 break; 11664 default: 11665 unallocated_encoding(s); 11666 break; 11667 } 11668 } 11669 11670 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 11671 CPUState *cpu) 11672 { 11673 DisasContext *dc = container_of(dcbase, DisasContext, base); 11674 CPUARMState *env = cpu_env(cpu); 11675 ARMCPU *arm_cpu = env_archcpu(env); 11676 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 11677 int bound, core_mmu_idx; 11678 11679 dc->isar = &arm_cpu->isar; 11680 dc->condjmp = 0; 11681 dc->pc_save = dc->base.pc_first; 11682 dc->aarch64 = true; 11683 dc->thumb = false; 11684 dc->sctlr_b = 0; 11685 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 11686 dc->condexec_mask = 0; 11687 dc->condexec_cond = 0; 11688 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 11689 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 11690 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 11691 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 11692 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 11693 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 11694 #if !defined(CONFIG_USER_ONLY) 11695 dc->user = (dc->current_el == 0); 11696 #endif 11697 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 11698 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 11699 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 11700 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 11701 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 11702 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 11703 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 11704 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 11705 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 11706 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 11707 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 11708 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 11709 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 11710 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 11711 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 11712 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 11713 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 11714 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 11715 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 11716 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 11717 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 11718 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 11719 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 11720 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 11721 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 11722 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 11723 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 11724 dc->vec_len = 0; 11725 dc->vec_stride = 0; 11726 dc->cp_regs = arm_cpu->cp_regs; 11727 dc->features = env->features; 11728 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 11729 dc->gm_blocksize = arm_cpu->gm_blocksize; 11730 11731 #ifdef CONFIG_USER_ONLY 11732 /* In sve_probe_page, we assume TBI is enabled. */ 11733 tcg_debug_assert(dc->tbid & 1); 11734 #endif 11735 11736 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 11737 11738 /* Single step state. The code-generation logic here is: 11739 * SS_ACTIVE == 0: 11740 * generate code with no special handling for single-stepping (except 11741 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 11742 * this happens anyway because those changes are all system register or 11743 * PSTATE writes). 11744 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 11745 * emit code for one insn 11746 * emit code to clear PSTATE.SS 11747 * emit code to generate software step exception for completed step 11748 * end TB (as usual for having generated an exception) 11749 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 11750 * emit code to generate a software step exception 11751 * end the TB 11752 */ 11753 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 11754 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 11755 dc->is_ldex = false; 11756 11757 /* Bound the number of insns to execute to those left on the page. */ 11758 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 11759 11760 /* If architectural single step active, limit to 1. */ 11761 if (dc->ss_active) { 11762 bound = 1; 11763 } 11764 dc->base.max_insns = MIN(dc->base.max_insns, bound); 11765 } 11766 11767 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 11768 { 11769 } 11770 11771 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 11772 { 11773 DisasContext *dc = container_of(dcbase, DisasContext, base); 11774 target_ulong pc_arg = dc->base.pc_next; 11775 11776 if (tb_cflags(dcbase->tb) & CF_PCREL) { 11777 pc_arg &= ~TARGET_PAGE_MASK; 11778 } 11779 tcg_gen_insn_start(pc_arg, 0, 0); 11780 dc->insn_start_updated = false; 11781 } 11782 11783 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 11784 { 11785 DisasContext *s = container_of(dcbase, DisasContext, base); 11786 CPUARMState *env = cpu_env(cpu); 11787 uint64_t pc = s->base.pc_next; 11788 uint32_t insn; 11789 11790 /* Singlestep exceptions have the highest priority. */ 11791 if (s->ss_active && !s->pstate_ss) { 11792 /* Singlestep state is Active-pending. 11793 * If we're in this state at the start of a TB then either 11794 * a) we just took an exception to an EL which is being debugged 11795 * and this is the first insn in the exception handler 11796 * b) debug exceptions were masked and we just unmasked them 11797 * without changing EL (eg by clearing PSTATE.D) 11798 * In either case we're going to take a swstep exception in the 11799 * "did not step an insn" case, and so the syndrome ISV and EX 11800 * bits should be zero. 11801 */ 11802 assert(s->base.num_insns == 1); 11803 gen_swstep_exception(s, 0, 0); 11804 s->base.is_jmp = DISAS_NORETURN; 11805 s->base.pc_next = pc + 4; 11806 return; 11807 } 11808 11809 if (pc & 3) { 11810 /* 11811 * PC alignment fault. This has priority over the instruction abort 11812 * that we would receive from a translation fault via arm_ldl_code. 11813 * This should only be possible after an indirect branch, at the 11814 * start of the TB. 11815 */ 11816 assert(s->base.num_insns == 1); 11817 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 11818 s->base.is_jmp = DISAS_NORETURN; 11819 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 11820 return; 11821 } 11822 11823 s->pc_curr = pc; 11824 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 11825 s->insn = insn; 11826 s->base.pc_next = pc + 4; 11827 11828 s->fp_access_checked = false; 11829 s->sve_access_checked = false; 11830 11831 if (s->pstate_il) { 11832 /* 11833 * Illegal execution state. This has priority over BTI 11834 * exceptions, but comes after instruction abort exceptions. 11835 */ 11836 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 11837 return; 11838 } 11839 11840 if (dc_isar_feature(aa64_bti, s)) { 11841 if (s->base.num_insns == 1) { 11842 /* First insn can have btype set to non-zero. */ 11843 tcg_debug_assert(s->btype >= 0); 11844 11845 /* 11846 * Note that the Branch Target Exception has fairly high 11847 * priority -- below debugging exceptions but above most 11848 * everything else. This allows us to handle this now 11849 * instead of waiting until the insn is otherwise decoded. 11850 * 11851 * We can check all but the guarded page check here; 11852 * defer the latter to a helper. 11853 */ 11854 if (s->btype != 0 11855 && !btype_destination_ok(insn, s->bt, s->btype)) { 11856 gen_helper_guarded_page_check(tcg_env); 11857 } 11858 } else { 11859 /* Not the first insn: btype must be 0. */ 11860 tcg_debug_assert(s->btype == 0); 11861 } 11862 } 11863 11864 s->is_nonstreaming = false; 11865 if (s->sme_trap_nonstreaming) { 11866 disas_sme_fa64(s, insn); 11867 } 11868 11869 if (!disas_a64(s, insn) && 11870 !disas_sme(s, insn) && 11871 !disas_sve(s, insn)) { 11872 disas_a64_legacy(s, insn); 11873 } 11874 11875 /* 11876 * After execution of most insns, btype is reset to 0. 11877 * Note that we set btype == -1 when the insn sets btype. 11878 */ 11879 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 11880 reset_btype(s); 11881 } 11882 } 11883 11884 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 11885 { 11886 DisasContext *dc = container_of(dcbase, DisasContext, base); 11887 11888 if (unlikely(dc->ss_active)) { 11889 /* Note that this means single stepping WFI doesn't halt the CPU. 11890 * For conditional branch insns this is harmless unreachable code as 11891 * gen_goto_tb() has already handled emitting the debug exception 11892 * (and thus a tb-jump is not possible when singlestepping). 11893 */ 11894 switch (dc->base.is_jmp) { 11895 default: 11896 gen_a64_update_pc(dc, 4); 11897 /* fall through */ 11898 case DISAS_EXIT: 11899 case DISAS_JUMP: 11900 gen_step_complete_exception(dc); 11901 break; 11902 case DISAS_NORETURN: 11903 break; 11904 } 11905 } else { 11906 switch (dc->base.is_jmp) { 11907 case DISAS_NEXT: 11908 case DISAS_TOO_MANY: 11909 gen_goto_tb(dc, 1, 4); 11910 break; 11911 default: 11912 case DISAS_UPDATE_EXIT: 11913 gen_a64_update_pc(dc, 4); 11914 /* fall through */ 11915 case DISAS_EXIT: 11916 tcg_gen_exit_tb(NULL, 0); 11917 break; 11918 case DISAS_UPDATE_NOCHAIN: 11919 gen_a64_update_pc(dc, 4); 11920 /* fall through */ 11921 case DISAS_JUMP: 11922 tcg_gen_lookup_and_goto_ptr(); 11923 break; 11924 case DISAS_NORETURN: 11925 case DISAS_SWI: 11926 break; 11927 case DISAS_WFE: 11928 gen_a64_update_pc(dc, 4); 11929 gen_helper_wfe(tcg_env); 11930 break; 11931 case DISAS_YIELD: 11932 gen_a64_update_pc(dc, 4); 11933 gen_helper_yield(tcg_env); 11934 break; 11935 case DISAS_WFI: 11936 /* 11937 * This is a special case because we don't want to just halt 11938 * the CPU if trying to debug across a WFI. 11939 */ 11940 gen_a64_update_pc(dc, 4); 11941 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 11942 /* 11943 * The helper doesn't necessarily throw an exception, but we 11944 * must go back to the main loop to check for interrupts anyway. 11945 */ 11946 tcg_gen_exit_tb(NULL, 0); 11947 break; 11948 } 11949 } 11950 } 11951 11952 const TranslatorOps aarch64_translator_ops = { 11953 .init_disas_context = aarch64_tr_init_disas_context, 11954 .tb_start = aarch64_tr_tb_start, 11955 .insn_start = aarch64_tr_insn_start, 11956 .translate_insn = aarch64_tr_translate_insn, 11957 .tb_stop = aarch64_tr_tb_stop, 11958 }; 11959