1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ 199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ 264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ 284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ 318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ 346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ 377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ 403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ 538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ 550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ 559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ 590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ 596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ 607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 632 * If SVE is not enabled, then there are only 128 bits in the vector. 633 */ 634 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 635 { 636 unsigned ofs = fp_reg_offset(s, rd, MO_64); 637 unsigned vsz = vec_full_reg_size(s); 638 639 /* Nop move, with side effect of clearing the tail. */ 640 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 641 } 642 643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 644 { 645 unsigned ofs = fp_reg_offset(s, reg, MO_64); 646 647 tcg_gen_st_i64(v, tcg_env, ofs); 648 clear_vec_high(s, false, reg); 649 } 650 651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 652 { 653 TCGv_i64 tmp = tcg_temp_new_i64(); 654 655 tcg_gen_extu_i32_i64(tmp, v); 656 write_fp_dreg(s, reg, tmp); 657 } 658 659 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 661 GVecGen2Fn *gvec_fn, int vece) 662 { 663 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 664 is_q ? 16 : 8, vec_full_reg_size(s)); 665 } 666 667 /* Expand a 2-operand + immediate AdvSIMD vector operation using 668 * an expander function. 669 */ 670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 671 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 672 { 673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 674 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 675 } 676 677 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 679 GVecGen3Fn *gvec_fn, int vece) 680 { 681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 682 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 683 } 684 685 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 687 int rx, GVecGen4Fn *gvec_fn, int vece) 688 { 689 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 690 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 691 is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 2-operand operation using an out-of-line helper. */ 695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 696 int rn, int data, gen_helper_gvec_2 *fn) 697 { 698 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 699 vec_full_reg_offset(s, rn), 700 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 701 } 702 703 /* Expand a 3-operand operation using an out-of-line helper. */ 704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int rm, int data, gen_helper_gvec_3 *fn) 706 { 707 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 vec_full_reg_offset(s, rm), 710 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 711 } 712 713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 714 * an out-of-line helper. 715 */ 716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 717 int rm, bool is_fp16, int data, 718 gen_helper_gvec_3_ptr *fn) 719 { 720 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 721 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 722 vec_full_reg_offset(s, rn), 723 vec_full_reg_offset(s, rm), fpst, 724 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 725 } 726 727 /* Expand a 4-operand operation using an out-of-line helper. */ 728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 729 int rm, int ra, int data, gen_helper_gvec_4 *fn) 730 { 731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 732 vec_full_reg_offset(s, rn), 733 vec_full_reg_offset(s, rm), 734 vec_full_reg_offset(s, ra), 735 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 736 } 737 738 /* 739 * Expand a 4-operand + fpstatus pointer + simd data value operation using 740 * an out-of-line helper. 741 */ 742 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 743 int rm, int ra, bool is_fp16, int data, 744 gen_helper_gvec_4_ptr *fn) 745 { 746 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 747 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 748 vec_full_reg_offset(s, rn), 749 vec_full_reg_offset(s, rm), 750 vec_full_reg_offset(s, ra), fpst, 751 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 752 } 753 754 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 755 * than the 32 bit equivalent. 756 */ 757 static inline void gen_set_NZ64(TCGv_i64 result) 758 { 759 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 760 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 761 } 762 763 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 764 static inline void gen_logic_CC(int sf, TCGv_i64 result) 765 { 766 if (sf) { 767 gen_set_NZ64(result); 768 } else { 769 tcg_gen_extrl_i64_i32(cpu_ZF, result); 770 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 771 } 772 tcg_gen_movi_i32(cpu_CF, 0); 773 tcg_gen_movi_i32(cpu_VF, 0); 774 } 775 776 /* dest = T0 + T1; compute C, N, V and Z flags */ 777 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 778 { 779 TCGv_i64 result, flag, tmp; 780 result = tcg_temp_new_i64(); 781 flag = tcg_temp_new_i64(); 782 tmp = tcg_temp_new_i64(); 783 784 tcg_gen_movi_i64(tmp, 0); 785 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 786 787 tcg_gen_extrl_i64_i32(cpu_CF, flag); 788 789 gen_set_NZ64(result); 790 791 tcg_gen_xor_i64(flag, result, t0); 792 tcg_gen_xor_i64(tmp, t0, t1); 793 tcg_gen_andc_i64(flag, flag, tmp); 794 tcg_gen_extrh_i64_i32(cpu_VF, flag); 795 796 tcg_gen_mov_i64(dest, result); 797 } 798 799 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 800 { 801 TCGv_i32 t0_32 = tcg_temp_new_i32(); 802 TCGv_i32 t1_32 = tcg_temp_new_i32(); 803 TCGv_i32 tmp = tcg_temp_new_i32(); 804 805 tcg_gen_movi_i32(tmp, 0); 806 tcg_gen_extrl_i64_i32(t0_32, t0); 807 tcg_gen_extrl_i64_i32(t1_32, t1); 808 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 809 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 810 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 811 tcg_gen_xor_i32(tmp, t0_32, t1_32); 812 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 813 tcg_gen_extu_i32_i64(dest, cpu_NF); 814 } 815 816 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 817 { 818 if (sf) { 819 gen_add64_CC(dest, t0, t1); 820 } else { 821 gen_add32_CC(dest, t0, t1); 822 } 823 } 824 825 /* dest = T0 - T1; compute C, N, V and Z flags */ 826 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 827 { 828 /* 64 bit arithmetic */ 829 TCGv_i64 result, flag, tmp; 830 831 result = tcg_temp_new_i64(); 832 flag = tcg_temp_new_i64(); 833 tcg_gen_sub_i64(result, t0, t1); 834 835 gen_set_NZ64(result); 836 837 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 838 tcg_gen_extrl_i64_i32(cpu_CF, flag); 839 840 tcg_gen_xor_i64(flag, result, t0); 841 tmp = tcg_temp_new_i64(); 842 tcg_gen_xor_i64(tmp, t0, t1); 843 tcg_gen_and_i64(flag, flag, tmp); 844 tcg_gen_extrh_i64_i32(cpu_VF, flag); 845 tcg_gen_mov_i64(dest, result); 846 } 847 848 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 849 { 850 /* 32 bit arithmetic */ 851 TCGv_i32 t0_32 = tcg_temp_new_i32(); 852 TCGv_i32 t1_32 = tcg_temp_new_i32(); 853 TCGv_i32 tmp; 854 855 tcg_gen_extrl_i64_i32(t0_32, t0); 856 tcg_gen_extrl_i64_i32(t1_32, t1); 857 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 858 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 859 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 860 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 861 tmp = tcg_temp_new_i32(); 862 tcg_gen_xor_i32(tmp, t0_32, t1_32); 863 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 864 tcg_gen_extu_i32_i64(dest, cpu_NF); 865 } 866 867 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 868 { 869 if (sf) { 870 gen_sub64_CC(dest, t0, t1); 871 } else { 872 gen_sub32_CC(dest, t0, t1); 873 } 874 } 875 876 /* dest = T0 + T1 + CF; do not compute flags. */ 877 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 878 { 879 TCGv_i64 flag = tcg_temp_new_i64(); 880 tcg_gen_extu_i32_i64(flag, cpu_CF); 881 tcg_gen_add_i64(dest, t0, t1); 882 tcg_gen_add_i64(dest, dest, flag); 883 884 if (!sf) { 885 tcg_gen_ext32u_i64(dest, dest); 886 } 887 } 888 889 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 890 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 891 { 892 if (sf) { 893 TCGv_i64 result = tcg_temp_new_i64(); 894 TCGv_i64 cf_64 = tcg_temp_new_i64(); 895 TCGv_i64 vf_64 = tcg_temp_new_i64(); 896 TCGv_i64 tmp = tcg_temp_new_i64(); 897 TCGv_i64 zero = tcg_constant_i64(0); 898 899 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 900 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 901 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 902 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 903 gen_set_NZ64(result); 904 905 tcg_gen_xor_i64(vf_64, result, t0); 906 tcg_gen_xor_i64(tmp, t0, t1); 907 tcg_gen_andc_i64(vf_64, vf_64, tmp); 908 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 909 910 tcg_gen_mov_i64(dest, result); 911 } else { 912 TCGv_i32 t0_32 = tcg_temp_new_i32(); 913 TCGv_i32 t1_32 = tcg_temp_new_i32(); 914 TCGv_i32 tmp = tcg_temp_new_i32(); 915 TCGv_i32 zero = tcg_constant_i32(0); 916 917 tcg_gen_extrl_i64_i32(t0_32, t0); 918 tcg_gen_extrl_i64_i32(t1_32, t1); 919 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 920 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 921 922 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 923 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 924 tcg_gen_xor_i32(tmp, t0_32, t1_32); 925 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 926 tcg_gen_extu_i32_i64(dest, cpu_NF); 927 } 928 } 929 930 /* 931 * Load/Store generators 932 */ 933 934 /* 935 * Store from GPR register to memory. 936 */ 937 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 938 TCGv_i64 tcg_addr, MemOp memop, int memidx, 939 bool iss_valid, 940 unsigned int iss_srt, 941 bool iss_sf, bool iss_ar) 942 { 943 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 944 945 if (iss_valid) { 946 uint32_t syn; 947 948 syn = syn_data_abort_with_iss(0, 949 (memop & MO_SIZE), 950 false, 951 iss_srt, 952 iss_sf, 953 iss_ar, 954 0, 0, 0, 0, 0, false); 955 disas_set_insn_syndrome(s, syn); 956 } 957 } 958 959 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 960 TCGv_i64 tcg_addr, MemOp memop, 961 bool iss_valid, 962 unsigned int iss_srt, 963 bool iss_sf, bool iss_ar) 964 { 965 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 966 iss_valid, iss_srt, iss_sf, iss_ar); 967 } 968 969 /* 970 * Load from memory to GPR register 971 */ 972 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 973 MemOp memop, bool extend, int memidx, 974 bool iss_valid, unsigned int iss_srt, 975 bool iss_sf, bool iss_ar) 976 { 977 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 978 979 if (extend && (memop & MO_SIGN)) { 980 g_assert((memop & MO_SIZE) <= MO_32); 981 tcg_gen_ext32u_i64(dest, dest); 982 } 983 984 if (iss_valid) { 985 uint32_t syn; 986 987 syn = syn_data_abort_with_iss(0, 988 (memop & MO_SIZE), 989 (memop & MO_SIGN) != 0, 990 iss_srt, 991 iss_sf, 992 iss_ar, 993 0, 0, 0, 0, 0, false); 994 disas_set_insn_syndrome(s, syn); 995 } 996 } 997 998 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 999 MemOp memop, bool extend, 1000 bool iss_valid, unsigned int iss_srt, 1001 bool iss_sf, bool iss_ar) 1002 { 1003 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1004 iss_valid, iss_srt, iss_sf, iss_ar); 1005 } 1006 1007 /* 1008 * Store from FP register to memory 1009 */ 1010 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1011 { 1012 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1013 TCGv_i64 tmplo = tcg_temp_new_i64(); 1014 1015 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1016 1017 if ((mop & MO_SIZE) < MO_128) { 1018 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1019 } else { 1020 TCGv_i64 tmphi = tcg_temp_new_i64(); 1021 TCGv_i128 t16 = tcg_temp_new_i128(); 1022 1023 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1024 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1025 1026 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1027 } 1028 } 1029 1030 /* 1031 * Load from memory to FP register 1032 */ 1033 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1034 { 1035 /* This always zero-extends and writes to a full 128 bit wide vector */ 1036 TCGv_i64 tmplo = tcg_temp_new_i64(); 1037 TCGv_i64 tmphi = NULL; 1038 1039 if ((mop & MO_SIZE) < MO_128) { 1040 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1041 } else { 1042 TCGv_i128 t16 = tcg_temp_new_i128(); 1043 1044 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1045 1046 tmphi = tcg_temp_new_i64(); 1047 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1048 } 1049 1050 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1051 1052 if (tmphi) { 1053 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1054 } 1055 clear_vec_high(s, tmphi != NULL, destidx); 1056 } 1057 1058 /* 1059 * Vector load/store helpers. 1060 * 1061 * The principal difference between this and a FP load is that we don't 1062 * zero extend as we are filling a partial chunk of the vector register. 1063 * These functions don't support 128 bit loads/stores, which would be 1064 * normal load/store operations. 1065 * 1066 * The _i32 versions are useful when operating on 32 bit quantities 1067 * (eg for floating point single or using Neon helper functions). 1068 */ 1069 1070 /* Get value of an element within a vector register */ 1071 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1072 int element, MemOp memop) 1073 { 1074 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1075 switch ((unsigned)memop) { 1076 case MO_8: 1077 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1078 break; 1079 case MO_16: 1080 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1081 break; 1082 case MO_32: 1083 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1084 break; 1085 case MO_8|MO_SIGN: 1086 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1087 break; 1088 case MO_16|MO_SIGN: 1089 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1090 break; 1091 case MO_32|MO_SIGN: 1092 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1093 break; 1094 case MO_64: 1095 case MO_64|MO_SIGN: 1096 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1097 break; 1098 default: 1099 g_assert_not_reached(); 1100 } 1101 } 1102 1103 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1104 int element, MemOp memop) 1105 { 1106 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1107 switch (memop) { 1108 case MO_8: 1109 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1110 break; 1111 case MO_16: 1112 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1113 break; 1114 case MO_8|MO_SIGN: 1115 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1116 break; 1117 case MO_16|MO_SIGN: 1118 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1119 break; 1120 case MO_32: 1121 case MO_32|MO_SIGN: 1122 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1123 break; 1124 default: 1125 g_assert_not_reached(); 1126 } 1127 } 1128 1129 /* Set value of an element within a vector register */ 1130 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1131 int element, MemOp memop) 1132 { 1133 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1134 switch (memop) { 1135 case MO_8: 1136 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1137 break; 1138 case MO_16: 1139 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1140 break; 1141 case MO_32: 1142 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1143 break; 1144 case MO_64: 1145 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1146 break; 1147 default: 1148 g_assert_not_reached(); 1149 } 1150 } 1151 1152 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1153 int destidx, int element, MemOp memop) 1154 { 1155 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1156 switch (memop) { 1157 case MO_8: 1158 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1159 break; 1160 case MO_16: 1161 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1162 break; 1163 case MO_32: 1164 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1165 break; 1166 default: 1167 g_assert_not_reached(); 1168 } 1169 } 1170 1171 /* Store from vector register to memory */ 1172 static void do_vec_st(DisasContext *s, int srcidx, int element, 1173 TCGv_i64 tcg_addr, MemOp mop) 1174 { 1175 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1176 1177 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1178 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1179 } 1180 1181 /* Load from memory to vector register */ 1182 static void do_vec_ld(DisasContext *s, int destidx, int element, 1183 TCGv_i64 tcg_addr, MemOp mop) 1184 { 1185 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1186 1187 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1188 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1189 } 1190 1191 /* Check that FP/Neon access is enabled. If it is, return 1192 * true. If not, emit code to generate an appropriate exception, 1193 * and return false; the caller should not emit any code for 1194 * the instruction. Note that this check must happen after all 1195 * unallocated-encoding checks (otherwise the syndrome information 1196 * for the resulting exception will be incorrect). 1197 */ 1198 static bool fp_access_check_only(DisasContext *s) 1199 { 1200 if (s->fp_excp_el) { 1201 assert(!s->fp_access_checked); 1202 s->fp_access_checked = true; 1203 1204 gen_exception_insn_el(s, 0, EXCP_UDEF, 1205 syn_fp_access_trap(1, 0xe, false, 0), 1206 s->fp_excp_el); 1207 return false; 1208 } 1209 s->fp_access_checked = true; 1210 return true; 1211 } 1212 1213 static bool fp_access_check(DisasContext *s) 1214 { 1215 if (!fp_access_check_only(s)) { 1216 return false; 1217 } 1218 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1219 gen_exception_insn(s, 0, EXCP_UDEF, 1220 syn_smetrap(SME_ET_Streaming, false)); 1221 return false; 1222 } 1223 return true; 1224 } 1225 1226 /* 1227 * Check that SVE access is enabled. If it is, return true. 1228 * If not, emit code to generate an appropriate exception and return false. 1229 * This function corresponds to CheckSVEEnabled(). 1230 */ 1231 bool sve_access_check(DisasContext *s) 1232 { 1233 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1234 assert(dc_isar_feature(aa64_sme, s)); 1235 if (!sme_sm_enabled_check(s)) { 1236 goto fail_exit; 1237 } 1238 } else if (s->sve_excp_el) { 1239 gen_exception_insn_el(s, 0, EXCP_UDEF, 1240 syn_sve_access_trap(), s->sve_excp_el); 1241 goto fail_exit; 1242 } 1243 s->sve_access_checked = true; 1244 return fp_access_check(s); 1245 1246 fail_exit: 1247 /* Assert that we only raise one exception per instruction. */ 1248 assert(!s->sve_access_checked); 1249 s->sve_access_checked = true; 1250 return false; 1251 } 1252 1253 /* 1254 * Check that SME access is enabled, raise an exception if not. 1255 * Note that this function corresponds to CheckSMEAccess and is 1256 * only used directly for cpregs. 1257 */ 1258 static bool sme_access_check(DisasContext *s) 1259 { 1260 if (s->sme_excp_el) { 1261 gen_exception_insn_el(s, 0, EXCP_UDEF, 1262 syn_smetrap(SME_ET_AccessTrap, false), 1263 s->sme_excp_el); 1264 return false; 1265 } 1266 return true; 1267 } 1268 1269 /* This function corresponds to CheckSMEEnabled. */ 1270 bool sme_enabled_check(DisasContext *s) 1271 { 1272 /* 1273 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1274 * to be zero when fp_excp_el has priority. This is because we need 1275 * sme_excp_el by itself for cpregs access checks. 1276 */ 1277 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1278 s->fp_access_checked = true; 1279 return sme_access_check(s); 1280 } 1281 return fp_access_check_only(s); 1282 } 1283 1284 /* Common subroutine for CheckSMEAnd*Enabled. */ 1285 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1286 { 1287 if (!sme_enabled_check(s)) { 1288 return false; 1289 } 1290 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1291 gen_exception_insn(s, 0, EXCP_UDEF, 1292 syn_smetrap(SME_ET_NotStreaming, false)); 1293 return false; 1294 } 1295 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1296 gen_exception_insn(s, 0, EXCP_UDEF, 1297 syn_smetrap(SME_ET_InactiveZA, false)); 1298 return false; 1299 } 1300 return true; 1301 } 1302 1303 /* 1304 * Expanders for AdvSIMD translation functions. 1305 */ 1306 1307 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1308 gen_helper_gvec_2 *fn) 1309 { 1310 if (!a->q && a->esz == MO_64) { 1311 return false; 1312 } 1313 if (fp_access_check(s)) { 1314 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1315 } 1316 return true; 1317 } 1318 1319 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1320 gen_helper_gvec_3 *fn) 1321 { 1322 if (!a->q && a->esz == MO_64) { 1323 return false; 1324 } 1325 if (fp_access_check(s)) { 1326 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1327 } 1328 return true; 1329 } 1330 1331 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1332 { 1333 if (!a->q && a->esz == MO_64) { 1334 return false; 1335 } 1336 if (fp_access_check(s)) { 1337 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1338 } 1339 return true; 1340 } 1341 1342 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1343 { 1344 if (a->esz == MO_64) { 1345 return false; 1346 } 1347 if (fp_access_check(s)) { 1348 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1349 } 1350 return true; 1351 } 1352 1353 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1354 { 1355 if (a->esz == MO_8) { 1356 return false; 1357 } 1358 return do_gvec_fn3_no64(s, a, fn); 1359 } 1360 1361 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1362 { 1363 if (!a->q && a->esz == MO_64) { 1364 return false; 1365 } 1366 if (fp_access_check(s)) { 1367 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1368 } 1369 return true; 1370 } 1371 1372 /* 1373 * This utility function is for doing register extension with an 1374 * optional shift. You will likely want to pass a temporary for the 1375 * destination register. See DecodeRegExtend() in the ARM ARM. 1376 */ 1377 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1378 int option, unsigned int shift) 1379 { 1380 int extsize = extract32(option, 0, 2); 1381 bool is_signed = extract32(option, 2, 1); 1382 1383 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1384 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1385 } 1386 1387 static inline void gen_check_sp_alignment(DisasContext *s) 1388 { 1389 /* The AArch64 architecture mandates that (if enabled via PSTATE 1390 * or SCTLR bits) there is a check that SP is 16-aligned on every 1391 * SP-relative load or store (with an exception generated if it is not). 1392 * In line with general QEMU practice regarding misaligned accesses, 1393 * we omit these checks for the sake of guest program performance. 1394 * This function is provided as a hook so we can more easily add these 1395 * checks in future (possibly as a "favour catching guest program bugs 1396 * over speed" user selectable option). 1397 */ 1398 } 1399 1400 /* 1401 * This provides a simple table based table lookup decoder. It is 1402 * intended to be used when the relevant bits for decode are too 1403 * awkwardly placed and switch/if based logic would be confusing and 1404 * deeply nested. Since it's a linear search through the table, tables 1405 * should be kept small. 1406 * 1407 * It returns the first handler where insn & mask == pattern, or 1408 * NULL if there is no match. 1409 * The table is terminated by an empty mask (i.e. 0) 1410 */ 1411 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1412 uint32_t insn) 1413 { 1414 const AArch64DecodeTable *tptr = table; 1415 1416 while (tptr->mask) { 1417 if ((insn & tptr->mask) == tptr->pattern) { 1418 return tptr->disas_fn; 1419 } 1420 tptr++; 1421 } 1422 return NULL; 1423 } 1424 1425 /* 1426 * The instruction disassembly implemented here matches 1427 * the instruction encoding classifications in chapter C4 1428 * of the ARM Architecture Reference Manual (DDI0487B_a); 1429 * classification names and decode diagrams here should generally 1430 * match up with those in the manual. 1431 */ 1432 1433 static bool trans_B(DisasContext *s, arg_i *a) 1434 { 1435 reset_btype(s); 1436 gen_goto_tb(s, 0, a->imm); 1437 return true; 1438 } 1439 1440 static bool trans_BL(DisasContext *s, arg_i *a) 1441 { 1442 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1443 reset_btype(s); 1444 gen_goto_tb(s, 0, a->imm); 1445 return true; 1446 } 1447 1448 1449 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1450 { 1451 DisasLabel match; 1452 TCGv_i64 tcg_cmp; 1453 1454 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1455 reset_btype(s); 1456 1457 match = gen_disas_label(s); 1458 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1459 tcg_cmp, 0, match.label); 1460 gen_goto_tb(s, 0, 4); 1461 set_disas_label(s, match); 1462 gen_goto_tb(s, 1, a->imm); 1463 return true; 1464 } 1465 1466 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1467 { 1468 DisasLabel match; 1469 TCGv_i64 tcg_cmp; 1470 1471 tcg_cmp = tcg_temp_new_i64(); 1472 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1473 1474 reset_btype(s); 1475 1476 match = gen_disas_label(s); 1477 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1478 tcg_cmp, 0, match.label); 1479 gen_goto_tb(s, 0, 4); 1480 set_disas_label(s, match); 1481 gen_goto_tb(s, 1, a->imm); 1482 return true; 1483 } 1484 1485 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1486 { 1487 /* BC.cond is only present with FEAT_HBC */ 1488 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1489 return false; 1490 } 1491 reset_btype(s); 1492 if (a->cond < 0x0e) { 1493 /* genuinely conditional branches */ 1494 DisasLabel match = gen_disas_label(s); 1495 arm_gen_test_cc(a->cond, match.label); 1496 gen_goto_tb(s, 0, 4); 1497 set_disas_label(s, match); 1498 gen_goto_tb(s, 1, a->imm); 1499 } else { 1500 /* 0xe and 0xf are both "always" conditions */ 1501 gen_goto_tb(s, 0, a->imm); 1502 } 1503 return true; 1504 } 1505 1506 static void set_btype_for_br(DisasContext *s, int rn) 1507 { 1508 if (dc_isar_feature(aa64_bti, s)) { 1509 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1510 if (rn == 16 || rn == 17) { 1511 set_btype(s, 1); 1512 } else { 1513 TCGv_i64 pc = tcg_temp_new_i64(); 1514 gen_pc_plus_diff(s, pc, 0); 1515 gen_helper_guarded_page_br(tcg_env, pc); 1516 s->btype = -1; 1517 } 1518 } 1519 } 1520 1521 static void set_btype_for_blr(DisasContext *s) 1522 { 1523 if (dc_isar_feature(aa64_bti, s)) { 1524 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1525 set_btype(s, 2); 1526 } 1527 } 1528 1529 static bool trans_BR(DisasContext *s, arg_r *a) 1530 { 1531 set_btype_for_br(s, a->rn); 1532 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1533 s->base.is_jmp = DISAS_JUMP; 1534 return true; 1535 } 1536 1537 static bool trans_BLR(DisasContext *s, arg_r *a) 1538 { 1539 TCGv_i64 dst = cpu_reg(s, a->rn); 1540 TCGv_i64 lr = cpu_reg(s, 30); 1541 if (dst == lr) { 1542 TCGv_i64 tmp = tcg_temp_new_i64(); 1543 tcg_gen_mov_i64(tmp, dst); 1544 dst = tmp; 1545 } 1546 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1547 gen_a64_set_pc(s, dst); 1548 set_btype_for_blr(s); 1549 s->base.is_jmp = DISAS_JUMP; 1550 return true; 1551 } 1552 1553 static bool trans_RET(DisasContext *s, arg_r *a) 1554 { 1555 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1556 s->base.is_jmp = DISAS_JUMP; 1557 return true; 1558 } 1559 1560 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1561 TCGv_i64 modifier, bool use_key_a) 1562 { 1563 TCGv_i64 truedst; 1564 /* 1565 * Return the branch target for a BRAA/RETA/etc, which is either 1566 * just the destination dst, or that value with the pauth check 1567 * done and the code removed from the high bits. 1568 */ 1569 if (!s->pauth_active) { 1570 return dst; 1571 } 1572 1573 truedst = tcg_temp_new_i64(); 1574 if (use_key_a) { 1575 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1576 } else { 1577 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1578 } 1579 return truedst; 1580 } 1581 1582 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1583 { 1584 TCGv_i64 dst; 1585 1586 if (!dc_isar_feature(aa64_pauth, s)) { 1587 return false; 1588 } 1589 1590 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1591 set_btype_for_br(s, a->rn); 1592 gen_a64_set_pc(s, dst); 1593 s->base.is_jmp = DISAS_JUMP; 1594 return true; 1595 } 1596 1597 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1598 { 1599 TCGv_i64 dst, lr; 1600 1601 if (!dc_isar_feature(aa64_pauth, s)) { 1602 return false; 1603 } 1604 1605 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1606 lr = cpu_reg(s, 30); 1607 if (dst == lr) { 1608 TCGv_i64 tmp = tcg_temp_new_i64(); 1609 tcg_gen_mov_i64(tmp, dst); 1610 dst = tmp; 1611 } 1612 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1613 gen_a64_set_pc(s, dst); 1614 set_btype_for_blr(s); 1615 s->base.is_jmp = DISAS_JUMP; 1616 return true; 1617 } 1618 1619 static bool trans_RETA(DisasContext *s, arg_reta *a) 1620 { 1621 TCGv_i64 dst; 1622 1623 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1624 gen_a64_set_pc(s, dst); 1625 s->base.is_jmp = DISAS_JUMP; 1626 return true; 1627 } 1628 1629 static bool trans_BRA(DisasContext *s, arg_bra *a) 1630 { 1631 TCGv_i64 dst; 1632 1633 if (!dc_isar_feature(aa64_pauth, s)) { 1634 return false; 1635 } 1636 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1637 gen_a64_set_pc(s, dst); 1638 set_btype_for_br(s, a->rn); 1639 s->base.is_jmp = DISAS_JUMP; 1640 return true; 1641 } 1642 1643 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1644 { 1645 TCGv_i64 dst, lr; 1646 1647 if (!dc_isar_feature(aa64_pauth, s)) { 1648 return false; 1649 } 1650 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1651 lr = cpu_reg(s, 30); 1652 if (dst == lr) { 1653 TCGv_i64 tmp = tcg_temp_new_i64(); 1654 tcg_gen_mov_i64(tmp, dst); 1655 dst = tmp; 1656 } 1657 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1658 gen_a64_set_pc(s, dst); 1659 set_btype_for_blr(s); 1660 s->base.is_jmp = DISAS_JUMP; 1661 return true; 1662 } 1663 1664 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1665 { 1666 TCGv_i64 dst; 1667 1668 if (s->current_el == 0) { 1669 return false; 1670 } 1671 if (s->trap_eret) { 1672 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1673 return true; 1674 } 1675 dst = tcg_temp_new_i64(); 1676 tcg_gen_ld_i64(dst, tcg_env, 1677 offsetof(CPUARMState, elr_el[s->current_el])); 1678 1679 translator_io_start(&s->base); 1680 1681 gen_helper_exception_return(tcg_env, dst); 1682 /* Must exit loop to check un-masked IRQs */ 1683 s->base.is_jmp = DISAS_EXIT; 1684 return true; 1685 } 1686 1687 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1688 { 1689 TCGv_i64 dst; 1690 1691 if (!dc_isar_feature(aa64_pauth, s)) { 1692 return false; 1693 } 1694 if (s->current_el == 0) { 1695 return false; 1696 } 1697 /* The FGT trap takes precedence over an auth trap. */ 1698 if (s->trap_eret) { 1699 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1700 return true; 1701 } 1702 dst = tcg_temp_new_i64(); 1703 tcg_gen_ld_i64(dst, tcg_env, 1704 offsetof(CPUARMState, elr_el[s->current_el])); 1705 1706 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1707 1708 translator_io_start(&s->base); 1709 1710 gen_helper_exception_return(tcg_env, dst); 1711 /* Must exit loop to check un-masked IRQs */ 1712 s->base.is_jmp = DISAS_EXIT; 1713 return true; 1714 } 1715 1716 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1717 { 1718 return true; 1719 } 1720 1721 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1722 { 1723 /* 1724 * When running in MTTCG we don't generate jumps to the yield and 1725 * WFE helpers as it won't affect the scheduling of other vCPUs. 1726 * If we wanted to more completely model WFE/SEV so we don't busy 1727 * spin unnecessarily we would need to do something more involved. 1728 */ 1729 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1730 s->base.is_jmp = DISAS_YIELD; 1731 } 1732 return true; 1733 } 1734 1735 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1736 { 1737 s->base.is_jmp = DISAS_WFI; 1738 return true; 1739 } 1740 1741 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1742 { 1743 /* 1744 * When running in MTTCG we don't generate jumps to the yield and 1745 * WFE helpers as it won't affect the scheduling of other vCPUs. 1746 * If we wanted to more completely model WFE/SEV so we don't busy 1747 * spin unnecessarily we would need to do something more involved. 1748 */ 1749 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1750 s->base.is_jmp = DISAS_WFE; 1751 } 1752 return true; 1753 } 1754 1755 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1756 { 1757 if (!dc_isar_feature(aa64_wfxt, s)) { 1758 return false; 1759 } 1760 1761 /* 1762 * Because we need to pass the register value to the helper, 1763 * it's easier to emit the code now, unlike trans_WFI which 1764 * defers it to aarch64_tr_tb_stop(). That means we need to 1765 * check ss_active so that single-stepping a WFIT doesn't halt. 1766 */ 1767 if (s->ss_active) { 1768 /* Act like a NOP under architectural singlestep */ 1769 return true; 1770 } 1771 1772 gen_a64_update_pc(s, 4); 1773 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1774 /* Go back to the main loop to check for interrupts */ 1775 s->base.is_jmp = DISAS_EXIT; 1776 return true; 1777 } 1778 1779 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1780 { 1781 if (!dc_isar_feature(aa64_wfxt, s)) { 1782 return false; 1783 } 1784 1785 /* 1786 * We rely here on our WFE implementation being a NOP, so we 1787 * don't need to do anything different to handle the WFET timeout 1788 * from what trans_WFE does. 1789 */ 1790 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1791 s->base.is_jmp = DISAS_WFE; 1792 } 1793 return true; 1794 } 1795 1796 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1797 { 1798 if (s->pauth_active) { 1799 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1800 } 1801 return true; 1802 } 1803 1804 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1805 { 1806 if (s->pauth_active) { 1807 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1808 } 1809 return true; 1810 } 1811 1812 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1813 { 1814 if (s->pauth_active) { 1815 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1816 } 1817 return true; 1818 } 1819 1820 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1821 { 1822 if (s->pauth_active) { 1823 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1824 } 1825 return true; 1826 } 1827 1828 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1829 { 1830 if (s->pauth_active) { 1831 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1832 } 1833 return true; 1834 } 1835 1836 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1837 { 1838 /* Without RAS, we must implement this as NOP. */ 1839 if (dc_isar_feature(aa64_ras, s)) { 1840 /* 1841 * QEMU does not have a source of physical SErrors, 1842 * so we are only concerned with virtual SErrors. 1843 * The pseudocode in the ARM for this case is 1844 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1845 * AArch64.vESBOperation(); 1846 * Most of the condition can be evaluated at translation time. 1847 * Test for EL2 present, and defer test for SEL2 to runtime. 1848 */ 1849 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1850 gen_helper_vesb(tcg_env); 1851 } 1852 } 1853 return true; 1854 } 1855 1856 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1857 { 1858 if (s->pauth_active) { 1859 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1860 } 1861 return true; 1862 } 1863 1864 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1865 { 1866 if (s->pauth_active) { 1867 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1868 } 1869 return true; 1870 } 1871 1872 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1873 { 1874 if (s->pauth_active) { 1875 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1876 } 1877 return true; 1878 } 1879 1880 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1881 { 1882 if (s->pauth_active) { 1883 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1884 } 1885 return true; 1886 } 1887 1888 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1889 { 1890 if (s->pauth_active) { 1891 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1892 } 1893 return true; 1894 } 1895 1896 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1897 { 1898 if (s->pauth_active) { 1899 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1900 } 1901 return true; 1902 } 1903 1904 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1905 { 1906 if (s->pauth_active) { 1907 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1908 } 1909 return true; 1910 } 1911 1912 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1913 { 1914 if (s->pauth_active) { 1915 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1916 } 1917 return true; 1918 } 1919 1920 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1921 { 1922 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1923 return true; 1924 } 1925 1926 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1927 { 1928 /* We handle DSB and DMB the same way */ 1929 TCGBar bar; 1930 1931 switch (a->types) { 1932 case 1: /* MBReqTypes_Reads */ 1933 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1934 break; 1935 case 2: /* MBReqTypes_Writes */ 1936 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1937 break; 1938 default: /* MBReqTypes_All */ 1939 bar = TCG_BAR_SC | TCG_MO_ALL; 1940 break; 1941 } 1942 tcg_gen_mb(bar); 1943 return true; 1944 } 1945 1946 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1947 { 1948 /* 1949 * We need to break the TB after this insn to execute 1950 * self-modifying code correctly and also to take 1951 * any pending interrupts immediately. 1952 */ 1953 reset_btype(s); 1954 gen_goto_tb(s, 0, 4); 1955 return true; 1956 } 1957 1958 static bool trans_SB(DisasContext *s, arg_SB *a) 1959 { 1960 if (!dc_isar_feature(aa64_sb, s)) { 1961 return false; 1962 } 1963 /* 1964 * TODO: There is no speculation barrier opcode for TCG; 1965 * MB and end the TB instead. 1966 */ 1967 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1968 gen_goto_tb(s, 0, 4); 1969 return true; 1970 } 1971 1972 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1973 { 1974 if (!dc_isar_feature(aa64_condm_4, s)) { 1975 return false; 1976 } 1977 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1978 return true; 1979 } 1980 1981 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1982 { 1983 TCGv_i32 z; 1984 1985 if (!dc_isar_feature(aa64_condm_5, s)) { 1986 return false; 1987 } 1988 1989 z = tcg_temp_new_i32(); 1990 1991 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1992 1993 /* 1994 * (!C & !Z) << 31 1995 * (!(C | Z)) << 31 1996 * ~((C | Z) << 31) 1997 * ~-(C | Z) 1998 * (C | Z) - 1 1999 */ 2000 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2001 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2002 2003 /* !(Z & C) */ 2004 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2005 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2006 2007 /* (!C & Z) << 31 -> -(Z & ~C) */ 2008 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2009 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2010 2011 /* C | Z */ 2012 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2013 2014 return true; 2015 } 2016 2017 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2018 { 2019 if (!dc_isar_feature(aa64_condm_5, s)) { 2020 return false; 2021 } 2022 2023 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2024 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2025 2026 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2027 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2028 2029 tcg_gen_movi_i32(cpu_NF, 0); 2030 tcg_gen_movi_i32(cpu_VF, 0); 2031 2032 return true; 2033 } 2034 2035 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2036 { 2037 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2038 return false; 2039 } 2040 if (a->imm & 1) { 2041 set_pstate_bits(PSTATE_UAO); 2042 } else { 2043 clear_pstate_bits(PSTATE_UAO); 2044 } 2045 gen_rebuild_hflags(s); 2046 s->base.is_jmp = DISAS_TOO_MANY; 2047 return true; 2048 } 2049 2050 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2051 { 2052 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2053 return false; 2054 } 2055 if (a->imm & 1) { 2056 set_pstate_bits(PSTATE_PAN); 2057 } else { 2058 clear_pstate_bits(PSTATE_PAN); 2059 } 2060 gen_rebuild_hflags(s); 2061 s->base.is_jmp = DISAS_TOO_MANY; 2062 return true; 2063 } 2064 2065 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2066 { 2067 if (s->current_el == 0) { 2068 return false; 2069 } 2070 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2071 s->base.is_jmp = DISAS_TOO_MANY; 2072 return true; 2073 } 2074 2075 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2076 { 2077 if (!dc_isar_feature(aa64_ssbs, s)) { 2078 return false; 2079 } 2080 if (a->imm & 1) { 2081 set_pstate_bits(PSTATE_SSBS); 2082 } else { 2083 clear_pstate_bits(PSTATE_SSBS); 2084 } 2085 /* Don't need to rebuild hflags since SSBS is a nop */ 2086 s->base.is_jmp = DISAS_TOO_MANY; 2087 return true; 2088 } 2089 2090 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2091 { 2092 if (!dc_isar_feature(aa64_dit, s)) { 2093 return false; 2094 } 2095 if (a->imm & 1) { 2096 set_pstate_bits(PSTATE_DIT); 2097 } else { 2098 clear_pstate_bits(PSTATE_DIT); 2099 } 2100 /* There's no need to rebuild hflags because DIT is a nop */ 2101 s->base.is_jmp = DISAS_TOO_MANY; 2102 return true; 2103 } 2104 2105 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2106 { 2107 if (dc_isar_feature(aa64_mte, s)) { 2108 /* Full MTE is enabled -- set the TCO bit as directed. */ 2109 if (a->imm & 1) { 2110 set_pstate_bits(PSTATE_TCO); 2111 } else { 2112 clear_pstate_bits(PSTATE_TCO); 2113 } 2114 gen_rebuild_hflags(s); 2115 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2116 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2117 return true; 2118 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2119 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2120 return true; 2121 } else { 2122 /* Insn not present */ 2123 return false; 2124 } 2125 } 2126 2127 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2128 { 2129 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2130 s->base.is_jmp = DISAS_TOO_MANY; 2131 return true; 2132 } 2133 2134 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2135 { 2136 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2137 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2138 s->base.is_jmp = DISAS_UPDATE_EXIT; 2139 return true; 2140 } 2141 2142 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2143 { 2144 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2145 return false; 2146 } 2147 2148 if (a->imm == 0) { 2149 clear_pstate_bits(PSTATE_ALLINT); 2150 } else if (s->current_el > 1) { 2151 set_pstate_bits(PSTATE_ALLINT); 2152 } else { 2153 gen_helper_msr_set_allint_el1(tcg_env); 2154 } 2155 2156 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2157 s->base.is_jmp = DISAS_UPDATE_EXIT; 2158 return true; 2159 } 2160 2161 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2162 { 2163 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2164 return false; 2165 } 2166 if (sme_access_check(s)) { 2167 int old = s->pstate_sm | (s->pstate_za << 1); 2168 int new = a->imm * 3; 2169 2170 if ((old ^ new) & a->mask) { 2171 /* At least one bit changes. */ 2172 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2173 tcg_constant_i32(a->mask)); 2174 s->base.is_jmp = DISAS_TOO_MANY; 2175 } 2176 } 2177 return true; 2178 } 2179 2180 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2181 { 2182 TCGv_i32 tmp = tcg_temp_new_i32(); 2183 TCGv_i32 nzcv = tcg_temp_new_i32(); 2184 2185 /* build bit 31, N */ 2186 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2187 /* build bit 30, Z */ 2188 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2189 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2190 /* build bit 29, C */ 2191 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2192 /* build bit 28, V */ 2193 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2194 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2195 /* generate result */ 2196 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2197 } 2198 2199 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2200 { 2201 TCGv_i32 nzcv = tcg_temp_new_i32(); 2202 2203 /* take NZCV from R[t] */ 2204 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2205 2206 /* bit 31, N */ 2207 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2208 /* bit 30, Z */ 2209 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2210 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2211 /* bit 29, C */ 2212 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2213 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2214 /* bit 28, V */ 2215 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2216 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2217 } 2218 2219 static void gen_sysreg_undef(DisasContext *s, bool isread, 2220 uint8_t op0, uint8_t op1, uint8_t op2, 2221 uint8_t crn, uint8_t crm, uint8_t rt) 2222 { 2223 /* 2224 * Generate code to emit an UNDEF with correct syndrome 2225 * information for a failed system register access. 2226 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2227 * but if FEAT_IDST is implemented then read accesses to registers 2228 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2229 * syndrome. 2230 */ 2231 uint32_t syndrome; 2232 2233 if (isread && dc_isar_feature(aa64_ids, s) && 2234 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2235 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2236 } else { 2237 syndrome = syn_uncategorized(); 2238 } 2239 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2240 } 2241 2242 /* MRS - move from system register 2243 * MSR (register) - move to system register 2244 * SYS 2245 * SYSL 2246 * These are all essentially the same insn in 'read' and 'write' 2247 * versions, with varying op0 fields. 2248 */ 2249 static void handle_sys(DisasContext *s, bool isread, 2250 unsigned int op0, unsigned int op1, unsigned int op2, 2251 unsigned int crn, unsigned int crm, unsigned int rt) 2252 { 2253 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2254 crn, crm, op0, op1, op2); 2255 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2256 bool need_exit_tb = false; 2257 bool nv_trap_to_el2 = false; 2258 bool nv_redirect_reg = false; 2259 bool skip_fp_access_checks = false; 2260 bool nv2_mem_redirect = false; 2261 TCGv_ptr tcg_ri = NULL; 2262 TCGv_i64 tcg_rt; 2263 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2264 2265 if (crn == 11 || crn == 15) { 2266 /* 2267 * Check for TIDCP trap, which must take precedence over 2268 * the UNDEF for "no such register" etc. 2269 */ 2270 switch (s->current_el) { 2271 case 0: 2272 if (dc_isar_feature(aa64_tidcp1, s)) { 2273 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2274 } 2275 break; 2276 case 1: 2277 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2278 break; 2279 } 2280 } 2281 2282 if (!ri) { 2283 /* Unknown register; this might be a guest error or a QEMU 2284 * unimplemented feature. 2285 */ 2286 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2287 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2288 isread ? "read" : "write", op0, op1, crn, crm, op2); 2289 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2290 return; 2291 } 2292 2293 if (s->nv2 && ri->nv2_redirect_offset) { 2294 /* 2295 * Some registers always redirect to memory; some only do so if 2296 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2297 * pairs which share an offset; see the table in R_CSRPQ). 2298 */ 2299 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2300 nv2_mem_redirect = s->nv1; 2301 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2302 nv2_mem_redirect = !s->nv1; 2303 } else { 2304 nv2_mem_redirect = true; 2305 } 2306 } 2307 2308 /* Check access permissions */ 2309 if (!cp_access_ok(s->current_el, ri, isread)) { 2310 /* 2311 * FEAT_NV/NV2 handling does not do the usual FP access checks 2312 * for registers only accessible at EL2 (though it *does* do them 2313 * for registers accessible at EL1). 2314 */ 2315 skip_fp_access_checks = true; 2316 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2317 /* 2318 * This is one of the few EL2 registers which should redirect 2319 * to the equivalent EL1 register. We do that after running 2320 * the EL2 register's accessfn. 2321 */ 2322 nv_redirect_reg = true; 2323 assert(!nv2_mem_redirect); 2324 } else if (nv2_mem_redirect) { 2325 /* 2326 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2327 * UNDEF to EL1. 2328 */ 2329 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2330 /* 2331 * This register / instruction exists and is an EL2 register, so 2332 * we must trap to EL2 if accessed in nested virtualization EL1 2333 * instead of UNDEFing. We'll do that after the usual access checks. 2334 * (This makes a difference only for a couple of registers like 2335 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2336 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2337 * an accessfn which does nothing when called from EL1, because 2338 * the trap-to-EL3 controls which would apply to that register 2339 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2340 */ 2341 nv_trap_to_el2 = true; 2342 } else { 2343 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2344 return; 2345 } 2346 } 2347 2348 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2349 /* Emit code to perform further access permissions checks at 2350 * runtime; this may result in an exception. 2351 */ 2352 gen_a64_update_pc(s, 0); 2353 tcg_ri = tcg_temp_new_ptr(); 2354 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2355 tcg_constant_i32(key), 2356 tcg_constant_i32(syndrome), 2357 tcg_constant_i32(isread)); 2358 } else if (ri->type & ARM_CP_RAISES_EXC) { 2359 /* 2360 * The readfn or writefn might raise an exception; 2361 * synchronize the CPU state in case it does. 2362 */ 2363 gen_a64_update_pc(s, 0); 2364 } 2365 2366 if (!skip_fp_access_checks) { 2367 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2368 return; 2369 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2370 return; 2371 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2372 return; 2373 } 2374 } 2375 2376 if (nv_trap_to_el2) { 2377 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2378 return; 2379 } 2380 2381 if (nv_redirect_reg) { 2382 /* 2383 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2384 * Conveniently in all cases the encoding of the EL1 register is 2385 * identical to the EL2 register except that opc1 is 0. 2386 * Get the reginfo for the EL1 register to use for the actual access. 2387 * We don't use the EL1 register's access function, and 2388 * fine-grained-traps on EL1 also do not apply here. 2389 */ 2390 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2391 crn, crm, op0, 0, op2); 2392 ri = get_arm_cp_reginfo(s->cp_regs, key); 2393 assert(ri); 2394 assert(cp_access_ok(s->current_el, ri, isread)); 2395 /* 2396 * We might not have done an update_pc earlier, so check we don't 2397 * need it. We could support this in future if necessary. 2398 */ 2399 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2400 } 2401 2402 if (nv2_mem_redirect) { 2403 /* 2404 * This system register is being redirected into an EL2 memory access. 2405 * This means it is not an IO operation, doesn't change hflags, 2406 * and need not end the TB, because it has no side effects. 2407 * 2408 * The access is 64-bit single copy atomic, guaranteed aligned because 2409 * of the definition of VCNR_EL2. Its endianness depends on 2410 * SCTLR_EL2.EE, not on the data endianness of EL1. 2411 * It is done under either the EL2 translation regime or the EL2&0 2412 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2413 * PSTATE.PAN is 0. 2414 */ 2415 TCGv_i64 ptr = tcg_temp_new_i64(); 2416 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2417 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2418 int memidx = arm_to_core_mmu_idx(armmemidx); 2419 uint32_t syn; 2420 2421 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2422 2423 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2424 tcg_gen_addi_i64(ptr, ptr, 2425 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2426 tcg_rt = cpu_reg(s, rt); 2427 2428 syn = syn_data_abort_vncr(0, !isread, 0); 2429 disas_set_insn_syndrome(s, syn); 2430 if (isread) { 2431 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2432 } else { 2433 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2434 } 2435 return; 2436 } 2437 2438 /* Handle special cases first */ 2439 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2440 case 0: 2441 break; 2442 case ARM_CP_NOP: 2443 return; 2444 case ARM_CP_NZCV: 2445 tcg_rt = cpu_reg(s, rt); 2446 if (isread) { 2447 gen_get_nzcv(tcg_rt); 2448 } else { 2449 gen_set_nzcv(tcg_rt); 2450 } 2451 return; 2452 case ARM_CP_CURRENTEL: 2453 { 2454 /* 2455 * Reads as current EL value from pstate, which is 2456 * guaranteed to be constant by the tb flags. 2457 * For nested virt we should report EL2. 2458 */ 2459 int el = s->nv ? 2 : s->current_el; 2460 tcg_rt = cpu_reg(s, rt); 2461 tcg_gen_movi_i64(tcg_rt, el << 2); 2462 return; 2463 } 2464 case ARM_CP_DC_ZVA: 2465 /* Writes clear the aligned block of memory which rt points into. */ 2466 if (s->mte_active[0]) { 2467 int desc = 0; 2468 2469 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2470 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2471 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2472 2473 tcg_rt = tcg_temp_new_i64(); 2474 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2475 tcg_constant_i32(desc), cpu_reg(s, rt)); 2476 } else { 2477 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2478 } 2479 gen_helper_dc_zva(tcg_env, tcg_rt); 2480 return; 2481 case ARM_CP_DC_GVA: 2482 { 2483 TCGv_i64 clean_addr, tag; 2484 2485 /* 2486 * DC_GVA, like DC_ZVA, requires that we supply the original 2487 * pointer for an invalid page. Probe that address first. 2488 */ 2489 tcg_rt = cpu_reg(s, rt); 2490 clean_addr = clean_data_tbi(s, tcg_rt); 2491 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2492 2493 if (s->ata[0]) { 2494 /* Extract the tag from the register to match STZGM. */ 2495 tag = tcg_temp_new_i64(); 2496 tcg_gen_shri_i64(tag, tcg_rt, 56); 2497 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2498 } 2499 } 2500 return; 2501 case ARM_CP_DC_GZVA: 2502 { 2503 TCGv_i64 clean_addr, tag; 2504 2505 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2506 tcg_rt = cpu_reg(s, rt); 2507 clean_addr = clean_data_tbi(s, tcg_rt); 2508 gen_helper_dc_zva(tcg_env, clean_addr); 2509 2510 if (s->ata[0]) { 2511 /* Extract the tag from the register to match STZGM. */ 2512 tag = tcg_temp_new_i64(); 2513 tcg_gen_shri_i64(tag, tcg_rt, 56); 2514 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2515 } 2516 } 2517 return; 2518 default: 2519 g_assert_not_reached(); 2520 } 2521 2522 if (ri->type & ARM_CP_IO) { 2523 /* I/O operations must end the TB here (whether read or write) */ 2524 need_exit_tb = translator_io_start(&s->base); 2525 } 2526 2527 tcg_rt = cpu_reg(s, rt); 2528 2529 if (isread) { 2530 if (ri->type & ARM_CP_CONST) { 2531 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2532 } else if (ri->readfn) { 2533 if (!tcg_ri) { 2534 tcg_ri = gen_lookup_cp_reg(key); 2535 } 2536 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2537 } else { 2538 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2539 } 2540 } else { 2541 if (ri->type & ARM_CP_CONST) { 2542 /* If not forbidden by access permissions, treat as WI */ 2543 return; 2544 } else if (ri->writefn) { 2545 if (!tcg_ri) { 2546 tcg_ri = gen_lookup_cp_reg(key); 2547 } 2548 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2549 } else { 2550 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2551 } 2552 } 2553 2554 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2555 /* 2556 * A write to any coprocessor register that ends a TB 2557 * must rebuild the hflags for the next TB. 2558 */ 2559 gen_rebuild_hflags(s); 2560 /* 2561 * We default to ending the TB on a coprocessor register write, 2562 * but allow this to be suppressed by the register definition 2563 * (usually only necessary to work around guest bugs). 2564 */ 2565 need_exit_tb = true; 2566 } 2567 if (need_exit_tb) { 2568 s->base.is_jmp = DISAS_UPDATE_EXIT; 2569 } 2570 } 2571 2572 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2573 { 2574 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2575 return true; 2576 } 2577 2578 static bool trans_SVC(DisasContext *s, arg_i *a) 2579 { 2580 /* 2581 * For SVC, HVC and SMC we advance the single-step state 2582 * machine before taking the exception. This is architecturally 2583 * mandated, to ensure that single-stepping a system call 2584 * instruction works properly. 2585 */ 2586 uint32_t syndrome = syn_aa64_svc(a->imm); 2587 if (s->fgt_svc) { 2588 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2589 return true; 2590 } 2591 gen_ss_advance(s); 2592 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2593 return true; 2594 } 2595 2596 static bool trans_HVC(DisasContext *s, arg_i *a) 2597 { 2598 int target_el = s->current_el == 3 ? 3 : 2; 2599 2600 if (s->current_el == 0) { 2601 unallocated_encoding(s); 2602 return true; 2603 } 2604 /* 2605 * The pre HVC helper handles cases when HVC gets trapped 2606 * as an undefined insn by runtime configuration. 2607 */ 2608 gen_a64_update_pc(s, 0); 2609 gen_helper_pre_hvc(tcg_env); 2610 /* Architecture requires ss advance before we do the actual work */ 2611 gen_ss_advance(s); 2612 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2613 return true; 2614 } 2615 2616 static bool trans_SMC(DisasContext *s, arg_i *a) 2617 { 2618 if (s->current_el == 0) { 2619 unallocated_encoding(s); 2620 return true; 2621 } 2622 gen_a64_update_pc(s, 0); 2623 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2624 /* Architecture requires ss advance before we do the actual work */ 2625 gen_ss_advance(s); 2626 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2627 return true; 2628 } 2629 2630 static bool trans_BRK(DisasContext *s, arg_i *a) 2631 { 2632 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2633 return true; 2634 } 2635 2636 static bool trans_HLT(DisasContext *s, arg_i *a) 2637 { 2638 /* 2639 * HLT. This has two purposes. 2640 * Architecturally, it is an external halting debug instruction. 2641 * Since QEMU doesn't implement external debug, we treat this as 2642 * it is required for halting debug disabled: it will UNDEF. 2643 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2644 */ 2645 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2646 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2647 } else { 2648 unallocated_encoding(s); 2649 } 2650 return true; 2651 } 2652 2653 /* 2654 * Load/Store exclusive instructions are implemented by remembering 2655 * the value/address loaded, and seeing if these are the same 2656 * when the store is performed. This is not actually the architecturally 2657 * mandated semantics, but it works for typical guest code sequences 2658 * and avoids having to monitor regular stores. 2659 * 2660 * The store exclusive uses the atomic cmpxchg primitives to avoid 2661 * races in multi-threaded linux-user and when MTTCG softmmu is 2662 * enabled. 2663 */ 2664 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2665 int size, bool is_pair) 2666 { 2667 int idx = get_mem_index(s); 2668 TCGv_i64 dirty_addr, clean_addr; 2669 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2670 2671 s->is_ldex = true; 2672 dirty_addr = cpu_reg_sp(s, rn); 2673 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2674 2675 g_assert(size <= 3); 2676 if (is_pair) { 2677 g_assert(size >= 2); 2678 if (size == 2) { 2679 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2680 if (s->be_data == MO_LE) { 2681 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2682 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2683 } else { 2684 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2685 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2686 } 2687 } else { 2688 TCGv_i128 t16 = tcg_temp_new_i128(); 2689 2690 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2691 2692 if (s->be_data == MO_LE) { 2693 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2694 cpu_exclusive_high, t16); 2695 } else { 2696 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2697 cpu_exclusive_val, t16); 2698 } 2699 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2700 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2701 } 2702 } else { 2703 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2704 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2705 } 2706 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2707 } 2708 2709 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2710 int rn, int size, int is_pair) 2711 { 2712 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2713 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2714 * [addr] = {Rt}; 2715 * if (is_pair) { 2716 * [addr + datasize] = {Rt2}; 2717 * } 2718 * {Rd} = 0; 2719 * } else { 2720 * {Rd} = 1; 2721 * } 2722 * env->exclusive_addr = -1; 2723 */ 2724 TCGLabel *fail_label = gen_new_label(); 2725 TCGLabel *done_label = gen_new_label(); 2726 TCGv_i64 tmp, clean_addr; 2727 MemOp memop; 2728 2729 /* 2730 * FIXME: We are out of spec here. We have recorded only the address 2731 * from load_exclusive, not the entire range, and we assume that the 2732 * size of the access on both sides match. The architecture allows the 2733 * store to be smaller than the load, so long as the stored bytes are 2734 * within the range recorded by the load. 2735 */ 2736 2737 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2738 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2739 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2740 2741 /* 2742 * The write, and any associated faults, only happen if the virtual 2743 * and physical addresses pass the exclusive monitor check. These 2744 * faults are exceedingly unlikely, because normally the guest uses 2745 * the exact same address register for the load_exclusive, and we 2746 * would have recognized these faults there. 2747 * 2748 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2749 * unaligned 4-byte write within the range of an aligned 8-byte load. 2750 * With LSE2, the store would need to cross a 16-byte boundary when the 2751 * load did not, which would mean the store is outside the range 2752 * recorded for the monitor, which would have failed a corrected monitor 2753 * check above. For now, we assume no size change and retain the 2754 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2755 * 2756 * It is possible to trigger an MTE fault, by performing the load with 2757 * a virtual address with a valid tag and performing the store with the 2758 * same virtual address and a different invalid tag. 2759 */ 2760 memop = size + is_pair; 2761 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2762 memop |= MO_ALIGN; 2763 } 2764 memop = finalize_memop(s, memop); 2765 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2766 2767 tmp = tcg_temp_new_i64(); 2768 if (is_pair) { 2769 if (size == 2) { 2770 if (s->be_data == MO_LE) { 2771 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2772 } else { 2773 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2774 } 2775 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2776 cpu_exclusive_val, tmp, 2777 get_mem_index(s), memop); 2778 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2779 } else { 2780 TCGv_i128 t16 = tcg_temp_new_i128(); 2781 TCGv_i128 c16 = tcg_temp_new_i128(); 2782 TCGv_i64 a, b; 2783 2784 if (s->be_data == MO_LE) { 2785 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2786 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2787 cpu_exclusive_high); 2788 } else { 2789 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2790 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2791 cpu_exclusive_val); 2792 } 2793 2794 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2795 get_mem_index(s), memop); 2796 2797 a = tcg_temp_new_i64(); 2798 b = tcg_temp_new_i64(); 2799 if (s->be_data == MO_LE) { 2800 tcg_gen_extr_i128_i64(a, b, t16); 2801 } else { 2802 tcg_gen_extr_i128_i64(b, a, t16); 2803 } 2804 2805 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2806 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2807 tcg_gen_or_i64(tmp, a, b); 2808 2809 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2810 } 2811 } else { 2812 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2813 cpu_reg(s, rt), get_mem_index(s), memop); 2814 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2815 } 2816 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2817 tcg_gen_br(done_label); 2818 2819 gen_set_label(fail_label); 2820 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2821 gen_set_label(done_label); 2822 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2823 } 2824 2825 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2826 int rn, int size) 2827 { 2828 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2829 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2830 int memidx = get_mem_index(s); 2831 TCGv_i64 clean_addr; 2832 MemOp memop; 2833 2834 if (rn == 31) { 2835 gen_check_sp_alignment(s); 2836 } 2837 memop = check_atomic_align(s, rn, size); 2838 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2839 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2840 memidx, memop); 2841 } 2842 2843 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2844 int rn, int size) 2845 { 2846 TCGv_i64 s1 = cpu_reg(s, rs); 2847 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2848 TCGv_i64 t1 = cpu_reg(s, rt); 2849 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2850 TCGv_i64 clean_addr; 2851 int memidx = get_mem_index(s); 2852 MemOp memop; 2853 2854 if (rn == 31) { 2855 gen_check_sp_alignment(s); 2856 } 2857 2858 /* This is a single atomic access, despite the "pair". */ 2859 memop = check_atomic_align(s, rn, size + 1); 2860 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2861 2862 if (size == 2) { 2863 TCGv_i64 cmp = tcg_temp_new_i64(); 2864 TCGv_i64 val = tcg_temp_new_i64(); 2865 2866 if (s->be_data == MO_LE) { 2867 tcg_gen_concat32_i64(val, t1, t2); 2868 tcg_gen_concat32_i64(cmp, s1, s2); 2869 } else { 2870 tcg_gen_concat32_i64(val, t2, t1); 2871 tcg_gen_concat32_i64(cmp, s2, s1); 2872 } 2873 2874 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2875 2876 if (s->be_data == MO_LE) { 2877 tcg_gen_extr32_i64(s1, s2, cmp); 2878 } else { 2879 tcg_gen_extr32_i64(s2, s1, cmp); 2880 } 2881 } else { 2882 TCGv_i128 cmp = tcg_temp_new_i128(); 2883 TCGv_i128 val = tcg_temp_new_i128(); 2884 2885 if (s->be_data == MO_LE) { 2886 tcg_gen_concat_i64_i128(val, t1, t2); 2887 tcg_gen_concat_i64_i128(cmp, s1, s2); 2888 } else { 2889 tcg_gen_concat_i64_i128(val, t2, t1); 2890 tcg_gen_concat_i64_i128(cmp, s2, s1); 2891 } 2892 2893 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2894 2895 if (s->be_data == MO_LE) { 2896 tcg_gen_extr_i128_i64(s1, s2, cmp); 2897 } else { 2898 tcg_gen_extr_i128_i64(s2, s1, cmp); 2899 } 2900 } 2901 } 2902 2903 /* 2904 * Compute the ISS.SF bit for syndrome information if an exception 2905 * is taken on a load or store. This indicates whether the instruction 2906 * is accessing a 32-bit or 64-bit register. This logic is derived 2907 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2908 */ 2909 static bool ldst_iss_sf(int size, bool sign, bool ext) 2910 { 2911 2912 if (sign) { 2913 /* 2914 * Signed loads are 64 bit results if we are not going to 2915 * do a zero-extend from 32 to 64 after the load. 2916 * (For a store, sign and ext are always false.) 2917 */ 2918 return !ext; 2919 } else { 2920 /* Unsigned loads/stores work at the specified size */ 2921 return size == MO_64; 2922 } 2923 } 2924 2925 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2926 { 2927 if (a->rn == 31) { 2928 gen_check_sp_alignment(s); 2929 } 2930 if (a->lasr) { 2931 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2932 } 2933 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2934 return true; 2935 } 2936 2937 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2938 { 2939 if (a->rn == 31) { 2940 gen_check_sp_alignment(s); 2941 } 2942 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2943 if (a->lasr) { 2944 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2945 } 2946 return true; 2947 } 2948 2949 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2950 { 2951 TCGv_i64 clean_addr; 2952 MemOp memop; 2953 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2954 2955 /* 2956 * StoreLORelease is the same as Store-Release for QEMU, but 2957 * needs the feature-test. 2958 */ 2959 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2960 return false; 2961 } 2962 /* Generate ISS for non-exclusive accesses including LASR. */ 2963 if (a->rn == 31) { 2964 gen_check_sp_alignment(s); 2965 } 2966 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2967 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2968 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2969 true, a->rn != 31, memop); 2970 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2971 iss_sf, a->lasr); 2972 return true; 2973 } 2974 2975 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2976 { 2977 TCGv_i64 clean_addr; 2978 MemOp memop; 2979 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2980 2981 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2982 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2983 return false; 2984 } 2985 /* Generate ISS for non-exclusive accesses including LASR. */ 2986 if (a->rn == 31) { 2987 gen_check_sp_alignment(s); 2988 } 2989 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 2990 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2991 false, a->rn != 31, memop); 2992 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 2993 a->rt, iss_sf, a->lasr); 2994 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2995 return true; 2996 } 2997 2998 static bool trans_STXP(DisasContext *s, arg_stxr *a) 2999 { 3000 if (a->rn == 31) { 3001 gen_check_sp_alignment(s); 3002 } 3003 if (a->lasr) { 3004 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3005 } 3006 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3007 return true; 3008 } 3009 3010 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3011 { 3012 if (a->rn == 31) { 3013 gen_check_sp_alignment(s); 3014 } 3015 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3016 if (a->lasr) { 3017 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3018 } 3019 return true; 3020 } 3021 3022 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3023 { 3024 if (!dc_isar_feature(aa64_atomics, s)) { 3025 return false; 3026 } 3027 if (((a->rt | a->rs) & 1) != 0) { 3028 return false; 3029 } 3030 3031 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3032 return true; 3033 } 3034 3035 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3036 { 3037 if (!dc_isar_feature(aa64_atomics, s)) { 3038 return false; 3039 } 3040 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3041 return true; 3042 } 3043 3044 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3045 { 3046 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3047 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3048 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3049 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3050 3051 gen_pc_plus_diff(s, clean_addr, a->imm); 3052 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3053 false, true, a->rt, iss_sf, false); 3054 return true; 3055 } 3056 3057 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3058 { 3059 /* Load register (literal), vector version */ 3060 TCGv_i64 clean_addr; 3061 MemOp memop; 3062 3063 if (!fp_access_check(s)) { 3064 return true; 3065 } 3066 memop = finalize_memop_asimd(s, a->sz); 3067 clean_addr = tcg_temp_new_i64(); 3068 gen_pc_plus_diff(s, clean_addr, a->imm); 3069 do_fp_ld(s, a->rt, clean_addr, memop); 3070 return true; 3071 } 3072 3073 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3074 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3075 uint64_t offset, bool is_store, MemOp mop) 3076 { 3077 if (a->rn == 31) { 3078 gen_check_sp_alignment(s); 3079 } 3080 3081 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3082 if (!a->p) { 3083 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3084 } 3085 3086 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3087 (a->w || a->rn != 31), 2 << a->sz, mop); 3088 } 3089 3090 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3091 TCGv_i64 dirty_addr, uint64_t offset) 3092 { 3093 if (a->w) { 3094 if (a->p) { 3095 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3096 } 3097 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3098 } 3099 } 3100 3101 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3102 { 3103 uint64_t offset = a->imm << a->sz; 3104 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3105 MemOp mop = finalize_memop(s, a->sz); 3106 3107 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3108 tcg_rt = cpu_reg(s, a->rt); 3109 tcg_rt2 = cpu_reg(s, a->rt2); 3110 /* 3111 * We built mop above for the single logical access -- rebuild it 3112 * now for the paired operation. 3113 * 3114 * With LSE2, non-sign-extending pairs are treated atomically if 3115 * aligned, and if unaligned one of the pair will be completely 3116 * within a 16-byte block and that element will be atomic. 3117 * Otherwise each element is separately atomic. 3118 * In all cases, issue one operation with the correct atomicity. 3119 */ 3120 mop = a->sz + 1; 3121 if (s->align_mem) { 3122 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3123 } 3124 mop = finalize_memop_pair(s, mop); 3125 if (a->sz == 2) { 3126 TCGv_i64 tmp = tcg_temp_new_i64(); 3127 3128 if (s->be_data == MO_LE) { 3129 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3130 } else { 3131 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3132 } 3133 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3134 } else { 3135 TCGv_i128 tmp = tcg_temp_new_i128(); 3136 3137 if (s->be_data == MO_LE) { 3138 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3139 } else { 3140 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3141 } 3142 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3143 } 3144 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3145 return true; 3146 } 3147 3148 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3149 { 3150 uint64_t offset = a->imm << a->sz; 3151 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3152 MemOp mop = finalize_memop(s, a->sz); 3153 3154 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3155 tcg_rt = cpu_reg(s, a->rt); 3156 tcg_rt2 = cpu_reg(s, a->rt2); 3157 3158 /* 3159 * We built mop above for the single logical access -- rebuild it 3160 * now for the paired operation. 3161 * 3162 * With LSE2, non-sign-extending pairs are treated atomically if 3163 * aligned, and if unaligned one of the pair will be completely 3164 * within a 16-byte block and that element will be atomic. 3165 * Otherwise each element is separately atomic. 3166 * In all cases, issue one operation with the correct atomicity. 3167 * 3168 * This treats sign-extending loads like zero-extending loads, 3169 * since that reuses the most code below. 3170 */ 3171 mop = a->sz + 1; 3172 if (s->align_mem) { 3173 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3174 } 3175 mop = finalize_memop_pair(s, mop); 3176 if (a->sz == 2) { 3177 int o2 = s->be_data == MO_LE ? 32 : 0; 3178 int o1 = o2 ^ 32; 3179 3180 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3181 if (a->sign) { 3182 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3183 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3184 } else { 3185 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3186 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3187 } 3188 } else { 3189 TCGv_i128 tmp = tcg_temp_new_i128(); 3190 3191 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3192 if (s->be_data == MO_LE) { 3193 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3194 } else { 3195 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3196 } 3197 } 3198 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3199 return true; 3200 } 3201 3202 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3203 { 3204 uint64_t offset = a->imm << a->sz; 3205 TCGv_i64 clean_addr, dirty_addr; 3206 MemOp mop; 3207 3208 if (!fp_access_check(s)) { 3209 return true; 3210 } 3211 3212 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3213 mop = finalize_memop_asimd(s, a->sz); 3214 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3215 do_fp_st(s, a->rt, clean_addr, mop); 3216 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3217 do_fp_st(s, a->rt2, clean_addr, mop); 3218 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3219 return true; 3220 } 3221 3222 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3223 { 3224 uint64_t offset = a->imm << a->sz; 3225 TCGv_i64 clean_addr, dirty_addr; 3226 MemOp mop; 3227 3228 if (!fp_access_check(s)) { 3229 return true; 3230 } 3231 3232 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3233 mop = finalize_memop_asimd(s, a->sz); 3234 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3235 do_fp_ld(s, a->rt, clean_addr, mop); 3236 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3237 do_fp_ld(s, a->rt2, clean_addr, mop); 3238 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3239 return true; 3240 } 3241 3242 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3243 { 3244 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3245 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3246 MemOp mop; 3247 TCGv_i128 tmp; 3248 3249 /* STGP only comes in one size. */ 3250 tcg_debug_assert(a->sz == MO_64); 3251 3252 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3253 return false; 3254 } 3255 3256 if (a->rn == 31) { 3257 gen_check_sp_alignment(s); 3258 } 3259 3260 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3261 if (!a->p) { 3262 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3263 } 3264 3265 clean_addr = clean_data_tbi(s, dirty_addr); 3266 tcg_rt = cpu_reg(s, a->rt); 3267 tcg_rt2 = cpu_reg(s, a->rt2); 3268 3269 /* 3270 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3271 * and one tag operation. We implement it as one single aligned 16-byte 3272 * memory operation for convenience. Note that the alignment ensures 3273 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3274 */ 3275 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3276 3277 tmp = tcg_temp_new_i128(); 3278 if (s->be_data == MO_LE) { 3279 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3280 } else { 3281 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3282 } 3283 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3284 3285 /* Perform the tag store, if tag access enabled. */ 3286 if (s->ata[0]) { 3287 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3288 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3289 } else { 3290 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3291 } 3292 } 3293 3294 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3295 return true; 3296 } 3297 3298 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3299 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3300 uint64_t offset, bool is_store, MemOp mop) 3301 { 3302 int memidx; 3303 3304 if (a->rn == 31) { 3305 gen_check_sp_alignment(s); 3306 } 3307 3308 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3309 if (!a->p) { 3310 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3311 } 3312 memidx = get_a64_user_mem_index(s, a->unpriv); 3313 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3314 a->w || a->rn != 31, 3315 mop, a->unpriv, memidx); 3316 } 3317 3318 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3319 TCGv_i64 dirty_addr, uint64_t offset) 3320 { 3321 if (a->w) { 3322 if (a->p) { 3323 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3324 } 3325 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3326 } 3327 } 3328 3329 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3330 { 3331 bool iss_sf, iss_valid = !a->w; 3332 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3333 int memidx = get_a64_user_mem_index(s, a->unpriv); 3334 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3335 3336 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3337 3338 tcg_rt = cpu_reg(s, a->rt); 3339 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3340 3341 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3342 iss_valid, a->rt, iss_sf, false); 3343 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3344 return true; 3345 } 3346 3347 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3348 { 3349 bool iss_sf, iss_valid = !a->w; 3350 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3351 int memidx = get_a64_user_mem_index(s, a->unpriv); 3352 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3353 3354 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3355 3356 tcg_rt = cpu_reg(s, a->rt); 3357 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3358 3359 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3360 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3361 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3362 return true; 3363 } 3364 3365 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3366 { 3367 TCGv_i64 clean_addr, dirty_addr; 3368 MemOp mop; 3369 3370 if (!fp_access_check(s)) { 3371 return true; 3372 } 3373 mop = finalize_memop_asimd(s, a->sz); 3374 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3375 do_fp_st(s, a->rt, clean_addr, mop); 3376 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3377 return true; 3378 } 3379 3380 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3381 { 3382 TCGv_i64 clean_addr, dirty_addr; 3383 MemOp mop; 3384 3385 if (!fp_access_check(s)) { 3386 return true; 3387 } 3388 mop = finalize_memop_asimd(s, a->sz); 3389 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3390 do_fp_ld(s, a->rt, clean_addr, mop); 3391 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3392 return true; 3393 } 3394 3395 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3396 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3397 bool is_store, MemOp memop) 3398 { 3399 TCGv_i64 tcg_rm; 3400 3401 if (a->rn == 31) { 3402 gen_check_sp_alignment(s); 3403 } 3404 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3405 3406 tcg_rm = read_cpu_reg(s, a->rm, 1); 3407 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3408 3409 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3410 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3411 } 3412 3413 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3414 { 3415 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3416 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3417 MemOp memop; 3418 3419 if (extract32(a->opt, 1, 1) == 0) { 3420 return false; 3421 } 3422 3423 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3424 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3425 tcg_rt = cpu_reg(s, a->rt); 3426 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3427 a->ext, true, a->rt, iss_sf, false); 3428 return true; 3429 } 3430 3431 static bool trans_STR(DisasContext *s, arg_ldst *a) 3432 { 3433 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3434 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3435 MemOp memop; 3436 3437 if (extract32(a->opt, 1, 1) == 0) { 3438 return false; 3439 } 3440 3441 memop = finalize_memop(s, a->sz); 3442 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3443 tcg_rt = cpu_reg(s, a->rt); 3444 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3445 return true; 3446 } 3447 3448 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3449 { 3450 TCGv_i64 clean_addr, dirty_addr; 3451 MemOp memop; 3452 3453 if (extract32(a->opt, 1, 1) == 0) { 3454 return false; 3455 } 3456 3457 if (!fp_access_check(s)) { 3458 return true; 3459 } 3460 3461 memop = finalize_memop_asimd(s, a->sz); 3462 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3463 do_fp_ld(s, a->rt, clean_addr, memop); 3464 return true; 3465 } 3466 3467 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3468 { 3469 TCGv_i64 clean_addr, dirty_addr; 3470 MemOp memop; 3471 3472 if (extract32(a->opt, 1, 1) == 0) { 3473 return false; 3474 } 3475 3476 if (!fp_access_check(s)) { 3477 return true; 3478 } 3479 3480 memop = finalize_memop_asimd(s, a->sz); 3481 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3482 do_fp_st(s, a->rt, clean_addr, memop); 3483 return true; 3484 } 3485 3486 3487 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3488 int sign, bool invert) 3489 { 3490 MemOp mop = a->sz | sign; 3491 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3492 3493 if (a->rn == 31) { 3494 gen_check_sp_alignment(s); 3495 } 3496 mop = check_atomic_align(s, a->rn, mop); 3497 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3498 a->rn != 31, mop); 3499 tcg_rs = read_cpu_reg(s, a->rs, true); 3500 tcg_rt = cpu_reg(s, a->rt); 3501 if (invert) { 3502 tcg_gen_not_i64(tcg_rs, tcg_rs); 3503 } 3504 /* 3505 * The tcg atomic primitives are all full barriers. Therefore we 3506 * can ignore the Acquire and Release bits of this instruction. 3507 */ 3508 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3509 3510 if (mop & MO_SIGN) { 3511 switch (a->sz) { 3512 case MO_8: 3513 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3514 break; 3515 case MO_16: 3516 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3517 break; 3518 case MO_32: 3519 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3520 break; 3521 case MO_64: 3522 break; 3523 default: 3524 g_assert_not_reached(); 3525 } 3526 } 3527 return true; 3528 } 3529 3530 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3531 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3532 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3533 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3534 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3535 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3536 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3537 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3538 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3539 3540 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3541 { 3542 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3543 TCGv_i64 clean_addr; 3544 MemOp mop; 3545 3546 if (!dc_isar_feature(aa64_atomics, s) || 3547 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3548 return false; 3549 } 3550 if (a->rn == 31) { 3551 gen_check_sp_alignment(s); 3552 } 3553 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3554 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3555 a->rn != 31, mop); 3556 /* 3557 * LDAPR* are a special case because they are a simple load, not a 3558 * fetch-and-do-something op. 3559 * The architectural consistency requirements here are weaker than 3560 * full load-acquire (we only need "load-acquire processor consistent"), 3561 * but we choose to implement them as full LDAQ. 3562 */ 3563 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3564 true, a->rt, iss_sf, true); 3565 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3566 return true; 3567 } 3568 3569 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3570 { 3571 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3572 MemOp memop; 3573 3574 /* Load with pointer authentication */ 3575 if (!dc_isar_feature(aa64_pauth, s)) { 3576 return false; 3577 } 3578 3579 if (a->rn == 31) { 3580 gen_check_sp_alignment(s); 3581 } 3582 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3583 3584 if (s->pauth_active) { 3585 if (!a->m) { 3586 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3587 tcg_constant_i64(0)); 3588 } else { 3589 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3590 tcg_constant_i64(0)); 3591 } 3592 } 3593 3594 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3595 3596 memop = finalize_memop(s, MO_64); 3597 3598 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3599 clean_addr = gen_mte_check1(s, dirty_addr, false, 3600 a->w || a->rn != 31, memop); 3601 3602 tcg_rt = cpu_reg(s, a->rt); 3603 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3604 /* extend */ false, /* iss_valid */ !a->w, 3605 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3606 3607 if (a->w) { 3608 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3609 } 3610 return true; 3611 } 3612 3613 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3614 { 3615 TCGv_i64 clean_addr, dirty_addr; 3616 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3617 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3618 3619 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3620 return false; 3621 } 3622 3623 if (a->rn == 31) { 3624 gen_check_sp_alignment(s); 3625 } 3626 3627 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3628 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3629 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3630 clean_addr = clean_data_tbi(s, dirty_addr); 3631 3632 /* 3633 * Load-AcquirePC semantics; we implement as the slightly more 3634 * restrictive Load-Acquire. 3635 */ 3636 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3637 a->rt, iss_sf, true); 3638 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3639 return true; 3640 } 3641 3642 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3643 { 3644 TCGv_i64 clean_addr, dirty_addr; 3645 MemOp mop = a->sz; 3646 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3647 3648 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3649 return false; 3650 } 3651 3652 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3653 3654 if (a->rn == 31) { 3655 gen_check_sp_alignment(s); 3656 } 3657 3658 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3659 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3660 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3661 clean_addr = clean_data_tbi(s, dirty_addr); 3662 3663 /* Store-Release semantics */ 3664 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3665 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3666 return true; 3667 } 3668 3669 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3670 { 3671 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3672 MemOp endian, align, mop; 3673 3674 int total; /* total bytes */ 3675 int elements; /* elements per vector */ 3676 int r; 3677 int size = a->sz; 3678 3679 if (!a->p && a->rm != 0) { 3680 /* For non-postindexed accesses the Rm field must be 0 */ 3681 return false; 3682 } 3683 if (size == 3 && !a->q && a->selem != 1) { 3684 return false; 3685 } 3686 if (!fp_access_check(s)) { 3687 return true; 3688 } 3689 3690 if (a->rn == 31) { 3691 gen_check_sp_alignment(s); 3692 } 3693 3694 /* For our purposes, bytes are always little-endian. */ 3695 endian = s->be_data; 3696 if (size == 0) { 3697 endian = MO_LE; 3698 } 3699 3700 total = a->rpt * a->selem * (a->q ? 16 : 8); 3701 tcg_rn = cpu_reg_sp(s, a->rn); 3702 3703 /* 3704 * Issue the MTE check vs the logical repeat count, before we 3705 * promote consecutive little-endian elements below. 3706 */ 3707 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3708 finalize_memop_asimd(s, size)); 3709 3710 /* 3711 * Consecutive little-endian elements from a single register 3712 * can be promoted to a larger little-endian operation. 3713 */ 3714 align = MO_ALIGN; 3715 if (a->selem == 1 && endian == MO_LE) { 3716 align = pow2_align(size); 3717 size = 3; 3718 } 3719 if (!s->align_mem) { 3720 align = 0; 3721 } 3722 mop = endian | size | align; 3723 3724 elements = (a->q ? 16 : 8) >> size; 3725 tcg_ebytes = tcg_constant_i64(1 << size); 3726 for (r = 0; r < a->rpt; r++) { 3727 int e; 3728 for (e = 0; e < elements; e++) { 3729 int xs; 3730 for (xs = 0; xs < a->selem; xs++) { 3731 int tt = (a->rt + r + xs) % 32; 3732 do_vec_ld(s, tt, e, clean_addr, mop); 3733 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3734 } 3735 } 3736 } 3737 3738 /* 3739 * For non-quad operations, setting a slice of the low 64 bits of 3740 * the register clears the high 64 bits (in the ARM ARM pseudocode 3741 * this is implicit in the fact that 'rval' is a 64 bit wide 3742 * variable). For quad operations, we might still need to zero 3743 * the high bits of SVE. 3744 */ 3745 for (r = 0; r < a->rpt * a->selem; r++) { 3746 int tt = (a->rt + r) % 32; 3747 clear_vec_high(s, a->q, tt); 3748 } 3749 3750 if (a->p) { 3751 if (a->rm == 31) { 3752 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3753 } else { 3754 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3755 } 3756 } 3757 return true; 3758 } 3759 3760 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3761 { 3762 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3763 MemOp endian, align, mop; 3764 3765 int total; /* total bytes */ 3766 int elements; /* elements per vector */ 3767 int r; 3768 int size = a->sz; 3769 3770 if (!a->p && a->rm != 0) { 3771 /* For non-postindexed accesses the Rm field must be 0 */ 3772 return false; 3773 } 3774 if (size == 3 && !a->q && a->selem != 1) { 3775 return false; 3776 } 3777 if (!fp_access_check(s)) { 3778 return true; 3779 } 3780 3781 if (a->rn == 31) { 3782 gen_check_sp_alignment(s); 3783 } 3784 3785 /* For our purposes, bytes are always little-endian. */ 3786 endian = s->be_data; 3787 if (size == 0) { 3788 endian = MO_LE; 3789 } 3790 3791 total = a->rpt * a->selem * (a->q ? 16 : 8); 3792 tcg_rn = cpu_reg_sp(s, a->rn); 3793 3794 /* 3795 * Issue the MTE check vs the logical repeat count, before we 3796 * promote consecutive little-endian elements below. 3797 */ 3798 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3799 finalize_memop_asimd(s, size)); 3800 3801 /* 3802 * Consecutive little-endian elements from a single register 3803 * can be promoted to a larger little-endian operation. 3804 */ 3805 align = MO_ALIGN; 3806 if (a->selem == 1 && endian == MO_LE) { 3807 align = pow2_align(size); 3808 size = 3; 3809 } 3810 if (!s->align_mem) { 3811 align = 0; 3812 } 3813 mop = endian | size | align; 3814 3815 elements = (a->q ? 16 : 8) >> size; 3816 tcg_ebytes = tcg_constant_i64(1 << size); 3817 for (r = 0; r < a->rpt; r++) { 3818 int e; 3819 for (e = 0; e < elements; e++) { 3820 int xs; 3821 for (xs = 0; xs < a->selem; xs++) { 3822 int tt = (a->rt + r + xs) % 32; 3823 do_vec_st(s, tt, e, clean_addr, mop); 3824 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3825 } 3826 } 3827 } 3828 3829 if (a->p) { 3830 if (a->rm == 31) { 3831 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3832 } else { 3833 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3834 } 3835 } 3836 return true; 3837 } 3838 3839 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3840 { 3841 int xs, total, rt; 3842 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3843 MemOp mop; 3844 3845 if (!a->p && a->rm != 0) { 3846 return false; 3847 } 3848 if (!fp_access_check(s)) { 3849 return true; 3850 } 3851 3852 if (a->rn == 31) { 3853 gen_check_sp_alignment(s); 3854 } 3855 3856 total = a->selem << a->scale; 3857 tcg_rn = cpu_reg_sp(s, a->rn); 3858 3859 mop = finalize_memop_asimd(s, a->scale); 3860 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3861 total, mop); 3862 3863 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3864 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3865 do_vec_st(s, rt, a->index, clean_addr, mop); 3866 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3867 } 3868 3869 if (a->p) { 3870 if (a->rm == 31) { 3871 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3872 } else { 3873 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3874 } 3875 } 3876 return true; 3877 } 3878 3879 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3880 { 3881 int xs, total, rt; 3882 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3883 MemOp mop; 3884 3885 if (!a->p && a->rm != 0) { 3886 return false; 3887 } 3888 if (!fp_access_check(s)) { 3889 return true; 3890 } 3891 3892 if (a->rn == 31) { 3893 gen_check_sp_alignment(s); 3894 } 3895 3896 total = a->selem << a->scale; 3897 tcg_rn = cpu_reg_sp(s, a->rn); 3898 3899 mop = finalize_memop_asimd(s, a->scale); 3900 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3901 total, mop); 3902 3903 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3904 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3905 do_vec_ld(s, rt, a->index, clean_addr, mop); 3906 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3907 } 3908 3909 if (a->p) { 3910 if (a->rm == 31) { 3911 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3912 } else { 3913 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3914 } 3915 } 3916 return true; 3917 } 3918 3919 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3920 { 3921 int xs, total, rt; 3922 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3923 MemOp mop; 3924 3925 if (!a->p && a->rm != 0) { 3926 return false; 3927 } 3928 if (!fp_access_check(s)) { 3929 return true; 3930 } 3931 3932 if (a->rn == 31) { 3933 gen_check_sp_alignment(s); 3934 } 3935 3936 total = a->selem << a->scale; 3937 tcg_rn = cpu_reg_sp(s, a->rn); 3938 3939 mop = finalize_memop_asimd(s, a->scale); 3940 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3941 total, mop); 3942 3943 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3944 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3945 /* Load and replicate to all elements */ 3946 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3947 3948 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3949 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3950 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3951 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3952 } 3953 3954 if (a->p) { 3955 if (a->rm == 31) { 3956 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3957 } else { 3958 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3959 } 3960 } 3961 return true; 3962 } 3963 3964 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3965 { 3966 TCGv_i64 addr, clean_addr, tcg_rt; 3967 int size = 4 << s->dcz_blocksize; 3968 3969 if (!dc_isar_feature(aa64_mte, s)) { 3970 return false; 3971 } 3972 if (s->current_el == 0) { 3973 return false; 3974 } 3975 3976 if (a->rn == 31) { 3977 gen_check_sp_alignment(s); 3978 } 3979 3980 addr = read_cpu_reg_sp(s, a->rn, true); 3981 tcg_gen_addi_i64(addr, addr, a->imm); 3982 tcg_rt = cpu_reg(s, a->rt); 3983 3984 if (s->ata[0]) { 3985 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 3986 } 3987 /* 3988 * The non-tags portion of STZGM is mostly like DC_ZVA, 3989 * except the alignment happens before the access. 3990 */ 3991 clean_addr = clean_data_tbi(s, addr); 3992 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3993 gen_helper_dc_zva(tcg_env, clean_addr); 3994 return true; 3995 } 3996 3997 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 3998 { 3999 TCGv_i64 addr, clean_addr, tcg_rt; 4000 4001 if (!dc_isar_feature(aa64_mte, s)) { 4002 return false; 4003 } 4004 if (s->current_el == 0) { 4005 return false; 4006 } 4007 4008 if (a->rn == 31) { 4009 gen_check_sp_alignment(s); 4010 } 4011 4012 addr = read_cpu_reg_sp(s, a->rn, true); 4013 tcg_gen_addi_i64(addr, addr, a->imm); 4014 tcg_rt = cpu_reg(s, a->rt); 4015 4016 if (s->ata[0]) { 4017 gen_helper_stgm(tcg_env, addr, tcg_rt); 4018 } else { 4019 MMUAccessType acc = MMU_DATA_STORE; 4020 int size = 4 << s->gm_blocksize; 4021 4022 clean_addr = clean_data_tbi(s, addr); 4023 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4024 gen_probe_access(s, clean_addr, acc, size); 4025 } 4026 return true; 4027 } 4028 4029 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4030 { 4031 TCGv_i64 addr, clean_addr, tcg_rt; 4032 4033 if (!dc_isar_feature(aa64_mte, s)) { 4034 return false; 4035 } 4036 if (s->current_el == 0) { 4037 return false; 4038 } 4039 4040 if (a->rn == 31) { 4041 gen_check_sp_alignment(s); 4042 } 4043 4044 addr = read_cpu_reg_sp(s, a->rn, true); 4045 tcg_gen_addi_i64(addr, addr, a->imm); 4046 tcg_rt = cpu_reg(s, a->rt); 4047 4048 if (s->ata[0]) { 4049 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4050 } else { 4051 MMUAccessType acc = MMU_DATA_LOAD; 4052 int size = 4 << s->gm_blocksize; 4053 4054 clean_addr = clean_data_tbi(s, addr); 4055 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4056 gen_probe_access(s, clean_addr, acc, size); 4057 /* The result tags are zeros. */ 4058 tcg_gen_movi_i64(tcg_rt, 0); 4059 } 4060 return true; 4061 } 4062 4063 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4064 { 4065 TCGv_i64 addr, clean_addr, tcg_rt; 4066 4067 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4068 return false; 4069 } 4070 4071 if (a->rn == 31) { 4072 gen_check_sp_alignment(s); 4073 } 4074 4075 addr = read_cpu_reg_sp(s, a->rn, true); 4076 if (!a->p) { 4077 /* pre-index or signed offset */ 4078 tcg_gen_addi_i64(addr, addr, a->imm); 4079 } 4080 4081 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4082 tcg_rt = cpu_reg(s, a->rt); 4083 if (s->ata[0]) { 4084 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4085 } else { 4086 /* 4087 * Tag access disabled: we must check for aborts on the load 4088 * load from [rn+offset], and then insert a 0 tag into rt. 4089 */ 4090 clean_addr = clean_data_tbi(s, addr); 4091 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4092 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4093 } 4094 4095 if (a->w) { 4096 /* pre-index or post-index */ 4097 if (a->p) { 4098 /* post-index */ 4099 tcg_gen_addi_i64(addr, addr, a->imm); 4100 } 4101 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4102 } 4103 return true; 4104 } 4105 4106 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4107 { 4108 TCGv_i64 addr, tcg_rt; 4109 4110 if (a->rn == 31) { 4111 gen_check_sp_alignment(s); 4112 } 4113 4114 addr = read_cpu_reg_sp(s, a->rn, true); 4115 if (!a->p) { 4116 /* pre-index or signed offset */ 4117 tcg_gen_addi_i64(addr, addr, a->imm); 4118 } 4119 tcg_rt = cpu_reg_sp(s, a->rt); 4120 if (!s->ata[0]) { 4121 /* 4122 * For STG and ST2G, we need to check alignment and probe memory. 4123 * TODO: For STZG and STZ2G, we could rely on the stores below, 4124 * at least for system mode; user-only won't enforce alignment. 4125 */ 4126 if (is_pair) { 4127 gen_helper_st2g_stub(tcg_env, addr); 4128 } else { 4129 gen_helper_stg_stub(tcg_env, addr); 4130 } 4131 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4132 if (is_pair) { 4133 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4134 } else { 4135 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4136 } 4137 } else { 4138 if (is_pair) { 4139 gen_helper_st2g(tcg_env, addr, tcg_rt); 4140 } else { 4141 gen_helper_stg(tcg_env, addr, tcg_rt); 4142 } 4143 } 4144 4145 if (is_zero) { 4146 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4147 TCGv_i64 zero64 = tcg_constant_i64(0); 4148 TCGv_i128 zero128 = tcg_temp_new_i128(); 4149 int mem_index = get_mem_index(s); 4150 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4151 4152 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4153 4154 /* This is 1 or 2 atomic 16-byte operations. */ 4155 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4156 if (is_pair) { 4157 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4158 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4159 } 4160 } 4161 4162 if (a->w) { 4163 /* pre-index or post-index */ 4164 if (a->p) { 4165 /* post-index */ 4166 tcg_gen_addi_i64(addr, addr, a->imm); 4167 } 4168 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4169 } 4170 return true; 4171 } 4172 4173 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4174 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4175 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4176 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4177 4178 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4179 4180 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4181 bool is_setg, SetFn fn) 4182 { 4183 int memidx; 4184 uint32_t syndrome, desc = 0; 4185 4186 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4187 return false; 4188 } 4189 4190 /* 4191 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4192 * us to pull this check before the CheckMOPSEnabled() test 4193 * (which we do in the helper function) 4194 */ 4195 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4196 a->rd == 31 || a->rn == 31) { 4197 return false; 4198 } 4199 4200 memidx = get_a64_user_mem_index(s, a->unpriv); 4201 4202 /* 4203 * We pass option_a == true, matching our implementation; 4204 * we pass wrong_option == false: helper function may set that bit. 4205 */ 4206 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4207 is_epilogue, false, true, a->rd, a->rs, a->rn); 4208 4209 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4210 /* We may need to do MTE tag checking, so assemble the descriptor */ 4211 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4212 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4213 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4214 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4215 } 4216 /* The helper function always needs the memidx even with MTE disabled */ 4217 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4218 4219 /* 4220 * The helper needs the register numbers, but since they're in 4221 * the syndrome anyway, we let it extract them from there rather 4222 * than passing in an extra three integer arguments. 4223 */ 4224 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4225 return true; 4226 } 4227 4228 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4229 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4230 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4231 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4232 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4233 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4234 4235 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4236 4237 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4238 { 4239 int rmemidx, wmemidx; 4240 uint32_t syndrome, rdesc = 0, wdesc = 0; 4241 bool wunpriv = extract32(a->options, 0, 1); 4242 bool runpriv = extract32(a->options, 1, 1); 4243 4244 /* 4245 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4246 * us to pull this check before the CheckMOPSEnabled() test 4247 * (which we do in the helper function) 4248 */ 4249 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4250 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4251 return false; 4252 } 4253 4254 rmemidx = get_a64_user_mem_index(s, runpriv); 4255 wmemidx = get_a64_user_mem_index(s, wunpriv); 4256 4257 /* 4258 * We pass option_a == true, matching our implementation; 4259 * we pass wrong_option == false: helper function may set that bit. 4260 */ 4261 syndrome = syn_mop(false, false, a->options, is_epilogue, 4262 false, true, a->rd, a->rs, a->rn); 4263 4264 /* If we need to do MTE tag checking, assemble the descriptors */ 4265 if (s->mte_active[runpriv]) { 4266 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4267 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4268 } 4269 if (s->mte_active[wunpriv]) { 4270 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4271 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4272 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4273 } 4274 /* The helper function needs these parts of the descriptor regardless */ 4275 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4276 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4277 4278 /* 4279 * The helper needs the register numbers, but since they're in 4280 * the syndrome anyway, we let it extract them from there rather 4281 * than passing in an extra three integer arguments. 4282 */ 4283 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4284 tcg_constant_i32(rdesc)); 4285 return true; 4286 } 4287 4288 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4289 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4290 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4291 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4292 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4293 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4294 4295 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4296 4297 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4298 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4299 { 4300 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4301 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4302 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4303 4304 fn(tcg_rd, tcg_rn, tcg_imm); 4305 if (!a->sf) { 4306 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4307 } 4308 return true; 4309 } 4310 4311 /* 4312 * PC-rel. addressing 4313 */ 4314 4315 static bool trans_ADR(DisasContext *s, arg_ri *a) 4316 { 4317 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4318 return true; 4319 } 4320 4321 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4322 { 4323 int64_t offset = (int64_t)a->imm << 12; 4324 4325 /* The page offset is ok for CF_PCREL. */ 4326 offset -= s->pc_curr & 0xfff; 4327 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4328 return true; 4329 } 4330 4331 /* 4332 * Add/subtract (immediate) 4333 */ 4334 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4335 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4336 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4337 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4338 4339 /* 4340 * Add/subtract (immediate, with tags) 4341 */ 4342 4343 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4344 bool sub_op) 4345 { 4346 TCGv_i64 tcg_rn, tcg_rd; 4347 int imm; 4348 4349 imm = a->uimm6 << LOG2_TAG_GRANULE; 4350 if (sub_op) { 4351 imm = -imm; 4352 } 4353 4354 tcg_rn = cpu_reg_sp(s, a->rn); 4355 tcg_rd = cpu_reg_sp(s, a->rd); 4356 4357 if (s->ata[0]) { 4358 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4359 tcg_constant_i32(imm), 4360 tcg_constant_i32(a->uimm4)); 4361 } else { 4362 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4363 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4364 } 4365 return true; 4366 } 4367 4368 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4369 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4370 4371 /* The input should be a value in the bottom e bits (with higher 4372 * bits zero); returns that value replicated into every element 4373 * of size e in a 64 bit integer. 4374 */ 4375 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4376 { 4377 assert(e != 0); 4378 while (e < 64) { 4379 mask |= mask << e; 4380 e *= 2; 4381 } 4382 return mask; 4383 } 4384 4385 /* 4386 * Logical (immediate) 4387 */ 4388 4389 /* 4390 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4391 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4392 * value (ie should cause a guest UNDEF exception), and true if they are 4393 * valid, in which case the decoded bit pattern is written to result. 4394 */ 4395 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4396 unsigned int imms, unsigned int immr) 4397 { 4398 uint64_t mask; 4399 unsigned e, levels, s, r; 4400 int len; 4401 4402 assert(immn < 2 && imms < 64 && immr < 64); 4403 4404 /* The bit patterns we create here are 64 bit patterns which 4405 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4406 * 64 bits each. Each element contains the same value: a run 4407 * of between 1 and e-1 non-zero bits, rotated within the 4408 * element by between 0 and e-1 bits. 4409 * 4410 * The element size and run length are encoded into immn (1 bit) 4411 * and imms (6 bits) as follows: 4412 * 64 bit elements: immn = 1, imms = <length of run - 1> 4413 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4414 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4415 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4416 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4417 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4418 * Notice that immn = 0, imms = 11111x is the only combination 4419 * not covered by one of the above options; this is reserved. 4420 * Further, <length of run - 1> all-ones is a reserved pattern. 4421 * 4422 * In all cases the rotation is by immr % e (and immr is 6 bits). 4423 */ 4424 4425 /* First determine the element size */ 4426 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4427 if (len < 1) { 4428 /* This is the immn == 0, imms == 0x11111x case */ 4429 return false; 4430 } 4431 e = 1 << len; 4432 4433 levels = e - 1; 4434 s = imms & levels; 4435 r = immr & levels; 4436 4437 if (s == levels) { 4438 /* <length of run - 1> mustn't be all-ones. */ 4439 return false; 4440 } 4441 4442 /* Create the value of one element: s+1 set bits rotated 4443 * by r within the element (which is e bits wide)... 4444 */ 4445 mask = MAKE_64BIT_MASK(0, s + 1); 4446 if (r) { 4447 mask = (mask >> r) | (mask << (e - r)); 4448 mask &= MAKE_64BIT_MASK(0, e); 4449 } 4450 /* ...then replicate the element over the whole 64 bit value */ 4451 mask = bitfield_replicate(mask, e); 4452 *result = mask; 4453 return true; 4454 } 4455 4456 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4457 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4458 { 4459 TCGv_i64 tcg_rd, tcg_rn; 4460 uint64_t imm; 4461 4462 /* Some immediate field values are reserved. */ 4463 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4464 extract32(a->dbm, 0, 6), 4465 extract32(a->dbm, 6, 6))) { 4466 return false; 4467 } 4468 if (!a->sf) { 4469 imm &= 0xffffffffull; 4470 } 4471 4472 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4473 tcg_rn = cpu_reg(s, a->rn); 4474 4475 fn(tcg_rd, tcg_rn, imm); 4476 if (set_cc) { 4477 gen_logic_CC(a->sf, tcg_rd); 4478 } 4479 if (!a->sf) { 4480 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4481 } 4482 return true; 4483 } 4484 4485 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4486 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4487 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4488 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4489 4490 /* 4491 * Move wide (immediate) 4492 */ 4493 4494 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4495 { 4496 int pos = a->hw << 4; 4497 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4498 return true; 4499 } 4500 4501 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4502 { 4503 int pos = a->hw << 4; 4504 uint64_t imm = a->imm; 4505 4506 imm = ~(imm << pos); 4507 if (!a->sf) { 4508 imm = (uint32_t)imm; 4509 } 4510 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4511 return true; 4512 } 4513 4514 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4515 { 4516 int pos = a->hw << 4; 4517 TCGv_i64 tcg_rd, tcg_im; 4518 4519 tcg_rd = cpu_reg(s, a->rd); 4520 tcg_im = tcg_constant_i64(a->imm); 4521 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4522 if (!a->sf) { 4523 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4524 } 4525 return true; 4526 } 4527 4528 /* 4529 * Bitfield 4530 */ 4531 4532 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4533 { 4534 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4535 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4536 unsigned int bitsize = a->sf ? 64 : 32; 4537 unsigned int ri = a->immr; 4538 unsigned int si = a->imms; 4539 unsigned int pos, len; 4540 4541 if (si >= ri) { 4542 /* Wd<s-r:0> = Wn<s:r> */ 4543 len = (si - ri) + 1; 4544 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4545 if (!a->sf) { 4546 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4547 } 4548 } else { 4549 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4550 len = si + 1; 4551 pos = (bitsize - ri) & (bitsize - 1); 4552 4553 if (len < ri) { 4554 /* 4555 * Sign extend the destination field from len to fill the 4556 * balance of the word. Let the deposit below insert all 4557 * of those sign bits. 4558 */ 4559 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4560 len = ri; 4561 } 4562 4563 /* 4564 * We start with zero, and we haven't modified any bits outside 4565 * bitsize, therefore no final zero-extension is unneeded for !sf. 4566 */ 4567 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4568 } 4569 return true; 4570 } 4571 4572 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4573 { 4574 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4575 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4576 unsigned int bitsize = a->sf ? 64 : 32; 4577 unsigned int ri = a->immr; 4578 unsigned int si = a->imms; 4579 unsigned int pos, len; 4580 4581 tcg_rd = cpu_reg(s, a->rd); 4582 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4583 4584 if (si >= ri) { 4585 /* Wd<s-r:0> = Wn<s:r> */ 4586 len = (si - ri) + 1; 4587 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4588 } else { 4589 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4590 len = si + 1; 4591 pos = (bitsize - ri) & (bitsize - 1); 4592 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4593 } 4594 return true; 4595 } 4596 4597 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4598 { 4599 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4600 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4601 unsigned int bitsize = a->sf ? 64 : 32; 4602 unsigned int ri = a->immr; 4603 unsigned int si = a->imms; 4604 unsigned int pos, len; 4605 4606 tcg_rd = cpu_reg(s, a->rd); 4607 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4608 4609 if (si >= ri) { 4610 /* Wd<s-r:0> = Wn<s:r> */ 4611 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4612 len = (si - ri) + 1; 4613 pos = 0; 4614 } else { 4615 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4616 len = si + 1; 4617 pos = (bitsize - ri) & (bitsize - 1); 4618 } 4619 4620 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4621 if (!a->sf) { 4622 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4623 } 4624 return true; 4625 } 4626 4627 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4628 { 4629 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4630 4631 tcg_rd = cpu_reg(s, a->rd); 4632 4633 if (unlikely(a->imm == 0)) { 4634 /* 4635 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4636 * so an extract from bit 0 is a special case. 4637 */ 4638 if (a->sf) { 4639 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4640 } else { 4641 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4642 } 4643 } else { 4644 tcg_rm = cpu_reg(s, a->rm); 4645 tcg_rn = cpu_reg(s, a->rn); 4646 4647 if (a->sf) { 4648 /* Specialization to ROR happens in EXTRACT2. */ 4649 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4650 } else { 4651 TCGv_i32 t0 = tcg_temp_new_i32(); 4652 4653 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4654 if (a->rm == a->rn) { 4655 tcg_gen_rotri_i32(t0, t0, a->imm); 4656 } else { 4657 TCGv_i32 t1 = tcg_temp_new_i32(); 4658 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4659 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4660 } 4661 tcg_gen_extu_i32_i64(tcg_rd, t0); 4662 } 4663 } 4664 return true; 4665 } 4666 4667 /* 4668 * Cryptographic AES, SHA, SHA512 4669 */ 4670 4671 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4672 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4673 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4674 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4675 4676 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4677 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4678 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4679 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4680 4681 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4682 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4683 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4684 4685 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4686 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4687 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4688 4689 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4690 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4691 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) 4692 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4693 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4694 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4695 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4696 4697 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4698 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4699 4700 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4701 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4702 4703 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4704 { 4705 if (!dc_isar_feature(aa64_sm3, s)) { 4706 return false; 4707 } 4708 if (fp_access_check(s)) { 4709 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4710 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4711 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4712 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4713 unsigned vsz, dofs; 4714 4715 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4716 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4717 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4718 4719 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4720 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4721 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4722 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4723 4724 /* Clear the whole register first, then store bits [127:96]. */ 4725 vsz = vec_full_reg_size(s); 4726 dofs = vec_full_reg_offset(s, a->rd); 4727 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 4728 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4729 } 4730 return true; 4731 } 4732 4733 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4734 { 4735 if (fp_access_check(s)) { 4736 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4737 } 4738 return true; 4739 } 4740 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4741 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4742 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4743 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4744 4745 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4746 { 4747 if (!dc_isar_feature(aa64_sha3, s)) { 4748 return false; 4749 } 4750 if (fp_access_check(s)) { 4751 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4752 vec_full_reg_offset(s, a->rn), 4753 vec_full_reg_offset(s, a->rm), a->imm, 16, 4754 vec_full_reg_size(s)); 4755 } 4756 return true; 4757 } 4758 4759 /* 4760 * Advanced SIMD copy 4761 */ 4762 4763 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4764 { 4765 unsigned esz = ctz32(imm); 4766 if (esz <= MO_64) { 4767 *pesz = esz; 4768 *pidx = imm >> (esz + 1); 4769 return true; 4770 } 4771 return false; 4772 } 4773 4774 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4775 { 4776 MemOp esz; 4777 unsigned idx; 4778 4779 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4780 return false; 4781 } 4782 if (fp_access_check(s)) { 4783 /* 4784 * This instruction just extracts the specified element and 4785 * zero-extends it into the bottom of the destination register. 4786 */ 4787 TCGv_i64 tmp = tcg_temp_new_i64(); 4788 read_vec_element(s, tmp, a->rn, idx, esz); 4789 write_fp_dreg(s, a->rd, tmp); 4790 } 4791 return true; 4792 } 4793 4794 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4795 { 4796 MemOp esz; 4797 unsigned idx; 4798 4799 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4800 return false; 4801 } 4802 if (esz == MO_64 && !a->q) { 4803 return false; 4804 } 4805 if (fp_access_check(s)) { 4806 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4807 vec_reg_offset(s, a->rn, idx, esz), 4808 a->q ? 16 : 8, vec_full_reg_size(s)); 4809 } 4810 return true; 4811 } 4812 4813 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4814 { 4815 MemOp esz; 4816 unsigned idx; 4817 4818 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4819 return false; 4820 } 4821 if (esz == MO_64 && !a->q) { 4822 return false; 4823 } 4824 if (fp_access_check(s)) { 4825 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4826 a->q ? 16 : 8, vec_full_reg_size(s), 4827 cpu_reg(s, a->rn)); 4828 } 4829 return true; 4830 } 4831 4832 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4833 { 4834 MemOp esz; 4835 unsigned idx; 4836 4837 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4838 return false; 4839 } 4840 if (is_signed) { 4841 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4842 return false; 4843 } 4844 } else { 4845 if (esz == MO_64 ? !a->q : a->q) { 4846 return false; 4847 } 4848 } 4849 if (fp_access_check(s)) { 4850 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4851 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4852 if (is_signed && !a->q) { 4853 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4854 } 4855 } 4856 return true; 4857 } 4858 4859 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4860 TRANS(UMOV, do_smov_umov, a, 0) 4861 4862 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4863 { 4864 MemOp esz; 4865 unsigned idx; 4866 4867 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4868 return false; 4869 } 4870 if (fp_access_check(s)) { 4871 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 4872 clear_vec_high(s, true, a->rd); 4873 } 4874 return true; 4875 } 4876 4877 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 4878 { 4879 MemOp esz; 4880 unsigned didx, sidx; 4881 4882 if (!decode_esz_idx(a->di, &esz, &didx)) { 4883 return false; 4884 } 4885 sidx = a->si >> esz; 4886 if (fp_access_check(s)) { 4887 TCGv_i64 tmp = tcg_temp_new_i64(); 4888 4889 read_vec_element(s, tmp, a->rn, sidx, esz); 4890 write_vec_element(s, tmp, a->rd, didx, esz); 4891 4892 /* INS is considered a 128-bit write for SVE. */ 4893 clear_vec_high(s, true, a->rd); 4894 } 4895 return true; 4896 } 4897 4898 /* 4899 * Advanced SIMD three same 4900 */ 4901 4902 typedef struct FPScalar { 4903 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 4904 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 4905 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 4906 } FPScalar; 4907 4908 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 4909 { 4910 switch (a->esz) { 4911 case MO_64: 4912 if (fp_access_check(s)) { 4913 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 4914 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 4915 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 4916 write_fp_dreg(s, a->rd, t0); 4917 } 4918 break; 4919 case MO_32: 4920 if (fp_access_check(s)) { 4921 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 4922 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 4923 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 4924 write_fp_sreg(s, a->rd, t0); 4925 } 4926 break; 4927 case MO_16: 4928 if (!dc_isar_feature(aa64_fp16, s)) { 4929 return false; 4930 } 4931 if (fp_access_check(s)) { 4932 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 4933 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 4934 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 4935 write_fp_sreg(s, a->rd, t0); 4936 } 4937 break; 4938 default: 4939 return false; 4940 } 4941 return true; 4942 } 4943 4944 static const FPScalar f_scalar_fadd = { 4945 gen_helper_vfp_addh, 4946 gen_helper_vfp_adds, 4947 gen_helper_vfp_addd, 4948 }; 4949 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 4950 4951 static const FPScalar f_scalar_fsub = { 4952 gen_helper_vfp_subh, 4953 gen_helper_vfp_subs, 4954 gen_helper_vfp_subd, 4955 }; 4956 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 4957 4958 static const FPScalar f_scalar_fdiv = { 4959 gen_helper_vfp_divh, 4960 gen_helper_vfp_divs, 4961 gen_helper_vfp_divd, 4962 }; 4963 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 4964 4965 static const FPScalar f_scalar_fmul = { 4966 gen_helper_vfp_mulh, 4967 gen_helper_vfp_muls, 4968 gen_helper_vfp_muld, 4969 }; 4970 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 4971 4972 static const FPScalar f_scalar_fmax = { 4973 gen_helper_advsimd_maxh, 4974 gen_helper_vfp_maxs, 4975 gen_helper_vfp_maxd, 4976 }; 4977 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 4978 4979 static const FPScalar f_scalar_fmin = { 4980 gen_helper_advsimd_minh, 4981 gen_helper_vfp_mins, 4982 gen_helper_vfp_mind, 4983 }; 4984 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 4985 4986 static const FPScalar f_scalar_fmaxnm = { 4987 gen_helper_advsimd_maxnumh, 4988 gen_helper_vfp_maxnums, 4989 gen_helper_vfp_maxnumd, 4990 }; 4991 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 4992 4993 static const FPScalar f_scalar_fminnm = { 4994 gen_helper_advsimd_minnumh, 4995 gen_helper_vfp_minnums, 4996 gen_helper_vfp_minnumd, 4997 }; 4998 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 4999 5000 static const FPScalar f_scalar_fmulx = { 5001 gen_helper_advsimd_mulxh, 5002 gen_helper_vfp_mulxs, 5003 gen_helper_vfp_mulxd, 5004 }; 5005 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5006 5007 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5008 { 5009 gen_helper_vfp_mulh(d, n, m, s); 5010 gen_vfp_negh(d, d); 5011 } 5012 5013 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5014 { 5015 gen_helper_vfp_muls(d, n, m, s); 5016 gen_vfp_negs(d, d); 5017 } 5018 5019 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5020 { 5021 gen_helper_vfp_muld(d, n, m, s); 5022 gen_vfp_negd(d, d); 5023 } 5024 5025 static const FPScalar f_scalar_fnmul = { 5026 gen_fnmul_h, 5027 gen_fnmul_s, 5028 gen_fnmul_d, 5029 }; 5030 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5031 5032 static const FPScalar f_scalar_fcmeq = { 5033 gen_helper_advsimd_ceq_f16, 5034 gen_helper_neon_ceq_f32, 5035 gen_helper_neon_ceq_f64, 5036 }; 5037 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5038 5039 static const FPScalar f_scalar_fcmge = { 5040 gen_helper_advsimd_cge_f16, 5041 gen_helper_neon_cge_f32, 5042 gen_helper_neon_cge_f64, 5043 }; 5044 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5045 5046 static const FPScalar f_scalar_fcmgt = { 5047 gen_helper_advsimd_cgt_f16, 5048 gen_helper_neon_cgt_f32, 5049 gen_helper_neon_cgt_f64, 5050 }; 5051 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5052 5053 static const FPScalar f_scalar_facge = { 5054 gen_helper_advsimd_acge_f16, 5055 gen_helper_neon_acge_f32, 5056 gen_helper_neon_acge_f64, 5057 }; 5058 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5059 5060 static const FPScalar f_scalar_facgt = { 5061 gen_helper_advsimd_acgt_f16, 5062 gen_helper_neon_acgt_f32, 5063 gen_helper_neon_acgt_f64, 5064 }; 5065 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5066 5067 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5068 { 5069 gen_helper_vfp_subh(d, n, m, s); 5070 gen_vfp_absh(d, d); 5071 } 5072 5073 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5074 { 5075 gen_helper_vfp_subs(d, n, m, s); 5076 gen_vfp_abss(d, d); 5077 } 5078 5079 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5080 { 5081 gen_helper_vfp_subd(d, n, m, s); 5082 gen_vfp_absd(d, d); 5083 } 5084 5085 static const FPScalar f_scalar_fabd = { 5086 gen_fabd_h, 5087 gen_fabd_s, 5088 gen_fabd_d, 5089 }; 5090 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5091 5092 static const FPScalar f_scalar_frecps = { 5093 gen_helper_recpsf_f16, 5094 gen_helper_recpsf_f32, 5095 gen_helper_recpsf_f64, 5096 }; 5097 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5098 5099 static const FPScalar f_scalar_frsqrts = { 5100 gen_helper_rsqrtsf_f16, 5101 gen_helper_rsqrtsf_f32, 5102 gen_helper_rsqrtsf_f64, 5103 }; 5104 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5105 5106 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5107 MemOp sgn_n, MemOp sgn_m, 5108 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5109 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5110 { 5111 TCGv_i64 t0, t1, t2, qc; 5112 MemOp esz = a->esz; 5113 5114 if (!fp_access_check(s)) { 5115 return true; 5116 } 5117 5118 t0 = tcg_temp_new_i64(); 5119 t1 = tcg_temp_new_i64(); 5120 t2 = tcg_temp_new_i64(); 5121 qc = tcg_temp_new_i64(); 5122 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5123 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5124 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5125 5126 if (esz == MO_64) { 5127 gen_d(t0, qc, t1, t2); 5128 } else { 5129 gen_bhs(t0, qc, t1, t2, esz); 5130 tcg_gen_ext_i64(t0, t0, esz); 5131 } 5132 5133 write_fp_dreg(s, a->rd, t0); 5134 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5135 return true; 5136 } 5137 5138 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5139 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5140 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5141 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5142 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5143 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5144 5145 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5146 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5147 { 5148 if (fp_access_check(s)) { 5149 TCGv_i64 t0 = tcg_temp_new_i64(); 5150 TCGv_i64 t1 = tcg_temp_new_i64(); 5151 5152 read_vec_element(s, t0, a->rn, 0, MO_64); 5153 read_vec_element(s, t1, a->rm, 0, MO_64); 5154 fn(t0, t0, t1); 5155 write_fp_dreg(s, a->rd, t0); 5156 } 5157 return true; 5158 } 5159 5160 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5161 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5162 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5163 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5164 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5165 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5166 5167 typedef struct ENVScalar2 { 5168 NeonGenTwoOpEnvFn *gen_bhs[3]; 5169 NeonGenTwo64OpEnvFn *gen_d; 5170 } ENVScalar2; 5171 5172 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5173 { 5174 if (!fp_access_check(s)) { 5175 return true; 5176 } 5177 if (a->esz == MO_64) { 5178 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5179 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5180 f->gen_d(t0, tcg_env, t0, t1); 5181 write_fp_dreg(s, a->rd, t0); 5182 } else { 5183 TCGv_i32 t0 = tcg_temp_new_i32(); 5184 TCGv_i32 t1 = tcg_temp_new_i32(); 5185 5186 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5187 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5188 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5189 write_fp_sreg(s, a->rd, t0); 5190 } 5191 return true; 5192 } 5193 5194 static const ENVScalar2 f_scalar_sqshl = { 5195 { gen_helper_neon_qshl_s8, 5196 gen_helper_neon_qshl_s16, 5197 gen_helper_neon_qshl_s32 }, 5198 gen_helper_neon_qshl_s64, 5199 }; 5200 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5201 5202 static const ENVScalar2 f_scalar_uqshl = { 5203 { gen_helper_neon_qshl_u8, 5204 gen_helper_neon_qshl_u16, 5205 gen_helper_neon_qshl_u32 }, 5206 gen_helper_neon_qshl_u64, 5207 }; 5208 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5209 5210 static const ENVScalar2 f_scalar_sqrshl = { 5211 { gen_helper_neon_qrshl_s8, 5212 gen_helper_neon_qrshl_s16, 5213 gen_helper_neon_qrshl_s32 }, 5214 gen_helper_neon_qrshl_s64, 5215 }; 5216 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5217 5218 static const ENVScalar2 f_scalar_uqrshl = { 5219 { gen_helper_neon_qrshl_u8, 5220 gen_helper_neon_qrshl_u16, 5221 gen_helper_neon_qrshl_u32 }, 5222 gen_helper_neon_qrshl_u64, 5223 }; 5224 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5225 5226 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5227 const ENVScalar2 *f) 5228 { 5229 if (a->esz == MO_16 || a->esz == MO_32) { 5230 return do_env_scalar2(s, a, f); 5231 } 5232 return false; 5233 } 5234 5235 static const ENVScalar2 f_scalar_sqdmulh = { 5236 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5237 }; 5238 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5239 5240 static const ENVScalar2 f_scalar_sqrdmulh = { 5241 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5242 }; 5243 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5244 5245 typedef struct ENVScalar3 { 5246 NeonGenThreeOpEnvFn *gen_hs[2]; 5247 } ENVScalar3; 5248 5249 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5250 const ENVScalar3 *f) 5251 { 5252 TCGv_i32 t0, t1, t2; 5253 5254 if (a->esz != MO_16 && a->esz != MO_32) { 5255 return false; 5256 } 5257 if (!fp_access_check(s)) { 5258 return true; 5259 } 5260 5261 t0 = tcg_temp_new_i32(); 5262 t1 = tcg_temp_new_i32(); 5263 t2 = tcg_temp_new_i32(); 5264 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5265 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5266 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5267 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5268 write_fp_sreg(s, a->rd, t0); 5269 return true; 5270 } 5271 5272 static const ENVScalar3 f_scalar_sqrdmlah = { 5273 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5274 }; 5275 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5276 5277 static const ENVScalar3 f_scalar_sqrdmlsh = { 5278 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5279 }; 5280 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5281 5282 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5283 { 5284 if (fp_access_check(s)) { 5285 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5286 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5287 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5288 write_fp_dreg(s, a->rd, t0); 5289 } 5290 return true; 5291 } 5292 5293 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5294 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5295 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5296 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5297 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5298 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5299 5300 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5301 gen_helper_gvec_3_ptr * const fns[3]) 5302 { 5303 MemOp esz = a->esz; 5304 5305 switch (esz) { 5306 case MO_64: 5307 if (!a->q) { 5308 return false; 5309 } 5310 break; 5311 case MO_32: 5312 break; 5313 case MO_16: 5314 if (!dc_isar_feature(aa64_fp16, s)) { 5315 return false; 5316 } 5317 break; 5318 default: 5319 return false; 5320 } 5321 if (fp_access_check(s)) { 5322 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5323 esz == MO_16, data, fns[esz - 1]); 5324 } 5325 return true; 5326 } 5327 5328 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5329 gen_helper_gvec_fadd_h, 5330 gen_helper_gvec_fadd_s, 5331 gen_helper_gvec_fadd_d, 5332 }; 5333 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5334 5335 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5336 gen_helper_gvec_fsub_h, 5337 gen_helper_gvec_fsub_s, 5338 gen_helper_gvec_fsub_d, 5339 }; 5340 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5341 5342 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5343 gen_helper_gvec_fdiv_h, 5344 gen_helper_gvec_fdiv_s, 5345 gen_helper_gvec_fdiv_d, 5346 }; 5347 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5348 5349 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5350 gen_helper_gvec_fmul_h, 5351 gen_helper_gvec_fmul_s, 5352 gen_helper_gvec_fmul_d, 5353 }; 5354 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5355 5356 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5357 gen_helper_gvec_fmax_h, 5358 gen_helper_gvec_fmax_s, 5359 gen_helper_gvec_fmax_d, 5360 }; 5361 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5362 5363 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5364 gen_helper_gvec_fmin_h, 5365 gen_helper_gvec_fmin_s, 5366 gen_helper_gvec_fmin_d, 5367 }; 5368 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5369 5370 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5371 gen_helper_gvec_fmaxnum_h, 5372 gen_helper_gvec_fmaxnum_s, 5373 gen_helper_gvec_fmaxnum_d, 5374 }; 5375 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5376 5377 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5378 gen_helper_gvec_fminnum_h, 5379 gen_helper_gvec_fminnum_s, 5380 gen_helper_gvec_fminnum_d, 5381 }; 5382 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5383 5384 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5385 gen_helper_gvec_fmulx_h, 5386 gen_helper_gvec_fmulx_s, 5387 gen_helper_gvec_fmulx_d, 5388 }; 5389 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5390 5391 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5392 gen_helper_gvec_vfma_h, 5393 gen_helper_gvec_vfma_s, 5394 gen_helper_gvec_vfma_d, 5395 }; 5396 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5397 5398 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5399 gen_helper_gvec_vfms_h, 5400 gen_helper_gvec_vfms_s, 5401 gen_helper_gvec_vfms_d, 5402 }; 5403 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5404 5405 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5406 gen_helper_gvec_fceq_h, 5407 gen_helper_gvec_fceq_s, 5408 gen_helper_gvec_fceq_d, 5409 }; 5410 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5411 5412 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5413 gen_helper_gvec_fcge_h, 5414 gen_helper_gvec_fcge_s, 5415 gen_helper_gvec_fcge_d, 5416 }; 5417 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5418 5419 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5420 gen_helper_gvec_fcgt_h, 5421 gen_helper_gvec_fcgt_s, 5422 gen_helper_gvec_fcgt_d, 5423 }; 5424 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5425 5426 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5427 gen_helper_gvec_facge_h, 5428 gen_helper_gvec_facge_s, 5429 gen_helper_gvec_facge_d, 5430 }; 5431 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5432 5433 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5434 gen_helper_gvec_facgt_h, 5435 gen_helper_gvec_facgt_s, 5436 gen_helper_gvec_facgt_d, 5437 }; 5438 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5439 5440 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5441 gen_helper_gvec_fabd_h, 5442 gen_helper_gvec_fabd_s, 5443 gen_helper_gvec_fabd_d, 5444 }; 5445 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5446 5447 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5448 gen_helper_gvec_recps_h, 5449 gen_helper_gvec_recps_s, 5450 gen_helper_gvec_recps_d, 5451 }; 5452 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5453 5454 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5455 gen_helper_gvec_rsqrts_h, 5456 gen_helper_gvec_rsqrts_s, 5457 gen_helper_gvec_rsqrts_d, 5458 }; 5459 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5460 5461 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5462 gen_helper_gvec_faddp_h, 5463 gen_helper_gvec_faddp_s, 5464 gen_helper_gvec_faddp_d, 5465 }; 5466 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5467 5468 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5469 gen_helper_gvec_fmaxp_h, 5470 gen_helper_gvec_fmaxp_s, 5471 gen_helper_gvec_fmaxp_d, 5472 }; 5473 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5474 5475 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5476 gen_helper_gvec_fminp_h, 5477 gen_helper_gvec_fminp_s, 5478 gen_helper_gvec_fminp_d, 5479 }; 5480 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5481 5482 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5483 gen_helper_gvec_fmaxnump_h, 5484 gen_helper_gvec_fmaxnump_s, 5485 gen_helper_gvec_fmaxnump_d, 5486 }; 5487 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5488 5489 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5490 gen_helper_gvec_fminnump_h, 5491 gen_helper_gvec_fminnump_s, 5492 gen_helper_gvec_fminnump_d, 5493 }; 5494 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5495 5496 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5497 { 5498 if (fp_access_check(s)) { 5499 int data = (is_2 << 1) | is_s; 5500 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5501 vec_full_reg_offset(s, a->rn), 5502 vec_full_reg_offset(s, a->rm), tcg_env, 5503 a->q ? 16 : 8, vec_full_reg_size(s), 5504 data, gen_helper_gvec_fmlal_a64); 5505 } 5506 return true; 5507 } 5508 5509 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5510 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5511 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5512 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5513 5514 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5515 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5516 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5517 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5518 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5519 5520 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5521 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5522 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5523 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5524 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5525 5526 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5527 { 5528 if (fp_access_check(s)) { 5529 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5530 } 5531 return true; 5532 } 5533 5534 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5535 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5536 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5537 5538 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5539 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5540 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5541 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5542 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5543 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5544 5545 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5546 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5547 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5548 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5549 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5550 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5551 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5552 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5553 5554 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5555 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5556 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5557 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5558 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5559 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5560 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5561 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5562 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5563 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5564 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5565 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5566 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5567 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5568 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5569 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5570 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5571 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5572 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5573 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5574 5575 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5576 { 5577 if (a->esz == MO_64 && !a->q) { 5578 return false; 5579 } 5580 if (fp_access_check(s)) { 5581 tcg_gen_gvec_cmp(cond, a->esz, 5582 vec_full_reg_offset(s, a->rd), 5583 vec_full_reg_offset(s, a->rn), 5584 vec_full_reg_offset(s, a->rm), 5585 a->q ? 16 : 8, vec_full_reg_size(s)); 5586 } 5587 return true; 5588 } 5589 5590 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5591 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5592 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5593 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5594 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5595 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5596 5597 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5598 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5599 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5600 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5601 5602 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5603 gen_helper_gvec_4 *fn) 5604 { 5605 if (fp_access_check(s)) { 5606 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5607 } 5608 return true; 5609 } 5610 5611 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5612 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5613 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5614 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot) 5615 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla) 5616 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5617 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5618 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5619 5620 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5621 { 5622 if (!dc_isar_feature(aa64_bf16, s)) { 5623 return false; 5624 } 5625 if (fp_access_check(s)) { 5626 /* Q bit selects BFMLALB vs BFMLALT. */ 5627 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5628 gen_helper_gvec_bfmlal); 5629 } 5630 return true; 5631 } 5632 5633 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5634 gen_helper_gvec_fcaddh, 5635 gen_helper_gvec_fcadds, 5636 gen_helper_gvec_fcaddd, 5637 }; 5638 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5639 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5640 5641 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5642 { 5643 gen_helper_gvec_4_ptr *fn; 5644 5645 if (!dc_isar_feature(aa64_fcma, s)) { 5646 return false; 5647 } 5648 switch (a->esz) { 5649 case MO_64: 5650 if (!a->q) { 5651 return false; 5652 } 5653 fn = gen_helper_gvec_fcmlad; 5654 break; 5655 case MO_32: 5656 fn = gen_helper_gvec_fcmlas; 5657 break; 5658 case MO_16: 5659 if (!dc_isar_feature(aa64_fp16, s)) { 5660 return false; 5661 } 5662 fn = gen_helper_gvec_fcmlah; 5663 break; 5664 default: 5665 return false; 5666 } 5667 if (fp_access_check(s)) { 5668 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5669 a->esz == MO_16, a->rot, fn); 5670 } 5671 return true; 5672 } 5673 5674 /* 5675 * Widening vector x vector/indexed. 5676 * 5677 * These read from the top or bottom half of a 128-bit vector. 5678 * After widening, optionally accumulate with a 128-bit vector. 5679 * Implement these inline, as the number of elements are limited 5680 * and the related SVE and SME operations on larger vectors use 5681 * even/odd elements instead of top/bottom half. 5682 * 5683 * If idx >= 0, operand 2 is indexed, otherwise vector. 5684 * If acc, operand 0 is loaded with rd. 5685 */ 5686 5687 /* For low half, iterating up. */ 5688 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5689 int rd, int rn, int rm, int idx, 5690 NeonGenTwo64OpFn *fn, bool acc) 5691 { 5692 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5693 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5694 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5695 MemOp esz = memop & MO_SIZE; 5696 int half = 8 >> esz; 5697 int top_swap, top_half; 5698 5699 /* There are no 64x64->128 bit operations. */ 5700 if (esz >= MO_64) { 5701 return false; 5702 } 5703 if (!fp_access_check(s)) { 5704 return true; 5705 } 5706 5707 if (idx >= 0) { 5708 read_vec_element(s, tcg_op2, rm, idx, memop); 5709 } 5710 5711 /* 5712 * For top half inputs, iterate forward; backward for bottom half. 5713 * This means the store to the destination will not occur until 5714 * overlapping input inputs are consumed. 5715 * Use top_swap to conditionally invert the forward iteration index. 5716 */ 5717 top_swap = top ? 0 : half - 1; 5718 top_half = top ? half : 0; 5719 5720 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5721 int elt = elt_fwd ^ top_swap; 5722 5723 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5724 if (idx < 0) { 5725 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5726 } 5727 if (acc) { 5728 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5729 } 5730 fn(tcg_op0, tcg_op1, tcg_op2); 5731 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5732 } 5733 clear_vec_high(s, 1, rd); 5734 return true; 5735 } 5736 5737 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5738 { 5739 TCGv_i64 t = tcg_temp_new_i64(); 5740 tcg_gen_mul_i64(t, n, m); 5741 tcg_gen_add_i64(d, d, t); 5742 } 5743 5744 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5745 { 5746 TCGv_i64 t = tcg_temp_new_i64(); 5747 tcg_gen_mul_i64(t, n, m); 5748 tcg_gen_sub_i64(d, d, t); 5749 } 5750 5751 TRANS(SMULL_v, do_3op_widening, 5752 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5753 tcg_gen_mul_i64, false) 5754 TRANS(UMULL_v, do_3op_widening, 5755 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5756 tcg_gen_mul_i64, false) 5757 TRANS(SMLAL_v, do_3op_widening, 5758 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5759 gen_muladd_i64, true) 5760 TRANS(UMLAL_v, do_3op_widening, 5761 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5762 gen_muladd_i64, true) 5763 TRANS(SMLSL_v, do_3op_widening, 5764 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5765 gen_mulsub_i64, true) 5766 TRANS(UMLSL_v, do_3op_widening, 5767 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5768 gen_mulsub_i64, true) 5769 5770 TRANS(SMULL_vi, do_3op_widening, 5771 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5772 tcg_gen_mul_i64, false) 5773 TRANS(UMULL_vi, do_3op_widening, 5774 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5775 tcg_gen_mul_i64, false) 5776 TRANS(SMLAL_vi, do_3op_widening, 5777 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5778 gen_muladd_i64, true) 5779 TRANS(UMLAL_vi, do_3op_widening, 5780 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5781 gen_muladd_i64, true) 5782 TRANS(SMLSL_vi, do_3op_widening, 5783 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5784 gen_mulsub_i64, true) 5785 TRANS(UMLSL_vi, do_3op_widening, 5786 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5787 gen_mulsub_i64, true) 5788 5789 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5790 { 5791 TCGv_i64 t1 = tcg_temp_new_i64(); 5792 TCGv_i64 t2 = tcg_temp_new_i64(); 5793 5794 tcg_gen_sub_i64(t1, n, m); 5795 tcg_gen_sub_i64(t2, m, n); 5796 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5797 } 5798 5799 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5800 { 5801 TCGv_i64 t1 = tcg_temp_new_i64(); 5802 TCGv_i64 t2 = tcg_temp_new_i64(); 5803 5804 tcg_gen_sub_i64(t1, n, m); 5805 tcg_gen_sub_i64(t2, m, n); 5806 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5807 } 5808 5809 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5810 { 5811 TCGv_i64 t = tcg_temp_new_i64(); 5812 gen_sabd_i64(t, n, m); 5813 tcg_gen_add_i64(d, d, t); 5814 } 5815 5816 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5817 { 5818 TCGv_i64 t = tcg_temp_new_i64(); 5819 gen_uabd_i64(t, n, m); 5820 tcg_gen_add_i64(d, d, t); 5821 } 5822 5823 TRANS(SADDL_v, do_3op_widening, 5824 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5825 tcg_gen_add_i64, false) 5826 TRANS(UADDL_v, do_3op_widening, 5827 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5828 tcg_gen_add_i64, false) 5829 TRANS(SSUBL_v, do_3op_widening, 5830 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5831 tcg_gen_sub_i64, false) 5832 TRANS(USUBL_v, do_3op_widening, 5833 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5834 tcg_gen_sub_i64, false) 5835 TRANS(SABDL_v, do_3op_widening, 5836 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5837 gen_sabd_i64, false) 5838 TRANS(UABDL_v, do_3op_widening, 5839 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5840 gen_uabd_i64, false) 5841 TRANS(SABAL_v, do_3op_widening, 5842 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5843 gen_saba_i64, true) 5844 TRANS(UABAL_v, do_3op_widening, 5845 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5846 gen_uaba_i64, true) 5847 5848 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5849 { 5850 tcg_gen_mul_i64(d, n, m); 5851 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 5852 } 5853 5854 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5855 { 5856 tcg_gen_mul_i64(d, n, m); 5857 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 5858 } 5859 5860 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5861 { 5862 TCGv_i64 t = tcg_temp_new_i64(); 5863 5864 tcg_gen_mul_i64(t, n, m); 5865 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5866 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5867 } 5868 5869 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5870 { 5871 TCGv_i64 t = tcg_temp_new_i64(); 5872 5873 tcg_gen_mul_i64(t, n, m); 5874 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 5875 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 5876 } 5877 5878 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5879 { 5880 TCGv_i64 t = tcg_temp_new_i64(); 5881 5882 tcg_gen_mul_i64(t, n, m); 5883 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5884 tcg_gen_neg_i64(t, t); 5885 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5886 } 5887 5888 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5889 { 5890 TCGv_i64 t = tcg_temp_new_i64(); 5891 5892 tcg_gen_mul_i64(t, n, m); 5893 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 5894 tcg_gen_neg_i64(t, t); 5895 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 5896 } 5897 5898 TRANS(SQDMULL_v, do_3op_widening, 5899 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5900 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 5901 TRANS(SQDMLAL_v, do_3op_widening, 5902 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5903 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 5904 TRANS(SQDMLSL_v, do_3op_widening, 5905 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5906 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 5907 5908 TRANS(SQDMULL_vi, do_3op_widening, 5909 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5910 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 5911 TRANS(SQDMLAL_vi, do_3op_widening, 5912 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5913 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 5914 TRANS(SQDMLSL_vi, do_3op_widening, 5915 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5916 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 5917 5918 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 5919 MemOp sign, bool sub) 5920 { 5921 TCGv_i64 tcg_op0, tcg_op1; 5922 MemOp esz = a->esz; 5923 int half = 8 >> esz; 5924 bool top = a->q; 5925 int top_swap = top ? 0 : half - 1; 5926 int top_half = top ? half : 0; 5927 5928 /* There are no 64x64->128 bit operations. */ 5929 if (esz >= MO_64) { 5930 return false; 5931 } 5932 if (!fp_access_check(s)) { 5933 return true; 5934 } 5935 tcg_op0 = tcg_temp_new_i64(); 5936 tcg_op1 = tcg_temp_new_i64(); 5937 5938 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5939 int elt = elt_fwd ^ top_swap; 5940 5941 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 5942 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 5943 if (sub) { 5944 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 5945 } else { 5946 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 5947 } 5948 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 5949 } 5950 clear_vec_high(s, 1, a->rd); 5951 return true; 5952 } 5953 5954 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 5955 TRANS(UADDW, do_addsub_wide, a, 0, false) 5956 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 5957 TRANS(USUBW, do_addsub_wide, a, 0, true) 5958 5959 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 5960 bool sub, bool round) 5961 { 5962 TCGv_i64 tcg_op0, tcg_op1; 5963 MemOp esz = a->esz; 5964 int half = 8 >> esz; 5965 bool top = a->q; 5966 int ebits = 8 << esz; 5967 uint64_t rbit = 1ull << (ebits - 1); 5968 int top_swap, top_half; 5969 5970 /* There are no 128x128->64 bit operations. */ 5971 if (esz >= MO_64) { 5972 return false; 5973 } 5974 if (!fp_access_check(s)) { 5975 return true; 5976 } 5977 tcg_op0 = tcg_temp_new_i64(); 5978 tcg_op1 = tcg_temp_new_i64(); 5979 5980 /* 5981 * For top half inputs, iterate backward; forward for bottom half. 5982 * This means the store to the destination will not occur until 5983 * overlapping input inputs are consumed. 5984 */ 5985 top_swap = top ? half - 1 : 0; 5986 top_half = top ? half : 0; 5987 5988 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5989 int elt = elt_fwd ^ top_swap; 5990 5991 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 5992 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 5993 if (sub) { 5994 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 5995 } else { 5996 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 5997 } 5998 if (round) { 5999 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6000 } 6001 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6002 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6003 } 6004 clear_vec_high(s, top, a->rd); 6005 return true; 6006 } 6007 6008 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6009 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6010 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6011 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6012 6013 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6014 { 6015 if (fp_access_check(s)) { 6016 /* The Q field specifies lo/hi half input for these insns. */ 6017 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6018 } 6019 return true; 6020 } 6021 6022 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6023 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6024 6025 /* 6026 * Advanced SIMD scalar/vector x indexed element 6027 */ 6028 6029 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6030 { 6031 switch (a->esz) { 6032 case MO_64: 6033 if (fp_access_check(s)) { 6034 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6035 TCGv_i64 t1 = tcg_temp_new_i64(); 6036 6037 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6038 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6039 write_fp_dreg(s, a->rd, t0); 6040 } 6041 break; 6042 case MO_32: 6043 if (fp_access_check(s)) { 6044 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6045 TCGv_i32 t1 = tcg_temp_new_i32(); 6046 6047 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6048 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6049 write_fp_sreg(s, a->rd, t0); 6050 } 6051 break; 6052 case MO_16: 6053 if (!dc_isar_feature(aa64_fp16, s)) { 6054 return false; 6055 } 6056 if (fp_access_check(s)) { 6057 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6058 TCGv_i32 t1 = tcg_temp_new_i32(); 6059 6060 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6061 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6062 write_fp_sreg(s, a->rd, t0); 6063 } 6064 break; 6065 default: 6066 g_assert_not_reached(); 6067 } 6068 return true; 6069 } 6070 6071 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6072 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6073 6074 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6075 { 6076 switch (a->esz) { 6077 case MO_64: 6078 if (fp_access_check(s)) { 6079 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6080 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6081 TCGv_i64 t2 = tcg_temp_new_i64(); 6082 6083 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6084 if (neg) { 6085 gen_vfp_negd(t1, t1); 6086 } 6087 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6088 write_fp_dreg(s, a->rd, t0); 6089 } 6090 break; 6091 case MO_32: 6092 if (fp_access_check(s)) { 6093 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6094 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6095 TCGv_i32 t2 = tcg_temp_new_i32(); 6096 6097 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6098 if (neg) { 6099 gen_vfp_negs(t1, t1); 6100 } 6101 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6102 write_fp_sreg(s, a->rd, t0); 6103 } 6104 break; 6105 case MO_16: 6106 if (!dc_isar_feature(aa64_fp16, s)) { 6107 return false; 6108 } 6109 if (fp_access_check(s)) { 6110 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6111 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6112 TCGv_i32 t2 = tcg_temp_new_i32(); 6113 6114 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6115 if (neg) { 6116 gen_vfp_negh(t1, t1); 6117 } 6118 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6119 fpstatus_ptr(FPST_FPCR_F16)); 6120 write_fp_sreg(s, a->rd, t0); 6121 } 6122 break; 6123 default: 6124 g_assert_not_reached(); 6125 } 6126 return true; 6127 } 6128 6129 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6130 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6131 6132 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6133 const ENVScalar2 *f) 6134 { 6135 if (a->esz < MO_16 || a->esz > MO_32) { 6136 return false; 6137 } 6138 if (fp_access_check(s)) { 6139 TCGv_i32 t0 = tcg_temp_new_i32(); 6140 TCGv_i32 t1 = tcg_temp_new_i32(); 6141 6142 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6143 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6144 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6145 write_fp_sreg(s, a->rd, t0); 6146 } 6147 return true; 6148 } 6149 6150 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6151 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6152 6153 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6154 const ENVScalar3 *f) 6155 { 6156 if (a->esz < MO_16 || a->esz > MO_32) { 6157 return false; 6158 } 6159 if (fp_access_check(s)) { 6160 TCGv_i32 t0 = tcg_temp_new_i32(); 6161 TCGv_i32 t1 = tcg_temp_new_i32(); 6162 TCGv_i32 t2 = tcg_temp_new_i32(); 6163 6164 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6165 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6166 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6167 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6168 write_fp_sreg(s, a->rd, t0); 6169 } 6170 return true; 6171 } 6172 6173 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6174 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6175 6176 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6177 NeonGenTwo64OpFn *fn, bool acc) 6178 { 6179 if (fp_access_check(s)) { 6180 TCGv_i64 t0 = tcg_temp_new_i64(); 6181 TCGv_i64 t1 = tcg_temp_new_i64(); 6182 TCGv_i64 t2 = tcg_temp_new_i64(); 6183 unsigned vsz, dofs; 6184 6185 if (acc) { 6186 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6187 } 6188 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6189 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6190 fn(t0, t1, t2); 6191 6192 /* Clear the whole register first, then store scalar. */ 6193 vsz = vec_full_reg_size(s); 6194 dofs = vec_full_reg_offset(s, a->rd); 6195 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 6196 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6197 } 6198 return true; 6199 } 6200 6201 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6202 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6203 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6204 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6205 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6206 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6207 6208 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6209 gen_helper_gvec_3_ptr * const fns[3]) 6210 { 6211 MemOp esz = a->esz; 6212 6213 switch (esz) { 6214 case MO_64: 6215 if (!a->q) { 6216 return false; 6217 } 6218 break; 6219 case MO_32: 6220 break; 6221 case MO_16: 6222 if (!dc_isar_feature(aa64_fp16, s)) { 6223 return false; 6224 } 6225 break; 6226 default: 6227 g_assert_not_reached(); 6228 } 6229 if (fp_access_check(s)) { 6230 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6231 esz == MO_16, a->idx, fns[esz - 1]); 6232 } 6233 return true; 6234 } 6235 6236 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6237 gen_helper_gvec_fmul_idx_h, 6238 gen_helper_gvec_fmul_idx_s, 6239 gen_helper_gvec_fmul_idx_d, 6240 }; 6241 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6242 6243 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6244 gen_helper_gvec_fmulx_idx_h, 6245 gen_helper_gvec_fmulx_idx_s, 6246 gen_helper_gvec_fmulx_idx_d, 6247 }; 6248 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6249 6250 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6251 { 6252 static gen_helper_gvec_4_ptr * const fns[3] = { 6253 gen_helper_gvec_fmla_idx_h, 6254 gen_helper_gvec_fmla_idx_s, 6255 gen_helper_gvec_fmla_idx_d, 6256 }; 6257 MemOp esz = a->esz; 6258 6259 switch (esz) { 6260 case MO_64: 6261 if (!a->q) { 6262 return false; 6263 } 6264 break; 6265 case MO_32: 6266 break; 6267 case MO_16: 6268 if (!dc_isar_feature(aa64_fp16, s)) { 6269 return false; 6270 } 6271 break; 6272 default: 6273 g_assert_not_reached(); 6274 } 6275 if (fp_access_check(s)) { 6276 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6277 esz == MO_16, (a->idx << 1) | neg, 6278 fns[esz - 1]); 6279 } 6280 return true; 6281 } 6282 6283 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6284 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6285 6286 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6287 { 6288 if (fp_access_check(s)) { 6289 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6290 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6291 vec_full_reg_offset(s, a->rn), 6292 vec_full_reg_offset(s, a->rm), tcg_env, 6293 a->q ? 16 : 8, vec_full_reg_size(s), 6294 data, gen_helper_gvec_fmlal_idx_a64); 6295 } 6296 return true; 6297 } 6298 6299 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6300 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6301 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6302 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6303 6304 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6305 gen_helper_gvec_3 * const fns[2]) 6306 { 6307 assert(a->esz == MO_16 || a->esz == MO_32); 6308 if (fp_access_check(s)) { 6309 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6310 } 6311 return true; 6312 } 6313 6314 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6315 gen_helper_gvec_mul_idx_h, 6316 gen_helper_gvec_mul_idx_s, 6317 }; 6318 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6319 6320 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6321 { 6322 static gen_helper_gvec_4 * const fns[2][2] = { 6323 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6324 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6325 }; 6326 6327 assert(a->esz == MO_16 || a->esz == MO_32); 6328 if (fp_access_check(s)) { 6329 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6330 a->idx, fns[a->esz - 1][sub]); 6331 } 6332 return true; 6333 } 6334 6335 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6336 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6337 6338 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6339 gen_helper_gvec_4 * const fns[2]) 6340 { 6341 assert(a->esz == MO_16 || a->esz == MO_32); 6342 if (fp_access_check(s)) { 6343 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6344 vec_full_reg_offset(s, a->rn), 6345 vec_full_reg_offset(s, a->rm), 6346 offsetof(CPUARMState, vfp.qc), 6347 a->q ? 16 : 8, vec_full_reg_size(s), 6348 a->idx, fns[a->esz - 1]); 6349 } 6350 return true; 6351 } 6352 6353 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6354 gen_helper_neon_sqdmulh_idx_h, 6355 gen_helper_neon_sqdmulh_idx_s, 6356 }; 6357 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6358 6359 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6360 gen_helper_neon_sqrdmulh_idx_h, 6361 gen_helper_neon_sqrdmulh_idx_s, 6362 }; 6363 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6364 6365 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6366 gen_helper_neon_sqrdmlah_idx_h, 6367 gen_helper_neon_sqrdmlah_idx_s, 6368 }; 6369 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6370 f_vector_idx_sqrdmlah) 6371 6372 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6373 gen_helper_neon_sqrdmlsh_idx_h, 6374 gen_helper_neon_sqrdmlsh_idx_s, 6375 }; 6376 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6377 f_vector_idx_sqrdmlsh) 6378 6379 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6380 gen_helper_gvec_4 *fn) 6381 { 6382 if (fp_access_check(s)) { 6383 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6384 } 6385 return true; 6386 } 6387 6388 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6389 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6390 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6391 gen_helper_gvec_sudot_idx_b) 6392 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6393 gen_helper_gvec_usdot_idx_b) 6394 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx, a, 6395 gen_helper_gvec_bfdot_idx) 6396 6397 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6398 { 6399 if (!dc_isar_feature(aa64_bf16, s)) { 6400 return false; 6401 } 6402 if (fp_access_check(s)) { 6403 /* Q bit selects BFMLALB vs BFMLALT. */ 6404 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6405 (a->idx << 1) | a->q, 6406 gen_helper_gvec_bfmlal_idx); 6407 } 6408 return true; 6409 } 6410 6411 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6412 { 6413 gen_helper_gvec_4_ptr *fn; 6414 6415 if (!dc_isar_feature(aa64_fcma, s)) { 6416 return false; 6417 } 6418 switch (a->esz) { 6419 case MO_16: 6420 if (!dc_isar_feature(aa64_fp16, s)) { 6421 return false; 6422 } 6423 fn = gen_helper_gvec_fcmlah_idx; 6424 break; 6425 case MO_32: 6426 fn = gen_helper_gvec_fcmlas_idx; 6427 break; 6428 default: 6429 g_assert_not_reached(); 6430 } 6431 if (fp_access_check(s)) { 6432 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6433 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6434 } 6435 return true; 6436 } 6437 6438 /* 6439 * Advanced SIMD scalar pairwise 6440 */ 6441 6442 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6443 { 6444 switch (a->esz) { 6445 case MO_64: 6446 if (fp_access_check(s)) { 6447 TCGv_i64 t0 = tcg_temp_new_i64(); 6448 TCGv_i64 t1 = tcg_temp_new_i64(); 6449 6450 read_vec_element(s, t0, a->rn, 0, MO_64); 6451 read_vec_element(s, t1, a->rn, 1, MO_64); 6452 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6453 write_fp_dreg(s, a->rd, t0); 6454 } 6455 break; 6456 case MO_32: 6457 if (fp_access_check(s)) { 6458 TCGv_i32 t0 = tcg_temp_new_i32(); 6459 TCGv_i32 t1 = tcg_temp_new_i32(); 6460 6461 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6462 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6463 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6464 write_fp_sreg(s, a->rd, t0); 6465 } 6466 break; 6467 case MO_16: 6468 if (!dc_isar_feature(aa64_fp16, s)) { 6469 return false; 6470 } 6471 if (fp_access_check(s)) { 6472 TCGv_i32 t0 = tcg_temp_new_i32(); 6473 TCGv_i32 t1 = tcg_temp_new_i32(); 6474 6475 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6476 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6477 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6478 write_fp_sreg(s, a->rd, t0); 6479 } 6480 break; 6481 default: 6482 g_assert_not_reached(); 6483 } 6484 return true; 6485 } 6486 6487 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6488 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6489 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6490 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6491 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6492 6493 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6494 { 6495 if (fp_access_check(s)) { 6496 TCGv_i64 t0 = tcg_temp_new_i64(); 6497 TCGv_i64 t1 = tcg_temp_new_i64(); 6498 6499 read_vec_element(s, t0, a->rn, 0, MO_64); 6500 read_vec_element(s, t1, a->rn, 1, MO_64); 6501 tcg_gen_add_i64(t0, t0, t1); 6502 write_fp_dreg(s, a->rd, t0); 6503 } 6504 return true; 6505 } 6506 6507 /* 6508 * Floating-point conditional select 6509 */ 6510 6511 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6512 { 6513 TCGv_i64 t_true, t_false; 6514 DisasCompare64 c; 6515 6516 switch (a->esz) { 6517 case MO_32: 6518 case MO_64: 6519 break; 6520 case MO_16: 6521 if (!dc_isar_feature(aa64_fp16, s)) { 6522 return false; 6523 } 6524 break; 6525 default: 6526 return false; 6527 } 6528 6529 if (!fp_access_check(s)) { 6530 return true; 6531 } 6532 6533 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6534 t_true = tcg_temp_new_i64(); 6535 t_false = tcg_temp_new_i64(); 6536 read_vec_element(s, t_true, a->rn, 0, a->esz); 6537 read_vec_element(s, t_false, a->rm, 0, a->esz); 6538 6539 a64_test_cc(&c, a->cond); 6540 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6541 t_true, t_false); 6542 6543 /* 6544 * Note that sregs & hregs write back zeros to the high bits, 6545 * and we've already done the zero-extension. 6546 */ 6547 write_fp_dreg(s, a->rd, t_true); 6548 return true; 6549 } 6550 6551 /* 6552 * Floating-point data-processing (3 source) 6553 */ 6554 6555 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6556 { 6557 TCGv_ptr fpst; 6558 6559 /* 6560 * These are fused multiply-add. Note that doing the negations here 6561 * as separate steps is correct: an input NaN should come out with 6562 * its sign bit flipped if it is a negated-input. 6563 */ 6564 switch (a->esz) { 6565 case MO_64: 6566 if (fp_access_check(s)) { 6567 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6568 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6569 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6570 6571 if (neg_a) { 6572 gen_vfp_negd(ta, ta); 6573 } 6574 if (neg_n) { 6575 gen_vfp_negd(tn, tn); 6576 } 6577 fpst = fpstatus_ptr(FPST_FPCR); 6578 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6579 write_fp_dreg(s, a->rd, ta); 6580 } 6581 break; 6582 6583 case MO_32: 6584 if (fp_access_check(s)) { 6585 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6586 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6587 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6588 6589 if (neg_a) { 6590 gen_vfp_negs(ta, ta); 6591 } 6592 if (neg_n) { 6593 gen_vfp_negs(tn, tn); 6594 } 6595 fpst = fpstatus_ptr(FPST_FPCR); 6596 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6597 write_fp_sreg(s, a->rd, ta); 6598 } 6599 break; 6600 6601 case MO_16: 6602 if (!dc_isar_feature(aa64_fp16, s)) { 6603 return false; 6604 } 6605 if (fp_access_check(s)) { 6606 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6607 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6608 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6609 6610 if (neg_a) { 6611 gen_vfp_negh(ta, ta); 6612 } 6613 if (neg_n) { 6614 gen_vfp_negh(tn, tn); 6615 } 6616 fpst = fpstatus_ptr(FPST_FPCR_F16); 6617 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6618 write_fp_sreg(s, a->rd, ta); 6619 } 6620 break; 6621 6622 default: 6623 return false; 6624 } 6625 return true; 6626 } 6627 6628 TRANS(FMADD, do_fmadd, a, false, false) 6629 TRANS(FNMADD, do_fmadd, a, true, true) 6630 TRANS(FMSUB, do_fmadd, a, false, true) 6631 TRANS(FNMSUB, do_fmadd, a, true, false) 6632 6633 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 6634 * Note that it is the caller's responsibility to ensure that the 6635 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 6636 * mandated semantics for out of range shifts. 6637 */ 6638 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 6639 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 6640 { 6641 switch (shift_type) { 6642 case A64_SHIFT_TYPE_LSL: 6643 tcg_gen_shl_i64(dst, src, shift_amount); 6644 break; 6645 case A64_SHIFT_TYPE_LSR: 6646 tcg_gen_shr_i64(dst, src, shift_amount); 6647 break; 6648 case A64_SHIFT_TYPE_ASR: 6649 if (!sf) { 6650 tcg_gen_ext32s_i64(dst, src); 6651 } 6652 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 6653 break; 6654 case A64_SHIFT_TYPE_ROR: 6655 if (sf) { 6656 tcg_gen_rotr_i64(dst, src, shift_amount); 6657 } else { 6658 TCGv_i32 t0, t1; 6659 t0 = tcg_temp_new_i32(); 6660 t1 = tcg_temp_new_i32(); 6661 tcg_gen_extrl_i64_i32(t0, src); 6662 tcg_gen_extrl_i64_i32(t1, shift_amount); 6663 tcg_gen_rotr_i32(t0, t0, t1); 6664 tcg_gen_extu_i32_i64(dst, t0); 6665 } 6666 break; 6667 default: 6668 assert(FALSE); /* all shift types should be handled */ 6669 break; 6670 } 6671 6672 if (!sf) { /* zero extend final result */ 6673 tcg_gen_ext32u_i64(dst, dst); 6674 } 6675 } 6676 6677 /* Shift a TCGv src by immediate, put result in dst. 6678 * The shift amount must be in range (this should always be true as the 6679 * relevant instructions will UNDEF on bad shift immediates). 6680 */ 6681 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 6682 enum a64_shift_type shift_type, unsigned int shift_i) 6683 { 6684 assert(shift_i < (sf ? 64 : 32)); 6685 6686 if (shift_i == 0) { 6687 tcg_gen_mov_i64(dst, src); 6688 } else { 6689 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 6690 } 6691 } 6692 6693 /* Logical (shifted register) 6694 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 6695 * +----+-----+-----------+-------+---+------+--------+------+------+ 6696 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 6697 * +----+-----+-----------+-------+---+------+--------+------+------+ 6698 */ 6699 static void disas_logic_reg(DisasContext *s, uint32_t insn) 6700 { 6701 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 6702 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 6703 6704 sf = extract32(insn, 31, 1); 6705 opc = extract32(insn, 29, 2); 6706 shift_type = extract32(insn, 22, 2); 6707 invert = extract32(insn, 21, 1); 6708 rm = extract32(insn, 16, 5); 6709 shift_amount = extract32(insn, 10, 6); 6710 rn = extract32(insn, 5, 5); 6711 rd = extract32(insn, 0, 5); 6712 6713 if (!sf && (shift_amount & (1 << 5))) { 6714 unallocated_encoding(s); 6715 return; 6716 } 6717 6718 tcg_rd = cpu_reg(s, rd); 6719 6720 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 6721 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 6722 * register-register MOV and MVN, so it is worth special casing. 6723 */ 6724 tcg_rm = cpu_reg(s, rm); 6725 if (invert) { 6726 tcg_gen_not_i64(tcg_rd, tcg_rm); 6727 if (!sf) { 6728 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 6729 } 6730 } else { 6731 if (sf) { 6732 tcg_gen_mov_i64(tcg_rd, tcg_rm); 6733 } else { 6734 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 6735 } 6736 } 6737 return; 6738 } 6739 6740 tcg_rm = read_cpu_reg(s, rm, sf); 6741 6742 if (shift_amount) { 6743 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 6744 } 6745 6746 tcg_rn = cpu_reg(s, rn); 6747 6748 switch (opc | (invert << 2)) { 6749 case 0: /* AND */ 6750 case 3: /* ANDS */ 6751 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 6752 break; 6753 case 1: /* ORR */ 6754 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 6755 break; 6756 case 2: /* EOR */ 6757 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 6758 break; 6759 case 4: /* BIC */ 6760 case 7: /* BICS */ 6761 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 6762 break; 6763 case 5: /* ORN */ 6764 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 6765 break; 6766 case 6: /* EON */ 6767 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 6768 break; 6769 default: 6770 assert(FALSE); 6771 break; 6772 } 6773 6774 if (!sf) { 6775 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 6776 } 6777 6778 if (opc == 3) { 6779 gen_logic_CC(sf, tcg_rd); 6780 } 6781 } 6782 6783 /* 6784 * Add/subtract (extended register) 6785 * 6786 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 6787 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 6788 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 6789 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 6790 * 6791 * sf: 0 -> 32bit, 1 -> 64bit 6792 * op: 0 -> add , 1 -> sub 6793 * S: 1 -> set flags 6794 * opt: 00 6795 * option: extension type (see DecodeRegExtend) 6796 * imm3: optional shift to Rm 6797 * 6798 * Rd = Rn + LSL(extend(Rm), amount) 6799 */ 6800 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 6801 { 6802 int rd = extract32(insn, 0, 5); 6803 int rn = extract32(insn, 5, 5); 6804 int imm3 = extract32(insn, 10, 3); 6805 int option = extract32(insn, 13, 3); 6806 int rm = extract32(insn, 16, 5); 6807 int opt = extract32(insn, 22, 2); 6808 bool setflags = extract32(insn, 29, 1); 6809 bool sub_op = extract32(insn, 30, 1); 6810 bool sf = extract32(insn, 31, 1); 6811 6812 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 6813 TCGv_i64 tcg_rd; 6814 TCGv_i64 tcg_result; 6815 6816 if (imm3 > 4 || opt != 0) { 6817 unallocated_encoding(s); 6818 return; 6819 } 6820 6821 /* non-flag setting ops may use SP */ 6822 if (!setflags) { 6823 tcg_rd = cpu_reg_sp(s, rd); 6824 } else { 6825 tcg_rd = cpu_reg(s, rd); 6826 } 6827 tcg_rn = read_cpu_reg_sp(s, rn, sf); 6828 6829 tcg_rm = read_cpu_reg(s, rm, sf); 6830 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 6831 6832 tcg_result = tcg_temp_new_i64(); 6833 6834 if (!setflags) { 6835 if (sub_op) { 6836 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 6837 } else { 6838 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 6839 } 6840 } else { 6841 if (sub_op) { 6842 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 6843 } else { 6844 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 6845 } 6846 } 6847 6848 if (sf) { 6849 tcg_gen_mov_i64(tcg_rd, tcg_result); 6850 } else { 6851 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 6852 } 6853 } 6854 6855 /* 6856 * Add/subtract (shifted register) 6857 * 6858 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 6859 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 6860 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 6861 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 6862 * 6863 * sf: 0 -> 32bit, 1 -> 64bit 6864 * op: 0 -> add , 1 -> sub 6865 * S: 1 -> set flags 6866 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 6867 * imm6: Shift amount to apply to Rm before the add/sub 6868 */ 6869 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 6870 { 6871 int rd = extract32(insn, 0, 5); 6872 int rn = extract32(insn, 5, 5); 6873 int imm6 = extract32(insn, 10, 6); 6874 int rm = extract32(insn, 16, 5); 6875 int shift_type = extract32(insn, 22, 2); 6876 bool setflags = extract32(insn, 29, 1); 6877 bool sub_op = extract32(insn, 30, 1); 6878 bool sf = extract32(insn, 31, 1); 6879 6880 TCGv_i64 tcg_rd = cpu_reg(s, rd); 6881 TCGv_i64 tcg_rn, tcg_rm; 6882 TCGv_i64 tcg_result; 6883 6884 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 6885 unallocated_encoding(s); 6886 return; 6887 } 6888 6889 tcg_rn = read_cpu_reg(s, rn, sf); 6890 tcg_rm = read_cpu_reg(s, rm, sf); 6891 6892 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 6893 6894 tcg_result = tcg_temp_new_i64(); 6895 6896 if (!setflags) { 6897 if (sub_op) { 6898 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 6899 } else { 6900 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 6901 } 6902 } else { 6903 if (sub_op) { 6904 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 6905 } else { 6906 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 6907 } 6908 } 6909 6910 if (sf) { 6911 tcg_gen_mov_i64(tcg_rd, tcg_result); 6912 } else { 6913 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 6914 } 6915 } 6916 6917 /* Data-processing (3 source) 6918 * 6919 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 6920 * +--+------+-----------+------+------+----+------+------+------+ 6921 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 6922 * +--+------+-----------+------+------+----+------+------+------+ 6923 */ 6924 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 6925 { 6926 int rd = extract32(insn, 0, 5); 6927 int rn = extract32(insn, 5, 5); 6928 int ra = extract32(insn, 10, 5); 6929 int rm = extract32(insn, 16, 5); 6930 int op_id = (extract32(insn, 29, 3) << 4) | 6931 (extract32(insn, 21, 3) << 1) | 6932 extract32(insn, 15, 1); 6933 bool sf = extract32(insn, 31, 1); 6934 bool is_sub = extract32(op_id, 0, 1); 6935 bool is_high = extract32(op_id, 2, 1); 6936 bool is_signed = false; 6937 TCGv_i64 tcg_op1; 6938 TCGv_i64 tcg_op2; 6939 TCGv_i64 tcg_tmp; 6940 6941 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 6942 switch (op_id) { 6943 case 0x42: /* SMADDL */ 6944 case 0x43: /* SMSUBL */ 6945 case 0x44: /* SMULH */ 6946 is_signed = true; 6947 break; 6948 case 0x0: /* MADD (32bit) */ 6949 case 0x1: /* MSUB (32bit) */ 6950 case 0x40: /* MADD (64bit) */ 6951 case 0x41: /* MSUB (64bit) */ 6952 case 0x4a: /* UMADDL */ 6953 case 0x4b: /* UMSUBL */ 6954 case 0x4c: /* UMULH */ 6955 break; 6956 default: 6957 unallocated_encoding(s); 6958 return; 6959 } 6960 6961 if (is_high) { 6962 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 6963 TCGv_i64 tcg_rd = cpu_reg(s, rd); 6964 TCGv_i64 tcg_rn = cpu_reg(s, rn); 6965 TCGv_i64 tcg_rm = cpu_reg(s, rm); 6966 6967 if (is_signed) { 6968 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 6969 } else { 6970 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 6971 } 6972 return; 6973 } 6974 6975 tcg_op1 = tcg_temp_new_i64(); 6976 tcg_op2 = tcg_temp_new_i64(); 6977 tcg_tmp = tcg_temp_new_i64(); 6978 6979 if (op_id < 0x42) { 6980 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 6981 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 6982 } else { 6983 if (is_signed) { 6984 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 6985 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 6986 } else { 6987 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 6988 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 6989 } 6990 } 6991 6992 if (ra == 31 && !is_sub) { 6993 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 6994 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 6995 } else { 6996 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 6997 if (is_sub) { 6998 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 6999 } else { 7000 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 7001 } 7002 } 7003 7004 if (!sf) { 7005 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 7006 } 7007 } 7008 7009 /* Add/subtract (with carry) 7010 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 7011 * +--+--+--+------------------------+------+-------------+------+-----+ 7012 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 7013 * +--+--+--+------------------------+------+-------------+------+-----+ 7014 */ 7015 7016 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 7017 { 7018 unsigned int sf, op, setflags, rm, rn, rd; 7019 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 7020 7021 sf = extract32(insn, 31, 1); 7022 op = extract32(insn, 30, 1); 7023 setflags = extract32(insn, 29, 1); 7024 rm = extract32(insn, 16, 5); 7025 rn = extract32(insn, 5, 5); 7026 rd = extract32(insn, 0, 5); 7027 7028 tcg_rd = cpu_reg(s, rd); 7029 tcg_rn = cpu_reg(s, rn); 7030 7031 if (op) { 7032 tcg_y = tcg_temp_new_i64(); 7033 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 7034 } else { 7035 tcg_y = cpu_reg(s, rm); 7036 } 7037 7038 if (setflags) { 7039 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 7040 } else { 7041 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 7042 } 7043 } 7044 7045 /* 7046 * Rotate right into flags 7047 * 31 30 29 21 15 10 5 4 0 7048 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7049 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 7050 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7051 */ 7052 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 7053 { 7054 int mask = extract32(insn, 0, 4); 7055 int o2 = extract32(insn, 4, 1); 7056 int rn = extract32(insn, 5, 5); 7057 int imm6 = extract32(insn, 15, 6); 7058 int sf_op_s = extract32(insn, 29, 3); 7059 TCGv_i64 tcg_rn; 7060 TCGv_i32 nzcv; 7061 7062 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 7063 unallocated_encoding(s); 7064 return; 7065 } 7066 7067 tcg_rn = read_cpu_reg(s, rn, 1); 7068 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 7069 7070 nzcv = tcg_temp_new_i32(); 7071 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 7072 7073 if (mask & 8) { /* N */ 7074 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 7075 } 7076 if (mask & 4) { /* Z */ 7077 tcg_gen_not_i32(cpu_ZF, nzcv); 7078 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 7079 } 7080 if (mask & 2) { /* C */ 7081 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 7082 } 7083 if (mask & 1) { /* V */ 7084 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 7085 } 7086 } 7087 7088 /* 7089 * Evaluate into flags 7090 * 31 30 29 21 15 14 10 5 4 0 7091 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7092 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 7093 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7094 */ 7095 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 7096 { 7097 int o3_mask = extract32(insn, 0, 5); 7098 int rn = extract32(insn, 5, 5); 7099 int o2 = extract32(insn, 15, 6); 7100 int sz = extract32(insn, 14, 1); 7101 int sf_op_s = extract32(insn, 29, 3); 7102 TCGv_i32 tmp; 7103 int shift; 7104 7105 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 7106 !dc_isar_feature(aa64_condm_4, s)) { 7107 unallocated_encoding(s); 7108 return; 7109 } 7110 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 7111 7112 tmp = tcg_temp_new_i32(); 7113 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 7114 tcg_gen_shli_i32(cpu_NF, tmp, shift); 7115 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 7116 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 7117 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 7118 } 7119 7120 /* Conditional compare (immediate / register) 7121 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 7122 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7123 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 7124 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7125 * [1] y [0] [0] 7126 */ 7127 static void disas_cc(DisasContext *s, uint32_t insn) 7128 { 7129 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 7130 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 7131 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 7132 DisasCompare c; 7133 7134 if (!extract32(insn, 29, 1)) { 7135 unallocated_encoding(s); 7136 return; 7137 } 7138 if (insn & (1 << 10 | 1 << 4)) { 7139 unallocated_encoding(s); 7140 return; 7141 } 7142 sf = extract32(insn, 31, 1); 7143 op = extract32(insn, 30, 1); 7144 is_imm = extract32(insn, 11, 1); 7145 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 7146 cond = extract32(insn, 12, 4); 7147 rn = extract32(insn, 5, 5); 7148 nzcv = extract32(insn, 0, 4); 7149 7150 /* Set T0 = !COND. */ 7151 tcg_t0 = tcg_temp_new_i32(); 7152 arm_test_cc(&c, cond); 7153 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 7154 7155 /* Load the arguments for the new comparison. */ 7156 if (is_imm) { 7157 tcg_y = tcg_temp_new_i64(); 7158 tcg_gen_movi_i64(tcg_y, y); 7159 } else { 7160 tcg_y = cpu_reg(s, y); 7161 } 7162 tcg_rn = cpu_reg(s, rn); 7163 7164 /* Set the flags for the new comparison. */ 7165 tcg_tmp = tcg_temp_new_i64(); 7166 if (op) { 7167 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 7168 } else { 7169 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 7170 } 7171 7172 /* If COND was false, force the flags to #nzcv. Compute two masks 7173 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 7174 * For tcg hosts that support ANDC, we can make do with just T1. 7175 * In either case, allow the tcg optimizer to delete any unused mask. 7176 */ 7177 tcg_t1 = tcg_temp_new_i32(); 7178 tcg_t2 = tcg_temp_new_i32(); 7179 tcg_gen_neg_i32(tcg_t1, tcg_t0); 7180 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 7181 7182 if (nzcv & 8) { /* N */ 7183 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 7184 } else { 7185 if (TCG_TARGET_HAS_andc_i32) { 7186 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 7187 } else { 7188 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 7189 } 7190 } 7191 if (nzcv & 4) { /* Z */ 7192 if (TCG_TARGET_HAS_andc_i32) { 7193 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 7194 } else { 7195 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 7196 } 7197 } else { 7198 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 7199 } 7200 if (nzcv & 2) { /* C */ 7201 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 7202 } else { 7203 if (TCG_TARGET_HAS_andc_i32) { 7204 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 7205 } else { 7206 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 7207 } 7208 } 7209 if (nzcv & 1) { /* V */ 7210 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 7211 } else { 7212 if (TCG_TARGET_HAS_andc_i32) { 7213 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 7214 } else { 7215 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 7216 } 7217 } 7218 } 7219 7220 /* Conditional select 7221 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 7222 * +----+----+---+-----------------+------+------+-----+------+------+ 7223 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 7224 * +----+----+---+-----------------+------+------+-----+------+------+ 7225 */ 7226 static void disas_cond_select(DisasContext *s, uint32_t insn) 7227 { 7228 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 7229 TCGv_i64 tcg_rd, zero; 7230 DisasCompare64 c; 7231 7232 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 7233 /* S == 1 or op2<1> == 1 */ 7234 unallocated_encoding(s); 7235 return; 7236 } 7237 sf = extract32(insn, 31, 1); 7238 else_inv = extract32(insn, 30, 1); 7239 rm = extract32(insn, 16, 5); 7240 cond = extract32(insn, 12, 4); 7241 else_inc = extract32(insn, 10, 1); 7242 rn = extract32(insn, 5, 5); 7243 rd = extract32(insn, 0, 5); 7244 7245 tcg_rd = cpu_reg(s, rd); 7246 7247 a64_test_cc(&c, cond); 7248 zero = tcg_constant_i64(0); 7249 7250 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 7251 /* CSET & CSETM. */ 7252 if (else_inv) { 7253 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 7254 tcg_rd, c.value, zero); 7255 } else { 7256 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 7257 tcg_rd, c.value, zero); 7258 } 7259 } else { 7260 TCGv_i64 t_true = cpu_reg(s, rn); 7261 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 7262 if (else_inv && else_inc) { 7263 tcg_gen_neg_i64(t_false, t_false); 7264 } else if (else_inv) { 7265 tcg_gen_not_i64(t_false, t_false); 7266 } else if (else_inc) { 7267 tcg_gen_addi_i64(t_false, t_false, 1); 7268 } 7269 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 7270 } 7271 7272 if (!sf) { 7273 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7274 } 7275 } 7276 7277 static void handle_clz(DisasContext *s, unsigned int sf, 7278 unsigned int rn, unsigned int rd) 7279 { 7280 TCGv_i64 tcg_rd, tcg_rn; 7281 tcg_rd = cpu_reg(s, rd); 7282 tcg_rn = cpu_reg(s, rn); 7283 7284 if (sf) { 7285 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 7286 } else { 7287 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 7288 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 7289 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 7290 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 7291 } 7292 } 7293 7294 static void handle_cls(DisasContext *s, unsigned int sf, 7295 unsigned int rn, unsigned int rd) 7296 { 7297 TCGv_i64 tcg_rd, tcg_rn; 7298 tcg_rd = cpu_reg(s, rd); 7299 tcg_rn = cpu_reg(s, rn); 7300 7301 if (sf) { 7302 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 7303 } else { 7304 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 7305 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 7306 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 7307 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 7308 } 7309 } 7310 7311 static void handle_rbit(DisasContext *s, unsigned int sf, 7312 unsigned int rn, unsigned int rd) 7313 { 7314 TCGv_i64 tcg_rd, tcg_rn; 7315 tcg_rd = cpu_reg(s, rd); 7316 tcg_rn = cpu_reg(s, rn); 7317 7318 if (sf) { 7319 gen_helper_rbit64(tcg_rd, tcg_rn); 7320 } else { 7321 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 7322 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 7323 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 7324 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 7325 } 7326 } 7327 7328 /* REV with sf==1, opcode==3 ("REV64") */ 7329 static void handle_rev64(DisasContext *s, unsigned int sf, 7330 unsigned int rn, unsigned int rd) 7331 { 7332 if (!sf) { 7333 unallocated_encoding(s); 7334 return; 7335 } 7336 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 7337 } 7338 7339 /* REV with sf==0, opcode==2 7340 * REV32 (sf==1, opcode==2) 7341 */ 7342 static void handle_rev32(DisasContext *s, unsigned int sf, 7343 unsigned int rn, unsigned int rd) 7344 { 7345 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7346 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7347 7348 if (sf) { 7349 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 7350 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 7351 } else { 7352 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 7353 } 7354 } 7355 7356 /* REV16 (opcode==1) */ 7357 static void handle_rev16(DisasContext *s, unsigned int sf, 7358 unsigned int rn, unsigned int rd) 7359 { 7360 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7361 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7362 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 7363 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 7364 7365 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 7366 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 7367 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 7368 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 7369 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 7370 } 7371 7372 /* Data-processing (1 source) 7373 * 31 30 29 28 21 20 16 15 10 9 5 4 0 7374 * +----+---+---+-----------------+---------+--------+------+------+ 7375 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 7376 * +----+---+---+-----------------+---------+--------+------+------+ 7377 */ 7378 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 7379 { 7380 unsigned int sf, opcode, opcode2, rn, rd; 7381 TCGv_i64 tcg_rd; 7382 7383 if (extract32(insn, 29, 1)) { 7384 unallocated_encoding(s); 7385 return; 7386 } 7387 7388 sf = extract32(insn, 31, 1); 7389 opcode = extract32(insn, 10, 6); 7390 opcode2 = extract32(insn, 16, 5); 7391 rn = extract32(insn, 5, 5); 7392 rd = extract32(insn, 0, 5); 7393 7394 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 7395 7396 switch (MAP(sf, opcode2, opcode)) { 7397 case MAP(0, 0x00, 0x00): /* RBIT */ 7398 case MAP(1, 0x00, 0x00): 7399 handle_rbit(s, sf, rn, rd); 7400 break; 7401 case MAP(0, 0x00, 0x01): /* REV16 */ 7402 case MAP(1, 0x00, 0x01): 7403 handle_rev16(s, sf, rn, rd); 7404 break; 7405 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 7406 case MAP(1, 0x00, 0x02): 7407 handle_rev32(s, sf, rn, rd); 7408 break; 7409 case MAP(1, 0x00, 0x03): /* REV64 */ 7410 handle_rev64(s, sf, rn, rd); 7411 break; 7412 case MAP(0, 0x00, 0x04): /* CLZ */ 7413 case MAP(1, 0x00, 0x04): 7414 handle_clz(s, sf, rn, rd); 7415 break; 7416 case MAP(0, 0x00, 0x05): /* CLS */ 7417 case MAP(1, 0x00, 0x05): 7418 handle_cls(s, sf, rn, rd); 7419 break; 7420 case MAP(1, 0x01, 0x00): /* PACIA */ 7421 if (s->pauth_active) { 7422 tcg_rd = cpu_reg(s, rd); 7423 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7424 } else if (!dc_isar_feature(aa64_pauth, s)) { 7425 goto do_unallocated; 7426 } 7427 break; 7428 case MAP(1, 0x01, 0x01): /* PACIB */ 7429 if (s->pauth_active) { 7430 tcg_rd = cpu_reg(s, rd); 7431 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7432 } else if (!dc_isar_feature(aa64_pauth, s)) { 7433 goto do_unallocated; 7434 } 7435 break; 7436 case MAP(1, 0x01, 0x02): /* PACDA */ 7437 if (s->pauth_active) { 7438 tcg_rd = cpu_reg(s, rd); 7439 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7440 } else if (!dc_isar_feature(aa64_pauth, s)) { 7441 goto do_unallocated; 7442 } 7443 break; 7444 case MAP(1, 0x01, 0x03): /* PACDB */ 7445 if (s->pauth_active) { 7446 tcg_rd = cpu_reg(s, rd); 7447 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7448 } else if (!dc_isar_feature(aa64_pauth, s)) { 7449 goto do_unallocated; 7450 } 7451 break; 7452 case MAP(1, 0x01, 0x04): /* AUTIA */ 7453 if (s->pauth_active) { 7454 tcg_rd = cpu_reg(s, rd); 7455 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7456 } else if (!dc_isar_feature(aa64_pauth, s)) { 7457 goto do_unallocated; 7458 } 7459 break; 7460 case MAP(1, 0x01, 0x05): /* AUTIB */ 7461 if (s->pauth_active) { 7462 tcg_rd = cpu_reg(s, rd); 7463 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7464 } else if (!dc_isar_feature(aa64_pauth, s)) { 7465 goto do_unallocated; 7466 } 7467 break; 7468 case MAP(1, 0x01, 0x06): /* AUTDA */ 7469 if (s->pauth_active) { 7470 tcg_rd = cpu_reg(s, rd); 7471 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7472 } else if (!dc_isar_feature(aa64_pauth, s)) { 7473 goto do_unallocated; 7474 } 7475 break; 7476 case MAP(1, 0x01, 0x07): /* AUTDB */ 7477 if (s->pauth_active) { 7478 tcg_rd = cpu_reg(s, rd); 7479 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 7480 } else if (!dc_isar_feature(aa64_pauth, s)) { 7481 goto do_unallocated; 7482 } 7483 break; 7484 case MAP(1, 0x01, 0x08): /* PACIZA */ 7485 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7486 goto do_unallocated; 7487 } else if (s->pauth_active) { 7488 tcg_rd = cpu_reg(s, rd); 7489 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7490 } 7491 break; 7492 case MAP(1, 0x01, 0x09): /* PACIZB */ 7493 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7494 goto do_unallocated; 7495 } else if (s->pauth_active) { 7496 tcg_rd = cpu_reg(s, rd); 7497 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7498 } 7499 break; 7500 case MAP(1, 0x01, 0x0a): /* PACDZA */ 7501 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7502 goto do_unallocated; 7503 } else if (s->pauth_active) { 7504 tcg_rd = cpu_reg(s, rd); 7505 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7506 } 7507 break; 7508 case MAP(1, 0x01, 0x0b): /* PACDZB */ 7509 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7510 goto do_unallocated; 7511 } else if (s->pauth_active) { 7512 tcg_rd = cpu_reg(s, rd); 7513 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7514 } 7515 break; 7516 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 7517 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7518 goto do_unallocated; 7519 } else if (s->pauth_active) { 7520 tcg_rd = cpu_reg(s, rd); 7521 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7522 } 7523 break; 7524 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 7525 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7526 goto do_unallocated; 7527 } else if (s->pauth_active) { 7528 tcg_rd = cpu_reg(s, rd); 7529 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7530 } 7531 break; 7532 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 7533 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7534 goto do_unallocated; 7535 } else if (s->pauth_active) { 7536 tcg_rd = cpu_reg(s, rd); 7537 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7538 } 7539 break; 7540 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 7541 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7542 goto do_unallocated; 7543 } else if (s->pauth_active) { 7544 tcg_rd = cpu_reg(s, rd); 7545 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 7546 } 7547 break; 7548 case MAP(1, 0x01, 0x10): /* XPACI */ 7549 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7550 goto do_unallocated; 7551 } else if (s->pauth_active) { 7552 tcg_rd = cpu_reg(s, rd); 7553 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); 7554 } 7555 break; 7556 case MAP(1, 0x01, 0x11): /* XPACD */ 7557 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 7558 goto do_unallocated; 7559 } else if (s->pauth_active) { 7560 tcg_rd = cpu_reg(s, rd); 7561 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); 7562 } 7563 break; 7564 default: 7565 do_unallocated: 7566 unallocated_encoding(s); 7567 break; 7568 } 7569 7570 #undef MAP 7571 } 7572 7573 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 7574 unsigned int rm, unsigned int rn, unsigned int rd) 7575 { 7576 TCGv_i64 tcg_n, tcg_m, tcg_rd; 7577 tcg_rd = cpu_reg(s, rd); 7578 7579 if (!sf && is_signed) { 7580 tcg_n = tcg_temp_new_i64(); 7581 tcg_m = tcg_temp_new_i64(); 7582 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 7583 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 7584 } else { 7585 tcg_n = read_cpu_reg(s, rn, sf); 7586 tcg_m = read_cpu_reg(s, rm, sf); 7587 } 7588 7589 if (is_signed) { 7590 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 7591 } else { 7592 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 7593 } 7594 7595 if (!sf) { /* zero extend final result */ 7596 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7597 } 7598 } 7599 7600 /* LSLV, LSRV, ASRV, RORV */ 7601 static void handle_shift_reg(DisasContext *s, 7602 enum a64_shift_type shift_type, unsigned int sf, 7603 unsigned int rm, unsigned int rn, unsigned int rd) 7604 { 7605 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 7606 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7607 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 7608 7609 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 7610 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 7611 } 7612 7613 /* CRC32[BHWX], CRC32C[BHWX] */ 7614 static void handle_crc32(DisasContext *s, 7615 unsigned int sf, unsigned int sz, bool crc32c, 7616 unsigned int rm, unsigned int rn, unsigned int rd) 7617 { 7618 TCGv_i64 tcg_acc, tcg_val; 7619 TCGv_i32 tcg_bytes; 7620 7621 if (!dc_isar_feature(aa64_crc32, s) 7622 || (sf == 1 && sz != 3) 7623 || (sf == 0 && sz == 3)) { 7624 unallocated_encoding(s); 7625 return; 7626 } 7627 7628 if (sz == 3) { 7629 tcg_val = cpu_reg(s, rm); 7630 } else { 7631 uint64_t mask; 7632 switch (sz) { 7633 case 0: 7634 mask = 0xFF; 7635 break; 7636 case 1: 7637 mask = 0xFFFF; 7638 break; 7639 case 2: 7640 mask = 0xFFFFFFFF; 7641 break; 7642 default: 7643 g_assert_not_reached(); 7644 } 7645 tcg_val = tcg_temp_new_i64(); 7646 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 7647 } 7648 7649 tcg_acc = cpu_reg(s, rn); 7650 tcg_bytes = tcg_constant_i32(1 << sz); 7651 7652 if (crc32c) { 7653 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 7654 } else { 7655 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 7656 } 7657 } 7658 7659 /* Data-processing (2 source) 7660 * 31 30 29 28 21 20 16 15 10 9 5 4 0 7661 * +----+---+---+-----------------+------+--------+------+------+ 7662 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 7663 * +----+---+---+-----------------+------+--------+------+------+ 7664 */ 7665 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 7666 { 7667 unsigned int sf, rm, opcode, rn, rd, setflag; 7668 sf = extract32(insn, 31, 1); 7669 setflag = extract32(insn, 29, 1); 7670 rm = extract32(insn, 16, 5); 7671 opcode = extract32(insn, 10, 6); 7672 rn = extract32(insn, 5, 5); 7673 rd = extract32(insn, 0, 5); 7674 7675 if (setflag && opcode != 0) { 7676 unallocated_encoding(s); 7677 return; 7678 } 7679 7680 switch (opcode) { 7681 case 0: /* SUBP(S) */ 7682 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 7683 goto do_unallocated; 7684 } else { 7685 TCGv_i64 tcg_n, tcg_m, tcg_d; 7686 7687 tcg_n = read_cpu_reg_sp(s, rn, true); 7688 tcg_m = read_cpu_reg_sp(s, rm, true); 7689 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 7690 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 7691 tcg_d = cpu_reg(s, rd); 7692 7693 if (setflag) { 7694 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 7695 } else { 7696 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 7697 } 7698 } 7699 break; 7700 case 2: /* UDIV */ 7701 handle_div(s, false, sf, rm, rn, rd); 7702 break; 7703 case 3: /* SDIV */ 7704 handle_div(s, true, sf, rm, rn, rd); 7705 break; 7706 case 4: /* IRG */ 7707 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 7708 goto do_unallocated; 7709 } 7710 if (s->ata[0]) { 7711 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, 7712 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 7713 } else { 7714 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 7715 cpu_reg_sp(s, rn)); 7716 } 7717 break; 7718 case 5: /* GMI */ 7719 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 7720 goto do_unallocated; 7721 } else { 7722 TCGv_i64 t = tcg_temp_new_i64(); 7723 7724 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 7725 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 7726 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 7727 } 7728 break; 7729 case 8: /* LSLV */ 7730 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 7731 break; 7732 case 9: /* LSRV */ 7733 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 7734 break; 7735 case 10: /* ASRV */ 7736 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 7737 break; 7738 case 11: /* RORV */ 7739 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 7740 break; 7741 case 12: /* PACGA */ 7742 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 7743 goto do_unallocated; 7744 } 7745 gen_helper_pacga(cpu_reg(s, rd), tcg_env, 7746 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 7747 break; 7748 case 16: 7749 case 17: 7750 case 18: 7751 case 19: 7752 case 20: 7753 case 21: 7754 case 22: 7755 case 23: /* CRC32 */ 7756 { 7757 int sz = extract32(opcode, 0, 2); 7758 bool crc32c = extract32(opcode, 2, 1); 7759 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 7760 break; 7761 } 7762 default: 7763 do_unallocated: 7764 unallocated_encoding(s); 7765 break; 7766 } 7767 } 7768 7769 /* 7770 * Data processing - register 7771 * 31 30 29 28 25 21 20 16 10 0 7772 * +--+---+--+---+-------+-----+-------+-------+---------+ 7773 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 7774 * +--+---+--+---+-------+-----+-------+-------+---------+ 7775 */ 7776 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 7777 { 7778 int op0 = extract32(insn, 30, 1); 7779 int op1 = extract32(insn, 28, 1); 7780 int op2 = extract32(insn, 21, 4); 7781 int op3 = extract32(insn, 10, 6); 7782 7783 if (!op1) { 7784 if (op2 & 8) { 7785 if (op2 & 1) { 7786 /* Add/sub (extended register) */ 7787 disas_add_sub_ext_reg(s, insn); 7788 } else { 7789 /* Add/sub (shifted register) */ 7790 disas_add_sub_reg(s, insn); 7791 } 7792 } else { 7793 /* Logical (shifted register) */ 7794 disas_logic_reg(s, insn); 7795 } 7796 return; 7797 } 7798 7799 switch (op2) { 7800 case 0x0: 7801 switch (op3) { 7802 case 0x00: /* Add/subtract (with carry) */ 7803 disas_adc_sbc(s, insn); 7804 break; 7805 7806 case 0x01: /* Rotate right into flags */ 7807 case 0x21: 7808 disas_rotate_right_into_flags(s, insn); 7809 break; 7810 7811 case 0x02: /* Evaluate into flags */ 7812 case 0x12: 7813 case 0x22: 7814 case 0x32: 7815 disas_evaluate_into_flags(s, insn); 7816 break; 7817 7818 default: 7819 goto do_unallocated; 7820 } 7821 break; 7822 7823 case 0x2: /* Conditional compare */ 7824 disas_cc(s, insn); /* both imm and reg forms */ 7825 break; 7826 7827 case 0x4: /* Conditional select */ 7828 disas_cond_select(s, insn); 7829 break; 7830 7831 case 0x6: /* Data-processing */ 7832 if (op0) { /* (1 source) */ 7833 disas_data_proc_1src(s, insn); 7834 } else { /* (2 source) */ 7835 disas_data_proc_2src(s, insn); 7836 } 7837 break; 7838 case 0x8 ... 0xf: /* (3 source) */ 7839 disas_data_proc_3src(s, insn); 7840 break; 7841 7842 default: 7843 do_unallocated: 7844 unallocated_encoding(s); 7845 break; 7846 } 7847 } 7848 7849 static void handle_fp_compare(DisasContext *s, int size, 7850 unsigned int rn, unsigned int rm, 7851 bool cmp_with_zero, bool signal_all_nans) 7852 { 7853 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 7854 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7855 7856 if (size == MO_64) { 7857 TCGv_i64 tcg_vn, tcg_vm; 7858 7859 tcg_vn = read_fp_dreg(s, rn); 7860 if (cmp_with_zero) { 7861 tcg_vm = tcg_constant_i64(0); 7862 } else { 7863 tcg_vm = read_fp_dreg(s, rm); 7864 } 7865 if (signal_all_nans) { 7866 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7867 } else { 7868 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7869 } 7870 } else { 7871 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 7872 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 7873 7874 read_vec_element_i32(s, tcg_vn, rn, 0, size); 7875 if (cmp_with_zero) { 7876 tcg_gen_movi_i32(tcg_vm, 0); 7877 } else { 7878 read_vec_element_i32(s, tcg_vm, rm, 0, size); 7879 } 7880 7881 switch (size) { 7882 case MO_32: 7883 if (signal_all_nans) { 7884 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7885 } else { 7886 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7887 } 7888 break; 7889 case MO_16: 7890 if (signal_all_nans) { 7891 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7892 } else { 7893 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 7894 } 7895 break; 7896 default: 7897 g_assert_not_reached(); 7898 } 7899 } 7900 7901 gen_set_nzcv(tcg_flags); 7902 } 7903 7904 /* Floating point compare 7905 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 7906 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 7907 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 7908 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 7909 */ 7910 static void disas_fp_compare(DisasContext *s, uint32_t insn) 7911 { 7912 unsigned int mos, type, rm, op, rn, opc, op2r; 7913 int size; 7914 7915 mos = extract32(insn, 29, 3); 7916 type = extract32(insn, 22, 2); 7917 rm = extract32(insn, 16, 5); 7918 op = extract32(insn, 14, 2); 7919 rn = extract32(insn, 5, 5); 7920 opc = extract32(insn, 3, 2); 7921 op2r = extract32(insn, 0, 3); 7922 7923 if (mos || op || op2r) { 7924 unallocated_encoding(s); 7925 return; 7926 } 7927 7928 switch (type) { 7929 case 0: 7930 size = MO_32; 7931 break; 7932 case 1: 7933 size = MO_64; 7934 break; 7935 case 3: 7936 size = MO_16; 7937 if (dc_isar_feature(aa64_fp16, s)) { 7938 break; 7939 } 7940 /* fallthru */ 7941 default: 7942 unallocated_encoding(s); 7943 return; 7944 } 7945 7946 if (!fp_access_check(s)) { 7947 return; 7948 } 7949 7950 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 7951 } 7952 7953 /* Floating point conditional compare 7954 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 7955 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 7956 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 7957 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 7958 */ 7959 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 7960 { 7961 unsigned int mos, type, rm, cond, rn, op, nzcv; 7962 TCGLabel *label_continue = NULL; 7963 int size; 7964 7965 mos = extract32(insn, 29, 3); 7966 type = extract32(insn, 22, 2); 7967 rm = extract32(insn, 16, 5); 7968 cond = extract32(insn, 12, 4); 7969 rn = extract32(insn, 5, 5); 7970 op = extract32(insn, 4, 1); 7971 nzcv = extract32(insn, 0, 4); 7972 7973 if (mos) { 7974 unallocated_encoding(s); 7975 return; 7976 } 7977 7978 switch (type) { 7979 case 0: 7980 size = MO_32; 7981 break; 7982 case 1: 7983 size = MO_64; 7984 break; 7985 case 3: 7986 size = MO_16; 7987 if (dc_isar_feature(aa64_fp16, s)) { 7988 break; 7989 } 7990 /* fallthru */ 7991 default: 7992 unallocated_encoding(s); 7993 return; 7994 } 7995 7996 if (!fp_access_check(s)) { 7997 return; 7998 } 7999 8000 if (cond < 0x0e) { /* not always */ 8001 TCGLabel *label_match = gen_new_label(); 8002 label_continue = gen_new_label(); 8003 arm_gen_test_cc(cond, label_match); 8004 /* nomatch: */ 8005 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 8006 tcg_gen_br(label_continue); 8007 gen_set_label(label_match); 8008 } 8009 8010 handle_fp_compare(s, size, rn, rm, false, op); 8011 8012 if (cond < 0x0e) { 8013 gen_set_label(label_continue); 8014 } 8015 } 8016 8017 /* Floating-point data-processing (1 source) - half precision */ 8018 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 8019 { 8020 TCGv_ptr fpst = NULL; 8021 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 8022 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8023 8024 switch (opcode) { 8025 case 0x0: /* FMOV */ 8026 tcg_gen_mov_i32(tcg_res, tcg_op); 8027 break; 8028 case 0x1: /* FABS */ 8029 gen_vfp_absh(tcg_res, tcg_op); 8030 break; 8031 case 0x2: /* FNEG */ 8032 gen_vfp_negh(tcg_res, tcg_op); 8033 break; 8034 case 0x3: /* FSQRT */ 8035 fpst = fpstatus_ptr(FPST_FPCR_F16); 8036 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 8037 break; 8038 case 0x8: /* FRINTN */ 8039 case 0x9: /* FRINTP */ 8040 case 0xa: /* FRINTM */ 8041 case 0xb: /* FRINTZ */ 8042 case 0xc: /* FRINTA */ 8043 { 8044 TCGv_i32 tcg_rmode; 8045 8046 fpst = fpstatus_ptr(FPST_FPCR_F16); 8047 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 8048 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8049 gen_restore_rmode(tcg_rmode, fpst); 8050 break; 8051 } 8052 case 0xe: /* FRINTX */ 8053 fpst = fpstatus_ptr(FPST_FPCR_F16); 8054 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 8055 break; 8056 case 0xf: /* FRINTI */ 8057 fpst = fpstatus_ptr(FPST_FPCR_F16); 8058 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8059 break; 8060 default: 8061 g_assert_not_reached(); 8062 } 8063 8064 write_fp_sreg(s, rd, tcg_res); 8065 } 8066 8067 /* Floating-point data-processing (1 source) - single precision */ 8068 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 8069 { 8070 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 8071 TCGv_i32 tcg_op, tcg_res; 8072 TCGv_ptr fpst; 8073 int rmode = -1; 8074 8075 tcg_op = read_fp_sreg(s, rn); 8076 tcg_res = tcg_temp_new_i32(); 8077 8078 switch (opcode) { 8079 case 0x0: /* FMOV */ 8080 tcg_gen_mov_i32(tcg_res, tcg_op); 8081 goto done; 8082 case 0x1: /* FABS */ 8083 gen_vfp_abss(tcg_res, tcg_op); 8084 goto done; 8085 case 0x2: /* FNEG */ 8086 gen_vfp_negs(tcg_res, tcg_op); 8087 goto done; 8088 case 0x3: /* FSQRT */ 8089 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 8090 goto done; 8091 case 0x6: /* BFCVT */ 8092 gen_fpst = gen_helper_bfcvt; 8093 break; 8094 case 0x8: /* FRINTN */ 8095 case 0x9: /* FRINTP */ 8096 case 0xa: /* FRINTM */ 8097 case 0xb: /* FRINTZ */ 8098 case 0xc: /* FRINTA */ 8099 rmode = opcode & 7; 8100 gen_fpst = gen_helper_rints; 8101 break; 8102 case 0xe: /* FRINTX */ 8103 gen_fpst = gen_helper_rints_exact; 8104 break; 8105 case 0xf: /* FRINTI */ 8106 gen_fpst = gen_helper_rints; 8107 break; 8108 case 0x10: /* FRINT32Z */ 8109 rmode = FPROUNDING_ZERO; 8110 gen_fpst = gen_helper_frint32_s; 8111 break; 8112 case 0x11: /* FRINT32X */ 8113 gen_fpst = gen_helper_frint32_s; 8114 break; 8115 case 0x12: /* FRINT64Z */ 8116 rmode = FPROUNDING_ZERO; 8117 gen_fpst = gen_helper_frint64_s; 8118 break; 8119 case 0x13: /* FRINT64X */ 8120 gen_fpst = gen_helper_frint64_s; 8121 break; 8122 default: 8123 g_assert_not_reached(); 8124 } 8125 8126 fpst = fpstatus_ptr(FPST_FPCR); 8127 if (rmode >= 0) { 8128 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 8129 gen_fpst(tcg_res, tcg_op, fpst); 8130 gen_restore_rmode(tcg_rmode, fpst); 8131 } else { 8132 gen_fpst(tcg_res, tcg_op, fpst); 8133 } 8134 8135 done: 8136 write_fp_sreg(s, rd, tcg_res); 8137 } 8138 8139 /* Floating-point data-processing (1 source) - double precision */ 8140 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 8141 { 8142 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 8143 TCGv_i64 tcg_op, tcg_res; 8144 TCGv_ptr fpst; 8145 int rmode = -1; 8146 8147 switch (opcode) { 8148 case 0x0: /* FMOV */ 8149 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 8150 return; 8151 } 8152 8153 tcg_op = read_fp_dreg(s, rn); 8154 tcg_res = tcg_temp_new_i64(); 8155 8156 switch (opcode) { 8157 case 0x1: /* FABS */ 8158 gen_vfp_absd(tcg_res, tcg_op); 8159 goto done; 8160 case 0x2: /* FNEG */ 8161 gen_vfp_negd(tcg_res, tcg_op); 8162 goto done; 8163 case 0x3: /* FSQRT */ 8164 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); 8165 goto done; 8166 case 0x8: /* FRINTN */ 8167 case 0x9: /* FRINTP */ 8168 case 0xa: /* FRINTM */ 8169 case 0xb: /* FRINTZ */ 8170 case 0xc: /* FRINTA */ 8171 rmode = opcode & 7; 8172 gen_fpst = gen_helper_rintd; 8173 break; 8174 case 0xe: /* FRINTX */ 8175 gen_fpst = gen_helper_rintd_exact; 8176 break; 8177 case 0xf: /* FRINTI */ 8178 gen_fpst = gen_helper_rintd; 8179 break; 8180 case 0x10: /* FRINT32Z */ 8181 rmode = FPROUNDING_ZERO; 8182 gen_fpst = gen_helper_frint32_d; 8183 break; 8184 case 0x11: /* FRINT32X */ 8185 gen_fpst = gen_helper_frint32_d; 8186 break; 8187 case 0x12: /* FRINT64Z */ 8188 rmode = FPROUNDING_ZERO; 8189 gen_fpst = gen_helper_frint64_d; 8190 break; 8191 case 0x13: /* FRINT64X */ 8192 gen_fpst = gen_helper_frint64_d; 8193 break; 8194 default: 8195 g_assert_not_reached(); 8196 } 8197 8198 fpst = fpstatus_ptr(FPST_FPCR); 8199 if (rmode >= 0) { 8200 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 8201 gen_fpst(tcg_res, tcg_op, fpst); 8202 gen_restore_rmode(tcg_rmode, fpst); 8203 } else { 8204 gen_fpst(tcg_res, tcg_op, fpst); 8205 } 8206 8207 done: 8208 write_fp_dreg(s, rd, tcg_res); 8209 } 8210 8211 static void handle_fp_fcvt(DisasContext *s, int opcode, 8212 int rd, int rn, int dtype, int ntype) 8213 { 8214 switch (ntype) { 8215 case 0x0: 8216 { 8217 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 8218 if (dtype == 1) { 8219 /* Single to double */ 8220 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8221 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 8222 write_fp_dreg(s, rd, tcg_rd); 8223 } else { 8224 /* Single to half */ 8225 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8226 TCGv_i32 ahp = get_ahp_flag(); 8227 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8228 8229 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8230 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 8231 write_fp_sreg(s, rd, tcg_rd); 8232 } 8233 break; 8234 } 8235 case 0x1: 8236 { 8237 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 8238 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8239 if (dtype == 0) { 8240 /* Double to single */ 8241 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 8242 } else { 8243 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8244 TCGv_i32 ahp = get_ahp_flag(); 8245 /* Double to half */ 8246 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 8247 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 8248 } 8249 write_fp_sreg(s, rd, tcg_rd); 8250 break; 8251 } 8252 case 0x3: 8253 { 8254 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 8255 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 8256 TCGv_i32 tcg_ahp = get_ahp_flag(); 8257 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 8258 if (dtype == 0) { 8259 /* Half to single */ 8260 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 8261 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8262 write_fp_sreg(s, rd, tcg_rd); 8263 } else { 8264 /* Half to double */ 8265 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 8266 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 8267 write_fp_dreg(s, rd, tcg_rd); 8268 } 8269 break; 8270 } 8271 default: 8272 g_assert_not_reached(); 8273 } 8274 } 8275 8276 /* Floating point data-processing (1 source) 8277 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 8278 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 8279 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 8280 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 8281 */ 8282 static void disas_fp_1src(DisasContext *s, uint32_t insn) 8283 { 8284 int mos = extract32(insn, 29, 3); 8285 int type = extract32(insn, 22, 2); 8286 int opcode = extract32(insn, 15, 6); 8287 int rn = extract32(insn, 5, 5); 8288 int rd = extract32(insn, 0, 5); 8289 8290 if (mos) { 8291 goto do_unallocated; 8292 } 8293 8294 switch (opcode) { 8295 case 0x4: case 0x5: case 0x7: 8296 { 8297 /* FCVT between half, single and double precision */ 8298 int dtype = extract32(opcode, 0, 2); 8299 if (type == 2 || dtype == type) { 8300 goto do_unallocated; 8301 } 8302 if (!fp_access_check(s)) { 8303 return; 8304 } 8305 8306 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 8307 break; 8308 } 8309 8310 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 8311 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 8312 goto do_unallocated; 8313 } 8314 /* fall through */ 8315 case 0x0 ... 0x3: 8316 case 0x8 ... 0xc: 8317 case 0xe ... 0xf: 8318 /* 32-to-32 and 64-to-64 ops */ 8319 switch (type) { 8320 case 0: 8321 if (!fp_access_check(s)) { 8322 return; 8323 } 8324 handle_fp_1src_single(s, opcode, rd, rn); 8325 break; 8326 case 1: 8327 if (!fp_access_check(s)) { 8328 return; 8329 } 8330 handle_fp_1src_double(s, opcode, rd, rn); 8331 break; 8332 case 3: 8333 if (!dc_isar_feature(aa64_fp16, s)) { 8334 goto do_unallocated; 8335 } 8336 8337 if (!fp_access_check(s)) { 8338 return; 8339 } 8340 handle_fp_1src_half(s, opcode, rd, rn); 8341 break; 8342 default: 8343 goto do_unallocated; 8344 } 8345 break; 8346 8347 case 0x6: 8348 switch (type) { 8349 case 1: /* BFCVT */ 8350 if (!dc_isar_feature(aa64_bf16, s)) { 8351 goto do_unallocated; 8352 } 8353 if (!fp_access_check(s)) { 8354 return; 8355 } 8356 handle_fp_1src_single(s, opcode, rd, rn); 8357 break; 8358 default: 8359 goto do_unallocated; 8360 } 8361 break; 8362 8363 default: 8364 do_unallocated: 8365 unallocated_encoding(s); 8366 break; 8367 } 8368 } 8369 8370 /* Floating point immediate 8371 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 8372 * +---+---+---+-----------+------+---+------------+-------+------+------+ 8373 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 8374 * +---+---+---+-----------+------+---+------------+-------+------+------+ 8375 */ 8376 static void disas_fp_imm(DisasContext *s, uint32_t insn) 8377 { 8378 int rd = extract32(insn, 0, 5); 8379 int imm5 = extract32(insn, 5, 5); 8380 int imm8 = extract32(insn, 13, 8); 8381 int type = extract32(insn, 22, 2); 8382 int mos = extract32(insn, 29, 3); 8383 uint64_t imm; 8384 MemOp sz; 8385 8386 if (mos || imm5) { 8387 unallocated_encoding(s); 8388 return; 8389 } 8390 8391 switch (type) { 8392 case 0: 8393 sz = MO_32; 8394 break; 8395 case 1: 8396 sz = MO_64; 8397 break; 8398 case 3: 8399 sz = MO_16; 8400 if (dc_isar_feature(aa64_fp16, s)) { 8401 break; 8402 } 8403 /* fallthru */ 8404 default: 8405 unallocated_encoding(s); 8406 return; 8407 } 8408 8409 if (!fp_access_check(s)) { 8410 return; 8411 } 8412 8413 imm = vfp_expand_imm(sz, imm8); 8414 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 8415 } 8416 8417 /* Handle floating point <=> fixed point conversions. Note that we can 8418 * also deal with fp <=> integer conversions as a special case (scale == 64) 8419 * OPTME: consider handling that special case specially or at least skipping 8420 * the call to scalbn in the helpers for zero shifts. 8421 */ 8422 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 8423 bool itof, int rmode, int scale, int sf, int type) 8424 { 8425 bool is_signed = !(opcode & 1); 8426 TCGv_ptr tcg_fpstatus; 8427 TCGv_i32 tcg_shift, tcg_single; 8428 TCGv_i64 tcg_double; 8429 8430 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 8431 8432 tcg_shift = tcg_constant_i32(64 - scale); 8433 8434 if (itof) { 8435 TCGv_i64 tcg_int = cpu_reg(s, rn); 8436 if (!sf) { 8437 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 8438 8439 if (is_signed) { 8440 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 8441 } else { 8442 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 8443 } 8444 8445 tcg_int = tcg_extend; 8446 } 8447 8448 switch (type) { 8449 case 1: /* float64 */ 8450 tcg_double = tcg_temp_new_i64(); 8451 if (is_signed) { 8452 gen_helper_vfp_sqtod(tcg_double, tcg_int, 8453 tcg_shift, tcg_fpstatus); 8454 } else { 8455 gen_helper_vfp_uqtod(tcg_double, tcg_int, 8456 tcg_shift, tcg_fpstatus); 8457 } 8458 write_fp_dreg(s, rd, tcg_double); 8459 break; 8460 8461 case 0: /* float32 */ 8462 tcg_single = tcg_temp_new_i32(); 8463 if (is_signed) { 8464 gen_helper_vfp_sqtos(tcg_single, tcg_int, 8465 tcg_shift, tcg_fpstatus); 8466 } else { 8467 gen_helper_vfp_uqtos(tcg_single, tcg_int, 8468 tcg_shift, tcg_fpstatus); 8469 } 8470 write_fp_sreg(s, rd, tcg_single); 8471 break; 8472 8473 case 3: /* float16 */ 8474 tcg_single = tcg_temp_new_i32(); 8475 if (is_signed) { 8476 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 8477 tcg_shift, tcg_fpstatus); 8478 } else { 8479 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 8480 tcg_shift, tcg_fpstatus); 8481 } 8482 write_fp_sreg(s, rd, tcg_single); 8483 break; 8484 8485 default: 8486 g_assert_not_reached(); 8487 } 8488 } else { 8489 TCGv_i64 tcg_int = cpu_reg(s, rd); 8490 TCGv_i32 tcg_rmode; 8491 8492 if (extract32(opcode, 2, 1)) { 8493 /* There are too many rounding modes to all fit into rmode, 8494 * so FCVTA[US] is a special case. 8495 */ 8496 rmode = FPROUNDING_TIEAWAY; 8497 } 8498 8499 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 8500 8501 switch (type) { 8502 case 1: /* float64 */ 8503 tcg_double = read_fp_dreg(s, rn); 8504 if (is_signed) { 8505 if (!sf) { 8506 gen_helper_vfp_tosld(tcg_int, tcg_double, 8507 tcg_shift, tcg_fpstatus); 8508 } else { 8509 gen_helper_vfp_tosqd(tcg_int, tcg_double, 8510 tcg_shift, tcg_fpstatus); 8511 } 8512 } else { 8513 if (!sf) { 8514 gen_helper_vfp_tould(tcg_int, tcg_double, 8515 tcg_shift, tcg_fpstatus); 8516 } else { 8517 gen_helper_vfp_touqd(tcg_int, tcg_double, 8518 tcg_shift, tcg_fpstatus); 8519 } 8520 } 8521 if (!sf) { 8522 tcg_gen_ext32u_i64(tcg_int, tcg_int); 8523 } 8524 break; 8525 8526 case 0: /* float32 */ 8527 tcg_single = read_fp_sreg(s, rn); 8528 if (sf) { 8529 if (is_signed) { 8530 gen_helper_vfp_tosqs(tcg_int, tcg_single, 8531 tcg_shift, tcg_fpstatus); 8532 } else { 8533 gen_helper_vfp_touqs(tcg_int, tcg_single, 8534 tcg_shift, tcg_fpstatus); 8535 } 8536 } else { 8537 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 8538 if (is_signed) { 8539 gen_helper_vfp_tosls(tcg_dest, tcg_single, 8540 tcg_shift, tcg_fpstatus); 8541 } else { 8542 gen_helper_vfp_touls(tcg_dest, tcg_single, 8543 tcg_shift, tcg_fpstatus); 8544 } 8545 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 8546 } 8547 break; 8548 8549 case 3: /* float16 */ 8550 tcg_single = read_fp_sreg(s, rn); 8551 if (sf) { 8552 if (is_signed) { 8553 gen_helper_vfp_tosqh(tcg_int, tcg_single, 8554 tcg_shift, tcg_fpstatus); 8555 } else { 8556 gen_helper_vfp_touqh(tcg_int, tcg_single, 8557 tcg_shift, tcg_fpstatus); 8558 } 8559 } else { 8560 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 8561 if (is_signed) { 8562 gen_helper_vfp_toslh(tcg_dest, tcg_single, 8563 tcg_shift, tcg_fpstatus); 8564 } else { 8565 gen_helper_vfp_toulh(tcg_dest, tcg_single, 8566 tcg_shift, tcg_fpstatus); 8567 } 8568 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 8569 } 8570 break; 8571 8572 default: 8573 g_assert_not_reached(); 8574 } 8575 8576 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8577 } 8578 } 8579 8580 /* Floating point <-> fixed point conversions 8581 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 8582 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 8583 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 8584 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 8585 */ 8586 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 8587 { 8588 int rd = extract32(insn, 0, 5); 8589 int rn = extract32(insn, 5, 5); 8590 int scale = extract32(insn, 10, 6); 8591 int opcode = extract32(insn, 16, 3); 8592 int rmode = extract32(insn, 19, 2); 8593 int type = extract32(insn, 22, 2); 8594 bool sbit = extract32(insn, 29, 1); 8595 bool sf = extract32(insn, 31, 1); 8596 bool itof; 8597 8598 if (sbit || (!sf && scale < 32)) { 8599 unallocated_encoding(s); 8600 return; 8601 } 8602 8603 switch (type) { 8604 case 0: /* float32 */ 8605 case 1: /* float64 */ 8606 break; 8607 case 3: /* float16 */ 8608 if (dc_isar_feature(aa64_fp16, s)) { 8609 break; 8610 } 8611 /* fallthru */ 8612 default: 8613 unallocated_encoding(s); 8614 return; 8615 } 8616 8617 switch ((rmode << 3) | opcode) { 8618 case 0x2: /* SCVTF */ 8619 case 0x3: /* UCVTF */ 8620 itof = true; 8621 break; 8622 case 0x18: /* FCVTZS */ 8623 case 0x19: /* FCVTZU */ 8624 itof = false; 8625 break; 8626 default: 8627 unallocated_encoding(s); 8628 return; 8629 } 8630 8631 if (!fp_access_check(s)) { 8632 return; 8633 } 8634 8635 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 8636 } 8637 8638 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 8639 { 8640 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 8641 * without conversion. 8642 */ 8643 8644 if (itof) { 8645 TCGv_i64 tcg_rn = cpu_reg(s, rn); 8646 TCGv_i64 tmp; 8647 8648 switch (type) { 8649 case 0: 8650 /* 32 bit */ 8651 tmp = tcg_temp_new_i64(); 8652 tcg_gen_ext32u_i64(tmp, tcg_rn); 8653 write_fp_dreg(s, rd, tmp); 8654 break; 8655 case 1: 8656 /* 64 bit */ 8657 write_fp_dreg(s, rd, tcg_rn); 8658 break; 8659 case 2: 8660 /* 64 bit to top half. */ 8661 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); 8662 clear_vec_high(s, true, rd); 8663 break; 8664 case 3: 8665 /* 16 bit */ 8666 tmp = tcg_temp_new_i64(); 8667 tcg_gen_ext16u_i64(tmp, tcg_rn); 8668 write_fp_dreg(s, rd, tmp); 8669 break; 8670 default: 8671 g_assert_not_reached(); 8672 } 8673 } else { 8674 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8675 8676 switch (type) { 8677 case 0: 8678 /* 32 bit */ 8679 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); 8680 break; 8681 case 1: 8682 /* 64 bit */ 8683 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); 8684 break; 8685 case 2: 8686 /* 64 bits from top half */ 8687 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); 8688 break; 8689 case 3: 8690 /* 16 bit */ 8691 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); 8692 break; 8693 default: 8694 g_assert_not_reached(); 8695 } 8696 } 8697 } 8698 8699 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 8700 { 8701 TCGv_i64 t = read_fp_dreg(s, rn); 8702 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 8703 8704 gen_helper_fjcvtzs(t, t, fpstatus); 8705 8706 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 8707 tcg_gen_extrh_i64_i32(cpu_ZF, t); 8708 tcg_gen_movi_i32(cpu_CF, 0); 8709 tcg_gen_movi_i32(cpu_NF, 0); 8710 tcg_gen_movi_i32(cpu_VF, 0); 8711 } 8712 8713 /* Floating point <-> integer conversions 8714 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 8715 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 8716 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 8717 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 8718 */ 8719 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 8720 { 8721 int rd = extract32(insn, 0, 5); 8722 int rn = extract32(insn, 5, 5); 8723 int opcode = extract32(insn, 16, 3); 8724 int rmode = extract32(insn, 19, 2); 8725 int type = extract32(insn, 22, 2); 8726 bool sbit = extract32(insn, 29, 1); 8727 bool sf = extract32(insn, 31, 1); 8728 bool itof = false; 8729 8730 if (sbit) { 8731 goto do_unallocated; 8732 } 8733 8734 switch (opcode) { 8735 case 2: /* SCVTF */ 8736 case 3: /* UCVTF */ 8737 itof = true; 8738 /* fallthru */ 8739 case 4: /* FCVTAS */ 8740 case 5: /* FCVTAU */ 8741 if (rmode != 0) { 8742 goto do_unallocated; 8743 } 8744 /* fallthru */ 8745 case 0: /* FCVT[NPMZ]S */ 8746 case 1: /* FCVT[NPMZ]U */ 8747 switch (type) { 8748 case 0: /* float32 */ 8749 case 1: /* float64 */ 8750 break; 8751 case 3: /* float16 */ 8752 if (!dc_isar_feature(aa64_fp16, s)) { 8753 goto do_unallocated; 8754 } 8755 break; 8756 default: 8757 goto do_unallocated; 8758 } 8759 if (!fp_access_check(s)) { 8760 return; 8761 } 8762 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 8763 break; 8764 8765 default: 8766 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 8767 case 0b01100110: /* FMOV half <-> 32-bit int */ 8768 case 0b01100111: 8769 case 0b11100110: /* FMOV half <-> 64-bit int */ 8770 case 0b11100111: 8771 if (!dc_isar_feature(aa64_fp16, s)) { 8772 goto do_unallocated; 8773 } 8774 /* fallthru */ 8775 case 0b00000110: /* FMOV 32-bit */ 8776 case 0b00000111: 8777 case 0b10100110: /* FMOV 64-bit */ 8778 case 0b10100111: 8779 case 0b11001110: /* FMOV top half of 128-bit */ 8780 case 0b11001111: 8781 if (!fp_access_check(s)) { 8782 return; 8783 } 8784 itof = opcode & 1; 8785 handle_fmov(s, rd, rn, type, itof); 8786 break; 8787 8788 case 0b00111110: /* FJCVTZS */ 8789 if (!dc_isar_feature(aa64_jscvt, s)) { 8790 goto do_unallocated; 8791 } else if (fp_access_check(s)) { 8792 handle_fjcvtzs(s, rd, rn); 8793 } 8794 break; 8795 8796 default: 8797 do_unallocated: 8798 unallocated_encoding(s); 8799 return; 8800 } 8801 break; 8802 } 8803 } 8804 8805 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 8806 * 31 30 29 28 25 24 0 8807 * +---+---+---+---------+-----------------------------+ 8808 * | | 0 | | 1 1 1 1 | | 8809 * +---+---+---+---------+-----------------------------+ 8810 */ 8811 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 8812 { 8813 if (extract32(insn, 24, 1)) { 8814 unallocated_encoding(s); /* in decodetree */ 8815 } else if (extract32(insn, 21, 1) == 0) { 8816 /* Floating point to fixed point conversions */ 8817 disas_fp_fixed_conv(s, insn); 8818 } else { 8819 switch (extract32(insn, 10, 2)) { 8820 case 1: 8821 /* Floating point conditional compare */ 8822 disas_fp_ccomp(s, insn); 8823 break; 8824 case 2: 8825 /* Floating point data-processing (2 source) */ 8826 unallocated_encoding(s); /* in decodetree */ 8827 break; 8828 case 3: 8829 /* Floating point conditional select */ 8830 unallocated_encoding(s); /* in decodetree */ 8831 break; 8832 case 0: 8833 switch (ctz32(extract32(insn, 12, 4))) { 8834 case 0: /* [15:12] == xxx1 */ 8835 /* Floating point immediate */ 8836 disas_fp_imm(s, insn); 8837 break; 8838 case 1: /* [15:12] == xx10 */ 8839 /* Floating point compare */ 8840 disas_fp_compare(s, insn); 8841 break; 8842 case 2: /* [15:12] == x100 */ 8843 /* Floating point data-processing (1 source) */ 8844 disas_fp_1src(s, insn); 8845 break; 8846 case 3: /* [15:12] == 1000 */ 8847 unallocated_encoding(s); 8848 break; 8849 default: /* [15:12] == 0000 */ 8850 /* Floating point <-> integer conversions */ 8851 disas_fp_int_conv(s, insn); 8852 break; 8853 } 8854 break; 8855 } 8856 } 8857 } 8858 8859 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 8860 int pos) 8861 { 8862 /* Extract 64 bits from the middle of two concatenated 64 bit 8863 * vector register slices left:right. The extracted bits start 8864 * at 'pos' bits into the right (least significant) side. 8865 * We return the result in tcg_right, and guarantee not to 8866 * trash tcg_left. 8867 */ 8868 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8869 assert(pos > 0 && pos < 64); 8870 8871 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 8872 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 8873 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 8874 } 8875 8876 /* EXT 8877 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 8878 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 8879 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 8880 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 8881 */ 8882 static void disas_simd_ext(DisasContext *s, uint32_t insn) 8883 { 8884 int is_q = extract32(insn, 30, 1); 8885 int op2 = extract32(insn, 22, 2); 8886 int imm4 = extract32(insn, 11, 4); 8887 int rm = extract32(insn, 16, 5); 8888 int rn = extract32(insn, 5, 5); 8889 int rd = extract32(insn, 0, 5); 8890 int pos = imm4 << 3; 8891 TCGv_i64 tcg_resl, tcg_resh; 8892 8893 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 8894 unallocated_encoding(s); 8895 return; 8896 } 8897 8898 if (!fp_access_check(s)) { 8899 return; 8900 } 8901 8902 tcg_resh = tcg_temp_new_i64(); 8903 tcg_resl = tcg_temp_new_i64(); 8904 8905 /* Vd gets bits starting at pos bits into Vm:Vn. This is 8906 * either extracting 128 bits from a 128:128 concatenation, or 8907 * extracting 64 bits from a 64:64 concatenation. 8908 */ 8909 if (!is_q) { 8910 read_vec_element(s, tcg_resl, rn, 0, MO_64); 8911 if (pos != 0) { 8912 read_vec_element(s, tcg_resh, rm, 0, MO_64); 8913 do_ext64(s, tcg_resh, tcg_resl, pos); 8914 } 8915 } else { 8916 TCGv_i64 tcg_hh; 8917 typedef struct { 8918 int reg; 8919 int elt; 8920 } EltPosns; 8921 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 8922 EltPosns *elt = eltposns; 8923 8924 if (pos >= 64) { 8925 elt++; 8926 pos -= 64; 8927 } 8928 8929 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 8930 elt++; 8931 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 8932 elt++; 8933 if (pos != 0) { 8934 do_ext64(s, tcg_resh, tcg_resl, pos); 8935 tcg_hh = tcg_temp_new_i64(); 8936 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 8937 do_ext64(s, tcg_hh, tcg_resh, pos); 8938 } 8939 } 8940 8941 write_vec_element(s, tcg_resl, rd, 0, MO_64); 8942 if (is_q) { 8943 write_vec_element(s, tcg_resh, rd, 1, MO_64); 8944 } 8945 clear_vec_high(s, is_q, rd); 8946 } 8947 8948 /* TBL/TBX 8949 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 8950 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 8951 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 8952 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 8953 */ 8954 static void disas_simd_tb(DisasContext *s, uint32_t insn) 8955 { 8956 int op2 = extract32(insn, 22, 2); 8957 int is_q = extract32(insn, 30, 1); 8958 int rm = extract32(insn, 16, 5); 8959 int rn = extract32(insn, 5, 5); 8960 int rd = extract32(insn, 0, 5); 8961 int is_tbx = extract32(insn, 12, 1); 8962 int len = (extract32(insn, 13, 2) + 1) * 16; 8963 8964 if (op2 != 0) { 8965 unallocated_encoding(s); 8966 return; 8967 } 8968 8969 if (!fp_access_check(s)) { 8970 return; 8971 } 8972 8973 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 8974 vec_full_reg_offset(s, rm), tcg_env, 8975 is_q ? 16 : 8, vec_full_reg_size(s), 8976 (len << 6) | (is_tbx << 5) | rn, 8977 gen_helper_simd_tblx); 8978 } 8979 8980 /* ZIP/UZP/TRN 8981 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 8982 * +---+---+-------------+------+---+------+---+------------------+------+ 8983 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 8984 * +---+---+-------------+------+---+------+---+------------------+------+ 8985 */ 8986 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 8987 { 8988 int rd = extract32(insn, 0, 5); 8989 int rn = extract32(insn, 5, 5); 8990 int rm = extract32(insn, 16, 5); 8991 int size = extract32(insn, 22, 2); 8992 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 8993 * bit 2 indicates 1 vs 2 variant of the insn. 8994 */ 8995 int opcode = extract32(insn, 12, 2); 8996 bool part = extract32(insn, 14, 1); 8997 bool is_q = extract32(insn, 30, 1); 8998 int esize = 8 << size; 8999 int i; 9000 int datasize = is_q ? 128 : 64; 9001 int elements = datasize / esize; 9002 TCGv_i64 tcg_res[2], tcg_ele; 9003 9004 if (opcode == 0 || (size == 3 && !is_q)) { 9005 unallocated_encoding(s); 9006 return; 9007 } 9008 9009 if (!fp_access_check(s)) { 9010 return; 9011 } 9012 9013 tcg_res[0] = tcg_temp_new_i64(); 9014 tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; 9015 tcg_ele = tcg_temp_new_i64(); 9016 9017 for (i = 0; i < elements; i++) { 9018 int o, w; 9019 9020 switch (opcode) { 9021 case 1: /* UZP1/2 */ 9022 { 9023 int midpoint = elements / 2; 9024 if (i < midpoint) { 9025 read_vec_element(s, tcg_ele, rn, 2 * i + part, size); 9026 } else { 9027 read_vec_element(s, tcg_ele, rm, 9028 2 * (i - midpoint) + part, size); 9029 } 9030 break; 9031 } 9032 case 2: /* TRN1/2 */ 9033 if (i & 1) { 9034 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); 9035 } else { 9036 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); 9037 } 9038 break; 9039 case 3: /* ZIP1/2 */ 9040 { 9041 int base = part * elements / 2; 9042 if (i & 1) { 9043 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); 9044 } else { 9045 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); 9046 } 9047 break; 9048 } 9049 default: 9050 g_assert_not_reached(); 9051 } 9052 9053 w = (i * esize) / 64; 9054 o = (i * esize) % 64; 9055 if (o == 0) { 9056 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 9057 } else { 9058 tcg_gen_shli_i64(tcg_ele, tcg_ele, o); 9059 tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); 9060 } 9061 } 9062 9063 for (i = 0; i <= is_q; ++i) { 9064 write_vec_element(s, tcg_res[i], rd, i, MO_64); 9065 } 9066 clear_vec_high(s, is_q, rd); 9067 } 9068 9069 /* 9070 * do_reduction_op helper 9071 * 9072 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 9073 * important for correct NaN propagation that we do these 9074 * operations in exactly the order specified by the pseudocode. 9075 * 9076 * This is a recursive function, TCG temps should be freed by the 9077 * calling function once it is done with the values. 9078 */ 9079 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 9080 int esize, int size, int vmap, TCGv_ptr fpst) 9081 { 9082 if (esize == size) { 9083 int element; 9084 MemOp msize = esize == 16 ? MO_16 : MO_32; 9085 TCGv_i32 tcg_elem; 9086 9087 /* We should have one register left here */ 9088 assert(ctpop8(vmap) == 1); 9089 element = ctz32(vmap); 9090 assert(element < 8); 9091 9092 tcg_elem = tcg_temp_new_i32(); 9093 read_vec_element_i32(s, tcg_elem, rn, element, msize); 9094 return tcg_elem; 9095 } else { 9096 int bits = size / 2; 9097 int shift = ctpop8(vmap) / 2; 9098 int vmap_lo = (vmap >> shift) & vmap; 9099 int vmap_hi = (vmap & ~vmap_lo); 9100 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 9101 9102 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 9103 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 9104 tcg_res = tcg_temp_new_i32(); 9105 9106 switch (fpopcode) { 9107 case 0x0c: /* fmaxnmv half-precision */ 9108 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 9109 break; 9110 case 0x0f: /* fmaxv half-precision */ 9111 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 9112 break; 9113 case 0x1c: /* fminnmv half-precision */ 9114 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 9115 break; 9116 case 0x1f: /* fminv half-precision */ 9117 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 9118 break; 9119 case 0x2c: /* fmaxnmv */ 9120 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 9121 break; 9122 case 0x2f: /* fmaxv */ 9123 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 9124 break; 9125 case 0x3c: /* fminnmv */ 9126 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 9127 break; 9128 case 0x3f: /* fminv */ 9129 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 9130 break; 9131 default: 9132 g_assert_not_reached(); 9133 } 9134 return tcg_res; 9135 } 9136 } 9137 9138 /* AdvSIMD across lanes 9139 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 9140 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 9141 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 9142 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 9143 */ 9144 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 9145 { 9146 int rd = extract32(insn, 0, 5); 9147 int rn = extract32(insn, 5, 5); 9148 int size = extract32(insn, 22, 2); 9149 int opcode = extract32(insn, 12, 5); 9150 bool is_q = extract32(insn, 30, 1); 9151 bool is_u = extract32(insn, 29, 1); 9152 bool is_fp = false; 9153 bool is_min = false; 9154 int esize; 9155 int elements; 9156 int i; 9157 TCGv_i64 tcg_res, tcg_elt; 9158 9159 switch (opcode) { 9160 case 0x1b: /* ADDV */ 9161 if (is_u) { 9162 unallocated_encoding(s); 9163 return; 9164 } 9165 /* fall through */ 9166 case 0x3: /* SADDLV, UADDLV */ 9167 case 0xa: /* SMAXV, UMAXV */ 9168 case 0x1a: /* SMINV, UMINV */ 9169 if (size == 3 || (size == 2 && !is_q)) { 9170 unallocated_encoding(s); 9171 return; 9172 } 9173 break; 9174 case 0xc: /* FMAXNMV, FMINNMV */ 9175 case 0xf: /* FMAXV, FMINV */ 9176 /* Bit 1 of size field encodes min vs max and the actual size 9177 * depends on the encoding of the U bit. If not set (and FP16 9178 * enabled) then we do half-precision float instead of single 9179 * precision. 9180 */ 9181 is_min = extract32(size, 1, 1); 9182 is_fp = true; 9183 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 9184 size = 1; 9185 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 9186 unallocated_encoding(s); 9187 return; 9188 } else { 9189 size = 2; 9190 } 9191 break; 9192 default: 9193 unallocated_encoding(s); 9194 return; 9195 } 9196 9197 if (!fp_access_check(s)) { 9198 return; 9199 } 9200 9201 esize = 8 << size; 9202 elements = (is_q ? 128 : 64) / esize; 9203 9204 tcg_res = tcg_temp_new_i64(); 9205 tcg_elt = tcg_temp_new_i64(); 9206 9207 /* These instructions operate across all lanes of a vector 9208 * to produce a single result. We can guarantee that a 64 9209 * bit intermediate is sufficient: 9210 * + for [US]ADDLV the maximum element size is 32 bits, and 9211 * the result type is 64 bits 9212 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 9213 * same as the element size, which is 32 bits at most 9214 * For the integer operations we can choose to work at 64 9215 * or 32 bits and truncate at the end; for simplicity 9216 * we use 64 bits always. The floating point 9217 * ops do require 32 bit intermediates, though. 9218 */ 9219 if (!is_fp) { 9220 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 9221 9222 for (i = 1; i < elements; i++) { 9223 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 9224 9225 switch (opcode) { 9226 case 0x03: /* SADDLV / UADDLV */ 9227 case 0x1b: /* ADDV */ 9228 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 9229 break; 9230 case 0x0a: /* SMAXV / UMAXV */ 9231 if (is_u) { 9232 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 9233 } else { 9234 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 9235 } 9236 break; 9237 case 0x1a: /* SMINV / UMINV */ 9238 if (is_u) { 9239 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 9240 } else { 9241 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 9242 } 9243 break; 9244 default: 9245 g_assert_not_reached(); 9246 } 9247 9248 } 9249 } else { 9250 /* Floating point vector reduction ops which work across 32 9251 * bit (single) or 16 bit (half-precision) intermediates. 9252 * Note that correct NaN propagation requires that we do these 9253 * operations in exactly the order specified by the pseudocode. 9254 */ 9255 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9256 int fpopcode = opcode | is_min << 4 | is_u << 5; 9257 int vmap = (1 << elements) - 1; 9258 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 9259 (is_q ? 128 : 64), vmap, fpst); 9260 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 9261 } 9262 9263 /* Now truncate the result to the width required for the final output */ 9264 if (opcode == 0x03) { 9265 /* SADDLV, UADDLV: result is 2*esize */ 9266 size++; 9267 } 9268 9269 switch (size) { 9270 case 0: 9271 tcg_gen_ext8u_i64(tcg_res, tcg_res); 9272 break; 9273 case 1: 9274 tcg_gen_ext16u_i64(tcg_res, tcg_res); 9275 break; 9276 case 2: 9277 tcg_gen_ext32u_i64(tcg_res, tcg_res); 9278 break; 9279 case 3: 9280 break; 9281 default: 9282 g_assert_not_reached(); 9283 } 9284 9285 write_fp_dreg(s, rd, tcg_res); 9286 } 9287 9288 /* AdvSIMD modified immediate 9289 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 9290 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 9291 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 9292 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 9293 * 9294 * There are a number of operations that can be carried out here: 9295 * MOVI - move (shifted) imm into register 9296 * MVNI - move inverted (shifted) imm into register 9297 * ORR - bitwise OR of (shifted) imm with register 9298 * BIC - bitwise clear of (shifted) imm with register 9299 * With ARMv8.2 we also have: 9300 * FMOV half-precision 9301 */ 9302 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 9303 { 9304 int rd = extract32(insn, 0, 5); 9305 int cmode = extract32(insn, 12, 4); 9306 int o2 = extract32(insn, 11, 1); 9307 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 9308 bool is_neg = extract32(insn, 29, 1); 9309 bool is_q = extract32(insn, 30, 1); 9310 uint64_t imm = 0; 9311 9312 if (o2) { 9313 if (cmode != 0xf || is_neg) { 9314 unallocated_encoding(s); 9315 return; 9316 } 9317 /* FMOV (vector, immediate) - half-precision */ 9318 if (!dc_isar_feature(aa64_fp16, s)) { 9319 unallocated_encoding(s); 9320 return; 9321 } 9322 imm = vfp_expand_imm(MO_16, abcdefgh); 9323 /* now duplicate across the lanes */ 9324 imm = dup_const(MO_16, imm); 9325 } else { 9326 if (cmode == 0xf && is_neg && !is_q) { 9327 unallocated_encoding(s); 9328 return; 9329 } 9330 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 9331 } 9332 9333 if (!fp_access_check(s)) { 9334 return; 9335 } 9336 9337 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 9338 /* MOVI or MVNI, with MVNI negation handled above. */ 9339 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 9340 vec_full_reg_size(s), imm); 9341 } else { 9342 /* ORR or BIC, with BIC negation to AND handled above. */ 9343 if (is_neg) { 9344 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 9345 } else { 9346 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 9347 } 9348 } 9349 } 9350 9351 /* 9352 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 9353 * 9354 * This code is handles the common shifting code and is used by both 9355 * the vector and scalar code. 9356 */ 9357 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 9358 TCGv_i64 tcg_rnd, bool accumulate, 9359 bool is_u, int size, int shift) 9360 { 9361 bool extended_result = false; 9362 bool round = tcg_rnd != NULL; 9363 int ext_lshift = 0; 9364 TCGv_i64 tcg_src_hi; 9365 9366 if (round && size == 3) { 9367 extended_result = true; 9368 ext_lshift = 64 - shift; 9369 tcg_src_hi = tcg_temp_new_i64(); 9370 } else if (shift == 64) { 9371 if (!accumulate && is_u) { 9372 /* result is zero */ 9373 tcg_gen_movi_i64(tcg_res, 0); 9374 return; 9375 } 9376 } 9377 9378 /* Deal with the rounding step */ 9379 if (round) { 9380 if (extended_result) { 9381 TCGv_i64 tcg_zero = tcg_constant_i64(0); 9382 if (!is_u) { 9383 /* take care of sign extending tcg_res */ 9384 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 9385 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 9386 tcg_src, tcg_src_hi, 9387 tcg_rnd, tcg_zero); 9388 } else { 9389 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 9390 tcg_src, tcg_zero, 9391 tcg_rnd, tcg_zero); 9392 } 9393 } else { 9394 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 9395 } 9396 } 9397 9398 /* Now do the shift right */ 9399 if (round && extended_result) { 9400 /* extended case, >64 bit precision required */ 9401 if (ext_lshift == 0) { 9402 /* special case, only high bits matter */ 9403 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 9404 } else { 9405 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 9406 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 9407 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 9408 } 9409 } else { 9410 if (is_u) { 9411 if (shift == 64) { 9412 /* essentially shifting in 64 zeros */ 9413 tcg_gen_movi_i64(tcg_src, 0); 9414 } else { 9415 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 9416 } 9417 } else { 9418 if (shift == 64) { 9419 /* effectively extending the sign-bit */ 9420 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 9421 } else { 9422 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 9423 } 9424 } 9425 } 9426 9427 if (accumulate) { 9428 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 9429 } else { 9430 tcg_gen_mov_i64(tcg_res, tcg_src); 9431 } 9432 } 9433 9434 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 9435 static void handle_scalar_simd_shri(DisasContext *s, 9436 bool is_u, int immh, int immb, 9437 int opcode, int rn, int rd) 9438 { 9439 const int size = 3; 9440 int immhb = immh << 3 | immb; 9441 int shift = 2 * (8 << size) - immhb; 9442 bool accumulate = false; 9443 bool round = false; 9444 bool insert = false; 9445 TCGv_i64 tcg_rn; 9446 TCGv_i64 tcg_rd; 9447 TCGv_i64 tcg_round; 9448 9449 if (!extract32(immh, 3, 1)) { 9450 unallocated_encoding(s); 9451 return; 9452 } 9453 9454 if (!fp_access_check(s)) { 9455 return; 9456 } 9457 9458 switch (opcode) { 9459 case 0x02: /* SSRA / USRA (accumulate) */ 9460 accumulate = true; 9461 break; 9462 case 0x04: /* SRSHR / URSHR (rounding) */ 9463 round = true; 9464 break; 9465 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 9466 accumulate = round = true; 9467 break; 9468 case 0x08: /* SRI */ 9469 insert = true; 9470 break; 9471 } 9472 9473 if (round) { 9474 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 9475 } else { 9476 tcg_round = NULL; 9477 } 9478 9479 tcg_rn = read_fp_dreg(s, rn); 9480 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 9481 9482 if (insert) { 9483 /* shift count same as element size is valid but does nothing; 9484 * special case to avoid potential shift by 64. 9485 */ 9486 int esize = 8 << size; 9487 if (shift != esize) { 9488 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 9489 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 9490 } 9491 } else { 9492 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 9493 accumulate, is_u, size, shift); 9494 } 9495 9496 write_fp_dreg(s, rd, tcg_rd); 9497 } 9498 9499 /* SHL/SLI - Scalar shift left */ 9500 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 9501 int immh, int immb, int opcode, 9502 int rn, int rd) 9503 { 9504 int size = 32 - clz32(immh) - 1; 9505 int immhb = immh << 3 | immb; 9506 int shift = immhb - (8 << size); 9507 TCGv_i64 tcg_rn; 9508 TCGv_i64 tcg_rd; 9509 9510 if (!extract32(immh, 3, 1)) { 9511 unallocated_encoding(s); 9512 return; 9513 } 9514 9515 if (!fp_access_check(s)) { 9516 return; 9517 } 9518 9519 tcg_rn = read_fp_dreg(s, rn); 9520 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 9521 9522 if (insert) { 9523 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 9524 } else { 9525 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 9526 } 9527 9528 write_fp_dreg(s, rd, tcg_rd); 9529 } 9530 9531 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 9532 * (signed/unsigned) narrowing */ 9533 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 9534 bool is_u_shift, bool is_u_narrow, 9535 int immh, int immb, int opcode, 9536 int rn, int rd) 9537 { 9538 int immhb = immh << 3 | immb; 9539 int size = 32 - clz32(immh) - 1; 9540 int esize = 8 << size; 9541 int shift = (2 * esize) - immhb; 9542 int elements = is_scalar ? 1 : (64 / esize); 9543 bool round = extract32(opcode, 0, 1); 9544 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 9545 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 9546 TCGv_i32 tcg_rd_narrowed; 9547 TCGv_i64 tcg_final; 9548 9549 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 9550 { gen_helper_neon_narrow_sat_s8, 9551 gen_helper_neon_unarrow_sat8 }, 9552 { gen_helper_neon_narrow_sat_s16, 9553 gen_helper_neon_unarrow_sat16 }, 9554 { gen_helper_neon_narrow_sat_s32, 9555 gen_helper_neon_unarrow_sat32 }, 9556 { NULL, NULL }, 9557 }; 9558 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 9559 gen_helper_neon_narrow_sat_u8, 9560 gen_helper_neon_narrow_sat_u16, 9561 gen_helper_neon_narrow_sat_u32, 9562 NULL 9563 }; 9564 NeonGenNarrowEnvFn *narrowfn; 9565 9566 int i; 9567 9568 assert(size < 4); 9569 9570 if (extract32(immh, 3, 1)) { 9571 unallocated_encoding(s); 9572 return; 9573 } 9574 9575 if (!fp_access_check(s)) { 9576 return; 9577 } 9578 9579 if (is_u_shift) { 9580 narrowfn = unsigned_narrow_fns[size]; 9581 } else { 9582 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 9583 } 9584 9585 tcg_rn = tcg_temp_new_i64(); 9586 tcg_rd = tcg_temp_new_i64(); 9587 tcg_rd_narrowed = tcg_temp_new_i32(); 9588 tcg_final = tcg_temp_new_i64(); 9589 9590 if (round) { 9591 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 9592 } else { 9593 tcg_round = NULL; 9594 } 9595 9596 for (i = 0; i < elements; i++) { 9597 read_vec_element(s, tcg_rn, rn, i, ldop); 9598 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 9599 false, is_u_shift, size+1, shift); 9600 narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); 9601 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 9602 if (i == 0) { 9603 tcg_gen_extract_i64(tcg_final, tcg_rd, 0, esize); 9604 } else { 9605 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 9606 } 9607 } 9608 9609 if (!is_q) { 9610 write_vec_element(s, tcg_final, rd, 0, MO_64); 9611 } else { 9612 write_vec_element(s, tcg_final, rd, 1, MO_64); 9613 } 9614 clear_vec_high(s, is_q, rd); 9615 } 9616 9617 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 9618 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 9619 bool src_unsigned, bool dst_unsigned, 9620 int immh, int immb, int rn, int rd) 9621 { 9622 int immhb = immh << 3 | immb; 9623 int size = 32 - clz32(immh) - 1; 9624 int shift = immhb - (8 << size); 9625 int pass; 9626 9627 assert(immh != 0); 9628 assert(!(scalar && is_q)); 9629 9630 if (!scalar) { 9631 if (!is_q && extract32(immh, 3, 1)) { 9632 unallocated_encoding(s); 9633 return; 9634 } 9635 9636 /* Since we use the variable-shift helpers we must 9637 * replicate the shift count into each element of 9638 * the tcg_shift value. 9639 */ 9640 switch (size) { 9641 case 0: 9642 shift |= shift << 8; 9643 /* fall through */ 9644 case 1: 9645 shift |= shift << 16; 9646 break; 9647 case 2: 9648 case 3: 9649 break; 9650 default: 9651 g_assert_not_reached(); 9652 } 9653 } 9654 9655 if (!fp_access_check(s)) { 9656 return; 9657 } 9658 9659 if (size == 3) { 9660 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 9661 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 9662 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 9663 { NULL, gen_helper_neon_qshl_u64 }, 9664 }; 9665 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 9666 int maxpass = is_q ? 2 : 1; 9667 9668 for (pass = 0; pass < maxpass; pass++) { 9669 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9670 9671 read_vec_element(s, tcg_op, rn, pass, MO_64); 9672 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 9673 write_vec_element(s, tcg_op, rd, pass, MO_64); 9674 } 9675 clear_vec_high(s, is_q, rd); 9676 } else { 9677 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 9678 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 9679 { 9680 { gen_helper_neon_qshl_s8, 9681 gen_helper_neon_qshl_s16, 9682 gen_helper_neon_qshl_s32 }, 9683 { gen_helper_neon_qshlu_s8, 9684 gen_helper_neon_qshlu_s16, 9685 gen_helper_neon_qshlu_s32 } 9686 }, { 9687 { NULL, NULL, NULL }, 9688 { gen_helper_neon_qshl_u8, 9689 gen_helper_neon_qshl_u16, 9690 gen_helper_neon_qshl_u32 } 9691 } 9692 }; 9693 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 9694 MemOp memop = scalar ? size : MO_32; 9695 int maxpass = scalar ? 1 : is_q ? 4 : 2; 9696 9697 for (pass = 0; pass < maxpass; pass++) { 9698 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9699 9700 read_vec_element_i32(s, tcg_op, rn, pass, memop); 9701 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 9702 if (scalar) { 9703 switch (size) { 9704 case 0: 9705 tcg_gen_ext8u_i32(tcg_op, tcg_op); 9706 break; 9707 case 1: 9708 tcg_gen_ext16u_i32(tcg_op, tcg_op); 9709 break; 9710 case 2: 9711 break; 9712 default: 9713 g_assert_not_reached(); 9714 } 9715 write_fp_sreg(s, rd, tcg_op); 9716 } else { 9717 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 9718 } 9719 } 9720 9721 if (!scalar) { 9722 clear_vec_high(s, is_q, rd); 9723 } 9724 } 9725 } 9726 9727 /* Common vector code for handling integer to FP conversion */ 9728 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 9729 int elements, int is_signed, 9730 int fracbits, int size) 9731 { 9732 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9733 TCGv_i32 tcg_shift = NULL; 9734 9735 MemOp mop = size | (is_signed ? MO_SIGN : 0); 9736 int pass; 9737 9738 if (fracbits || size == MO_64) { 9739 tcg_shift = tcg_constant_i32(fracbits); 9740 } 9741 9742 if (size == MO_64) { 9743 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 9744 TCGv_i64 tcg_double = tcg_temp_new_i64(); 9745 9746 for (pass = 0; pass < elements; pass++) { 9747 read_vec_element(s, tcg_int64, rn, pass, mop); 9748 9749 if (is_signed) { 9750 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 9751 tcg_shift, tcg_fpst); 9752 } else { 9753 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 9754 tcg_shift, tcg_fpst); 9755 } 9756 if (elements == 1) { 9757 write_fp_dreg(s, rd, tcg_double); 9758 } else { 9759 write_vec_element(s, tcg_double, rd, pass, MO_64); 9760 } 9761 } 9762 } else { 9763 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 9764 TCGv_i32 tcg_float = tcg_temp_new_i32(); 9765 9766 for (pass = 0; pass < elements; pass++) { 9767 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 9768 9769 switch (size) { 9770 case MO_32: 9771 if (fracbits) { 9772 if (is_signed) { 9773 gen_helper_vfp_sltos(tcg_float, tcg_int32, 9774 tcg_shift, tcg_fpst); 9775 } else { 9776 gen_helper_vfp_ultos(tcg_float, tcg_int32, 9777 tcg_shift, tcg_fpst); 9778 } 9779 } else { 9780 if (is_signed) { 9781 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 9782 } else { 9783 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 9784 } 9785 } 9786 break; 9787 case MO_16: 9788 if (fracbits) { 9789 if (is_signed) { 9790 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 9791 tcg_shift, tcg_fpst); 9792 } else { 9793 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 9794 tcg_shift, tcg_fpst); 9795 } 9796 } else { 9797 if (is_signed) { 9798 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 9799 } else { 9800 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 9801 } 9802 } 9803 break; 9804 default: 9805 g_assert_not_reached(); 9806 } 9807 9808 if (elements == 1) { 9809 write_fp_sreg(s, rd, tcg_float); 9810 } else { 9811 write_vec_element_i32(s, tcg_float, rd, pass, size); 9812 } 9813 } 9814 } 9815 9816 clear_vec_high(s, elements << size == 16, rd); 9817 } 9818 9819 /* UCVTF/SCVTF - Integer to FP conversion */ 9820 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 9821 bool is_q, bool is_u, 9822 int immh, int immb, int opcode, 9823 int rn, int rd) 9824 { 9825 int size, elements, fracbits; 9826 int immhb = immh << 3 | immb; 9827 9828 if (immh & 8) { 9829 size = MO_64; 9830 if (!is_scalar && !is_q) { 9831 unallocated_encoding(s); 9832 return; 9833 } 9834 } else if (immh & 4) { 9835 size = MO_32; 9836 } else if (immh & 2) { 9837 size = MO_16; 9838 if (!dc_isar_feature(aa64_fp16, s)) { 9839 unallocated_encoding(s); 9840 return; 9841 } 9842 } else { 9843 /* immh == 0 would be a failure of the decode logic */ 9844 g_assert(immh == 1); 9845 unallocated_encoding(s); 9846 return; 9847 } 9848 9849 if (is_scalar) { 9850 elements = 1; 9851 } else { 9852 elements = (8 << is_q) >> size; 9853 } 9854 fracbits = (16 << size) - immhb; 9855 9856 if (!fp_access_check(s)) { 9857 return; 9858 } 9859 9860 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 9861 } 9862 9863 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 9864 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 9865 bool is_q, bool is_u, 9866 int immh, int immb, int rn, int rd) 9867 { 9868 int immhb = immh << 3 | immb; 9869 int pass, size, fracbits; 9870 TCGv_ptr tcg_fpstatus; 9871 TCGv_i32 tcg_rmode, tcg_shift; 9872 9873 if (immh & 0x8) { 9874 size = MO_64; 9875 if (!is_scalar && !is_q) { 9876 unallocated_encoding(s); 9877 return; 9878 } 9879 } else if (immh & 0x4) { 9880 size = MO_32; 9881 } else if (immh & 0x2) { 9882 size = MO_16; 9883 if (!dc_isar_feature(aa64_fp16, s)) { 9884 unallocated_encoding(s); 9885 return; 9886 } 9887 } else { 9888 /* Should have split out AdvSIMD modified immediate earlier. */ 9889 assert(immh == 1); 9890 unallocated_encoding(s); 9891 return; 9892 } 9893 9894 if (!fp_access_check(s)) { 9895 return; 9896 } 9897 9898 assert(!(is_scalar && is_q)); 9899 9900 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9901 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 9902 fracbits = (16 << size) - immhb; 9903 tcg_shift = tcg_constant_i32(fracbits); 9904 9905 if (size == MO_64) { 9906 int maxpass = is_scalar ? 1 : 2; 9907 9908 for (pass = 0; pass < maxpass; pass++) { 9909 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9910 9911 read_vec_element(s, tcg_op, rn, pass, MO_64); 9912 if (is_u) { 9913 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9914 } else { 9915 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9916 } 9917 write_vec_element(s, tcg_op, rd, pass, MO_64); 9918 } 9919 clear_vec_high(s, is_q, rd); 9920 } else { 9921 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9922 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9923 9924 switch (size) { 9925 case MO_16: 9926 if (is_u) { 9927 fn = gen_helper_vfp_touhh; 9928 } else { 9929 fn = gen_helper_vfp_toshh; 9930 } 9931 break; 9932 case MO_32: 9933 if (is_u) { 9934 fn = gen_helper_vfp_touls; 9935 } else { 9936 fn = gen_helper_vfp_tosls; 9937 } 9938 break; 9939 default: 9940 g_assert_not_reached(); 9941 } 9942 9943 for (pass = 0; pass < maxpass; pass++) { 9944 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9945 9946 read_vec_element_i32(s, tcg_op, rn, pass, size); 9947 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9948 if (is_scalar) { 9949 if (size == MO_16 && !is_u) { 9950 tcg_gen_ext16u_i32(tcg_op, tcg_op); 9951 } 9952 write_fp_sreg(s, rd, tcg_op); 9953 } else { 9954 write_vec_element_i32(s, tcg_op, rd, pass, size); 9955 } 9956 } 9957 if (!is_scalar) { 9958 clear_vec_high(s, is_q, rd); 9959 } 9960 } 9961 9962 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9963 } 9964 9965 /* AdvSIMD scalar shift by immediate 9966 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9967 * +-----+---+-------------+------+------+--------+---+------+------+ 9968 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9969 * +-----+---+-------------+------+------+--------+---+------+------+ 9970 * 9971 * This is the scalar version so it works on a fixed sized registers 9972 */ 9973 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9974 { 9975 int rd = extract32(insn, 0, 5); 9976 int rn = extract32(insn, 5, 5); 9977 int opcode = extract32(insn, 11, 5); 9978 int immb = extract32(insn, 16, 3); 9979 int immh = extract32(insn, 19, 4); 9980 bool is_u = extract32(insn, 29, 1); 9981 9982 if (immh == 0) { 9983 unallocated_encoding(s); 9984 return; 9985 } 9986 9987 switch (opcode) { 9988 case 0x08: /* SRI */ 9989 if (!is_u) { 9990 unallocated_encoding(s); 9991 return; 9992 } 9993 /* fall through */ 9994 case 0x00: /* SSHR / USHR */ 9995 case 0x02: /* SSRA / USRA */ 9996 case 0x04: /* SRSHR / URSHR */ 9997 case 0x06: /* SRSRA / URSRA */ 9998 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 9999 break; 10000 case 0x0a: /* SHL / SLI */ 10001 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 10002 break; 10003 case 0x1c: /* SCVTF, UCVTF */ 10004 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 10005 opcode, rn, rd); 10006 break; 10007 case 0x10: /* SQSHRUN, SQSHRUN2 */ 10008 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 10009 if (!is_u) { 10010 unallocated_encoding(s); 10011 return; 10012 } 10013 handle_vec_simd_sqshrn(s, true, false, false, true, 10014 immh, immb, opcode, rn, rd); 10015 break; 10016 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 10017 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 10018 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 10019 immh, immb, opcode, rn, rd); 10020 break; 10021 case 0xc: /* SQSHLU */ 10022 if (!is_u) { 10023 unallocated_encoding(s); 10024 return; 10025 } 10026 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 10027 break; 10028 case 0xe: /* SQSHL, UQSHL */ 10029 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 10030 break; 10031 case 0x1f: /* FCVTZS, FCVTZU */ 10032 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 10033 break; 10034 default: 10035 unallocated_encoding(s); 10036 break; 10037 } 10038 } 10039 10040 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 10041 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 10042 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 10043 { 10044 /* Handle 64->64 opcodes which are shared between the scalar and 10045 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 10046 * is valid in either group and also the double-precision fp ops. 10047 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 10048 * requires them. 10049 */ 10050 TCGCond cond; 10051 10052 switch (opcode) { 10053 case 0x4: /* CLS, CLZ */ 10054 if (u) { 10055 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 10056 } else { 10057 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 10058 } 10059 break; 10060 case 0x5: /* NOT */ 10061 /* This opcode is shared with CNT and RBIT but we have earlier 10062 * enforced that size == 3 if and only if this is the NOT insn. 10063 */ 10064 tcg_gen_not_i64(tcg_rd, tcg_rn); 10065 break; 10066 case 0x7: /* SQABS, SQNEG */ 10067 if (u) { 10068 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); 10069 } else { 10070 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); 10071 } 10072 break; 10073 case 0xa: /* CMLT */ 10074 cond = TCG_COND_LT; 10075 do_cmop: 10076 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 10077 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 10078 break; 10079 case 0x8: /* CMGT, CMGE */ 10080 cond = u ? TCG_COND_GE : TCG_COND_GT; 10081 goto do_cmop; 10082 case 0x9: /* CMEQ, CMLE */ 10083 cond = u ? TCG_COND_LE : TCG_COND_EQ; 10084 goto do_cmop; 10085 case 0xb: /* ABS, NEG */ 10086 if (u) { 10087 tcg_gen_neg_i64(tcg_rd, tcg_rn); 10088 } else { 10089 tcg_gen_abs_i64(tcg_rd, tcg_rn); 10090 } 10091 break; 10092 case 0x2f: /* FABS */ 10093 gen_vfp_absd(tcg_rd, tcg_rn); 10094 break; 10095 case 0x6f: /* FNEG */ 10096 gen_vfp_negd(tcg_rd, tcg_rn); 10097 break; 10098 case 0x7f: /* FSQRT */ 10099 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); 10100 break; 10101 case 0x1a: /* FCVTNS */ 10102 case 0x1b: /* FCVTMS */ 10103 case 0x1c: /* FCVTAS */ 10104 case 0x3a: /* FCVTPS */ 10105 case 0x3b: /* FCVTZS */ 10106 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10107 break; 10108 case 0x5a: /* FCVTNU */ 10109 case 0x5b: /* FCVTMU */ 10110 case 0x5c: /* FCVTAU */ 10111 case 0x7a: /* FCVTPU */ 10112 case 0x7b: /* FCVTZU */ 10113 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10114 break; 10115 case 0x18: /* FRINTN */ 10116 case 0x19: /* FRINTM */ 10117 case 0x38: /* FRINTP */ 10118 case 0x39: /* FRINTZ */ 10119 case 0x58: /* FRINTA */ 10120 case 0x79: /* FRINTI */ 10121 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 10122 break; 10123 case 0x59: /* FRINTX */ 10124 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 10125 break; 10126 case 0x1e: /* FRINT32Z */ 10127 case 0x5e: /* FRINT32X */ 10128 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 10129 break; 10130 case 0x1f: /* FRINT64Z */ 10131 case 0x5f: /* FRINT64X */ 10132 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 10133 break; 10134 default: 10135 g_assert_not_reached(); 10136 } 10137 } 10138 10139 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 10140 bool is_scalar, bool is_u, bool is_q, 10141 int size, int rn, int rd) 10142 { 10143 bool is_double = (size == MO_64); 10144 TCGv_ptr fpst; 10145 10146 if (!fp_access_check(s)) { 10147 return; 10148 } 10149 10150 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10151 10152 if (is_double) { 10153 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10154 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10155 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10156 NeonGenTwoDoubleOpFn *genfn; 10157 bool swap = false; 10158 int pass; 10159 10160 switch (opcode) { 10161 case 0x2e: /* FCMLT (zero) */ 10162 swap = true; 10163 /* fallthrough */ 10164 case 0x2c: /* FCMGT (zero) */ 10165 genfn = gen_helper_neon_cgt_f64; 10166 break; 10167 case 0x2d: /* FCMEQ (zero) */ 10168 genfn = gen_helper_neon_ceq_f64; 10169 break; 10170 case 0x6d: /* FCMLE (zero) */ 10171 swap = true; 10172 /* fall through */ 10173 case 0x6c: /* FCMGE (zero) */ 10174 genfn = gen_helper_neon_cge_f64; 10175 break; 10176 default: 10177 g_assert_not_reached(); 10178 } 10179 10180 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10181 read_vec_element(s, tcg_op, rn, pass, MO_64); 10182 if (swap) { 10183 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10184 } else { 10185 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10186 } 10187 write_vec_element(s, tcg_res, rd, pass, MO_64); 10188 } 10189 10190 clear_vec_high(s, !is_scalar, rd); 10191 } else { 10192 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10193 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10194 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10195 NeonGenTwoSingleOpFn *genfn; 10196 bool swap = false; 10197 int pass, maxpasses; 10198 10199 if (size == MO_16) { 10200 switch (opcode) { 10201 case 0x2e: /* FCMLT (zero) */ 10202 swap = true; 10203 /* fall through */ 10204 case 0x2c: /* FCMGT (zero) */ 10205 genfn = gen_helper_advsimd_cgt_f16; 10206 break; 10207 case 0x2d: /* FCMEQ (zero) */ 10208 genfn = gen_helper_advsimd_ceq_f16; 10209 break; 10210 case 0x6d: /* FCMLE (zero) */ 10211 swap = true; 10212 /* fall through */ 10213 case 0x6c: /* FCMGE (zero) */ 10214 genfn = gen_helper_advsimd_cge_f16; 10215 break; 10216 default: 10217 g_assert_not_reached(); 10218 } 10219 } else { 10220 switch (opcode) { 10221 case 0x2e: /* FCMLT (zero) */ 10222 swap = true; 10223 /* fall through */ 10224 case 0x2c: /* FCMGT (zero) */ 10225 genfn = gen_helper_neon_cgt_f32; 10226 break; 10227 case 0x2d: /* FCMEQ (zero) */ 10228 genfn = gen_helper_neon_ceq_f32; 10229 break; 10230 case 0x6d: /* FCMLE (zero) */ 10231 swap = true; 10232 /* fall through */ 10233 case 0x6c: /* FCMGE (zero) */ 10234 genfn = gen_helper_neon_cge_f32; 10235 break; 10236 default: 10237 g_assert_not_reached(); 10238 } 10239 } 10240 10241 if (is_scalar) { 10242 maxpasses = 1; 10243 } else { 10244 int vector_size = 8 << is_q; 10245 maxpasses = vector_size >> size; 10246 } 10247 10248 for (pass = 0; pass < maxpasses; pass++) { 10249 read_vec_element_i32(s, tcg_op, rn, pass, size); 10250 if (swap) { 10251 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10252 } else { 10253 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10254 } 10255 if (is_scalar) { 10256 write_fp_sreg(s, rd, tcg_res); 10257 } else { 10258 write_vec_element_i32(s, tcg_res, rd, pass, size); 10259 } 10260 } 10261 10262 if (!is_scalar) { 10263 clear_vec_high(s, is_q, rd); 10264 } 10265 } 10266 } 10267 10268 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10269 bool is_scalar, bool is_u, bool is_q, 10270 int size, int rn, int rd) 10271 { 10272 bool is_double = (size == 3); 10273 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10274 10275 if (is_double) { 10276 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10277 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10278 int pass; 10279 10280 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10281 read_vec_element(s, tcg_op, rn, pass, MO_64); 10282 switch (opcode) { 10283 case 0x3d: /* FRECPE */ 10284 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10285 break; 10286 case 0x3f: /* FRECPX */ 10287 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10288 break; 10289 case 0x7d: /* FRSQRTE */ 10290 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10291 break; 10292 default: 10293 g_assert_not_reached(); 10294 } 10295 write_vec_element(s, tcg_res, rd, pass, MO_64); 10296 } 10297 clear_vec_high(s, !is_scalar, rd); 10298 } else { 10299 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10300 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10301 int pass, maxpasses; 10302 10303 if (is_scalar) { 10304 maxpasses = 1; 10305 } else { 10306 maxpasses = is_q ? 4 : 2; 10307 } 10308 10309 for (pass = 0; pass < maxpasses; pass++) { 10310 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10311 10312 switch (opcode) { 10313 case 0x3c: /* URECPE */ 10314 gen_helper_recpe_u32(tcg_res, tcg_op); 10315 break; 10316 case 0x3d: /* FRECPE */ 10317 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10318 break; 10319 case 0x3f: /* FRECPX */ 10320 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10321 break; 10322 case 0x7d: /* FRSQRTE */ 10323 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10324 break; 10325 default: 10326 g_assert_not_reached(); 10327 } 10328 10329 if (is_scalar) { 10330 write_fp_sreg(s, rd, tcg_res); 10331 } else { 10332 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10333 } 10334 } 10335 if (!is_scalar) { 10336 clear_vec_high(s, is_q, rd); 10337 } 10338 } 10339 } 10340 10341 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10342 int opcode, bool u, bool is_q, 10343 int size, int rn, int rd) 10344 { 10345 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10346 * in the source becomes a size element in the destination). 10347 */ 10348 int pass; 10349 TCGv_i32 tcg_res[2]; 10350 int destelt = is_q ? 2 : 0; 10351 int passes = scalar ? 1 : 2; 10352 10353 if (scalar) { 10354 tcg_res[1] = tcg_constant_i32(0); 10355 } 10356 10357 for (pass = 0; pass < passes; pass++) { 10358 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10359 NeonGenNarrowFn *genfn = NULL; 10360 NeonGenNarrowEnvFn *genenvfn = NULL; 10361 10362 if (scalar) { 10363 read_vec_element(s, tcg_op, rn, pass, size + 1); 10364 } else { 10365 read_vec_element(s, tcg_op, rn, pass, MO_64); 10366 } 10367 tcg_res[pass] = tcg_temp_new_i32(); 10368 10369 switch (opcode) { 10370 case 0x12: /* XTN, SQXTUN */ 10371 { 10372 static NeonGenNarrowFn * const xtnfns[3] = { 10373 gen_helper_neon_narrow_u8, 10374 gen_helper_neon_narrow_u16, 10375 tcg_gen_extrl_i64_i32, 10376 }; 10377 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 10378 gen_helper_neon_unarrow_sat8, 10379 gen_helper_neon_unarrow_sat16, 10380 gen_helper_neon_unarrow_sat32, 10381 }; 10382 if (u) { 10383 genenvfn = sqxtunfns[size]; 10384 } else { 10385 genfn = xtnfns[size]; 10386 } 10387 break; 10388 } 10389 case 0x14: /* SQXTN, UQXTN */ 10390 { 10391 static NeonGenNarrowEnvFn * const fns[3][2] = { 10392 { gen_helper_neon_narrow_sat_s8, 10393 gen_helper_neon_narrow_sat_u8 }, 10394 { gen_helper_neon_narrow_sat_s16, 10395 gen_helper_neon_narrow_sat_u16 }, 10396 { gen_helper_neon_narrow_sat_s32, 10397 gen_helper_neon_narrow_sat_u32 }, 10398 }; 10399 genenvfn = fns[size][u]; 10400 break; 10401 } 10402 case 0x16: /* FCVTN, FCVTN2 */ 10403 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10404 if (size == 2) { 10405 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env); 10406 } else { 10407 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10408 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10409 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10410 TCGv_i32 ahp = get_ahp_flag(); 10411 10412 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10413 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10414 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10415 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 10416 } 10417 break; 10418 case 0x36: /* BFCVTN, BFCVTN2 */ 10419 { 10420 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10421 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 10422 } 10423 break; 10424 case 0x56: /* FCVTXN, FCVTXN2 */ 10425 /* 64 bit to 32 bit float conversion 10426 * with von Neumann rounding (round to odd) 10427 */ 10428 assert(size == 2); 10429 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env); 10430 break; 10431 default: 10432 g_assert_not_reached(); 10433 } 10434 10435 if (genfn) { 10436 genfn(tcg_res[pass], tcg_op); 10437 } else if (genenvfn) { 10438 genenvfn(tcg_res[pass], tcg_env, tcg_op); 10439 } 10440 } 10441 10442 for (pass = 0; pass < 2; pass++) { 10443 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 10444 } 10445 clear_vec_high(s, is_q, rd); 10446 } 10447 10448 /* AdvSIMD scalar two reg misc 10449 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10450 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10451 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10452 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10453 */ 10454 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10455 { 10456 int rd = extract32(insn, 0, 5); 10457 int rn = extract32(insn, 5, 5); 10458 int opcode = extract32(insn, 12, 5); 10459 int size = extract32(insn, 22, 2); 10460 bool u = extract32(insn, 29, 1); 10461 bool is_fcvt = false; 10462 int rmode; 10463 TCGv_i32 tcg_rmode; 10464 TCGv_ptr tcg_fpstatus; 10465 10466 switch (opcode) { 10467 case 0x7: /* SQABS / SQNEG */ 10468 break; 10469 case 0xa: /* CMLT */ 10470 if (u) { 10471 unallocated_encoding(s); 10472 return; 10473 } 10474 /* fall through */ 10475 case 0x8: /* CMGT, CMGE */ 10476 case 0x9: /* CMEQ, CMLE */ 10477 case 0xb: /* ABS, NEG */ 10478 if (size != 3) { 10479 unallocated_encoding(s); 10480 return; 10481 } 10482 break; 10483 case 0x12: /* SQXTUN */ 10484 if (!u) { 10485 unallocated_encoding(s); 10486 return; 10487 } 10488 /* fall through */ 10489 case 0x14: /* SQXTN, UQXTN */ 10490 if (size == 3) { 10491 unallocated_encoding(s); 10492 return; 10493 } 10494 if (!fp_access_check(s)) { 10495 return; 10496 } 10497 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10498 return; 10499 case 0xc ... 0xf: 10500 case 0x16 ... 0x1d: 10501 case 0x1f: 10502 /* Floating point: U, size[1] and opcode indicate operation; 10503 * size[0] indicates single or double precision. 10504 */ 10505 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10506 size = extract32(size, 0, 1) ? 3 : 2; 10507 switch (opcode) { 10508 case 0x2c: /* FCMGT (zero) */ 10509 case 0x2d: /* FCMEQ (zero) */ 10510 case 0x2e: /* FCMLT (zero) */ 10511 case 0x6c: /* FCMGE (zero) */ 10512 case 0x6d: /* FCMLE (zero) */ 10513 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10514 return; 10515 case 0x1d: /* SCVTF */ 10516 case 0x5d: /* UCVTF */ 10517 { 10518 bool is_signed = (opcode == 0x1d); 10519 if (!fp_access_check(s)) { 10520 return; 10521 } 10522 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10523 return; 10524 } 10525 case 0x3d: /* FRECPE */ 10526 case 0x3f: /* FRECPX */ 10527 case 0x7d: /* FRSQRTE */ 10528 if (!fp_access_check(s)) { 10529 return; 10530 } 10531 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10532 return; 10533 case 0x1a: /* FCVTNS */ 10534 case 0x1b: /* FCVTMS */ 10535 case 0x3a: /* FCVTPS */ 10536 case 0x3b: /* FCVTZS */ 10537 case 0x5a: /* FCVTNU */ 10538 case 0x5b: /* FCVTMU */ 10539 case 0x7a: /* FCVTPU */ 10540 case 0x7b: /* FCVTZU */ 10541 is_fcvt = true; 10542 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10543 break; 10544 case 0x1c: /* FCVTAS */ 10545 case 0x5c: /* FCVTAU */ 10546 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10547 is_fcvt = true; 10548 rmode = FPROUNDING_TIEAWAY; 10549 break; 10550 case 0x56: /* FCVTXN, FCVTXN2 */ 10551 if (size == 2) { 10552 unallocated_encoding(s); 10553 return; 10554 } 10555 if (!fp_access_check(s)) { 10556 return; 10557 } 10558 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10559 return; 10560 default: 10561 unallocated_encoding(s); 10562 return; 10563 } 10564 break; 10565 default: 10566 case 0x3: /* USQADD / SUQADD */ 10567 unallocated_encoding(s); 10568 return; 10569 } 10570 10571 if (!fp_access_check(s)) { 10572 return; 10573 } 10574 10575 if (is_fcvt) { 10576 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10577 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 10578 } else { 10579 tcg_fpstatus = NULL; 10580 tcg_rmode = NULL; 10581 } 10582 10583 if (size == 3) { 10584 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10585 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10586 10587 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10588 write_fp_dreg(s, rd, tcg_rd); 10589 } else { 10590 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10591 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10592 10593 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10594 10595 switch (opcode) { 10596 case 0x7: /* SQABS, SQNEG */ 10597 { 10598 NeonGenOneOpEnvFn *genfn; 10599 static NeonGenOneOpEnvFn * const fns[3][2] = { 10600 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10601 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10602 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10603 }; 10604 genfn = fns[size][u]; 10605 genfn(tcg_rd, tcg_env, tcg_rn); 10606 break; 10607 } 10608 case 0x1a: /* FCVTNS */ 10609 case 0x1b: /* FCVTMS */ 10610 case 0x1c: /* FCVTAS */ 10611 case 0x3a: /* FCVTPS */ 10612 case 0x3b: /* FCVTZS */ 10613 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10614 tcg_fpstatus); 10615 break; 10616 case 0x5a: /* FCVTNU */ 10617 case 0x5b: /* FCVTMU */ 10618 case 0x5c: /* FCVTAU */ 10619 case 0x7a: /* FCVTPU */ 10620 case 0x7b: /* FCVTZU */ 10621 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10622 tcg_fpstatus); 10623 break; 10624 default: 10625 g_assert_not_reached(); 10626 } 10627 10628 write_fp_sreg(s, rd, tcg_rd); 10629 } 10630 10631 if (is_fcvt) { 10632 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10633 } 10634 } 10635 10636 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10637 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10638 int immh, int immb, int opcode, int rn, int rd) 10639 { 10640 int size = 32 - clz32(immh) - 1; 10641 int immhb = immh << 3 | immb; 10642 int shift = 2 * (8 << size) - immhb; 10643 GVecGen2iFn *gvec_fn; 10644 10645 if (extract32(immh, 3, 1) && !is_q) { 10646 unallocated_encoding(s); 10647 return; 10648 } 10649 tcg_debug_assert(size <= 3); 10650 10651 if (!fp_access_check(s)) { 10652 return; 10653 } 10654 10655 switch (opcode) { 10656 case 0x02: /* SSRA / USRA (accumulate) */ 10657 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10658 break; 10659 10660 case 0x08: /* SRI */ 10661 gvec_fn = gen_gvec_sri; 10662 break; 10663 10664 case 0x00: /* SSHR / USHR */ 10665 if (is_u) { 10666 if (shift == 8 << size) { 10667 /* Shift count the same size as element size produces zero. */ 10668 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10669 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10670 return; 10671 } 10672 gvec_fn = tcg_gen_gvec_shri; 10673 } else { 10674 /* Shift count the same size as element size produces all sign. */ 10675 if (shift == 8 << size) { 10676 shift -= 1; 10677 } 10678 gvec_fn = tcg_gen_gvec_sari; 10679 } 10680 break; 10681 10682 case 0x04: /* SRSHR / URSHR (rounding) */ 10683 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10684 break; 10685 10686 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10687 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10688 break; 10689 10690 default: 10691 g_assert_not_reached(); 10692 } 10693 10694 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10695 } 10696 10697 /* SHL/SLI - Vector shift left */ 10698 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10699 int immh, int immb, int opcode, int rn, int rd) 10700 { 10701 int size = 32 - clz32(immh) - 1; 10702 int immhb = immh << 3 | immb; 10703 int shift = immhb - (8 << size); 10704 10705 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10706 assert(size >= 0 && size <= 3); 10707 10708 if (extract32(immh, 3, 1) && !is_q) { 10709 unallocated_encoding(s); 10710 return; 10711 } 10712 10713 if (!fp_access_check(s)) { 10714 return; 10715 } 10716 10717 if (insert) { 10718 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10719 } else { 10720 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10721 } 10722 } 10723 10724 /* USHLL/SHLL - Vector shift left with widening */ 10725 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10726 int immh, int immb, int opcode, int rn, int rd) 10727 { 10728 int size = 32 - clz32(immh) - 1; 10729 int immhb = immh << 3 | immb; 10730 int shift = immhb - (8 << size); 10731 int dsize = 64; 10732 int esize = 8 << size; 10733 int elements = dsize/esize; 10734 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10735 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10736 int i; 10737 10738 if (size >= 3) { 10739 unallocated_encoding(s); 10740 return; 10741 } 10742 10743 if (!fp_access_check(s)) { 10744 return; 10745 } 10746 10747 /* For the LL variants the store is larger than the load, 10748 * so if rd == rn we would overwrite parts of our input. 10749 * So load everything right now and use shifts in the main loop. 10750 */ 10751 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10752 10753 for (i = 0; i < elements; i++) { 10754 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10755 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10756 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10757 write_vec_element(s, tcg_rd, rd, i, size + 1); 10758 } 10759 clear_vec_high(s, true, rd); 10760 } 10761 10762 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10763 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10764 int immh, int immb, int opcode, int rn, int rd) 10765 { 10766 int immhb = immh << 3 | immb; 10767 int size = 32 - clz32(immh) - 1; 10768 int dsize = 64; 10769 int esize = 8 << size; 10770 int elements = dsize/esize; 10771 int shift = (2 * esize) - immhb; 10772 bool round = extract32(opcode, 0, 1); 10773 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10774 TCGv_i64 tcg_round; 10775 int i; 10776 10777 if (extract32(immh, 3, 1)) { 10778 unallocated_encoding(s); 10779 return; 10780 } 10781 10782 if (!fp_access_check(s)) { 10783 return; 10784 } 10785 10786 tcg_rn = tcg_temp_new_i64(); 10787 tcg_rd = tcg_temp_new_i64(); 10788 tcg_final = tcg_temp_new_i64(); 10789 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10790 10791 if (round) { 10792 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10793 } else { 10794 tcg_round = NULL; 10795 } 10796 10797 for (i = 0; i < elements; i++) { 10798 read_vec_element(s, tcg_rn, rn, i, size+1); 10799 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10800 false, true, size+1, shift); 10801 10802 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10803 } 10804 10805 if (!is_q) { 10806 write_vec_element(s, tcg_final, rd, 0, MO_64); 10807 } else { 10808 write_vec_element(s, tcg_final, rd, 1, MO_64); 10809 } 10810 10811 clear_vec_high(s, is_q, rd); 10812 } 10813 10814 10815 /* AdvSIMD shift by immediate 10816 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10817 * +---+---+---+-------------+------+------+--------+---+------+------+ 10818 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10819 * +---+---+---+-------------+------+------+--------+---+------+------+ 10820 */ 10821 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10822 { 10823 int rd = extract32(insn, 0, 5); 10824 int rn = extract32(insn, 5, 5); 10825 int opcode = extract32(insn, 11, 5); 10826 int immb = extract32(insn, 16, 3); 10827 int immh = extract32(insn, 19, 4); 10828 bool is_u = extract32(insn, 29, 1); 10829 bool is_q = extract32(insn, 30, 1); 10830 10831 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10832 assert(immh != 0); 10833 10834 switch (opcode) { 10835 case 0x08: /* SRI */ 10836 if (!is_u) { 10837 unallocated_encoding(s); 10838 return; 10839 } 10840 /* fall through */ 10841 case 0x00: /* SSHR / USHR */ 10842 case 0x02: /* SSRA / USRA (accumulate) */ 10843 case 0x04: /* SRSHR / URSHR (rounding) */ 10844 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10845 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10846 break; 10847 case 0x0a: /* SHL / SLI */ 10848 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10849 break; 10850 case 0x10: /* SHRN */ 10851 case 0x11: /* RSHRN / SQRSHRUN */ 10852 if (is_u) { 10853 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10854 opcode, rn, rd); 10855 } else { 10856 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10857 } 10858 break; 10859 case 0x12: /* SQSHRN / UQSHRN */ 10860 case 0x13: /* SQRSHRN / UQRSHRN */ 10861 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10862 opcode, rn, rd); 10863 break; 10864 case 0x14: /* SSHLL / USHLL */ 10865 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10866 break; 10867 case 0x1c: /* SCVTF / UCVTF */ 10868 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10869 opcode, rn, rd); 10870 break; 10871 case 0xc: /* SQSHLU */ 10872 if (!is_u) { 10873 unallocated_encoding(s); 10874 return; 10875 } 10876 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10877 break; 10878 case 0xe: /* SQSHL, UQSHL */ 10879 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10880 break; 10881 case 0x1f: /* FCVTZS/ FCVTZU */ 10882 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10883 return; 10884 default: 10885 unallocated_encoding(s); 10886 return; 10887 } 10888 } 10889 10890 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 10891 int size, int rn, int rd) 10892 { 10893 /* Handle 2-reg-misc ops which are widening (so each size element 10894 * in the source becomes a 2*size element in the destination. 10895 * The only instruction like this is FCVTL. 10896 */ 10897 int pass; 10898 10899 if (size == 3) { 10900 /* 32 -> 64 bit fp conversion */ 10901 TCGv_i64 tcg_res[2]; 10902 int srcelt = is_q ? 2 : 0; 10903 10904 for (pass = 0; pass < 2; pass++) { 10905 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10906 tcg_res[pass] = tcg_temp_new_i64(); 10907 10908 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 10909 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 10910 } 10911 for (pass = 0; pass < 2; pass++) { 10912 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10913 } 10914 } else { 10915 /* 16 -> 32 bit fp conversion */ 10916 int srcelt = is_q ? 4 : 0; 10917 TCGv_i32 tcg_res[4]; 10918 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10919 TCGv_i32 ahp = get_ahp_flag(); 10920 10921 for (pass = 0; pass < 4; pass++) { 10922 tcg_res[pass] = tcg_temp_new_i32(); 10923 10924 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 10925 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10926 fpst, ahp); 10927 } 10928 for (pass = 0; pass < 4; pass++) { 10929 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10930 } 10931 } 10932 } 10933 10934 static void handle_rev(DisasContext *s, int opcode, bool u, 10935 bool is_q, int size, int rn, int rd) 10936 { 10937 int op = (opcode << 1) | u; 10938 int opsz = op + size; 10939 int grp_size = 3 - opsz; 10940 int dsize = is_q ? 128 : 64; 10941 int i; 10942 10943 if (opsz >= 3) { 10944 unallocated_encoding(s); 10945 return; 10946 } 10947 10948 if (!fp_access_check(s)) { 10949 return; 10950 } 10951 10952 if (size == 0) { 10953 /* Special case bytes, use bswap op on each group of elements */ 10954 int groups = dsize / (8 << grp_size); 10955 10956 for (i = 0; i < groups; i++) { 10957 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 10958 10959 read_vec_element(s, tcg_tmp, rn, i, grp_size); 10960 switch (grp_size) { 10961 case MO_16: 10962 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10963 break; 10964 case MO_32: 10965 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10966 break; 10967 case MO_64: 10968 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 10969 break; 10970 default: 10971 g_assert_not_reached(); 10972 } 10973 write_vec_element(s, tcg_tmp, rd, i, grp_size); 10974 } 10975 clear_vec_high(s, is_q, rd); 10976 } else { 10977 int revmask = (1 << grp_size) - 1; 10978 int esize = 8 << size; 10979 int elements = dsize / esize; 10980 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10981 TCGv_i64 tcg_rd[2]; 10982 10983 for (i = 0; i < 2; i++) { 10984 tcg_rd[i] = tcg_temp_new_i64(); 10985 tcg_gen_movi_i64(tcg_rd[i], 0); 10986 } 10987 10988 for (i = 0; i < elements; i++) { 10989 int e_rev = (i & 0xf) ^ revmask; 10990 int w = (e_rev * esize) / 64; 10991 int o = (e_rev * esize) % 64; 10992 10993 read_vec_element(s, tcg_rn, rn, i, size); 10994 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 10995 } 10996 10997 for (i = 0; i < 2; i++) { 10998 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 10999 } 11000 clear_vec_high(s, true, rd); 11001 } 11002 } 11003 11004 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 11005 bool is_q, int size, int rn, int rd) 11006 { 11007 /* Implement the pairwise operations from 2-misc: 11008 * SADDLP, UADDLP, SADALP, UADALP. 11009 * These all add pairs of elements in the input to produce a 11010 * double-width result element in the output (possibly accumulating). 11011 */ 11012 bool accum = (opcode == 0x6); 11013 int maxpass = is_q ? 2 : 1; 11014 int pass; 11015 TCGv_i64 tcg_res[2]; 11016 11017 if (size == 2) { 11018 /* 32 + 32 -> 64 op */ 11019 MemOp memop = size + (u ? 0 : MO_SIGN); 11020 11021 for (pass = 0; pass < maxpass; pass++) { 11022 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11023 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11024 11025 tcg_res[pass] = tcg_temp_new_i64(); 11026 11027 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 11028 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 11029 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11030 if (accum) { 11031 read_vec_element(s, tcg_op1, rd, pass, MO_64); 11032 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 11033 } 11034 } 11035 } else { 11036 for (pass = 0; pass < maxpass; pass++) { 11037 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11038 NeonGenOne64OpFn *genfn; 11039 static NeonGenOne64OpFn * const fns[2][2] = { 11040 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 11041 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 11042 }; 11043 11044 genfn = fns[size][u]; 11045 11046 tcg_res[pass] = tcg_temp_new_i64(); 11047 11048 read_vec_element(s, tcg_op, rn, pass, MO_64); 11049 genfn(tcg_res[pass], tcg_op); 11050 11051 if (accum) { 11052 read_vec_element(s, tcg_op, rd, pass, MO_64); 11053 if (size == 0) { 11054 gen_helper_neon_addl_u16(tcg_res[pass], 11055 tcg_res[pass], tcg_op); 11056 } else { 11057 gen_helper_neon_addl_u32(tcg_res[pass], 11058 tcg_res[pass], tcg_op); 11059 } 11060 } 11061 } 11062 } 11063 if (!is_q) { 11064 tcg_res[1] = tcg_constant_i64(0); 11065 } 11066 for (pass = 0; pass < 2; pass++) { 11067 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11068 } 11069 } 11070 11071 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 11072 { 11073 /* Implement SHLL and SHLL2 */ 11074 int pass; 11075 int part = is_q ? 2 : 0; 11076 TCGv_i64 tcg_res[2]; 11077 11078 for (pass = 0; pass < 2; pass++) { 11079 static NeonGenWidenFn * const widenfns[3] = { 11080 gen_helper_neon_widen_u8, 11081 gen_helper_neon_widen_u16, 11082 tcg_gen_extu_i32_i64, 11083 }; 11084 NeonGenWidenFn *widenfn = widenfns[size]; 11085 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11086 11087 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 11088 tcg_res[pass] = tcg_temp_new_i64(); 11089 widenfn(tcg_res[pass], tcg_op); 11090 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 11091 } 11092 11093 for (pass = 0; pass < 2; pass++) { 11094 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11095 } 11096 } 11097 11098 /* AdvSIMD two reg misc 11099 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 11100 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11101 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 11102 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11103 */ 11104 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 11105 { 11106 int size = extract32(insn, 22, 2); 11107 int opcode = extract32(insn, 12, 5); 11108 bool u = extract32(insn, 29, 1); 11109 bool is_q = extract32(insn, 30, 1); 11110 int rn = extract32(insn, 5, 5); 11111 int rd = extract32(insn, 0, 5); 11112 bool need_fpstatus = false; 11113 int rmode = -1; 11114 TCGv_i32 tcg_rmode; 11115 TCGv_ptr tcg_fpstatus; 11116 11117 switch (opcode) { 11118 case 0x0: /* REV64, REV32 */ 11119 case 0x1: /* REV16 */ 11120 handle_rev(s, opcode, u, is_q, size, rn, rd); 11121 return; 11122 case 0x5: /* CNT, NOT, RBIT */ 11123 if (u && size == 0) { 11124 /* NOT */ 11125 break; 11126 } else if (u && size == 1) { 11127 /* RBIT */ 11128 break; 11129 } else if (!u && size == 0) { 11130 /* CNT */ 11131 break; 11132 } 11133 unallocated_encoding(s); 11134 return; 11135 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 11136 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 11137 if (size == 3) { 11138 unallocated_encoding(s); 11139 return; 11140 } 11141 if (!fp_access_check(s)) { 11142 return; 11143 } 11144 11145 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 11146 return; 11147 case 0x4: /* CLS, CLZ */ 11148 if (size == 3) { 11149 unallocated_encoding(s); 11150 return; 11151 } 11152 break; 11153 case 0x2: /* SADDLP, UADDLP */ 11154 case 0x6: /* SADALP, UADALP */ 11155 if (size == 3) { 11156 unallocated_encoding(s); 11157 return; 11158 } 11159 if (!fp_access_check(s)) { 11160 return; 11161 } 11162 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 11163 return; 11164 case 0x13: /* SHLL, SHLL2 */ 11165 if (u == 0 || size == 3) { 11166 unallocated_encoding(s); 11167 return; 11168 } 11169 if (!fp_access_check(s)) { 11170 return; 11171 } 11172 handle_shll(s, is_q, size, rn, rd); 11173 return; 11174 case 0xa: /* CMLT */ 11175 if (u == 1) { 11176 unallocated_encoding(s); 11177 return; 11178 } 11179 /* fall through */ 11180 case 0x8: /* CMGT, CMGE */ 11181 case 0x9: /* CMEQ, CMLE */ 11182 case 0xb: /* ABS, NEG */ 11183 if (size == 3 && !is_q) { 11184 unallocated_encoding(s); 11185 return; 11186 } 11187 break; 11188 case 0x7: /* SQABS, SQNEG */ 11189 if (size == 3 && !is_q) { 11190 unallocated_encoding(s); 11191 return; 11192 } 11193 break; 11194 case 0xc ... 0xf: 11195 case 0x16 ... 0x1f: 11196 { 11197 /* Floating point: U, size[1] and opcode indicate operation; 11198 * size[0] indicates single or double precision. 11199 */ 11200 int is_double = extract32(size, 0, 1); 11201 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 11202 size = is_double ? 3 : 2; 11203 switch (opcode) { 11204 case 0x2f: /* FABS */ 11205 case 0x6f: /* FNEG */ 11206 if (size == 3 && !is_q) { 11207 unallocated_encoding(s); 11208 return; 11209 } 11210 break; 11211 case 0x1d: /* SCVTF */ 11212 case 0x5d: /* UCVTF */ 11213 { 11214 bool is_signed = (opcode == 0x1d) ? true : false; 11215 int elements = is_double ? 2 : is_q ? 4 : 2; 11216 if (is_double && !is_q) { 11217 unallocated_encoding(s); 11218 return; 11219 } 11220 if (!fp_access_check(s)) { 11221 return; 11222 } 11223 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 11224 return; 11225 } 11226 case 0x2c: /* FCMGT (zero) */ 11227 case 0x2d: /* FCMEQ (zero) */ 11228 case 0x2e: /* FCMLT (zero) */ 11229 case 0x6c: /* FCMGE (zero) */ 11230 case 0x6d: /* FCMLE (zero) */ 11231 if (size == 3 && !is_q) { 11232 unallocated_encoding(s); 11233 return; 11234 } 11235 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 11236 return; 11237 case 0x7f: /* FSQRT */ 11238 if (size == 3 && !is_q) { 11239 unallocated_encoding(s); 11240 return; 11241 } 11242 break; 11243 case 0x1a: /* FCVTNS */ 11244 case 0x1b: /* FCVTMS */ 11245 case 0x3a: /* FCVTPS */ 11246 case 0x3b: /* FCVTZS */ 11247 case 0x5a: /* FCVTNU */ 11248 case 0x5b: /* FCVTMU */ 11249 case 0x7a: /* FCVTPU */ 11250 case 0x7b: /* FCVTZU */ 11251 need_fpstatus = true; 11252 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11253 if (size == 3 && !is_q) { 11254 unallocated_encoding(s); 11255 return; 11256 } 11257 break; 11258 case 0x5c: /* FCVTAU */ 11259 case 0x1c: /* FCVTAS */ 11260 need_fpstatus = true; 11261 rmode = FPROUNDING_TIEAWAY; 11262 if (size == 3 && !is_q) { 11263 unallocated_encoding(s); 11264 return; 11265 } 11266 break; 11267 case 0x3c: /* URECPE */ 11268 if (size == 3) { 11269 unallocated_encoding(s); 11270 return; 11271 } 11272 /* fall through */ 11273 case 0x3d: /* FRECPE */ 11274 case 0x7d: /* FRSQRTE */ 11275 if (size == 3 && !is_q) { 11276 unallocated_encoding(s); 11277 return; 11278 } 11279 if (!fp_access_check(s)) { 11280 return; 11281 } 11282 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 11283 return; 11284 case 0x56: /* FCVTXN, FCVTXN2 */ 11285 if (size == 2) { 11286 unallocated_encoding(s); 11287 return; 11288 } 11289 /* fall through */ 11290 case 0x16: /* FCVTN, FCVTN2 */ 11291 /* handle_2misc_narrow does a 2*size -> size operation, but these 11292 * instructions encode the source size rather than dest size. 11293 */ 11294 if (!fp_access_check(s)) { 11295 return; 11296 } 11297 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11298 return; 11299 case 0x36: /* BFCVTN, BFCVTN2 */ 11300 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 11301 unallocated_encoding(s); 11302 return; 11303 } 11304 if (!fp_access_check(s)) { 11305 return; 11306 } 11307 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11308 return; 11309 case 0x17: /* FCVTL, FCVTL2 */ 11310 if (!fp_access_check(s)) { 11311 return; 11312 } 11313 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 11314 return; 11315 case 0x18: /* FRINTN */ 11316 case 0x19: /* FRINTM */ 11317 case 0x38: /* FRINTP */ 11318 case 0x39: /* FRINTZ */ 11319 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11320 /* fall through */ 11321 case 0x59: /* FRINTX */ 11322 case 0x79: /* FRINTI */ 11323 need_fpstatus = true; 11324 if (size == 3 && !is_q) { 11325 unallocated_encoding(s); 11326 return; 11327 } 11328 break; 11329 case 0x58: /* FRINTA */ 11330 rmode = FPROUNDING_TIEAWAY; 11331 need_fpstatus = true; 11332 if (size == 3 && !is_q) { 11333 unallocated_encoding(s); 11334 return; 11335 } 11336 break; 11337 case 0x7c: /* URSQRTE */ 11338 if (size == 3) { 11339 unallocated_encoding(s); 11340 return; 11341 } 11342 break; 11343 case 0x1e: /* FRINT32Z */ 11344 case 0x1f: /* FRINT64Z */ 11345 rmode = FPROUNDING_ZERO; 11346 /* fall through */ 11347 case 0x5e: /* FRINT32X */ 11348 case 0x5f: /* FRINT64X */ 11349 need_fpstatus = true; 11350 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 11351 unallocated_encoding(s); 11352 return; 11353 } 11354 break; 11355 default: 11356 unallocated_encoding(s); 11357 return; 11358 } 11359 break; 11360 } 11361 default: 11362 case 0x3: /* SUQADD, USQADD */ 11363 unallocated_encoding(s); 11364 return; 11365 } 11366 11367 if (!fp_access_check(s)) { 11368 return; 11369 } 11370 11371 if (need_fpstatus || rmode >= 0) { 11372 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 11373 } else { 11374 tcg_fpstatus = NULL; 11375 } 11376 if (rmode >= 0) { 11377 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11378 } else { 11379 tcg_rmode = NULL; 11380 } 11381 11382 switch (opcode) { 11383 case 0x5: 11384 if (u && size == 0) { /* NOT */ 11385 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 11386 return; 11387 } 11388 break; 11389 case 0x8: /* CMGT, CMGE */ 11390 if (u) { 11391 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 11392 } else { 11393 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 11394 } 11395 return; 11396 case 0x9: /* CMEQ, CMLE */ 11397 if (u) { 11398 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 11399 } else { 11400 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 11401 } 11402 return; 11403 case 0xa: /* CMLT */ 11404 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 11405 return; 11406 case 0xb: 11407 if (u) { /* ABS, NEG */ 11408 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 11409 } else { 11410 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 11411 } 11412 return; 11413 } 11414 11415 if (size == 3) { 11416 /* All 64-bit element operations can be shared with scalar 2misc */ 11417 int pass; 11418 11419 /* Coverity claims (size == 3 && !is_q) has been eliminated 11420 * from all paths leading to here. 11421 */ 11422 tcg_debug_assert(is_q); 11423 for (pass = 0; pass < 2; pass++) { 11424 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11425 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11426 11427 read_vec_element(s, tcg_op, rn, pass, MO_64); 11428 11429 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 11430 tcg_rmode, tcg_fpstatus); 11431 11432 write_vec_element(s, tcg_res, rd, pass, MO_64); 11433 } 11434 } else { 11435 int pass; 11436 11437 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11438 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11439 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11440 11441 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 11442 11443 if (size == 2) { 11444 /* Special cases for 32 bit elements */ 11445 switch (opcode) { 11446 case 0x4: /* CLS */ 11447 if (u) { 11448 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 11449 } else { 11450 tcg_gen_clrsb_i32(tcg_res, tcg_op); 11451 } 11452 break; 11453 case 0x7: /* SQABS, SQNEG */ 11454 if (u) { 11455 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); 11456 } else { 11457 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); 11458 } 11459 break; 11460 case 0x2f: /* FABS */ 11461 gen_vfp_abss(tcg_res, tcg_op); 11462 break; 11463 case 0x6f: /* FNEG */ 11464 gen_vfp_negs(tcg_res, tcg_op); 11465 break; 11466 case 0x7f: /* FSQRT */ 11467 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 11468 break; 11469 case 0x1a: /* FCVTNS */ 11470 case 0x1b: /* FCVTMS */ 11471 case 0x1c: /* FCVTAS */ 11472 case 0x3a: /* FCVTPS */ 11473 case 0x3b: /* FCVTZS */ 11474 gen_helper_vfp_tosls(tcg_res, tcg_op, 11475 tcg_constant_i32(0), tcg_fpstatus); 11476 break; 11477 case 0x5a: /* FCVTNU */ 11478 case 0x5b: /* FCVTMU */ 11479 case 0x5c: /* FCVTAU */ 11480 case 0x7a: /* FCVTPU */ 11481 case 0x7b: /* FCVTZU */ 11482 gen_helper_vfp_touls(tcg_res, tcg_op, 11483 tcg_constant_i32(0), tcg_fpstatus); 11484 break; 11485 case 0x18: /* FRINTN */ 11486 case 0x19: /* FRINTM */ 11487 case 0x38: /* FRINTP */ 11488 case 0x39: /* FRINTZ */ 11489 case 0x58: /* FRINTA */ 11490 case 0x79: /* FRINTI */ 11491 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 11492 break; 11493 case 0x59: /* FRINTX */ 11494 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 11495 break; 11496 case 0x7c: /* URSQRTE */ 11497 gen_helper_rsqrte_u32(tcg_res, tcg_op); 11498 break; 11499 case 0x1e: /* FRINT32Z */ 11500 case 0x5e: /* FRINT32X */ 11501 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 11502 break; 11503 case 0x1f: /* FRINT64Z */ 11504 case 0x5f: /* FRINT64X */ 11505 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 11506 break; 11507 default: 11508 g_assert_not_reached(); 11509 } 11510 } else { 11511 /* Use helpers for 8 and 16 bit elements */ 11512 switch (opcode) { 11513 case 0x5: /* CNT, RBIT */ 11514 /* For these two insns size is part of the opcode specifier 11515 * (handled earlier); they always operate on byte elements. 11516 */ 11517 if (u) { 11518 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 11519 } else { 11520 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 11521 } 11522 break; 11523 case 0x7: /* SQABS, SQNEG */ 11524 { 11525 NeonGenOneOpEnvFn *genfn; 11526 static NeonGenOneOpEnvFn * const fns[2][2] = { 11527 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 11528 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 11529 }; 11530 genfn = fns[size][u]; 11531 genfn(tcg_res, tcg_env, tcg_op); 11532 break; 11533 } 11534 case 0x4: /* CLS, CLZ */ 11535 if (u) { 11536 if (size == 0) { 11537 gen_helper_neon_clz_u8(tcg_res, tcg_op); 11538 } else { 11539 gen_helper_neon_clz_u16(tcg_res, tcg_op); 11540 } 11541 } else { 11542 if (size == 0) { 11543 gen_helper_neon_cls_s8(tcg_res, tcg_op); 11544 } else { 11545 gen_helper_neon_cls_s16(tcg_res, tcg_op); 11546 } 11547 } 11548 break; 11549 default: 11550 g_assert_not_reached(); 11551 } 11552 } 11553 11554 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11555 } 11556 } 11557 clear_vec_high(s, is_q, rd); 11558 11559 if (tcg_rmode) { 11560 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11561 } 11562 } 11563 11564 /* AdvSIMD [scalar] two register miscellaneous (FP16) 11565 * 11566 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 11567 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11568 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 11569 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11570 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 11571 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 11572 * 11573 * This actually covers two groups where scalar access is governed by 11574 * bit 28. A bunch of the instructions (float to integral) only exist 11575 * in the vector form and are un-allocated for the scalar decode. Also 11576 * in the scalar decode Q is always 1. 11577 */ 11578 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 11579 { 11580 int fpop, opcode, a, u; 11581 int rn, rd; 11582 bool is_q; 11583 bool is_scalar; 11584 bool only_in_vector = false; 11585 11586 int pass; 11587 TCGv_i32 tcg_rmode = NULL; 11588 TCGv_ptr tcg_fpstatus = NULL; 11589 bool need_fpst = true; 11590 int rmode = -1; 11591 11592 if (!dc_isar_feature(aa64_fp16, s)) { 11593 unallocated_encoding(s); 11594 return; 11595 } 11596 11597 rd = extract32(insn, 0, 5); 11598 rn = extract32(insn, 5, 5); 11599 11600 a = extract32(insn, 23, 1); 11601 u = extract32(insn, 29, 1); 11602 is_scalar = extract32(insn, 28, 1); 11603 is_q = extract32(insn, 30, 1); 11604 11605 opcode = extract32(insn, 12, 5); 11606 fpop = deposit32(opcode, 5, 1, a); 11607 fpop = deposit32(fpop, 6, 1, u); 11608 11609 switch (fpop) { 11610 case 0x1d: /* SCVTF */ 11611 case 0x5d: /* UCVTF */ 11612 { 11613 int elements; 11614 11615 if (is_scalar) { 11616 elements = 1; 11617 } else { 11618 elements = (is_q ? 8 : 4); 11619 } 11620 11621 if (!fp_access_check(s)) { 11622 return; 11623 } 11624 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 11625 return; 11626 } 11627 break; 11628 case 0x2c: /* FCMGT (zero) */ 11629 case 0x2d: /* FCMEQ (zero) */ 11630 case 0x2e: /* FCMLT (zero) */ 11631 case 0x6c: /* FCMGE (zero) */ 11632 case 0x6d: /* FCMLE (zero) */ 11633 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 11634 return; 11635 case 0x3d: /* FRECPE */ 11636 case 0x3f: /* FRECPX */ 11637 break; 11638 case 0x18: /* FRINTN */ 11639 only_in_vector = true; 11640 rmode = FPROUNDING_TIEEVEN; 11641 break; 11642 case 0x19: /* FRINTM */ 11643 only_in_vector = true; 11644 rmode = FPROUNDING_NEGINF; 11645 break; 11646 case 0x38: /* FRINTP */ 11647 only_in_vector = true; 11648 rmode = FPROUNDING_POSINF; 11649 break; 11650 case 0x39: /* FRINTZ */ 11651 only_in_vector = true; 11652 rmode = FPROUNDING_ZERO; 11653 break; 11654 case 0x58: /* FRINTA */ 11655 only_in_vector = true; 11656 rmode = FPROUNDING_TIEAWAY; 11657 break; 11658 case 0x59: /* FRINTX */ 11659 case 0x79: /* FRINTI */ 11660 only_in_vector = true; 11661 /* current rounding mode */ 11662 break; 11663 case 0x1a: /* FCVTNS */ 11664 rmode = FPROUNDING_TIEEVEN; 11665 break; 11666 case 0x1b: /* FCVTMS */ 11667 rmode = FPROUNDING_NEGINF; 11668 break; 11669 case 0x1c: /* FCVTAS */ 11670 rmode = FPROUNDING_TIEAWAY; 11671 break; 11672 case 0x3a: /* FCVTPS */ 11673 rmode = FPROUNDING_POSINF; 11674 break; 11675 case 0x3b: /* FCVTZS */ 11676 rmode = FPROUNDING_ZERO; 11677 break; 11678 case 0x5a: /* FCVTNU */ 11679 rmode = FPROUNDING_TIEEVEN; 11680 break; 11681 case 0x5b: /* FCVTMU */ 11682 rmode = FPROUNDING_NEGINF; 11683 break; 11684 case 0x5c: /* FCVTAU */ 11685 rmode = FPROUNDING_TIEAWAY; 11686 break; 11687 case 0x7a: /* FCVTPU */ 11688 rmode = FPROUNDING_POSINF; 11689 break; 11690 case 0x7b: /* FCVTZU */ 11691 rmode = FPROUNDING_ZERO; 11692 break; 11693 case 0x2f: /* FABS */ 11694 case 0x6f: /* FNEG */ 11695 need_fpst = false; 11696 break; 11697 case 0x7d: /* FRSQRTE */ 11698 case 0x7f: /* FSQRT (vector) */ 11699 break; 11700 default: 11701 unallocated_encoding(s); 11702 return; 11703 } 11704 11705 11706 /* Check additional constraints for the scalar encoding */ 11707 if (is_scalar) { 11708 if (!is_q) { 11709 unallocated_encoding(s); 11710 return; 11711 } 11712 /* FRINTxx is only in the vector form */ 11713 if (only_in_vector) { 11714 unallocated_encoding(s); 11715 return; 11716 } 11717 } 11718 11719 if (!fp_access_check(s)) { 11720 return; 11721 } 11722 11723 if (rmode >= 0 || need_fpst) { 11724 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 11725 } 11726 11727 if (rmode >= 0) { 11728 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11729 } 11730 11731 if (is_scalar) { 11732 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 11733 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11734 11735 switch (fpop) { 11736 case 0x1a: /* FCVTNS */ 11737 case 0x1b: /* FCVTMS */ 11738 case 0x1c: /* FCVTAS */ 11739 case 0x3a: /* FCVTPS */ 11740 case 0x3b: /* FCVTZS */ 11741 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11742 break; 11743 case 0x3d: /* FRECPE */ 11744 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11745 break; 11746 case 0x3f: /* FRECPX */ 11747 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 11748 break; 11749 case 0x5a: /* FCVTNU */ 11750 case 0x5b: /* FCVTMU */ 11751 case 0x5c: /* FCVTAU */ 11752 case 0x7a: /* FCVTPU */ 11753 case 0x7b: /* FCVTZU */ 11754 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11755 break; 11756 case 0x6f: /* FNEG */ 11757 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11758 break; 11759 case 0x7d: /* FRSQRTE */ 11760 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11761 break; 11762 default: 11763 g_assert_not_reached(); 11764 } 11765 11766 /* limit any sign extension going on */ 11767 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 11768 write_fp_sreg(s, rd, tcg_res); 11769 } else { 11770 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 11771 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11772 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11773 11774 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 11775 11776 switch (fpop) { 11777 case 0x1a: /* FCVTNS */ 11778 case 0x1b: /* FCVTMS */ 11779 case 0x1c: /* FCVTAS */ 11780 case 0x3a: /* FCVTPS */ 11781 case 0x3b: /* FCVTZS */ 11782 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11783 break; 11784 case 0x3d: /* FRECPE */ 11785 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11786 break; 11787 case 0x5a: /* FCVTNU */ 11788 case 0x5b: /* FCVTMU */ 11789 case 0x5c: /* FCVTAU */ 11790 case 0x7a: /* FCVTPU */ 11791 case 0x7b: /* FCVTZU */ 11792 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11793 break; 11794 case 0x18: /* FRINTN */ 11795 case 0x19: /* FRINTM */ 11796 case 0x38: /* FRINTP */ 11797 case 0x39: /* FRINTZ */ 11798 case 0x58: /* FRINTA */ 11799 case 0x79: /* FRINTI */ 11800 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 11801 break; 11802 case 0x59: /* FRINTX */ 11803 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 11804 break; 11805 case 0x2f: /* FABS */ 11806 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 11807 break; 11808 case 0x6f: /* FNEG */ 11809 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11810 break; 11811 case 0x7d: /* FRSQRTE */ 11812 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11813 break; 11814 case 0x7f: /* FSQRT */ 11815 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 11816 break; 11817 default: 11818 g_assert_not_reached(); 11819 } 11820 11821 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11822 } 11823 11824 clear_vec_high(s, is_q, rd); 11825 } 11826 11827 if (tcg_rmode) { 11828 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11829 } 11830 } 11831 11832 /* C3.6 Data processing - SIMD, inc Crypto 11833 * 11834 * As the decode gets a little complex we are using a table based 11835 * approach for this part of the decode. 11836 */ 11837 static const AArch64DecodeTable data_proc_simd[] = { 11838 /* pattern , mask , fn */ 11839 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 11840 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 11841 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 11842 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 11843 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 11844 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 11845 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 11846 { 0x2e000000, 0xbf208400, disas_simd_ext }, 11847 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 11848 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 11849 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 11850 { 0x00000000, 0x00000000, NULL } 11851 }; 11852 11853 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 11854 { 11855 /* Note that this is called with all non-FP cases from 11856 * table C3-6 so it must UNDEF for entries not specifically 11857 * allocated to instructions in that table. 11858 */ 11859 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 11860 if (fn) { 11861 fn(s, insn); 11862 } else { 11863 unallocated_encoding(s); 11864 } 11865 } 11866 11867 /* C3.6 Data processing - SIMD and floating point */ 11868 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 11869 { 11870 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 11871 disas_data_proc_fp(s, insn); 11872 } else { 11873 /* SIMD, including crypto */ 11874 disas_data_proc_simd(s, insn); 11875 } 11876 } 11877 11878 static bool trans_OK(DisasContext *s, arg_OK *a) 11879 { 11880 return true; 11881 } 11882 11883 static bool trans_FAIL(DisasContext *s, arg_OK *a) 11884 { 11885 s->is_nonstreaming = true; 11886 return true; 11887 } 11888 11889 /** 11890 * btype_destination_ok: 11891 * @insn: The instruction at the branch destination 11892 * @bt: SCTLR_ELx.BT 11893 * @btype: PSTATE.BTYPE, and is non-zero 11894 * 11895 * On a guarded page, there are a limited number of insns 11896 * that may be present at the branch target: 11897 * - branch target identifiers, 11898 * - paciasp, pacibsp, 11899 * - BRK insn 11900 * - HLT insn 11901 * Anything else causes a Branch Target Exception. 11902 * 11903 * Return true if the branch is compatible, false to raise BTITRAP. 11904 */ 11905 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 11906 { 11907 if ((insn & 0xfffff01fu) == 0xd503201fu) { 11908 /* HINT space */ 11909 switch (extract32(insn, 5, 7)) { 11910 case 0b011001: /* PACIASP */ 11911 case 0b011011: /* PACIBSP */ 11912 /* 11913 * If SCTLR_ELx.BT, then PACI*SP are not compatible 11914 * with btype == 3. Otherwise all btype are ok. 11915 */ 11916 return !bt || btype != 3; 11917 case 0b100000: /* BTI */ 11918 /* Not compatible with any btype. */ 11919 return false; 11920 case 0b100010: /* BTI c */ 11921 /* Not compatible with btype == 3 */ 11922 return btype != 3; 11923 case 0b100100: /* BTI j */ 11924 /* Not compatible with btype == 2 */ 11925 return btype != 2; 11926 case 0b100110: /* BTI jc */ 11927 /* Compatible with any btype. */ 11928 return true; 11929 } 11930 } else { 11931 switch (insn & 0xffe0001fu) { 11932 case 0xd4200000u: /* BRK */ 11933 case 0xd4400000u: /* HLT */ 11934 /* Give priority to the breakpoint exception. */ 11935 return true; 11936 } 11937 } 11938 return false; 11939 } 11940 11941 /* C3.1 A64 instruction index by encoding */ 11942 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 11943 { 11944 switch (extract32(insn, 25, 4)) { 11945 case 0x5: 11946 case 0xd: /* Data processing - register */ 11947 disas_data_proc_reg(s, insn); 11948 break; 11949 case 0x7: 11950 case 0xf: /* Data processing - SIMD and floating point */ 11951 disas_data_proc_simd_fp(s, insn); 11952 break; 11953 default: 11954 unallocated_encoding(s); 11955 break; 11956 } 11957 } 11958 11959 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 11960 CPUState *cpu) 11961 { 11962 DisasContext *dc = container_of(dcbase, DisasContext, base); 11963 CPUARMState *env = cpu_env(cpu); 11964 ARMCPU *arm_cpu = env_archcpu(env); 11965 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 11966 int bound, core_mmu_idx; 11967 11968 dc->isar = &arm_cpu->isar; 11969 dc->condjmp = 0; 11970 dc->pc_save = dc->base.pc_first; 11971 dc->aarch64 = true; 11972 dc->thumb = false; 11973 dc->sctlr_b = 0; 11974 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 11975 dc->condexec_mask = 0; 11976 dc->condexec_cond = 0; 11977 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 11978 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 11979 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 11980 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 11981 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 11982 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 11983 #if !defined(CONFIG_USER_ONLY) 11984 dc->user = (dc->current_el == 0); 11985 #endif 11986 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 11987 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 11988 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 11989 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 11990 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 11991 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 11992 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 11993 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 11994 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 11995 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 11996 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 11997 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 11998 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 11999 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 12000 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 12001 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 12002 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 12003 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 12004 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 12005 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 12006 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 12007 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 12008 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 12009 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 12010 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 12011 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 12012 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 12013 dc->vec_len = 0; 12014 dc->vec_stride = 0; 12015 dc->cp_regs = arm_cpu->cp_regs; 12016 dc->features = env->features; 12017 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 12018 dc->gm_blocksize = arm_cpu->gm_blocksize; 12019 12020 #ifdef CONFIG_USER_ONLY 12021 /* In sve_probe_page, we assume TBI is enabled. */ 12022 tcg_debug_assert(dc->tbid & 1); 12023 #endif 12024 12025 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 12026 12027 /* Single step state. The code-generation logic here is: 12028 * SS_ACTIVE == 0: 12029 * generate code with no special handling for single-stepping (except 12030 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 12031 * this happens anyway because those changes are all system register or 12032 * PSTATE writes). 12033 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 12034 * emit code for one insn 12035 * emit code to clear PSTATE.SS 12036 * emit code to generate software step exception for completed step 12037 * end TB (as usual for having generated an exception) 12038 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 12039 * emit code to generate a software step exception 12040 * end the TB 12041 */ 12042 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 12043 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 12044 dc->is_ldex = false; 12045 12046 /* Bound the number of insns to execute to those left on the page. */ 12047 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 12048 12049 /* If architectural single step active, limit to 1. */ 12050 if (dc->ss_active) { 12051 bound = 1; 12052 } 12053 dc->base.max_insns = MIN(dc->base.max_insns, bound); 12054 } 12055 12056 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 12057 { 12058 } 12059 12060 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 12061 { 12062 DisasContext *dc = container_of(dcbase, DisasContext, base); 12063 target_ulong pc_arg = dc->base.pc_next; 12064 12065 if (tb_cflags(dcbase->tb) & CF_PCREL) { 12066 pc_arg &= ~TARGET_PAGE_MASK; 12067 } 12068 tcg_gen_insn_start(pc_arg, 0, 0); 12069 dc->insn_start_updated = false; 12070 } 12071 12072 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 12073 { 12074 DisasContext *s = container_of(dcbase, DisasContext, base); 12075 CPUARMState *env = cpu_env(cpu); 12076 uint64_t pc = s->base.pc_next; 12077 uint32_t insn; 12078 12079 /* Singlestep exceptions have the highest priority. */ 12080 if (s->ss_active && !s->pstate_ss) { 12081 /* Singlestep state is Active-pending. 12082 * If we're in this state at the start of a TB then either 12083 * a) we just took an exception to an EL which is being debugged 12084 * and this is the first insn in the exception handler 12085 * b) debug exceptions were masked and we just unmasked them 12086 * without changing EL (eg by clearing PSTATE.D) 12087 * In either case we're going to take a swstep exception in the 12088 * "did not step an insn" case, and so the syndrome ISV and EX 12089 * bits should be zero. 12090 */ 12091 assert(s->base.num_insns == 1); 12092 gen_swstep_exception(s, 0, 0); 12093 s->base.is_jmp = DISAS_NORETURN; 12094 s->base.pc_next = pc + 4; 12095 return; 12096 } 12097 12098 if (pc & 3) { 12099 /* 12100 * PC alignment fault. This has priority over the instruction abort 12101 * that we would receive from a translation fault via arm_ldl_code. 12102 * This should only be possible after an indirect branch, at the 12103 * start of the TB. 12104 */ 12105 assert(s->base.num_insns == 1); 12106 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 12107 s->base.is_jmp = DISAS_NORETURN; 12108 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 12109 return; 12110 } 12111 12112 s->pc_curr = pc; 12113 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 12114 s->insn = insn; 12115 s->base.pc_next = pc + 4; 12116 12117 s->fp_access_checked = false; 12118 s->sve_access_checked = false; 12119 12120 if (s->pstate_il) { 12121 /* 12122 * Illegal execution state. This has priority over BTI 12123 * exceptions, but comes after instruction abort exceptions. 12124 */ 12125 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 12126 return; 12127 } 12128 12129 if (dc_isar_feature(aa64_bti, s)) { 12130 if (s->base.num_insns == 1) { 12131 /* First insn can have btype set to non-zero. */ 12132 tcg_debug_assert(s->btype >= 0); 12133 12134 /* 12135 * Note that the Branch Target Exception has fairly high 12136 * priority -- below debugging exceptions but above most 12137 * everything else. This allows us to handle this now 12138 * instead of waiting until the insn is otherwise decoded. 12139 * 12140 * We can check all but the guarded page check here; 12141 * defer the latter to a helper. 12142 */ 12143 if (s->btype != 0 12144 && !btype_destination_ok(insn, s->bt, s->btype)) { 12145 gen_helper_guarded_page_check(tcg_env); 12146 } 12147 } else { 12148 /* Not the first insn: btype must be 0. */ 12149 tcg_debug_assert(s->btype == 0); 12150 } 12151 } 12152 12153 s->is_nonstreaming = false; 12154 if (s->sme_trap_nonstreaming) { 12155 disas_sme_fa64(s, insn); 12156 } 12157 12158 if (!disas_a64(s, insn) && 12159 !disas_sme(s, insn) && 12160 !disas_sve(s, insn)) { 12161 disas_a64_legacy(s, insn); 12162 } 12163 12164 /* 12165 * After execution of most insns, btype is reset to 0. 12166 * Note that we set btype == -1 when the insn sets btype. 12167 */ 12168 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 12169 reset_btype(s); 12170 } 12171 } 12172 12173 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 12174 { 12175 DisasContext *dc = container_of(dcbase, DisasContext, base); 12176 12177 if (unlikely(dc->ss_active)) { 12178 /* Note that this means single stepping WFI doesn't halt the CPU. 12179 * For conditional branch insns this is harmless unreachable code as 12180 * gen_goto_tb() has already handled emitting the debug exception 12181 * (and thus a tb-jump is not possible when singlestepping). 12182 */ 12183 switch (dc->base.is_jmp) { 12184 default: 12185 gen_a64_update_pc(dc, 4); 12186 /* fall through */ 12187 case DISAS_EXIT: 12188 case DISAS_JUMP: 12189 gen_step_complete_exception(dc); 12190 break; 12191 case DISAS_NORETURN: 12192 break; 12193 } 12194 } else { 12195 switch (dc->base.is_jmp) { 12196 case DISAS_NEXT: 12197 case DISAS_TOO_MANY: 12198 gen_goto_tb(dc, 1, 4); 12199 break; 12200 default: 12201 case DISAS_UPDATE_EXIT: 12202 gen_a64_update_pc(dc, 4); 12203 /* fall through */ 12204 case DISAS_EXIT: 12205 tcg_gen_exit_tb(NULL, 0); 12206 break; 12207 case DISAS_UPDATE_NOCHAIN: 12208 gen_a64_update_pc(dc, 4); 12209 /* fall through */ 12210 case DISAS_JUMP: 12211 tcg_gen_lookup_and_goto_ptr(); 12212 break; 12213 case DISAS_NORETURN: 12214 case DISAS_SWI: 12215 break; 12216 case DISAS_WFE: 12217 gen_a64_update_pc(dc, 4); 12218 gen_helper_wfe(tcg_env); 12219 break; 12220 case DISAS_YIELD: 12221 gen_a64_update_pc(dc, 4); 12222 gen_helper_yield(tcg_env); 12223 break; 12224 case DISAS_WFI: 12225 /* 12226 * This is a special case because we don't want to just halt 12227 * the CPU if trying to debug across a WFI. 12228 */ 12229 gen_a64_update_pc(dc, 4); 12230 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 12231 /* 12232 * The helper doesn't necessarily throw an exception, but we 12233 * must go back to the main loop to check for interrupts anyway. 12234 */ 12235 tcg_gen_exit_tb(NULL, 0); 12236 break; 12237 } 12238 } 12239 } 12240 12241 const TranslatorOps aarch64_translator_ops = { 12242 .init_disas_context = aarch64_tr_init_disas_context, 12243 .tb_start = aarch64_tr_tb_start, 12244 .insn_start = aarch64_tr_insn_start, 12245 .translate_insn = aarch64_tr_translate_insn, 12246 .tb_stop = aarch64_tr_tb_stop, 12247 }; 12248