1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "qemu/log.h" 24 #include "disas/disas.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ 118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ 199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ 264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ 284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ 318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ 346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ 377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ 403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ 538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ 550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ 559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ 590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ 596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ 607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 632 * If SVE is not enabled, then there are only 128 bits in the vector. 633 */ 634 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 635 { 636 unsigned ofs = fp_reg_offset(s, rd, MO_64); 637 unsigned vsz = vec_full_reg_size(s); 638 639 /* Nop move, with side effect of clearing the tail. */ 640 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 641 } 642 643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 644 { 645 unsigned ofs = fp_reg_offset(s, reg, MO_64); 646 647 tcg_gen_st_i64(v, tcg_env, ofs); 648 clear_vec_high(s, false, reg); 649 } 650 651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 652 { 653 TCGv_i64 tmp = tcg_temp_new_i64(); 654 655 tcg_gen_extu_i32_i64(tmp, v); 656 write_fp_dreg(s, reg, tmp); 657 } 658 659 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 661 GVecGen2Fn *gvec_fn, int vece) 662 { 663 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 664 is_q ? 16 : 8, vec_full_reg_size(s)); 665 } 666 667 /* Expand a 2-operand + immediate AdvSIMD vector operation using 668 * an expander function. 669 */ 670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 671 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 672 { 673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 674 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 675 } 676 677 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 679 GVecGen3Fn *gvec_fn, int vece) 680 { 681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 682 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 683 } 684 685 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 687 int rx, GVecGen4Fn *gvec_fn, int vece) 688 { 689 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 690 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 691 is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 2-operand operation using an out-of-line helper. */ 695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 696 int rn, int data, gen_helper_gvec_2 *fn) 697 { 698 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 699 vec_full_reg_offset(s, rn), 700 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 701 } 702 703 /* Expand a 3-operand operation using an out-of-line helper. */ 704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int rm, int data, gen_helper_gvec_3 *fn) 706 { 707 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 vec_full_reg_offset(s, rm), 710 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 711 } 712 713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 714 * an out-of-line helper. 715 */ 716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 717 int rm, bool is_fp16, int data, 718 gen_helper_gvec_3_ptr *fn) 719 { 720 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 721 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 722 vec_full_reg_offset(s, rn), 723 vec_full_reg_offset(s, rm), fpst, 724 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 725 } 726 727 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 728 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 729 int rm, gen_helper_gvec_3_ptr *fn) 730 { 731 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 732 733 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 734 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 735 vec_full_reg_offset(s, rn), 736 vec_full_reg_offset(s, rm), qc_ptr, 737 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 738 } 739 740 /* Expand a 4-operand operation using an out-of-line helper. */ 741 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 742 int rm, int ra, int data, gen_helper_gvec_4 *fn) 743 { 744 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 745 vec_full_reg_offset(s, rn), 746 vec_full_reg_offset(s, rm), 747 vec_full_reg_offset(s, ra), 748 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 749 } 750 751 /* 752 * Expand a 4-operand + fpstatus pointer + simd data value operation using 753 * an out-of-line helper. 754 */ 755 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 756 int rm, int ra, bool is_fp16, int data, 757 gen_helper_gvec_4_ptr *fn) 758 { 759 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 760 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 761 vec_full_reg_offset(s, rn), 762 vec_full_reg_offset(s, rm), 763 vec_full_reg_offset(s, ra), fpst, 764 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 765 } 766 767 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 768 * than the 32 bit equivalent. 769 */ 770 static inline void gen_set_NZ64(TCGv_i64 result) 771 { 772 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 773 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 774 } 775 776 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 777 static inline void gen_logic_CC(int sf, TCGv_i64 result) 778 { 779 if (sf) { 780 gen_set_NZ64(result); 781 } else { 782 tcg_gen_extrl_i64_i32(cpu_ZF, result); 783 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 784 } 785 tcg_gen_movi_i32(cpu_CF, 0); 786 tcg_gen_movi_i32(cpu_VF, 0); 787 } 788 789 /* dest = T0 + T1; compute C, N, V and Z flags */ 790 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 791 { 792 TCGv_i64 result, flag, tmp; 793 result = tcg_temp_new_i64(); 794 flag = tcg_temp_new_i64(); 795 tmp = tcg_temp_new_i64(); 796 797 tcg_gen_movi_i64(tmp, 0); 798 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 799 800 tcg_gen_extrl_i64_i32(cpu_CF, flag); 801 802 gen_set_NZ64(result); 803 804 tcg_gen_xor_i64(flag, result, t0); 805 tcg_gen_xor_i64(tmp, t0, t1); 806 tcg_gen_andc_i64(flag, flag, tmp); 807 tcg_gen_extrh_i64_i32(cpu_VF, flag); 808 809 tcg_gen_mov_i64(dest, result); 810 } 811 812 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 813 { 814 TCGv_i32 t0_32 = tcg_temp_new_i32(); 815 TCGv_i32 t1_32 = tcg_temp_new_i32(); 816 TCGv_i32 tmp = tcg_temp_new_i32(); 817 818 tcg_gen_movi_i32(tmp, 0); 819 tcg_gen_extrl_i64_i32(t0_32, t0); 820 tcg_gen_extrl_i64_i32(t1_32, t1); 821 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 822 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 823 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 824 tcg_gen_xor_i32(tmp, t0_32, t1_32); 825 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 826 tcg_gen_extu_i32_i64(dest, cpu_NF); 827 } 828 829 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 830 { 831 if (sf) { 832 gen_add64_CC(dest, t0, t1); 833 } else { 834 gen_add32_CC(dest, t0, t1); 835 } 836 } 837 838 /* dest = T0 - T1; compute C, N, V and Z flags */ 839 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 840 { 841 /* 64 bit arithmetic */ 842 TCGv_i64 result, flag, tmp; 843 844 result = tcg_temp_new_i64(); 845 flag = tcg_temp_new_i64(); 846 tcg_gen_sub_i64(result, t0, t1); 847 848 gen_set_NZ64(result); 849 850 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 851 tcg_gen_extrl_i64_i32(cpu_CF, flag); 852 853 tcg_gen_xor_i64(flag, result, t0); 854 tmp = tcg_temp_new_i64(); 855 tcg_gen_xor_i64(tmp, t0, t1); 856 tcg_gen_and_i64(flag, flag, tmp); 857 tcg_gen_extrh_i64_i32(cpu_VF, flag); 858 tcg_gen_mov_i64(dest, result); 859 } 860 861 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 862 { 863 /* 32 bit arithmetic */ 864 TCGv_i32 t0_32 = tcg_temp_new_i32(); 865 TCGv_i32 t1_32 = tcg_temp_new_i32(); 866 TCGv_i32 tmp; 867 868 tcg_gen_extrl_i64_i32(t0_32, t0); 869 tcg_gen_extrl_i64_i32(t1_32, t1); 870 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 871 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 872 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 873 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 874 tmp = tcg_temp_new_i32(); 875 tcg_gen_xor_i32(tmp, t0_32, t1_32); 876 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 877 tcg_gen_extu_i32_i64(dest, cpu_NF); 878 } 879 880 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 881 { 882 if (sf) { 883 gen_sub64_CC(dest, t0, t1); 884 } else { 885 gen_sub32_CC(dest, t0, t1); 886 } 887 } 888 889 /* dest = T0 + T1 + CF; do not compute flags. */ 890 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 891 { 892 TCGv_i64 flag = tcg_temp_new_i64(); 893 tcg_gen_extu_i32_i64(flag, cpu_CF); 894 tcg_gen_add_i64(dest, t0, t1); 895 tcg_gen_add_i64(dest, dest, flag); 896 897 if (!sf) { 898 tcg_gen_ext32u_i64(dest, dest); 899 } 900 } 901 902 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 903 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 904 { 905 if (sf) { 906 TCGv_i64 result = tcg_temp_new_i64(); 907 TCGv_i64 cf_64 = tcg_temp_new_i64(); 908 TCGv_i64 vf_64 = tcg_temp_new_i64(); 909 TCGv_i64 tmp = tcg_temp_new_i64(); 910 TCGv_i64 zero = tcg_constant_i64(0); 911 912 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 913 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 914 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 915 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 916 gen_set_NZ64(result); 917 918 tcg_gen_xor_i64(vf_64, result, t0); 919 tcg_gen_xor_i64(tmp, t0, t1); 920 tcg_gen_andc_i64(vf_64, vf_64, tmp); 921 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 922 923 tcg_gen_mov_i64(dest, result); 924 } else { 925 TCGv_i32 t0_32 = tcg_temp_new_i32(); 926 TCGv_i32 t1_32 = tcg_temp_new_i32(); 927 TCGv_i32 tmp = tcg_temp_new_i32(); 928 TCGv_i32 zero = tcg_constant_i32(0); 929 930 tcg_gen_extrl_i64_i32(t0_32, t0); 931 tcg_gen_extrl_i64_i32(t1_32, t1); 932 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 933 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 934 935 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 936 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 937 tcg_gen_xor_i32(tmp, t0_32, t1_32); 938 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 939 tcg_gen_extu_i32_i64(dest, cpu_NF); 940 } 941 } 942 943 /* 944 * Load/Store generators 945 */ 946 947 /* 948 * Store from GPR register to memory. 949 */ 950 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 951 TCGv_i64 tcg_addr, MemOp memop, int memidx, 952 bool iss_valid, 953 unsigned int iss_srt, 954 bool iss_sf, bool iss_ar) 955 { 956 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 957 958 if (iss_valid) { 959 uint32_t syn; 960 961 syn = syn_data_abort_with_iss(0, 962 (memop & MO_SIZE), 963 false, 964 iss_srt, 965 iss_sf, 966 iss_ar, 967 0, 0, 0, 0, 0, false); 968 disas_set_insn_syndrome(s, syn); 969 } 970 } 971 972 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 973 TCGv_i64 tcg_addr, MemOp memop, 974 bool iss_valid, 975 unsigned int iss_srt, 976 bool iss_sf, bool iss_ar) 977 { 978 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 979 iss_valid, iss_srt, iss_sf, iss_ar); 980 } 981 982 /* 983 * Load from memory to GPR register 984 */ 985 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 986 MemOp memop, bool extend, int memidx, 987 bool iss_valid, unsigned int iss_srt, 988 bool iss_sf, bool iss_ar) 989 { 990 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 991 992 if (extend && (memop & MO_SIGN)) { 993 g_assert((memop & MO_SIZE) <= MO_32); 994 tcg_gen_ext32u_i64(dest, dest); 995 } 996 997 if (iss_valid) { 998 uint32_t syn; 999 1000 syn = syn_data_abort_with_iss(0, 1001 (memop & MO_SIZE), 1002 (memop & MO_SIGN) != 0, 1003 iss_srt, 1004 iss_sf, 1005 iss_ar, 1006 0, 0, 0, 0, 0, false); 1007 disas_set_insn_syndrome(s, syn); 1008 } 1009 } 1010 1011 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1012 MemOp memop, bool extend, 1013 bool iss_valid, unsigned int iss_srt, 1014 bool iss_sf, bool iss_ar) 1015 { 1016 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1017 iss_valid, iss_srt, iss_sf, iss_ar); 1018 } 1019 1020 /* 1021 * Store from FP register to memory 1022 */ 1023 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1024 { 1025 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1026 TCGv_i64 tmplo = tcg_temp_new_i64(); 1027 1028 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1029 1030 if ((mop & MO_SIZE) < MO_128) { 1031 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1032 } else { 1033 TCGv_i64 tmphi = tcg_temp_new_i64(); 1034 TCGv_i128 t16 = tcg_temp_new_i128(); 1035 1036 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1037 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1038 1039 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1040 } 1041 } 1042 1043 /* 1044 * Load from memory to FP register 1045 */ 1046 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1047 { 1048 /* This always zero-extends and writes to a full 128 bit wide vector */ 1049 TCGv_i64 tmplo = tcg_temp_new_i64(); 1050 TCGv_i64 tmphi = NULL; 1051 1052 if ((mop & MO_SIZE) < MO_128) { 1053 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1054 } else { 1055 TCGv_i128 t16 = tcg_temp_new_i128(); 1056 1057 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1058 1059 tmphi = tcg_temp_new_i64(); 1060 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1061 } 1062 1063 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1064 1065 if (tmphi) { 1066 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1067 } 1068 clear_vec_high(s, tmphi != NULL, destidx); 1069 } 1070 1071 /* 1072 * Vector load/store helpers. 1073 * 1074 * The principal difference between this and a FP load is that we don't 1075 * zero extend as we are filling a partial chunk of the vector register. 1076 * These functions don't support 128 bit loads/stores, which would be 1077 * normal load/store operations. 1078 * 1079 * The _i32 versions are useful when operating on 32 bit quantities 1080 * (eg for floating point single or using Neon helper functions). 1081 */ 1082 1083 /* Get value of an element within a vector register */ 1084 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1085 int element, MemOp memop) 1086 { 1087 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1088 switch ((unsigned)memop) { 1089 case MO_8: 1090 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1091 break; 1092 case MO_16: 1093 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1094 break; 1095 case MO_32: 1096 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1097 break; 1098 case MO_8|MO_SIGN: 1099 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1100 break; 1101 case MO_16|MO_SIGN: 1102 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1103 break; 1104 case MO_32|MO_SIGN: 1105 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1106 break; 1107 case MO_64: 1108 case MO_64|MO_SIGN: 1109 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1110 break; 1111 default: 1112 g_assert_not_reached(); 1113 } 1114 } 1115 1116 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1117 int element, MemOp memop) 1118 { 1119 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1120 switch (memop) { 1121 case MO_8: 1122 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1123 break; 1124 case MO_16: 1125 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1126 break; 1127 case MO_8|MO_SIGN: 1128 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1129 break; 1130 case MO_16|MO_SIGN: 1131 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1132 break; 1133 case MO_32: 1134 case MO_32|MO_SIGN: 1135 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1136 break; 1137 default: 1138 g_assert_not_reached(); 1139 } 1140 } 1141 1142 /* Set value of an element within a vector register */ 1143 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1144 int element, MemOp memop) 1145 { 1146 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1147 switch (memop) { 1148 case MO_8: 1149 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1150 break; 1151 case MO_16: 1152 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1153 break; 1154 case MO_32: 1155 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1156 break; 1157 case MO_64: 1158 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1159 break; 1160 default: 1161 g_assert_not_reached(); 1162 } 1163 } 1164 1165 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1166 int destidx, int element, MemOp memop) 1167 { 1168 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1169 switch (memop) { 1170 case MO_8: 1171 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1172 break; 1173 case MO_16: 1174 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1175 break; 1176 case MO_32: 1177 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1178 break; 1179 default: 1180 g_assert_not_reached(); 1181 } 1182 } 1183 1184 /* Store from vector register to memory */ 1185 static void do_vec_st(DisasContext *s, int srcidx, int element, 1186 TCGv_i64 tcg_addr, MemOp mop) 1187 { 1188 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1189 1190 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1191 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1192 } 1193 1194 /* Load from memory to vector register */ 1195 static void do_vec_ld(DisasContext *s, int destidx, int element, 1196 TCGv_i64 tcg_addr, MemOp mop) 1197 { 1198 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1199 1200 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1201 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1202 } 1203 1204 /* Check that FP/Neon access is enabled. If it is, return 1205 * true. If not, emit code to generate an appropriate exception, 1206 * and return false; the caller should not emit any code for 1207 * the instruction. Note that this check must happen after all 1208 * unallocated-encoding checks (otherwise the syndrome information 1209 * for the resulting exception will be incorrect). 1210 */ 1211 static bool fp_access_check_only(DisasContext *s) 1212 { 1213 if (s->fp_excp_el) { 1214 assert(!s->fp_access_checked); 1215 s->fp_access_checked = true; 1216 1217 gen_exception_insn_el(s, 0, EXCP_UDEF, 1218 syn_fp_access_trap(1, 0xe, false, 0), 1219 s->fp_excp_el); 1220 return false; 1221 } 1222 s->fp_access_checked = true; 1223 return true; 1224 } 1225 1226 static bool fp_access_check(DisasContext *s) 1227 { 1228 if (!fp_access_check_only(s)) { 1229 return false; 1230 } 1231 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1232 gen_exception_insn(s, 0, EXCP_UDEF, 1233 syn_smetrap(SME_ET_Streaming, false)); 1234 return false; 1235 } 1236 return true; 1237 } 1238 1239 /* 1240 * Check that SVE access is enabled. If it is, return true. 1241 * If not, emit code to generate an appropriate exception and return false. 1242 * This function corresponds to CheckSVEEnabled(). 1243 */ 1244 bool sve_access_check(DisasContext *s) 1245 { 1246 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1247 assert(dc_isar_feature(aa64_sme, s)); 1248 if (!sme_sm_enabled_check(s)) { 1249 goto fail_exit; 1250 } 1251 } else if (s->sve_excp_el) { 1252 gen_exception_insn_el(s, 0, EXCP_UDEF, 1253 syn_sve_access_trap(), s->sve_excp_el); 1254 goto fail_exit; 1255 } 1256 s->sve_access_checked = true; 1257 return fp_access_check(s); 1258 1259 fail_exit: 1260 /* Assert that we only raise one exception per instruction. */ 1261 assert(!s->sve_access_checked); 1262 s->sve_access_checked = true; 1263 return false; 1264 } 1265 1266 /* 1267 * Check that SME access is enabled, raise an exception if not. 1268 * Note that this function corresponds to CheckSMEAccess and is 1269 * only used directly for cpregs. 1270 */ 1271 static bool sme_access_check(DisasContext *s) 1272 { 1273 if (s->sme_excp_el) { 1274 gen_exception_insn_el(s, 0, EXCP_UDEF, 1275 syn_smetrap(SME_ET_AccessTrap, false), 1276 s->sme_excp_el); 1277 return false; 1278 } 1279 return true; 1280 } 1281 1282 /* This function corresponds to CheckSMEEnabled. */ 1283 bool sme_enabled_check(DisasContext *s) 1284 { 1285 /* 1286 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1287 * to be zero when fp_excp_el has priority. This is because we need 1288 * sme_excp_el by itself for cpregs access checks. 1289 */ 1290 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1291 s->fp_access_checked = true; 1292 return sme_access_check(s); 1293 } 1294 return fp_access_check_only(s); 1295 } 1296 1297 /* Common subroutine for CheckSMEAnd*Enabled. */ 1298 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1299 { 1300 if (!sme_enabled_check(s)) { 1301 return false; 1302 } 1303 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1304 gen_exception_insn(s, 0, EXCP_UDEF, 1305 syn_smetrap(SME_ET_NotStreaming, false)); 1306 return false; 1307 } 1308 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1309 gen_exception_insn(s, 0, EXCP_UDEF, 1310 syn_smetrap(SME_ET_InactiveZA, false)); 1311 return false; 1312 } 1313 return true; 1314 } 1315 1316 /* 1317 * This utility function is for doing register extension with an 1318 * optional shift. You will likely want to pass a temporary for the 1319 * destination register. See DecodeRegExtend() in the ARM ARM. 1320 */ 1321 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1322 int option, unsigned int shift) 1323 { 1324 int extsize = extract32(option, 0, 2); 1325 bool is_signed = extract32(option, 2, 1); 1326 1327 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1328 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1329 } 1330 1331 static inline void gen_check_sp_alignment(DisasContext *s) 1332 { 1333 /* The AArch64 architecture mandates that (if enabled via PSTATE 1334 * or SCTLR bits) there is a check that SP is 16-aligned on every 1335 * SP-relative load or store (with an exception generated if it is not). 1336 * In line with general QEMU practice regarding misaligned accesses, 1337 * we omit these checks for the sake of guest program performance. 1338 * This function is provided as a hook so we can more easily add these 1339 * checks in future (possibly as a "favour catching guest program bugs 1340 * over speed" user selectable option). 1341 */ 1342 } 1343 1344 /* 1345 * This provides a simple table based table lookup decoder. It is 1346 * intended to be used when the relevant bits for decode are too 1347 * awkwardly placed and switch/if based logic would be confusing and 1348 * deeply nested. Since it's a linear search through the table, tables 1349 * should be kept small. 1350 * 1351 * It returns the first handler where insn & mask == pattern, or 1352 * NULL if there is no match. 1353 * The table is terminated by an empty mask (i.e. 0) 1354 */ 1355 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1356 uint32_t insn) 1357 { 1358 const AArch64DecodeTable *tptr = table; 1359 1360 while (tptr->mask) { 1361 if ((insn & tptr->mask) == tptr->pattern) { 1362 return tptr->disas_fn; 1363 } 1364 tptr++; 1365 } 1366 return NULL; 1367 } 1368 1369 /* 1370 * The instruction disassembly implemented here matches 1371 * the instruction encoding classifications in chapter C4 1372 * of the ARM Architecture Reference Manual (DDI0487B_a); 1373 * classification names and decode diagrams here should generally 1374 * match up with those in the manual. 1375 */ 1376 1377 static bool trans_B(DisasContext *s, arg_i *a) 1378 { 1379 reset_btype(s); 1380 gen_goto_tb(s, 0, a->imm); 1381 return true; 1382 } 1383 1384 static bool trans_BL(DisasContext *s, arg_i *a) 1385 { 1386 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1387 reset_btype(s); 1388 gen_goto_tb(s, 0, a->imm); 1389 return true; 1390 } 1391 1392 1393 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1394 { 1395 DisasLabel match; 1396 TCGv_i64 tcg_cmp; 1397 1398 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1399 reset_btype(s); 1400 1401 match = gen_disas_label(s); 1402 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1403 tcg_cmp, 0, match.label); 1404 gen_goto_tb(s, 0, 4); 1405 set_disas_label(s, match); 1406 gen_goto_tb(s, 1, a->imm); 1407 return true; 1408 } 1409 1410 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1411 { 1412 DisasLabel match; 1413 TCGv_i64 tcg_cmp; 1414 1415 tcg_cmp = tcg_temp_new_i64(); 1416 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1417 1418 reset_btype(s); 1419 1420 match = gen_disas_label(s); 1421 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1422 tcg_cmp, 0, match.label); 1423 gen_goto_tb(s, 0, 4); 1424 set_disas_label(s, match); 1425 gen_goto_tb(s, 1, a->imm); 1426 return true; 1427 } 1428 1429 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1430 { 1431 /* BC.cond is only present with FEAT_HBC */ 1432 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1433 return false; 1434 } 1435 reset_btype(s); 1436 if (a->cond < 0x0e) { 1437 /* genuinely conditional branches */ 1438 DisasLabel match = gen_disas_label(s); 1439 arm_gen_test_cc(a->cond, match.label); 1440 gen_goto_tb(s, 0, 4); 1441 set_disas_label(s, match); 1442 gen_goto_tb(s, 1, a->imm); 1443 } else { 1444 /* 0xe and 0xf are both "always" conditions */ 1445 gen_goto_tb(s, 0, a->imm); 1446 } 1447 return true; 1448 } 1449 1450 static void set_btype_for_br(DisasContext *s, int rn) 1451 { 1452 if (dc_isar_feature(aa64_bti, s)) { 1453 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1454 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 1455 } 1456 } 1457 1458 static void set_btype_for_blr(DisasContext *s) 1459 { 1460 if (dc_isar_feature(aa64_bti, s)) { 1461 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1462 set_btype(s, 2); 1463 } 1464 } 1465 1466 static bool trans_BR(DisasContext *s, arg_r *a) 1467 { 1468 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1469 set_btype_for_br(s, a->rn); 1470 s->base.is_jmp = DISAS_JUMP; 1471 return true; 1472 } 1473 1474 static bool trans_BLR(DisasContext *s, arg_r *a) 1475 { 1476 TCGv_i64 dst = cpu_reg(s, a->rn); 1477 TCGv_i64 lr = cpu_reg(s, 30); 1478 if (dst == lr) { 1479 TCGv_i64 tmp = tcg_temp_new_i64(); 1480 tcg_gen_mov_i64(tmp, dst); 1481 dst = tmp; 1482 } 1483 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1484 gen_a64_set_pc(s, dst); 1485 set_btype_for_blr(s); 1486 s->base.is_jmp = DISAS_JUMP; 1487 return true; 1488 } 1489 1490 static bool trans_RET(DisasContext *s, arg_r *a) 1491 { 1492 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1493 s->base.is_jmp = DISAS_JUMP; 1494 return true; 1495 } 1496 1497 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1498 TCGv_i64 modifier, bool use_key_a) 1499 { 1500 TCGv_i64 truedst; 1501 /* 1502 * Return the branch target for a BRAA/RETA/etc, which is either 1503 * just the destination dst, or that value with the pauth check 1504 * done and the code removed from the high bits. 1505 */ 1506 if (!s->pauth_active) { 1507 return dst; 1508 } 1509 1510 truedst = tcg_temp_new_i64(); 1511 if (use_key_a) { 1512 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1513 } else { 1514 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1515 } 1516 return truedst; 1517 } 1518 1519 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1520 { 1521 TCGv_i64 dst; 1522 1523 if (!dc_isar_feature(aa64_pauth, s)) { 1524 return false; 1525 } 1526 1527 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1528 gen_a64_set_pc(s, dst); 1529 set_btype_for_br(s, a->rn); 1530 s->base.is_jmp = DISAS_JUMP; 1531 return true; 1532 } 1533 1534 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1535 { 1536 TCGv_i64 dst, lr; 1537 1538 if (!dc_isar_feature(aa64_pauth, s)) { 1539 return false; 1540 } 1541 1542 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1543 lr = cpu_reg(s, 30); 1544 if (dst == lr) { 1545 TCGv_i64 tmp = tcg_temp_new_i64(); 1546 tcg_gen_mov_i64(tmp, dst); 1547 dst = tmp; 1548 } 1549 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1550 gen_a64_set_pc(s, dst); 1551 set_btype_for_blr(s); 1552 s->base.is_jmp = DISAS_JUMP; 1553 return true; 1554 } 1555 1556 static bool trans_RETA(DisasContext *s, arg_reta *a) 1557 { 1558 TCGv_i64 dst; 1559 1560 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1561 gen_a64_set_pc(s, dst); 1562 s->base.is_jmp = DISAS_JUMP; 1563 return true; 1564 } 1565 1566 static bool trans_BRA(DisasContext *s, arg_bra *a) 1567 { 1568 TCGv_i64 dst; 1569 1570 if (!dc_isar_feature(aa64_pauth, s)) { 1571 return false; 1572 } 1573 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1574 gen_a64_set_pc(s, dst); 1575 set_btype_for_br(s, a->rn); 1576 s->base.is_jmp = DISAS_JUMP; 1577 return true; 1578 } 1579 1580 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1581 { 1582 TCGv_i64 dst, lr; 1583 1584 if (!dc_isar_feature(aa64_pauth, s)) { 1585 return false; 1586 } 1587 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1588 lr = cpu_reg(s, 30); 1589 if (dst == lr) { 1590 TCGv_i64 tmp = tcg_temp_new_i64(); 1591 tcg_gen_mov_i64(tmp, dst); 1592 dst = tmp; 1593 } 1594 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1595 gen_a64_set_pc(s, dst); 1596 set_btype_for_blr(s); 1597 s->base.is_jmp = DISAS_JUMP; 1598 return true; 1599 } 1600 1601 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1602 { 1603 TCGv_i64 dst; 1604 1605 if (s->current_el == 0) { 1606 return false; 1607 } 1608 if (s->fgt_eret) { 1609 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1610 return true; 1611 } 1612 dst = tcg_temp_new_i64(); 1613 tcg_gen_ld_i64(dst, tcg_env, 1614 offsetof(CPUARMState, elr_el[s->current_el])); 1615 1616 translator_io_start(&s->base); 1617 1618 gen_helper_exception_return(tcg_env, dst); 1619 /* Must exit loop to check un-masked IRQs */ 1620 s->base.is_jmp = DISAS_EXIT; 1621 return true; 1622 } 1623 1624 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1625 { 1626 TCGv_i64 dst; 1627 1628 if (!dc_isar_feature(aa64_pauth, s)) { 1629 return false; 1630 } 1631 if (s->current_el == 0) { 1632 return false; 1633 } 1634 /* The FGT trap takes precedence over an auth trap. */ 1635 if (s->fgt_eret) { 1636 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1637 return true; 1638 } 1639 dst = tcg_temp_new_i64(); 1640 tcg_gen_ld_i64(dst, tcg_env, 1641 offsetof(CPUARMState, elr_el[s->current_el])); 1642 1643 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1644 1645 translator_io_start(&s->base); 1646 1647 gen_helper_exception_return(tcg_env, dst); 1648 /* Must exit loop to check un-masked IRQs */ 1649 s->base.is_jmp = DISAS_EXIT; 1650 return true; 1651 } 1652 1653 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1654 { 1655 return true; 1656 } 1657 1658 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1659 { 1660 /* 1661 * When running in MTTCG we don't generate jumps to the yield and 1662 * WFE helpers as it won't affect the scheduling of other vCPUs. 1663 * If we wanted to more completely model WFE/SEV so we don't busy 1664 * spin unnecessarily we would need to do something more involved. 1665 */ 1666 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1667 s->base.is_jmp = DISAS_YIELD; 1668 } 1669 return true; 1670 } 1671 1672 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1673 { 1674 s->base.is_jmp = DISAS_WFI; 1675 return true; 1676 } 1677 1678 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1679 { 1680 /* 1681 * When running in MTTCG we don't generate jumps to the yield and 1682 * WFE helpers as it won't affect the scheduling of other vCPUs. 1683 * If we wanted to more completely model WFE/SEV so we don't busy 1684 * spin unnecessarily we would need to do something more involved. 1685 */ 1686 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1687 s->base.is_jmp = DISAS_WFE; 1688 } 1689 return true; 1690 } 1691 1692 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1693 { 1694 if (s->pauth_active) { 1695 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1696 } 1697 return true; 1698 } 1699 1700 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1701 { 1702 if (s->pauth_active) { 1703 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1704 } 1705 return true; 1706 } 1707 1708 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1709 { 1710 if (s->pauth_active) { 1711 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1712 } 1713 return true; 1714 } 1715 1716 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1717 { 1718 if (s->pauth_active) { 1719 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1720 } 1721 return true; 1722 } 1723 1724 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1725 { 1726 if (s->pauth_active) { 1727 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1728 } 1729 return true; 1730 } 1731 1732 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1733 { 1734 /* Without RAS, we must implement this as NOP. */ 1735 if (dc_isar_feature(aa64_ras, s)) { 1736 /* 1737 * QEMU does not have a source of physical SErrors, 1738 * so we are only concerned with virtual SErrors. 1739 * The pseudocode in the ARM for this case is 1740 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1741 * AArch64.vESBOperation(); 1742 * Most of the condition can be evaluated at translation time. 1743 * Test for EL2 present, and defer test for SEL2 to runtime. 1744 */ 1745 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1746 gen_helper_vesb(tcg_env); 1747 } 1748 } 1749 return true; 1750 } 1751 1752 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1753 { 1754 if (s->pauth_active) { 1755 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1756 } 1757 return true; 1758 } 1759 1760 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1761 { 1762 if (s->pauth_active) { 1763 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1764 } 1765 return true; 1766 } 1767 1768 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1769 { 1770 if (s->pauth_active) { 1771 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1772 } 1773 return true; 1774 } 1775 1776 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1777 { 1778 if (s->pauth_active) { 1779 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1780 } 1781 return true; 1782 } 1783 1784 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1785 { 1786 if (s->pauth_active) { 1787 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1788 } 1789 return true; 1790 } 1791 1792 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1793 { 1794 if (s->pauth_active) { 1795 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1796 } 1797 return true; 1798 } 1799 1800 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1801 { 1802 if (s->pauth_active) { 1803 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1804 } 1805 return true; 1806 } 1807 1808 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1809 { 1810 if (s->pauth_active) { 1811 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1812 } 1813 return true; 1814 } 1815 1816 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1817 { 1818 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1819 return true; 1820 } 1821 1822 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1823 { 1824 /* We handle DSB and DMB the same way */ 1825 TCGBar bar; 1826 1827 switch (a->types) { 1828 case 1: /* MBReqTypes_Reads */ 1829 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1830 break; 1831 case 2: /* MBReqTypes_Writes */ 1832 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1833 break; 1834 default: /* MBReqTypes_All */ 1835 bar = TCG_BAR_SC | TCG_MO_ALL; 1836 break; 1837 } 1838 tcg_gen_mb(bar); 1839 return true; 1840 } 1841 1842 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1843 { 1844 /* 1845 * We need to break the TB after this insn to execute 1846 * self-modifying code correctly and also to take 1847 * any pending interrupts immediately. 1848 */ 1849 reset_btype(s); 1850 gen_goto_tb(s, 0, 4); 1851 return true; 1852 } 1853 1854 static bool trans_SB(DisasContext *s, arg_SB *a) 1855 { 1856 if (!dc_isar_feature(aa64_sb, s)) { 1857 return false; 1858 } 1859 /* 1860 * TODO: There is no speculation barrier opcode for TCG; 1861 * MB and end the TB instead. 1862 */ 1863 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1864 gen_goto_tb(s, 0, 4); 1865 return true; 1866 } 1867 1868 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1869 { 1870 if (!dc_isar_feature(aa64_condm_4, s)) { 1871 return false; 1872 } 1873 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1874 return true; 1875 } 1876 1877 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1878 { 1879 TCGv_i32 z; 1880 1881 if (!dc_isar_feature(aa64_condm_5, s)) { 1882 return false; 1883 } 1884 1885 z = tcg_temp_new_i32(); 1886 1887 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1888 1889 /* 1890 * (!C & !Z) << 31 1891 * (!(C | Z)) << 31 1892 * ~((C | Z) << 31) 1893 * ~-(C | Z) 1894 * (C | Z) - 1 1895 */ 1896 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1897 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1898 1899 /* !(Z & C) */ 1900 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1901 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1902 1903 /* (!C & Z) << 31 -> -(Z & ~C) */ 1904 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1905 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1906 1907 /* C | Z */ 1908 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1909 1910 return true; 1911 } 1912 1913 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 1914 { 1915 if (!dc_isar_feature(aa64_condm_5, s)) { 1916 return false; 1917 } 1918 1919 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1920 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1921 1922 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1923 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1924 1925 tcg_gen_movi_i32(cpu_NF, 0); 1926 tcg_gen_movi_i32(cpu_VF, 0); 1927 1928 return true; 1929 } 1930 1931 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 1932 { 1933 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1934 return false; 1935 } 1936 if (a->imm & 1) { 1937 set_pstate_bits(PSTATE_UAO); 1938 } else { 1939 clear_pstate_bits(PSTATE_UAO); 1940 } 1941 gen_rebuild_hflags(s); 1942 s->base.is_jmp = DISAS_TOO_MANY; 1943 return true; 1944 } 1945 1946 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 1947 { 1948 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1949 return false; 1950 } 1951 if (a->imm & 1) { 1952 set_pstate_bits(PSTATE_PAN); 1953 } else { 1954 clear_pstate_bits(PSTATE_PAN); 1955 } 1956 gen_rebuild_hflags(s); 1957 s->base.is_jmp = DISAS_TOO_MANY; 1958 return true; 1959 } 1960 1961 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 1962 { 1963 if (s->current_el == 0) { 1964 return false; 1965 } 1966 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 1967 s->base.is_jmp = DISAS_TOO_MANY; 1968 return true; 1969 } 1970 1971 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 1972 { 1973 if (!dc_isar_feature(aa64_ssbs, s)) { 1974 return false; 1975 } 1976 if (a->imm & 1) { 1977 set_pstate_bits(PSTATE_SSBS); 1978 } else { 1979 clear_pstate_bits(PSTATE_SSBS); 1980 } 1981 /* Don't need to rebuild hflags since SSBS is a nop */ 1982 s->base.is_jmp = DISAS_TOO_MANY; 1983 return true; 1984 } 1985 1986 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 1987 { 1988 if (!dc_isar_feature(aa64_dit, s)) { 1989 return false; 1990 } 1991 if (a->imm & 1) { 1992 set_pstate_bits(PSTATE_DIT); 1993 } else { 1994 clear_pstate_bits(PSTATE_DIT); 1995 } 1996 /* There's no need to rebuild hflags because DIT is a nop */ 1997 s->base.is_jmp = DISAS_TOO_MANY; 1998 return true; 1999 } 2000 2001 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2002 { 2003 if (dc_isar_feature(aa64_mte, s)) { 2004 /* Full MTE is enabled -- set the TCO bit as directed. */ 2005 if (a->imm & 1) { 2006 set_pstate_bits(PSTATE_TCO); 2007 } else { 2008 clear_pstate_bits(PSTATE_TCO); 2009 } 2010 gen_rebuild_hflags(s); 2011 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2012 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2013 return true; 2014 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2015 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2016 return true; 2017 } else { 2018 /* Insn not present */ 2019 return false; 2020 } 2021 } 2022 2023 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2024 { 2025 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2026 s->base.is_jmp = DISAS_TOO_MANY; 2027 return true; 2028 } 2029 2030 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2031 { 2032 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2033 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2034 s->base.is_jmp = DISAS_UPDATE_EXIT; 2035 return true; 2036 } 2037 2038 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2039 { 2040 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2041 return false; 2042 } 2043 if (sme_access_check(s)) { 2044 int old = s->pstate_sm | (s->pstate_za << 1); 2045 int new = a->imm * 3; 2046 2047 if ((old ^ new) & a->mask) { 2048 /* At least one bit changes. */ 2049 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2050 tcg_constant_i32(a->mask)); 2051 s->base.is_jmp = DISAS_TOO_MANY; 2052 } 2053 } 2054 return true; 2055 } 2056 2057 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2058 { 2059 TCGv_i32 tmp = tcg_temp_new_i32(); 2060 TCGv_i32 nzcv = tcg_temp_new_i32(); 2061 2062 /* build bit 31, N */ 2063 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2064 /* build bit 30, Z */ 2065 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2066 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2067 /* build bit 29, C */ 2068 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2069 /* build bit 28, V */ 2070 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2071 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2072 /* generate result */ 2073 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2074 } 2075 2076 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2077 { 2078 TCGv_i32 nzcv = tcg_temp_new_i32(); 2079 2080 /* take NZCV from R[t] */ 2081 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2082 2083 /* bit 31, N */ 2084 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2085 /* bit 30, Z */ 2086 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2087 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2088 /* bit 29, C */ 2089 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2090 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2091 /* bit 28, V */ 2092 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2093 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2094 } 2095 2096 static void gen_sysreg_undef(DisasContext *s, bool isread, 2097 uint8_t op0, uint8_t op1, uint8_t op2, 2098 uint8_t crn, uint8_t crm, uint8_t rt) 2099 { 2100 /* 2101 * Generate code to emit an UNDEF with correct syndrome 2102 * information for a failed system register access. 2103 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2104 * but if FEAT_IDST is implemented then read accesses to registers 2105 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2106 * syndrome. 2107 */ 2108 uint32_t syndrome; 2109 2110 if (isread && dc_isar_feature(aa64_ids, s) && 2111 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2112 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2113 } else { 2114 syndrome = syn_uncategorized(); 2115 } 2116 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2117 } 2118 2119 /* MRS - move from system register 2120 * MSR (register) - move to system register 2121 * SYS 2122 * SYSL 2123 * These are all essentially the same insn in 'read' and 'write' 2124 * versions, with varying op0 fields. 2125 */ 2126 static void handle_sys(DisasContext *s, bool isread, 2127 unsigned int op0, unsigned int op1, unsigned int op2, 2128 unsigned int crn, unsigned int crm, unsigned int rt) 2129 { 2130 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2131 crn, crm, op0, op1, op2); 2132 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2133 bool need_exit_tb = false; 2134 TCGv_ptr tcg_ri = NULL; 2135 TCGv_i64 tcg_rt; 2136 uint32_t syndrome; 2137 2138 if (crn == 11 || crn == 15) { 2139 /* 2140 * Check for TIDCP trap, which must take precedence over 2141 * the UNDEF for "no such register" etc. 2142 */ 2143 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2144 switch (s->current_el) { 2145 case 0: 2146 if (dc_isar_feature(aa64_tidcp1, s)) { 2147 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2148 } 2149 break; 2150 case 1: 2151 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2152 break; 2153 } 2154 } 2155 2156 if (!ri) { 2157 /* Unknown register; this might be a guest error or a QEMU 2158 * unimplemented feature. 2159 */ 2160 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2161 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2162 isread ? "read" : "write", op0, op1, crn, crm, op2); 2163 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2164 return; 2165 } 2166 2167 /* Check access permissions */ 2168 if (!cp_access_ok(s->current_el, ri, isread)) { 2169 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2170 return; 2171 } 2172 2173 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2174 /* Emit code to perform further access permissions checks at 2175 * runtime; this may result in an exception. 2176 */ 2177 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2178 gen_a64_update_pc(s, 0); 2179 tcg_ri = tcg_temp_new_ptr(); 2180 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2181 tcg_constant_i32(key), 2182 tcg_constant_i32(syndrome), 2183 tcg_constant_i32(isread)); 2184 } else if (ri->type & ARM_CP_RAISES_EXC) { 2185 /* 2186 * The readfn or writefn might raise an exception; 2187 * synchronize the CPU state in case it does. 2188 */ 2189 gen_a64_update_pc(s, 0); 2190 } 2191 2192 /* Handle special cases first */ 2193 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2194 case 0: 2195 break; 2196 case ARM_CP_NOP: 2197 return; 2198 case ARM_CP_NZCV: 2199 tcg_rt = cpu_reg(s, rt); 2200 if (isread) { 2201 gen_get_nzcv(tcg_rt); 2202 } else { 2203 gen_set_nzcv(tcg_rt); 2204 } 2205 return; 2206 case ARM_CP_CURRENTEL: 2207 /* Reads as current EL value from pstate, which is 2208 * guaranteed to be constant by the tb flags. 2209 */ 2210 tcg_rt = cpu_reg(s, rt); 2211 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 2212 return; 2213 case ARM_CP_DC_ZVA: 2214 /* Writes clear the aligned block of memory which rt points into. */ 2215 if (s->mte_active[0]) { 2216 int desc = 0; 2217 2218 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2219 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2220 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2221 2222 tcg_rt = tcg_temp_new_i64(); 2223 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2224 tcg_constant_i32(desc), cpu_reg(s, rt)); 2225 } else { 2226 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2227 } 2228 gen_helper_dc_zva(tcg_env, tcg_rt); 2229 return; 2230 case ARM_CP_DC_GVA: 2231 { 2232 TCGv_i64 clean_addr, tag; 2233 2234 /* 2235 * DC_GVA, like DC_ZVA, requires that we supply the original 2236 * pointer for an invalid page. Probe that address first. 2237 */ 2238 tcg_rt = cpu_reg(s, rt); 2239 clean_addr = clean_data_tbi(s, tcg_rt); 2240 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2241 2242 if (s->ata[0]) { 2243 /* Extract the tag from the register to match STZGM. */ 2244 tag = tcg_temp_new_i64(); 2245 tcg_gen_shri_i64(tag, tcg_rt, 56); 2246 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2247 } 2248 } 2249 return; 2250 case ARM_CP_DC_GZVA: 2251 { 2252 TCGv_i64 clean_addr, tag; 2253 2254 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2255 tcg_rt = cpu_reg(s, rt); 2256 clean_addr = clean_data_tbi(s, tcg_rt); 2257 gen_helper_dc_zva(tcg_env, clean_addr); 2258 2259 if (s->ata[0]) { 2260 /* Extract the tag from the register to match STZGM. */ 2261 tag = tcg_temp_new_i64(); 2262 tcg_gen_shri_i64(tag, tcg_rt, 56); 2263 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2264 } 2265 } 2266 return; 2267 default: 2268 g_assert_not_reached(); 2269 } 2270 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2271 return; 2272 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2273 return; 2274 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2275 return; 2276 } 2277 2278 if (ri->type & ARM_CP_IO) { 2279 /* I/O operations must end the TB here (whether read or write) */ 2280 need_exit_tb = translator_io_start(&s->base); 2281 } 2282 2283 tcg_rt = cpu_reg(s, rt); 2284 2285 if (isread) { 2286 if (ri->type & ARM_CP_CONST) { 2287 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2288 } else if (ri->readfn) { 2289 if (!tcg_ri) { 2290 tcg_ri = gen_lookup_cp_reg(key); 2291 } 2292 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2293 } else { 2294 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2295 } 2296 } else { 2297 if (ri->type & ARM_CP_CONST) { 2298 /* If not forbidden by access permissions, treat as WI */ 2299 return; 2300 } else if (ri->writefn) { 2301 if (!tcg_ri) { 2302 tcg_ri = gen_lookup_cp_reg(key); 2303 } 2304 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2305 } else { 2306 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2307 } 2308 } 2309 2310 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2311 /* 2312 * A write to any coprocessor register that ends a TB 2313 * must rebuild the hflags for the next TB. 2314 */ 2315 gen_rebuild_hflags(s); 2316 /* 2317 * We default to ending the TB on a coprocessor register write, 2318 * but allow this to be suppressed by the register definition 2319 * (usually only necessary to work around guest bugs). 2320 */ 2321 need_exit_tb = true; 2322 } 2323 if (need_exit_tb) { 2324 s->base.is_jmp = DISAS_UPDATE_EXIT; 2325 } 2326 } 2327 2328 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2329 { 2330 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2331 return true; 2332 } 2333 2334 static bool trans_SVC(DisasContext *s, arg_i *a) 2335 { 2336 /* 2337 * For SVC, HVC and SMC we advance the single-step state 2338 * machine before taking the exception. This is architecturally 2339 * mandated, to ensure that single-stepping a system call 2340 * instruction works properly. 2341 */ 2342 uint32_t syndrome = syn_aa64_svc(a->imm); 2343 if (s->fgt_svc) { 2344 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2345 return true; 2346 } 2347 gen_ss_advance(s); 2348 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2349 return true; 2350 } 2351 2352 static bool trans_HVC(DisasContext *s, arg_i *a) 2353 { 2354 if (s->current_el == 0) { 2355 unallocated_encoding(s); 2356 return true; 2357 } 2358 /* 2359 * The pre HVC helper handles cases when HVC gets trapped 2360 * as an undefined insn by runtime configuration. 2361 */ 2362 gen_a64_update_pc(s, 0); 2363 gen_helper_pre_hvc(tcg_env); 2364 /* Architecture requires ss advance before we do the actual work */ 2365 gen_ss_advance(s); 2366 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2); 2367 return true; 2368 } 2369 2370 static bool trans_SMC(DisasContext *s, arg_i *a) 2371 { 2372 if (s->current_el == 0) { 2373 unallocated_encoding(s); 2374 return true; 2375 } 2376 gen_a64_update_pc(s, 0); 2377 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2378 /* Architecture requires ss advance before we do the actual work */ 2379 gen_ss_advance(s); 2380 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2381 return true; 2382 } 2383 2384 static bool trans_BRK(DisasContext *s, arg_i *a) 2385 { 2386 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2387 return true; 2388 } 2389 2390 static bool trans_HLT(DisasContext *s, arg_i *a) 2391 { 2392 /* 2393 * HLT. This has two purposes. 2394 * Architecturally, it is an external halting debug instruction. 2395 * Since QEMU doesn't implement external debug, we treat this as 2396 * it is required for halting debug disabled: it will UNDEF. 2397 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2398 */ 2399 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2400 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2401 } else { 2402 unallocated_encoding(s); 2403 } 2404 return true; 2405 } 2406 2407 /* 2408 * Load/Store exclusive instructions are implemented by remembering 2409 * the value/address loaded, and seeing if these are the same 2410 * when the store is performed. This is not actually the architecturally 2411 * mandated semantics, but it works for typical guest code sequences 2412 * and avoids having to monitor regular stores. 2413 * 2414 * The store exclusive uses the atomic cmpxchg primitives to avoid 2415 * races in multi-threaded linux-user and when MTTCG softmmu is 2416 * enabled. 2417 */ 2418 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2419 int size, bool is_pair) 2420 { 2421 int idx = get_mem_index(s); 2422 TCGv_i64 dirty_addr, clean_addr; 2423 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2424 2425 s->is_ldex = true; 2426 dirty_addr = cpu_reg_sp(s, rn); 2427 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2428 2429 g_assert(size <= 3); 2430 if (is_pair) { 2431 g_assert(size >= 2); 2432 if (size == 2) { 2433 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2434 if (s->be_data == MO_LE) { 2435 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2436 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2437 } else { 2438 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2439 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2440 } 2441 } else { 2442 TCGv_i128 t16 = tcg_temp_new_i128(); 2443 2444 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2445 2446 if (s->be_data == MO_LE) { 2447 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2448 cpu_exclusive_high, t16); 2449 } else { 2450 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2451 cpu_exclusive_val, t16); 2452 } 2453 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2454 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2455 } 2456 } else { 2457 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2458 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2459 } 2460 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2461 } 2462 2463 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2464 int rn, int size, int is_pair) 2465 { 2466 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2467 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2468 * [addr] = {Rt}; 2469 * if (is_pair) { 2470 * [addr + datasize] = {Rt2}; 2471 * } 2472 * {Rd} = 0; 2473 * } else { 2474 * {Rd} = 1; 2475 * } 2476 * env->exclusive_addr = -1; 2477 */ 2478 TCGLabel *fail_label = gen_new_label(); 2479 TCGLabel *done_label = gen_new_label(); 2480 TCGv_i64 tmp, clean_addr; 2481 MemOp memop; 2482 2483 /* 2484 * FIXME: We are out of spec here. We have recorded only the address 2485 * from load_exclusive, not the entire range, and we assume that the 2486 * size of the access on both sides match. The architecture allows the 2487 * store to be smaller than the load, so long as the stored bytes are 2488 * within the range recorded by the load. 2489 */ 2490 2491 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2492 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2493 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2494 2495 /* 2496 * The write, and any associated faults, only happen if the virtual 2497 * and physical addresses pass the exclusive monitor check. These 2498 * faults are exceedingly unlikely, because normally the guest uses 2499 * the exact same address register for the load_exclusive, and we 2500 * would have recognized these faults there. 2501 * 2502 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2503 * unaligned 4-byte write within the range of an aligned 8-byte load. 2504 * With LSE2, the store would need to cross a 16-byte boundary when the 2505 * load did not, which would mean the store is outside the range 2506 * recorded for the monitor, which would have failed a corrected monitor 2507 * check above. For now, we assume no size change and retain the 2508 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2509 * 2510 * It is possible to trigger an MTE fault, by performing the load with 2511 * a virtual address with a valid tag and performing the store with the 2512 * same virtual address and a different invalid tag. 2513 */ 2514 memop = size + is_pair; 2515 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2516 memop |= MO_ALIGN; 2517 } 2518 memop = finalize_memop(s, memop); 2519 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2520 2521 tmp = tcg_temp_new_i64(); 2522 if (is_pair) { 2523 if (size == 2) { 2524 if (s->be_data == MO_LE) { 2525 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2526 } else { 2527 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2528 } 2529 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2530 cpu_exclusive_val, tmp, 2531 get_mem_index(s), memop); 2532 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2533 } else { 2534 TCGv_i128 t16 = tcg_temp_new_i128(); 2535 TCGv_i128 c16 = tcg_temp_new_i128(); 2536 TCGv_i64 a, b; 2537 2538 if (s->be_data == MO_LE) { 2539 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2540 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2541 cpu_exclusive_high); 2542 } else { 2543 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2544 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2545 cpu_exclusive_val); 2546 } 2547 2548 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2549 get_mem_index(s), memop); 2550 2551 a = tcg_temp_new_i64(); 2552 b = tcg_temp_new_i64(); 2553 if (s->be_data == MO_LE) { 2554 tcg_gen_extr_i128_i64(a, b, t16); 2555 } else { 2556 tcg_gen_extr_i128_i64(b, a, t16); 2557 } 2558 2559 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2560 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2561 tcg_gen_or_i64(tmp, a, b); 2562 2563 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2564 } 2565 } else { 2566 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2567 cpu_reg(s, rt), get_mem_index(s), memop); 2568 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2569 } 2570 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2571 tcg_gen_br(done_label); 2572 2573 gen_set_label(fail_label); 2574 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2575 gen_set_label(done_label); 2576 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2577 } 2578 2579 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2580 int rn, int size) 2581 { 2582 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2583 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2584 int memidx = get_mem_index(s); 2585 TCGv_i64 clean_addr; 2586 MemOp memop; 2587 2588 if (rn == 31) { 2589 gen_check_sp_alignment(s); 2590 } 2591 memop = check_atomic_align(s, rn, size); 2592 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2593 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2594 memidx, memop); 2595 } 2596 2597 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2598 int rn, int size) 2599 { 2600 TCGv_i64 s1 = cpu_reg(s, rs); 2601 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2602 TCGv_i64 t1 = cpu_reg(s, rt); 2603 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2604 TCGv_i64 clean_addr; 2605 int memidx = get_mem_index(s); 2606 MemOp memop; 2607 2608 if (rn == 31) { 2609 gen_check_sp_alignment(s); 2610 } 2611 2612 /* This is a single atomic access, despite the "pair". */ 2613 memop = check_atomic_align(s, rn, size + 1); 2614 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2615 2616 if (size == 2) { 2617 TCGv_i64 cmp = tcg_temp_new_i64(); 2618 TCGv_i64 val = tcg_temp_new_i64(); 2619 2620 if (s->be_data == MO_LE) { 2621 tcg_gen_concat32_i64(val, t1, t2); 2622 tcg_gen_concat32_i64(cmp, s1, s2); 2623 } else { 2624 tcg_gen_concat32_i64(val, t2, t1); 2625 tcg_gen_concat32_i64(cmp, s2, s1); 2626 } 2627 2628 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2629 2630 if (s->be_data == MO_LE) { 2631 tcg_gen_extr32_i64(s1, s2, cmp); 2632 } else { 2633 tcg_gen_extr32_i64(s2, s1, cmp); 2634 } 2635 } else { 2636 TCGv_i128 cmp = tcg_temp_new_i128(); 2637 TCGv_i128 val = tcg_temp_new_i128(); 2638 2639 if (s->be_data == MO_LE) { 2640 tcg_gen_concat_i64_i128(val, t1, t2); 2641 tcg_gen_concat_i64_i128(cmp, s1, s2); 2642 } else { 2643 tcg_gen_concat_i64_i128(val, t2, t1); 2644 tcg_gen_concat_i64_i128(cmp, s2, s1); 2645 } 2646 2647 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2648 2649 if (s->be_data == MO_LE) { 2650 tcg_gen_extr_i128_i64(s1, s2, cmp); 2651 } else { 2652 tcg_gen_extr_i128_i64(s2, s1, cmp); 2653 } 2654 } 2655 } 2656 2657 /* 2658 * Compute the ISS.SF bit for syndrome information if an exception 2659 * is taken on a load or store. This indicates whether the instruction 2660 * is accessing a 32-bit or 64-bit register. This logic is derived 2661 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2662 */ 2663 static bool ldst_iss_sf(int size, bool sign, bool ext) 2664 { 2665 2666 if (sign) { 2667 /* 2668 * Signed loads are 64 bit results if we are not going to 2669 * do a zero-extend from 32 to 64 after the load. 2670 * (For a store, sign and ext are always false.) 2671 */ 2672 return !ext; 2673 } else { 2674 /* Unsigned loads/stores work at the specified size */ 2675 return size == MO_64; 2676 } 2677 } 2678 2679 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2680 { 2681 if (a->rn == 31) { 2682 gen_check_sp_alignment(s); 2683 } 2684 if (a->lasr) { 2685 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2686 } 2687 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2688 return true; 2689 } 2690 2691 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2692 { 2693 if (a->rn == 31) { 2694 gen_check_sp_alignment(s); 2695 } 2696 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2697 if (a->lasr) { 2698 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2699 } 2700 return true; 2701 } 2702 2703 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2704 { 2705 TCGv_i64 clean_addr; 2706 MemOp memop; 2707 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2708 2709 /* 2710 * StoreLORelease is the same as Store-Release for QEMU, but 2711 * needs the feature-test. 2712 */ 2713 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2714 return false; 2715 } 2716 /* Generate ISS for non-exclusive accesses including LASR. */ 2717 if (a->rn == 31) { 2718 gen_check_sp_alignment(s); 2719 } 2720 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2721 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2722 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2723 true, a->rn != 31, memop); 2724 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2725 iss_sf, a->lasr); 2726 return true; 2727 } 2728 2729 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2730 { 2731 TCGv_i64 clean_addr; 2732 MemOp memop; 2733 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2734 2735 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2736 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2737 return false; 2738 } 2739 /* Generate ISS for non-exclusive accesses including LASR. */ 2740 if (a->rn == 31) { 2741 gen_check_sp_alignment(s); 2742 } 2743 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 2744 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2745 false, a->rn != 31, memop); 2746 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 2747 a->rt, iss_sf, a->lasr); 2748 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2749 return true; 2750 } 2751 2752 static bool trans_STXP(DisasContext *s, arg_stxr *a) 2753 { 2754 if (a->rn == 31) { 2755 gen_check_sp_alignment(s); 2756 } 2757 if (a->lasr) { 2758 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2759 } 2760 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 2761 return true; 2762 } 2763 2764 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 2765 { 2766 if (a->rn == 31) { 2767 gen_check_sp_alignment(s); 2768 } 2769 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 2770 if (a->lasr) { 2771 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2772 } 2773 return true; 2774 } 2775 2776 static bool trans_CASP(DisasContext *s, arg_CASP *a) 2777 { 2778 if (!dc_isar_feature(aa64_atomics, s)) { 2779 return false; 2780 } 2781 if (((a->rt | a->rs) & 1) != 0) { 2782 return false; 2783 } 2784 2785 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 2786 return true; 2787 } 2788 2789 static bool trans_CAS(DisasContext *s, arg_CAS *a) 2790 { 2791 if (!dc_isar_feature(aa64_atomics, s)) { 2792 return false; 2793 } 2794 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 2795 return true; 2796 } 2797 2798 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 2799 { 2800 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 2801 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 2802 TCGv_i64 clean_addr = tcg_temp_new_i64(); 2803 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 2804 2805 gen_pc_plus_diff(s, clean_addr, a->imm); 2806 do_gpr_ld(s, tcg_rt, clean_addr, memop, 2807 false, true, a->rt, iss_sf, false); 2808 return true; 2809 } 2810 2811 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 2812 { 2813 /* Load register (literal), vector version */ 2814 TCGv_i64 clean_addr; 2815 MemOp memop; 2816 2817 if (!fp_access_check(s)) { 2818 return true; 2819 } 2820 memop = finalize_memop_asimd(s, a->sz); 2821 clean_addr = tcg_temp_new_i64(); 2822 gen_pc_plus_diff(s, clean_addr, a->imm); 2823 do_fp_ld(s, a->rt, clean_addr, memop); 2824 return true; 2825 } 2826 2827 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 2828 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 2829 uint64_t offset, bool is_store, MemOp mop) 2830 { 2831 if (a->rn == 31) { 2832 gen_check_sp_alignment(s); 2833 } 2834 2835 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 2836 if (!a->p) { 2837 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 2838 } 2839 2840 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 2841 (a->w || a->rn != 31), 2 << a->sz, mop); 2842 } 2843 2844 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 2845 TCGv_i64 dirty_addr, uint64_t offset) 2846 { 2847 if (a->w) { 2848 if (a->p) { 2849 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 2850 } 2851 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 2852 } 2853 } 2854 2855 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 2856 { 2857 uint64_t offset = a->imm << a->sz; 2858 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2859 MemOp mop = finalize_memop(s, a->sz); 2860 2861 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 2862 tcg_rt = cpu_reg(s, a->rt); 2863 tcg_rt2 = cpu_reg(s, a->rt2); 2864 /* 2865 * We built mop above for the single logical access -- rebuild it 2866 * now for the paired operation. 2867 * 2868 * With LSE2, non-sign-extending pairs are treated atomically if 2869 * aligned, and if unaligned one of the pair will be completely 2870 * within a 16-byte block and that element will be atomic. 2871 * Otherwise each element is separately atomic. 2872 * In all cases, issue one operation with the correct atomicity. 2873 */ 2874 mop = a->sz + 1; 2875 if (s->align_mem) { 2876 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2877 } 2878 mop = finalize_memop_pair(s, mop); 2879 if (a->sz == 2) { 2880 TCGv_i64 tmp = tcg_temp_new_i64(); 2881 2882 if (s->be_data == MO_LE) { 2883 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 2884 } else { 2885 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 2886 } 2887 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 2888 } else { 2889 TCGv_i128 tmp = tcg_temp_new_i128(); 2890 2891 if (s->be_data == MO_LE) { 2892 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 2893 } else { 2894 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 2895 } 2896 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 2897 } 2898 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2899 return true; 2900 } 2901 2902 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 2903 { 2904 uint64_t offset = a->imm << a->sz; 2905 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2906 MemOp mop = finalize_memop(s, a->sz); 2907 2908 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 2909 tcg_rt = cpu_reg(s, a->rt); 2910 tcg_rt2 = cpu_reg(s, a->rt2); 2911 2912 /* 2913 * We built mop above for the single logical access -- rebuild it 2914 * now for the paired operation. 2915 * 2916 * With LSE2, non-sign-extending pairs are treated atomically if 2917 * aligned, and if unaligned one of the pair will be completely 2918 * within a 16-byte block and that element will be atomic. 2919 * Otherwise each element is separately atomic. 2920 * In all cases, issue one operation with the correct atomicity. 2921 * 2922 * This treats sign-extending loads like zero-extending loads, 2923 * since that reuses the most code below. 2924 */ 2925 mop = a->sz + 1; 2926 if (s->align_mem) { 2927 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2928 } 2929 mop = finalize_memop_pair(s, mop); 2930 if (a->sz == 2) { 2931 int o2 = s->be_data == MO_LE ? 32 : 0; 2932 int o1 = o2 ^ 32; 2933 2934 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 2935 if (a->sign) { 2936 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 2937 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 2938 } else { 2939 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 2940 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 2941 } 2942 } else { 2943 TCGv_i128 tmp = tcg_temp_new_i128(); 2944 2945 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 2946 if (s->be_data == MO_LE) { 2947 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 2948 } else { 2949 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 2950 } 2951 } 2952 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2953 return true; 2954 } 2955 2956 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 2957 { 2958 uint64_t offset = a->imm << a->sz; 2959 TCGv_i64 clean_addr, dirty_addr; 2960 MemOp mop; 2961 2962 if (!fp_access_check(s)) { 2963 return true; 2964 } 2965 2966 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 2967 mop = finalize_memop_asimd(s, a->sz); 2968 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 2969 do_fp_st(s, a->rt, clean_addr, mop); 2970 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 2971 do_fp_st(s, a->rt2, clean_addr, mop); 2972 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2973 return true; 2974 } 2975 2976 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 2977 { 2978 uint64_t offset = a->imm << a->sz; 2979 TCGv_i64 clean_addr, dirty_addr; 2980 MemOp mop; 2981 2982 if (!fp_access_check(s)) { 2983 return true; 2984 } 2985 2986 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 2987 mop = finalize_memop_asimd(s, a->sz); 2988 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 2989 do_fp_ld(s, a->rt, clean_addr, mop); 2990 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 2991 do_fp_ld(s, a->rt2, clean_addr, mop); 2992 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2993 return true; 2994 } 2995 2996 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 2997 { 2998 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2999 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3000 MemOp mop; 3001 TCGv_i128 tmp; 3002 3003 /* STGP only comes in one size. */ 3004 tcg_debug_assert(a->sz == MO_64); 3005 3006 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3007 return false; 3008 } 3009 3010 if (a->rn == 31) { 3011 gen_check_sp_alignment(s); 3012 } 3013 3014 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3015 if (!a->p) { 3016 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3017 } 3018 3019 clean_addr = clean_data_tbi(s, dirty_addr); 3020 tcg_rt = cpu_reg(s, a->rt); 3021 tcg_rt2 = cpu_reg(s, a->rt2); 3022 3023 /* 3024 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3025 * and one tag operation. We implement it as one single aligned 16-byte 3026 * memory operation for convenience. Note that the alignment ensures 3027 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3028 */ 3029 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3030 3031 tmp = tcg_temp_new_i128(); 3032 if (s->be_data == MO_LE) { 3033 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3034 } else { 3035 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3036 } 3037 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3038 3039 /* Perform the tag store, if tag access enabled. */ 3040 if (s->ata[0]) { 3041 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3042 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3043 } else { 3044 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3045 } 3046 } 3047 3048 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3049 return true; 3050 } 3051 3052 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3053 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3054 uint64_t offset, bool is_store, MemOp mop) 3055 { 3056 int memidx; 3057 3058 if (a->rn == 31) { 3059 gen_check_sp_alignment(s); 3060 } 3061 3062 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3063 if (!a->p) { 3064 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3065 } 3066 memidx = get_a64_user_mem_index(s, a->unpriv); 3067 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3068 a->w || a->rn != 31, 3069 mop, a->unpriv, memidx); 3070 } 3071 3072 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3073 TCGv_i64 dirty_addr, uint64_t offset) 3074 { 3075 if (a->w) { 3076 if (a->p) { 3077 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3078 } 3079 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3080 } 3081 } 3082 3083 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3084 { 3085 bool iss_sf, iss_valid = !a->w; 3086 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3087 int memidx = get_a64_user_mem_index(s, a->unpriv); 3088 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3089 3090 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3091 3092 tcg_rt = cpu_reg(s, a->rt); 3093 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3094 3095 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3096 iss_valid, a->rt, iss_sf, false); 3097 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3098 return true; 3099 } 3100 3101 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3102 { 3103 bool iss_sf, iss_valid = !a->w; 3104 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3105 int memidx = get_a64_user_mem_index(s, a->unpriv); 3106 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3107 3108 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3109 3110 tcg_rt = cpu_reg(s, a->rt); 3111 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3112 3113 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3114 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3115 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3116 return true; 3117 } 3118 3119 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3120 { 3121 TCGv_i64 clean_addr, dirty_addr; 3122 MemOp mop; 3123 3124 if (!fp_access_check(s)) { 3125 return true; 3126 } 3127 mop = finalize_memop_asimd(s, a->sz); 3128 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3129 do_fp_st(s, a->rt, clean_addr, mop); 3130 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3131 return true; 3132 } 3133 3134 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3135 { 3136 TCGv_i64 clean_addr, dirty_addr; 3137 MemOp mop; 3138 3139 if (!fp_access_check(s)) { 3140 return true; 3141 } 3142 mop = finalize_memop_asimd(s, a->sz); 3143 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3144 do_fp_ld(s, a->rt, clean_addr, mop); 3145 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3146 return true; 3147 } 3148 3149 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3150 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3151 bool is_store, MemOp memop) 3152 { 3153 TCGv_i64 tcg_rm; 3154 3155 if (a->rn == 31) { 3156 gen_check_sp_alignment(s); 3157 } 3158 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3159 3160 tcg_rm = read_cpu_reg(s, a->rm, 1); 3161 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3162 3163 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3164 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3165 } 3166 3167 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3168 { 3169 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3170 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3171 MemOp memop; 3172 3173 if (extract32(a->opt, 1, 1) == 0) { 3174 return false; 3175 } 3176 3177 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3178 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3179 tcg_rt = cpu_reg(s, a->rt); 3180 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3181 a->ext, true, a->rt, iss_sf, false); 3182 return true; 3183 } 3184 3185 static bool trans_STR(DisasContext *s, arg_ldst *a) 3186 { 3187 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3188 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3189 MemOp memop; 3190 3191 if (extract32(a->opt, 1, 1) == 0) { 3192 return false; 3193 } 3194 3195 memop = finalize_memop(s, a->sz); 3196 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3197 tcg_rt = cpu_reg(s, a->rt); 3198 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3199 return true; 3200 } 3201 3202 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3203 { 3204 TCGv_i64 clean_addr, dirty_addr; 3205 MemOp memop; 3206 3207 if (extract32(a->opt, 1, 1) == 0) { 3208 return false; 3209 } 3210 3211 if (!fp_access_check(s)) { 3212 return true; 3213 } 3214 3215 memop = finalize_memop_asimd(s, a->sz); 3216 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3217 do_fp_ld(s, a->rt, clean_addr, memop); 3218 return true; 3219 } 3220 3221 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3222 { 3223 TCGv_i64 clean_addr, dirty_addr; 3224 MemOp memop; 3225 3226 if (extract32(a->opt, 1, 1) == 0) { 3227 return false; 3228 } 3229 3230 if (!fp_access_check(s)) { 3231 return true; 3232 } 3233 3234 memop = finalize_memop_asimd(s, a->sz); 3235 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3236 do_fp_st(s, a->rt, clean_addr, memop); 3237 return true; 3238 } 3239 3240 3241 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3242 int sign, bool invert) 3243 { 3244 MemOp mop = a->sz | sign; 3245 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3246 3247 if (a->rn == 31) { 3248 gen_check_sp_alignment(s); 3249 } 3250 mop = check_atomic_align(s, a->rn, mop); 3251 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3252 a->rn != 31, mop); 3253 tcg_rs = read_cpu_reg(s, a->rs, true); 3254 tcg_rt = cpu_reg(s, a->rt); 3255 if (invert) { 3256 tcg_gen_not_i64(tcg_rs, tcg_rs); 3257 } 3258 /* 3259 * The tcg atomic primitives are all full barriers. Therefore we 3260 * can ignore the Acquire and Release bits of this instruction. 3261 */ 3262 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3263 3264 if (mop & MO_SIGN) { 3265 switch (a->sz) { 3266 case MO_8: 3267 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3268 break; 3269 case MO_16: 3270 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3271 break; 3272 case MO_32: 3273 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3274 break; 3275 case MO_64: 3276 break; 3277 default: 3278 g_assert_not_reached(); 3279 } 3280 } 3281 return true; 3282 } 3283 3284 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3285 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3286 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3287 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3288 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3289 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3290 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3291 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3292 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3293 3294 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3295 { 3296 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3297 TCGv_i64 clean_addr; 3298 MemOp mop; 3299 3300 if (!dc_isar_feature(aa64_atomics, s) || 3301 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3302 return false; 3303 } 3304 if (a->rn == 31) { 3305 gen_check_sp_alignment(s); 3306 } 3307 mop = check_atomic_align(s, a->rn, a->sz); 3308 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3309 a->rn != 31, mop); 3310 /* 3311 * LDAPR* are a special case because they are a simple load, not a 3312 * fetch-and-do-something op. 3313 * The architectural consistency requirements here are weaker than 3314 * full load-acquire (we only need "load-acquire processor consistent"), 3315 * but we choose to implement them as full LDAQ. 3316 */ 3317 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3318 true, a->rt, iss_sf, true); 3319 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3320 return true; 3321 } 3322 3323 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3324 { 3325 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3326 MemOp memop; 3327 3328 /* Load with pointer authentication */ 3329 if (!dc_isar_feature(aa64_pauth, s)) { 3330 return false; 3331 } 3332 3333 if (a->rn == 31) { 3334 gen_check_sp_alignment(s); 3335 } 3336 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3337 3338 if (s->pauth_active) { 3339 if (!a->m) { 3340 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3341 tcg_constant_i64(0)); 3342 } else { 3343 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3344 tcg_constant_i64(0)); 3345 } 3346 } 3347 3348 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3349 3350 memop = finalize_memop(s, MO_64); 3351 3352 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3353 clean_addr = gen_mte_check1(s, dirty_addr, false, 3354 a->w || a->rn != 31, memop); 3355 3356 tcg_rt = cpu_reg(s, a->rt); 3357 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3358 /* extend */ false, /* iss_valid */ !a->w, 3359 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3360 3361 if (a->w) { 3362 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3363 } 3364 return true; 3365 } 3366 3367 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3368 { 3369 TCGv_i64 clean_addr, dirty_addr; 3370 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3371 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3372 3373 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3374 return false; 3375 } 3376 3377 if (a->rn == 31) { 3378 gen_check_sp_alignment(s); 3379 } 3380 3381 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3382 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3383 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3384 clean_addr = clean_data_tbi(s, dirty_addr); 3385 3386 /* 3387 * Load-AcquirePC semantics; we implement as the slightly more 3388 * restrictive Load-Acquire. 3389 */ 3390 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3391 a->rt, iss_sf, true); 3392 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3393 return true; 3394 } 3395 3396 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3397 { 3398 TCGv_i64 clean_addr, dirty_addr; 3399 MemOp mop = a->sz; 3400 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3401 3402 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3403 return false; 3404 } 3405 3406 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3407 3408 if (a->rn == 31) { 3409 gen_check_sp_alignment(s); 3410 } 3411 3412 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3413 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3414 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3415 clean_addr = clean_data_tbi(s, dirty_addr); 3416 3417 /* Store-Release semantics */ 3418 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3419 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3420 return true; 3421 } 3422 3423 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3424 { 3425 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3426 MemOp endian, align, mop; 3427 3428 int total; /* total bytes */ 3429 int elements; /* elements per vector */ 3430 int r; 3431 int size = a->sz; 3432 3433 if (!a->p && a->rm != 0) { 3434 /* For non-postindexed accesses the Rm field must be 0 */ 3435 return false; 3436 } 3437 if (size == 3 && !a->q && a->selem != 1) { 3438 return false; 3439 } 3440 if (!fp_access_check(s)) { 3441 return true; 3442 } 3443 3444 if (a->rn == 31) { 3445 gen_check_sp_alignment(s); 3446 } 3447 3448 /* For our purposes, bytes are always little-endian. */ 3449 endian = s->be_data; 3450 if (size == 0) { 3451 endian = MO_LE; 3452 } 3453 3454 total = a->rpt * a->selem * (a->q ? 16 : 8); 3455 tcg_rn = cpu_reg_sp(s, a->rn); 3456 3457 /* 3458 * Issue the MTE check vs the logical repeat count, before we 3459 * promote consecutive little-endian elements below. 3460 */ 3461 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3462 finalize_memop_asimd(s, size)); 3463 3464 /* 3465 * Consecutive little-endian elements from a single register 3466 * can be promoted to a larger little-endian operation. 3467 */ 3468 align = MO_ALIGN; 3469 if (a->selem == 1 && endian == MO_LE) { 3470 align = pow2_align(size); 3471 size = 3; 3472 } 3473 if (!s->align_mem) { 3474 align = 0; 3475 } 3476 mop = endian | size | align; 3477 3478 elements = (a->q ? 16 : 8) >> size; 3479 tcg_ebytes = tcg_constant_i64(1 << size); 3480 for (r = 0; r < a->rpt; r++) { 3481 int e; 3482 for (e = 0; e < elements; e++) { 3483 int xs; 3484 for (xs = 0; xs < a->selem; xs++) { 3485 int tt = (a->rt + r + xs) % 32; 3486 do_vec_ld(s, tt, e, clean_addr, mop); 3487 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3488 } 3489 } 3490 } 3491 3492 /* 3493 * For non-quad operations, setting a slice of the low 64 bits of 3494 * the register clears the high 64 bits (in the ARM ARM pseudocode 3495 * this is implicit in the fact that 'rval' is a 64 bit wide 3496 * variable). For quad operations, we might still need to zero 3497 * the high bits of SVE. 3498 */ 3499 for (r = 0; r < a->rpt * a->selem; r++) { 3500 int tt = (a->rt + r) % 32; 3501 clear_vec_high(s, a->q, tt); 3502 } 3503 3504 if (a->p) { 3505 if (a->rm == 31) { 3506 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3507 } else { 3508 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3509 } 3510 } 3511 return true; 3512 } 3513 3514 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3515 { 3516 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3517 MemOp endian, align, mop; 3518 3519 int total; /* total bytes */ 3520 int elements; /* elements per vector */ 3521 int r; 3522 int size = a->sz; 3523 3524 if (!a->p && a->rm != 0) { 3525 /* For non-postindexed accesses the Rm field must be 0 */ 3526 return false; 3527 } 3528 if (size == 3 && !a->q && a->selem != 1) { 3529 return false; 3530 } 3531 if (!fp_access_check(s)) { 3532 return true; 3533 } 3534 3535 if (a->rn == 31) { 3536 gen_check_sp_alignment(s); 3537 } 3538 3539 /* For our purposes, bytes are always little-endian. */ 3540 endian = s->be_data; 3541 if (size == 0) { 3542 endian = MO_LE; 3543 } 3544 3545 total = a->rpt * a->selem * (a->q ? 16 : 8); 3546 tcg_rn = cpu_reg_sp(s, a->rn); 3547 3548 /* 3549 * Issue the MTE check vs the logical repeat count, before we 3550 * promote consecutive little-endian elements below. 3551 */ 3552 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3553 finalize_memop_asimd(s, size)); 3554 3555 /* 3556 * Consecutive little-endian elements from a single register 3557 * can be promoted to a larger little-endian operation. 3558 */ 3559 align = MO_ALIGN; 3560 if (a->selem == 1 && endian == MO_LE) { 3561 align = pow2_align(size); 3562 size = 3; 3563 } 3564 if (!s->align_mem) { 3565 align = 0; 3566 } 3567 mop = endian | size | align; 3568 3569 elements = (a->q ? 16 : 8) >> size; 3570 tcg_ebytes = tcg_constant_i64(1 << size); 3571 for (r = 0; r < a->rpt; r++) { 3572 int e; 3573 for (e = 0; e < elements; e++) { 3574 int xs; 3575 for (xs = 0; xs < a->selem; xs++) { 3576 int tt = (a->rt + r + xs) % 32; 3577 do_vec_st(s, tt, e, clean_addr, mop); 3578 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3579 } 3580 } 3581 } 3582 3583 if (a->p) { 3584 if (a->rm == 31) { 3585 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3586 } else { 3587 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3588 } 3589 } 3590 return true; 3591 } 3592 3593 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3594 { 3595 int xs, total, rt; 3596 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3597 MemOp mop; 3598 3599 if (!a->p && a->rm != 0) { 3600 return false; 3601 } 3602 if (!fp_access_check(s)) { 3603 return true; 3604 } 3605 3606 if (a->rn == 31) { 3607 gen_check_sp_alignment(s); 3608 } 3609 3610 total = a->selem << a->scale; 3611 tcg_rn = cpu_reg_sp(s, a->rn); 3612 3613 mop = finalize_memop_asimd(s, a->scale); 3614 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3615 total, mop); 3616 3617 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3618 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3619 do_vec_st(s, rt, a->index, clean_addr, mop); 3620 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3621 } 3622 3623 if (a->p) { 3624 if (a->rm == 31) { 3625 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3626 } else { 3627 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3628 } 3629 } 3630 return true; 3631 } 3632 3633 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3634 { 3635 int xs, total, rt; 3636 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3637 MemOp mop; 3638 3639 if (!a->p && a->rm != 0) { 3640 return false; 3641 } 3642 if (!fp_access_check(s)) { 3643 return true; 3644 } 3645 3646 if (a->rn == 31) { 3647 gen_check_sp_alignment(s); 3648 } 3649 3650 total = a->selem << a->scale; 3651 tcg_rn = cpu_reg_sp(s, a->rn); 3652 3653 mop = finalize_memop_asimd(s, a->scale); 3654 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3655 total, mop); 3656 3657 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3658 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3659 do_vec_ld(s, rt, a->index, clean_addr, mop); 3660 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3661 } 3662 3663 if (a->p) { 3664 if (a->rm == 31) { 3665 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3666 } else { 3667 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3668 } 3669 } 3670 return true; 3671 } 3672 3673 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3674 { 3675 int xs, total, rt; 3676 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3677 MemOp mop; 3678 3679 if (!a->p && a->rm != 0) { 3680 return false; 3681 } 3682 if (!fp_access_check(s)) { 3683 return true; 3684 } 3685 3686 if (a->rn == 31) { 3687 gen_check_sp_alignment(s); 3688 } 3689 3690 total = a->selem << a->scale; 3691 tcg_rn = cpu_reg_sp(s, a->rn); 3692 3693 mop = finalize_memop_asimd(s, a->scale); 3694 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3695 total, mop); 3696 3697 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3698 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3699 /* Load and replicate to all elements */ 3700 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3701 3702 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3703 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3704 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3705 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3706 } 3707 3708 if (a->p) { 3709 if (a->rm == 31) { 3710 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3711 } else { 3712 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3713 } 3714 } 3715 return true; 3716 } 3717 3718 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3719 { 3720 TCGv_i64 addr, clean_addr, tcg_rt; 3721 int size = 4 << s->dcz_blocksize; 3722 3723 if (!dc_isar_feature(aa64_mte, s)) { 3724 return false; 3725 } 3726 if (s->current_el == 0) { 3727 return false; 3728 } 3729 3730 if (a->rn == 31) { 3731 gen_check_sp_alignment(s); 3732 } 3733 3734 addr = read_cpu_reg_sp(s, a->rn, true); 3735 tcg_gen_addi_i64(addr, addr, a->imm); 3736 tcg_rt = cpu_reg(s, a->rt); 3737 3738 if (s->ata[0]) { 3739 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 3740 } 3741 /* 3742 * The non-tags portion of STZGM is mostly like DC_ZVA, 3743 * except the alignment happens before the access. 3744 */ 3745 clean_addr = clean_data_tbi(s, addr); 3746 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3747 gen_helper_dc_zva(tcg_env, clean_addr); 3748 return true; 3749 } 3750 3751 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 3752 { 3753 TCGv_i64 addr, clean_addr, tcg_rt; 3754 3755 if (!dc_isar_feature(aa64_mte, s)) { 3756 return false; 3757 } 3758 if (s->current_el == 0) { 3759 return false; 3760 } 3761 3762 if (a->rn == 31) { 3763 gen_check_sp_alignment(s); 3764 } 3765 3766 addr = read_cpu_reg_sp(s, a->rn, true); 3767 tcg_gen_addi_i64(addr, addr, a->imm); 3768 tcg_rt = cpu_reg(s, a->rt); 3769 3770 if (s->ata[0]) { 3771 gen_helper_stgm(tcg_env, addr, tcg_rt); 3772 } else { 3773 MMUAccessType acc = MMU_DATA_STORE; 3774 int size = 4 << s->gm_blocksize; 3775 3776 clean_addr = clean_data_tbi(s, addr); 3777 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3778 gen_probe_access(s, clean_addr, acc, size); 3779 } 3780 return true; 3781 } 3782 3783 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 3784 { 3785 TCGv_i64 addr, clean_addr, tcg_rt; 3786 3787 if (!dc_isar_feature(aa64_mte, s)) { 3788 return false; 3789 } 3790 if (s->current_el == 0) { 3791 return false; 3792 } 3793 3794 if (a->rn == 31) { 3795 gen_check_sp_alignment(s); 3796 } 3797 3798 addr = read_cpu_reg_sp(s, a->rn, true); 3799 tcg_gen_addi_i64(addr, addr, a->imm); 3800 tcg_rt = cpu_reg(s, a->rt); 3801 3802 if (s->ata[0]) { 3803 gen_helper_ldgm(tcg_rt, tcg_env, addr); 3804 } else { 3805 MMUAccessType acc = MMU_DATA_LOAD; 3806 int size = 4 << s->gm_blocksize; 3807 3808 clean_addr = clean_data_tbi(s, addr); 3809 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3810 gen_probe_access(s, clean_addr, acc, size); 3811 /* The result tags are zeros. */ 3812 tcg_gen_movi_i64(tcg_rt, 0); 3813 } 3814 return true; 3815 } 3816 3817 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 3818 { 3819 TCGv_i64 addr, clean_addr, tcg_rt; 3820 3821 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3822 return false; 3823 } 3824 3825 if (a->rn == 31) { 3826 gen_check_sp_alignment(s); 3827 } 3828 3829 addr = read_cpu_reg_sp(s, a->rn, true); 3830 if (!a->p) { 3831 /* pre-index or signed offset */ 3832 tcg_gen_addi_i64(addr, addr, a->imm); 3833 } 3834 3835 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 3836 tcg_rt = cpu_reg(s, a->rt); 3837 if (s->ata[0]) { 3838 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 3839 } else { 3840 /* 3841 * Tag access disabled: we must check for aborts on the load 3842 * load from [rn+offset], and then insert a 0 tag into rt. 3843 */ 3844 clean_addr = clean_data_tbi(s, addr); 3845 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 3846 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 3847 } 3848 3849 if (a->w) { 3850 /* pre-index or post-index */ 3851 if (a->p) { 3852 /* post-index */ 3853 tcg_gen_addi_i64(addr, addr, a->imm); 3854 } 3855 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3856 } 3857 return true; 3858 } 3859 3860 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 3861 { 3862 TCGv_i64 addr, tcg_rt; 3863 3864 if (a->rn == 31) { 3865 gen_check_sp_alignment(s); 3866 } 3867 3868 addr = read_cpu_reg_sp(s, a->rn, true); 3869 if (!a->p) { 3870 /* pre-index or signed offset */ 3871 tcg_gen_addi_i64(addr, addr, a->imm); 3872 } 3873 tcg_rt = cpu_reg_sp(s, a->rt); 3874 if (!s->ata[0]) { 3875 /* 3876 * For STG and ST2G, we need to check alignment and probe memory. 3877 * TODO: For STZG and STZ2G, we could rely on the stores below, 3878 * at least for system mode; user-only won't enforce alignment. 3879 */ 3880 if (is_pair) { 3881 gen_helper_st2g_stub(tcg_env, addr); 3882 } else { 3883 gen_helper_stg_stub(tcg_env, addr); 3884 } 3885 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3886 if (is_pair) { 3887 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 3888 } else { 3889 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 3890 } 3891 } else { 3892 if (is_pair) { 3893 gen_helper_st2g(tcg_env, addr, tcg_rt); 3894 } else { 3895 gen_helper_stg(tcg_env, addr, tcg_rt); 3896 } 3897 } 3898 3899 if (is_zero) { 3900 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 3901 TCGv_i64 zero64 = tcg_constant_i64(0); 3902 TCGv_i128 zero128 = tcg_temp_new_i128(); 3903 int mem_index = get_mem_index(s); 3904 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 3905 3906 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 3907 3908 /* This is 1 or 2 atomic 16-byte operations. */ 3909 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3910 if (is_pair) { 3911 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 3912 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3913 } 3914 } 3915 3916 if (a->w) { 3917 /* pre-index or post-index */ 3918 if (a->p) { 3919 /* post-index */ 3920 tcg_gen_addi_i64(addr, addr, a->imm); 3921 } 3922 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3923 } 3924 return true; 3925 } 3926 3927 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 3928 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 3929 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 3930 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 3931 3932 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 3933 3934 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 3935 bool is_setg, SetFn fn) 3936 { 3937 int memidx; 3938 uint32_t syndrome, desc = 0; 3939 3940 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 3941 return false; 3942 } 3943 3944 /* 3945 * UNPREDICTABLE cases: we choose to UNDEF, which allows 3946 * us to pull this check before the CheckMOPSEnabled() test 3947 * (which we do in the helper function) 3948 */ 3949 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 3950 a->rd == 31 || a->rn == 31) { 3951 return false; 3952 } 3953 3954 memidx = get_a64_user_mem_index(s, a->unpriv); 3955 3956 /* 3957 * We pass option_a == true, matching our implementation; 3958 * we pass wrong_option == false: helper function may set that bit. 3959 */ 3960 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 3961 is_epilogue, false, true, a->rd, a->rs, a->rn); 3962 3963 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 3964 /* We may need to do MTE tag checking, so assemble the descriptor */ 3965 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 3966 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 3967 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 3968 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 3969 } 3970 /* The helper function always needs the memidx even with MTE disabled */ 3971 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 3972 3973 /* 3974 * The helper needs the register numbers, but since they're in 3975 * the syndrome anyway, we let it extract them from there rather 3976 * than passing in an extra three integer arguments. 3977 */ 3978 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 3979 return true; 3980 } 3981 3982 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 3983 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 3984 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 3985 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 3986 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 3987 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 3988 3989 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 3990 3991 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 3992 { 3993 int rmemidx, wmemidx; 3994 uint32_t syndrome, rdesc = 0, wdesc = 0; 3995 bool wunpriv = extract32(a->options, 0, 1); 3996 bool runpriv = extract32(a->options, 1, 1); 3997 3998 /* 3999 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4000 * us to pull this check before the CheckMOPSEnabled() test 4001 * (which we do in the helper function) 4002 */ 4003 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4004 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4005 return false; 4006 } 4007 4008 rmemidx = get_a64_user_mem_index(s, runpriv); 4009 wmemidx = get_a64_user_mem_index(s, wunpriv); 4010 4011 /* 4012 * We pass option_a == true, matching our implementation; 4013 * we pass wrong_option == false: helper function may set that bit. 4014 */ 4015 syndrome = syn_mop(false, false, a->options, is_epilogue, 4016 false, true, a->rd, a->rs, a->rn); 4017 4018 /* If we need to do MTE tag checking, assemble the descriptors */ 4019 if (s->mte_active[runpriv]) { 4020 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4021 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4022 } 4023 if (s->mte_active[wunpriv]) { 4024 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4025 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4026 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4027 } 4028 /* The helper function needs these parts of the descriptor regardless */ 4029 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4030 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4031 4032 /* 4033 * The helper needs the register numbers, but since they're in 4034 * the syndrome anyway, we let it extract them from there rather 4035 * than passing in an extra three integer arguments. 4036 */ 4037 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4038 tcg_constant_i32(rdesc)); 4039 return true; 4040 } 4041 4042 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4043 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4044 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4045 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4046 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4047 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4048 4049 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4050 4051 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4052 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4053 { 4054 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4055 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4056 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4057 4058 fn(tcg_rd, tcg_rn, tcg_imm); 4059 if (!a->sf) { 4060 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4061 } 4062 return true; 4063 } 4064 4065 /* 4066 * PC-rel. addressing 4067 */ 4068 4069 static bool trans_ADR(DisasContext *s, arg_ri *a) 4070 { 4071 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4072 return true; 4073 } 4074 4075 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4076 { 4077 int64_t offset = (int64_t)a->imm << 12; 4078 4079 /* The page offset is ok for CF_PCREL. */ 4080 offset -= s->pc_curr & 0xfff; 4081 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4082 return true; 4083 } 4084 4085 /* 4086 * Add/subtract (immediate) 4087 */ 4088 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4089 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4090 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4091 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4092 4093 /* 4094 * Add/subtract (immediate, with tags) 4095 */ 4096 4097 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4098 bool sub_op) 4099 { 4100 TCGv_i64 tcg_rn, tcg_rd; 4101 int imm; 4102 4103 imm = a->uimm6 << LOG2_TAG_GRANULE; 4104 if (sub_op) { 4105 imm = -imm; 4106 } 4107 4108 tcg_rn = cpu_reg_sp(s, a->rn); 4109 tcg_rd = cpu_reg_sp(s, a->rd); 4110 4111 if (s->ata[0]) { 4112 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4113 tcg_constant_i32(imm), 4114 tcg_constant_i32(a->uimm4)); 4115 } else { 4116 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4117 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4118 } 4119 return true; 4120 } 4121 4122 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4123 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4124 4125 /* The input should be a value in the bottom e bits (with higher 4126 * bits zero); returns that value replicated into every element 4127 * of size e in a 64 bit integer. 4128 */ 4129 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4130 { 4131 assert(e != 0); 4132 while (e < 64) { 4133 mask |= mask << e; 4134 e *= 2; 4135 } 4136 return mask; 4137 } 4138 4139 /* 4140 * Logical (immediate) 4141 */ 4142 4143 /* 4144 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4145 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4146 * value (ie should cause a guest UNDEF exception), and true if they are 4147 * valid, in which case the decoded bit pattern is written to result. 4148 */ 4149 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4150 unsigned int imms, unsigned int immr) 4151 { 4152 uint64_t mask; 4153 unsigned e, levels, s, r; 4154 int len; 4155 4156 assert(immn < 2 && imms < 64 && immr < 64); 4157 4158 /* The bit patterns we create here are 64 bit patterns which 4159 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4160 * 64 bits each. Each element contains the same value: a run 4161 * of between 1 and e-1 non-zero bits, rotated within the 4162 * element by between 0 and e-1 bits. 4163 * 4164 * The element size and run length are encoded into immn (1 bit) 4165 * and imms (6 bits) as follows: 4166 * 64 bit elements: immn = 1, imms = <length of run - 1> 4167 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4168 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4169 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4170 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4171 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4172 * Notice that immn = 0, imms = 11111x is the only combination 4173 * not covered by one of the above options; this is reserved. 4174 * Further, <length of run - 1> all-ones is a reserved pattern. 4175 * 4176 * In all cases the rotation is by immr % e (and immr is 6 bits). 4177 */ 4178 4179 /* First determine the element size */ 4180 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4181 if (len < 1) { 4182 /* This is the immn == 0, imms == 0x11111x case */ 4183 return false; 4184 } 4185 e = 1 << len; 4186 4187 levels = e - 1; 4188 s = imms & levels; 4189 r = immr & levels; 4190 4191 if (s == levels) { 4192 /* <length of run - 1> mustn't be all-ones. */ 4193 return false; 4194 } 4195 4196 /* Create the value of one element: s+1 set bits rotated 4197 * by r within the element (which is e bits wide)... 4198 */ 4199 mask = MAKE_64BIT_MASK(0, s + 1); 4200 if (r) { 4201 mask = (mask >> r) | (mask << (e - r)); 4202 mask &= MAKE_64BIT_MASK(0, e); 4203 } 4204 /* ...then replicate the element over the whole 64 bit value */ 4205 mask = bitfield_replicate(mask, e); 4206 *result = mask; 4207 return true; 4208 } 4209 4210 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4211 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4212 { 4213 TCGv_i64 tcg_rd, tcg_rn; 4214 uint64_t imm; 4215 4216 /* Some immediate field values are reserved. */ 4217 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4218 extract32(a->dbm, 0, 6), 4219 extract32(a->dbm, 6, 6))) { 4220 return false; 4221 } 4222 if (!a->sf) { 4223 imm &= 0xffffffffull; 4224 } 4225 4226 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4227 tcg_rn = cpu_reg(s, a->rn); 4228 4229 fn(tcg_rd, tcg_rn, imm); 4230 if (set_cc) { 4231 gen_logic_CC(a->sf, tcg_rd); 4232 } 4233 if (!a->sf) { 4234 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4235 } 4236 return true; 4237 } 4238 4239 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4240 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4241 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4242 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4243 4244 /* 4245 * Move wide (immediate) 4246 */ 4247 4248 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4249 { 4250 int pos = a->hw << 4; 4251 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4252 return true; 4253 } 4254 4255 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4256 { 4257 int pos = a->hw << 4; 4258 uint64_t imm = a->imm; 4259 4260 imm = ~(imm << pos); 4261 if (!a->sf) { 4262 imm = (uint32_t)imm; 4263 } 4264 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4265 return true; 4266 } 4267 4268 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4269 { 4270 int pos = a->hw << 4; 4271 TCGv_i64 tcg_rd, tcg_im; 4272 4273 tcg_rd = cpu_reg(s, a->rd); 4274 tcg_im = tcg_constant_i64(a->imm); 4275 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4276 if (!a->sf) { 4277 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4278 } 4279 return true; 4280 } 4281 4282 /* 4283 * Bitfield 4284 */ 4285 4286 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4287 { 4288 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4289 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4290 unsigned int bitsize = a->sf ? 64 : 32; 4291 unsigned int ri = a->immr; 4292 unsigned int si = a->imms; 4293 unsigned int pos, len; 4294 4295 if (si >= ri) { 4296 /* Wd<s-r:0> = Wn<s:r> */ 4297 len = (si - ri) + 1; 4298 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4299 if (!a->sf) { 4300 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4301 } 4302 } else { 4303 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4304 len = si + 1; 4305 pos = (bitsize - ri) & (bitsize - 1); 4306 4307 if (len < ri) { 4308 /* 4309 * Sign extend the destination field from len to fill the 4310 * balance of the word. Let the deposit below insert all 4311 * of those sign bits. 4312 */ 4313 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4314 len = ri; 4315 } 4316 4317 /* 4318 * We start with zero, and we haven't modified any bits outside 4319 * bitsize, therefore no final zero-extension is unneeded for !sf. 4320 */ 4321 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4322 } 4323 return true; 4324 } 4325 4326 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4327 { 4328 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4329 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4330 unsigned int bitsize = a->sf ? 64 : 32; 4331 unsigned int ri = a->immr; 4332 unsigned int si = a->imms; 4333 unsigned int pos, len; 4334 4335 tcg_rd = cpu_reg(s, a->rd); 4336 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4337 4338 if (si >= ri) { 4339 /* Wd<s-r:0> = Wn<s:r> */ 4340 len = (si - ri) + 1; 4341 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4342 } else { 4343 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4344 len = si + 1; 4345 pos = (bitsize - ri) & (bitsize - 1); 4346 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4347 } 4348 return true; 4349 } 4350 4351 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4352 { 4353 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4354 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4355 unsigned int bitsize = a->sf ? 64 : 32; 4356 unsigned int ri = a->immr; 4357 unsigned int si = a->imms; 4358 unsigned int pos, len; 4359 4360 tcg_rd = cpu_reg(s, a->rd); 4361 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4362 4363 if (si >= ri) { 4364 /* Wd<s-r:0> = Wn<s:r> */ 4365 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4366 len = (si - ri) + 1; 4367 pos = 0; 4368 } else { 4369 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4370 len = si + 1; 4371 pos = (bitsize - ri) & (bitsize - 1); 4372 } 4373 4374 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4375 if (!a->sf) { 4376 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4377 } 4378 return true; 4379 } 4380 4381 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4382 { 4383 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4384 4385 tcg_rd = cpu_reg(s, a->rd); 4386 4387 if (unlikely(a->imm == 0)) { 4388 /* 4389 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4390 * so an extract from bit 0 is a special case. 4391 */ 4392 if (a->sf) { 4393 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4394 } else { 4395 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4396 } 4397 } else { 4398 tcg_rm = cpu_reg(s, a->rm); 4399 tcg_rn = cpu_reg(s, a->rn); 4400 4401 if (a->sf) { 4402 /* Specialization to ROR happens in EXTRACT2. */ 4403 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4404 } else { 4405 TCGv_i32 t0 = tcg_temp_new_i32(); 4406 4407 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4408 if (a->rm == a->rn) { 4409 tcg_gen_rotri_i32(t0, t0, a->imm); 4410 } else { 4411 TCGv_i32 t1 = tcg_temp_new_i32(); 4412 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4413 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4414 } 4415 tcg_gen_extu_i32_i64(tcg_rd, t0); 4416 } 4417 } 4418 return true; 4419 } 4420 4421 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4422 * Note that it is the caller's responsibility to ensure that the 4423 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4424 * mandated semantics for out of range shifts. 4425 */ 4426 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4427 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4428 { 4429 switch (shift_type) { 4430 case A64_SHIFT_TYPE_LSL: 4431 tcg_gen_shl_i64(dst, src, shift_amount); 4432 break; 4433 case A64_SHIFT_TYPE_LSR: 4434 tcg_gen_shr_i64(dst, src, shift_amount); 4435 break; 4436 case A64_SHIFT_TYPE_ASR: 4437 if (!sf) { 4438 tcg_gen_ext32s_i64(dst, src); 4439 } 4440 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4441 break; 4442 case A64_SHIFT_TYPE_ROR: 4443 if (sf) { 4444 tcg_gen_rotr_i64(dst, src, shift_amount); 4445 } else { 4446 TCGv_i32 t0, t1; 4447 t0 = tcg_temp_new_i32(); 4448 t1 = tcg_temp_new_i32(); 4449 tcg_gen_extrl_i64_i32(t0, src); 4450 tcg_gen_extrl_i64_i32(t1, shift_amount); 4451 tcg_gen_rotr_i32(t0, t0, t1); 4452 tcg_gen_extu_i32_i64(dst, t0); 4453 } 4454 break; 4455 default: 4456 assert(FALSE); /* all shift types should be handled */ 4457 break; 4458 } 4459 4460 if (!sf) { /* zero extend final result */ 4461 tcg_gen_ext32u_i64(dst, dst); 4462 } 4463 } 4464 4465 /* Shift a TCGv src by immediate, put result in dst. 4466 * The shift amount must be in range (this should always be true as the 4467 * relevant instructions will UNDEF on bad shift immediates). 4468 */ 4469 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4470 enum a64_shift_type shift_type, unsigned int shift_i) 4471 { 4472 assert(shift_i < (sf ? 64 : 32)); 4473 4474 if (shift_i == 0) { 4475 tcg_gen_mov_i64(dst, src); 4476 } else { 4477 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4478 } 4479 } 4480 4481 /* Logical (shifted register) 4482 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4483 * +----+-----+-----------+-------+---+------+--------+------+------+ 4484 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4485 * +----+-----+-----------+-------+---+------+--------+------+------+ 4486 */ 4487 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4488 { 4489 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4490 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4491 4492 sf = extract32(insn, 31, 1); 4493 opc = extract32(insn, 29, 2); 4494 shift_type = extract32(insn, 22, 2); 4495 invert = extract32(insn, 21, 1); 4496 rm = extract32(insn, 16, 5); 4497 shift_amount = extract32(insn, 10, 6); 4498 rn = extract32(insn, 5, 5); 4499 rd = extract32(insn, 0, 5); 4500 4501 if (!sf && (shift_amount & (1 << 5))) { 4502 unallocated_encoding(s); 4503 return; 4504 } 4505 4506 tcg_rd = cpu_reg(s, rd); 4507 4508 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4509 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4510 * register-register MOV and MVN, so it is worth special casing. 4511 */ 4512 tcg_rm = cpu_reg(s, rm); 4513 if (invert) { 4514 tcg_gen_not_i64(tcg_rd, tcg_rm); 4515 if (!sf) { 4516 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4517 } 4518 } else { 4519 if (sf) { 4520 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4521 } else { 4522 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4523 } 4524 } 4525 return; 4526 } 4527 4528 tcg_rm = read_cpu_reg(s, rm, sf); 4529 4530 if (shift_amount) { 4531 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4532 } 4533 4534 tcg_rn = cpu_reg(s, rn); 4535 4536 switch (opc | (invert << 2)) { 4537 case 0: /* AND */ 4538 case 3: /* ANDS */ 4539 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4540 break; 4541 case 1: /* ORR */ 4542 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4543 break; 4544 case 2: /* EOR */ 4545 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4546 break; 4547 case 4: /* BIC */ 4548 case 7: /* BICS */ 4549 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4550 break; 4551 case 5: /* ORN */ 4552 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4553 break; 4554 case 6: /* EON */ 4555 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4556 break; 4557 default: 4558 assert(FALSE); 4559 break; 4560 } 4561 4562 if (!sf) { 4563 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4564 } 4565 4566 if (opc == 3) { 4567 gen_logic_CC(sf, tcg_rd); 4568 } 4569 } 4570 4571 /* 4572 * Add/subtract (extended register) 4573 * 4574 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4575 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4576 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4577 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4578 * 4579 * sf: 0 -> 32bit, 1 -> 64bit 4580 * op: 0 -> add , 1 -> sub 4581 * S: 1 -> set flags 4582 * opt: 00 4583 * option: extension type (see DecodeRegExtend) 4584 * imm3: optional shift to Rm 4585 * 4586 * Rd = Rn + LSL(extend(Rm), amount) 4587 */ 4588 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4589 { 4590 int rd = extract32(insn, 0, 5); 4591 int rn = extract32(insn, 5, 5); 4592 int imm3 = extract32(insn, 10, 3); 4593 int option = extract32(insn, 13, 3); 4594 int rm = extract32(insn, 16, 5); 4595 int opt = extract32(insn, 22, 2); 4596 bool setflags = extract32(insn, 29, 1); 4597 bool sub_op = extract32(insn, 30, 1); 4598 bool sf = extract32(insn, 31, 1); 4599 4600 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4601 TCGv_i64 tcg_rd; 4602 TCGv_i64 tcg_result; 4603 4604 if (imm3 > 4 || opt != 0) { 4605 unallocated_encoding(s); 4606 return; 4607 } 4608 4609 /* non-flag setting ops may use SP */ 4610 if (!setflags) { 4611 tcg_rd = cpu_reg_sp(s, rd); 4612 } else { 4613 tcg_rd = cpu_reg(s, rd); 4614 } 4615 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4616 4617 tcg_rm = read_cpu_reg(s, rm, sf); 4618 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4619 4620 tcg_result = tcg_temp_new_i64(); 4621 4622 if (!setflags) { 4623 if (sub_op) { 4624 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4625 } else { 4626 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4627 } 4628 } else { 4629 if (sub_op) { 4630 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4631 } else { 4632 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4633 } 4634 } 4635 4636 if (sf) { 4637 tcg_gen_mov_i64(tcg_rd, tcg_result); 4638 } else { 4639 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4640 } 4641 } 4642 4643 /* 4644 * Add/subtract (shifted register) 4645 * 4646 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4647 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4648 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4649 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4650 * 4651 * sf: 0 -> 32bit, 1 -> 64bit 4652 * op: 0 -> add , 1 -> sub 4653 * S: 1 -> set flags 4654 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4655 * imm6: Shift amount to apply to Rm before the add/sub 4656 */ 4657 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4658 { 4659 int rd = extract32(insn, 0, 5); 4660 int rn = extract32(insn, 5, 5); 4661 int imm6 = extract32(insn, 10, 6); 4662 int rm = extract32(insn, 16, 5); 4663 int shift_type = extract32(insn, 22, 2); 4664 bool setflags = extract32(insn, 29, 1); 4665 bool sub_op = extract32(insn, 30, 1); 4666 bool sf = extract32(insn, 31, 1); 4667 4668 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4669 TCGv_i64 tcg_rn, tcg_rm; 4670 TCGv_i64 tcg_result; 4671 4672 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 4673 unallocated_encoding(s); 4674 return; 4675 } 4676 4677 tcg_rn = read_cpu_reg(s, rn, sf); 4678 tcg_rm = read_cpu_reg(s, rm, sf); 4679 4680 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 4681 4682 tcg_result = tcg_temp_new_i64(); 4683 4684 if (!setflags) { 4685 if (sub_op) { 4686 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4687 } else { 4688 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4689 } 4690 } else { 4691 if (sub_op) { 4692 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4693 } else { 4694 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4695 } 4696 } 4697 4698 if (sf) { 4699 tcg_gen_mov_i64(tcg_rd, tcg_result); 4700 } else { 4701 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4702 } 4703 } 4704 4705 /* Data-processing (3 source) 4706 * 4707 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 4708 * +--+------+-----------+------+------+----+------+------+------+ 4709 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 4710 * +--+------+-----------+------+------+----+------+------+------+ 4711 */ 4712 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 4713 { 4714 int rd = extract32(insn, 0, 5); 4715 int rn = extract32(insn, 5, 5); 4716 int ra = extract32(insn, 10, 5); 4717 int rm = extract32(insn, 16, 5); 4718 int op_id = (extract32(insn, 29, 3) << 4) | 4719 (extract32(insn, 21, 3) << 1) | 4720 extract32(insn, 15, 1); 4721 bool sf = extract32(insn, 31, 1); 4722 bool is_sub = extract32(op_id, 0, 1); 4723 bool is_high = extract32(op_id, 2, 1); 4724 bool is_signed = false; 4725 TCGv_i64 tcg_op1; 4726 TCGv_i64 tcg_op2; 4727 TCGv_i64 tcg_tmp; 4728 4729 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 4730 switch (op_id) { 4731 case 0x42: /* SMADDL */ 4732 case 0x43: /* SMSUBL */ 4733 case 0x44: /* SMULH */ 4734 is_signed = true; 4735 break; 4736 case 0x0: /* MADD (32bit) */ 4737 case 0x1: /* MSUB (32bit) */ 4738 case 0x40: /* MADD (64bit) */ 4739 case 0x41: /* MSUB (64bit) */ 4740 case 0x4a: /* UMADDL */ 4741 case 0x4b: /* UMSUBL */ 4742 case 0x4c: /* UMULH */ 4743 break; 4744 default: 4745 unallocated_encoding(s); 4746 return; 4747 } 4748 4749 if (is_high) { 4750 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 4751 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4752 TCGv_i64 tcg_rn = cpu_reg(s, rn); 4753 TCGv_i64 tcg_rm = cpu_reg(s, rm); 4754 4755 if (is_signed) { 4756 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4757 } else { 4758 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4759 } 4760 return; 4761 } 4762 4763 tcg_op1 = tcg_temp_new_i64(); 4764 tcg_op2 = tcg_temp_new_i64(); 4765 tcg_tmp = tcg_temp_new_i64(); 4766 4767 if (op_id < 0x42) { 4768 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 4769 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 4770 } else { 4771 if (is_signed) { 4772 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 4773 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 4774 } else { 4775 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 4776 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 4777 } 4778 } 4779 4780 if (ra == 31 && !is_sub) { 4781 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 4782 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 4783 } else { 4784 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 4785 if (is_sub) { 4786 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4787 } else { 4788 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4789 } 4790 } 4791 4792 if (!sf) { 4793 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 4794 } 4795 } 4796 4797 /* Add/subtract (with carry) 4798 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 4799 * +--+--+--+------------------------+------+-------------+------+-----+ 4800 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 4801 * +--+--+--+------------------------+------+-------------+------+-----+ 4802 */ 4803 4804 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 4805 { 4806 unsigned int sf, op, setflags, rm, rn, rd; 4807 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 4808 4809 sf = extract32(insn, 31, 1); 4810 op = extract32(insn, 30, 1); 4811 setflags = extract32(insn, 29, 1); 4812 rm = extract32(insn, 16, 5); 4813 rn = extract32(insn, 5, 5); 4814 rd = extract32(insn, 0, 5); 4815 4816 tcg_rd = cpu_reg(s, rd); 4817 tcg_rn = cpu_reg(s, rn); 4818 4819 if (op) { 4820 tcg_y = tcg_temp_new_i64(); 4821 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 4822 } else { 4823 tcg_y = cpu_reg(s, rm); 4824 } 4825 4826 if (setflags) { 4827 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 4828 } else { 4829 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 4830 } 4831 } 4832 4833 /* 4834 * Rotate right into flags 4835 * 31 30 29 21 15 10 5 4 0 4836 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4837 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 4838 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4839 */ 4840 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 4841 { 4842 int mask = extract32(insn, 0, 4); 4843 int o2 = extract32(insn, 4, 1); 4844 int rn = extract32(insn, 5, 5); 4845 int imm6 = extract32(insn, 15, 6); 4846 int sf_op_s = extract32(insn, 29, 3); 4847 TCGv_i64 tcg_rn; 4848 TCGv_i32 nzcv; 4849 4850 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 4851 unallocated_encoding(s); 4852 return; 4853 } 4854 4855 tcg_rn = read_cpu_reg(s, rn, 1); 4856 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 4857 4858 nzcv = tcg_temp_new_i32(); 4859 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 4860 4861 if (mask & 8) { /* N */ 4862 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 4863 } 4864 if (mask & 4) { /* Z */ 4865 tcg_gen_not_i32(cpu_ZF, nzcv); 4866 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 4867 } 4868 if (mask & 2) { /* C */ 4869 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 4870 } 4871 if (mask & 1) { /* V */ 4872 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 4873 } 4874 } 4875 4876 /* 4877 * Evaluate into flags 4878 * 31 30 29 21 15 14 10 5 4 0 4879 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4880 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 4881 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4882 */ 4883 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 4884 { 4885 int o3_mask = extract32(insn, 0, 5); 4886 int rn = extract32(insn, 5, 5); 4887 int o2 = extract32(insn, 15, 6); 4888 int sz = extract32(insn, 14, 1); 4889 int sf_op_s = extract32(insn, 29, 3); 4890 TCGv_i32 tmp; 4891 int shift; 4892 4893 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 4894 !dc_isar_feature(aa64_condm_4, s)) { 4895 unallocated_encoding(s); 4896 return; 4897 } 4898 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 4899 4900 tmp = tcg_temp_new_i32(); 4901 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 4902 tcg_gen_shli_i32(cpu_NF, tmp, shift); 4903 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 4904 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 4905 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 4906 } 4907 4908 /* Conditional compare (immediate / register) 4909 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 4910 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4911 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 4912 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4913 * [1] y [0] [0] 4914 */ 4915 static void disas_cc(DisasContext *s, uint32_t insn) 4916 { 4917 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 4918 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 4919 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 4920 DisasCompare c; 4921 4922 if (!extract32(insn, 29, 1)) { 4923 unallocated_encoding(s); 4924 return; 4925 } 4926 if (insn & (1 << 10 | 1 << 4)) { 4927 unallocated_encoding(s); 4928 return; 4929 } 4930 sf = extract32(insn, 31, 1); 4931 op = extract32(insn, 30, 1); 4932 is_imm = extract32(insn, 11, 1); 4933 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 4934 cond = extract32(insn, 12, 4); 4935 rn = extract32(insn, 5, 5); 4936 nzcv = extract32(insn, 0, 4); 4937 4938 /* Set T0 = !COND. */ 4939 tcg_t0 = tcg_temp_new_i32(); 4940 arm_test_cc(&c, cond); 4941 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 4942 4943 /* Load the arguments for the new comparison. */ 4944 if (is_imm) { 4945 tcg_y = tcg_temp_new_i64(); 4946 tcg_gen_movi_i64(tcg_y, y); 4947 } else { 4948 tcg_y = cpu_reg(s, y); 4949 } 4950 tcg_rn = cpu_reg(s, rn); 4951 4952 /* Set the flags for the new comparison. */ 4953 tcg_tmp = tcg_temp_new_i64(); 4954 if (op) { 4955 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 4956 } else { 4957 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 4958 } 4959 4960 /* If COND was false, force the flags to #nzcv. Compute two masks 4961 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 4962 * For tcg hosts that support ANDC, we can make do with just T1. 4963 * In either case, allow the tcg optimizer to delete any unused mask. 4964 */ 4965 tcg_t1 = tcg_temp_new_i32(); 4966 tcg_t2 = tcg_temp_new_i32(); 4967 tcg_gen_neg_i32(tcg_t1, tcg_t0); 4968 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 4969 4970 if (nzcv & 8) { /* N */ 4971 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 4972 } else { 4973 if (TCG_TARGET_HAS_andc_i32) { 4974 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 4975 } else { 4976 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 4977 } 4978 } 4979 if (nzcv & 4) { /* Z */ 4980 if (TCG_TARGET_HAS_andc_i32) { 4981 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 4982 } else { 4983 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 4984 } 4985 } else { 4986 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 4987 } 4988 if (nzcv & 2) { /* C */ 4989 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 4990 } else { 4991 if (TCG_TARGET_HAS_andc_i32) { 4992 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 4993 } else { 4994 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 4995 } 4996 } 4997 if (nzcv & 1) { /* V */ 4998 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 4999 } else { 5000 if (TCG_TARGET_HAS_andc_i32) { 5001 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5002 } else { 5003 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5004 } 5005 } 5006 } 5007 5008 /* Conditional select 5009 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5010 * +----+----+---+-----------------+------+------+-----+------+------+ 5011 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5012 * +----+----+---+-----------------+------+------+-----+------+------+ 5013 */ 5014 static void disas_cond_select(DisasContext *s, uint32_t insn) 5015 { 5016 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5017 TCGv_i64 tcg_rd, zero; 5018 DisasCompare64 c; 5019 5020 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5021 /* S == 1 or op2<1> == 1 */ 5022 unallocated_encoding(s); 5023 return; 5024 } 5025 sf = extract32(insn, 31, 1); 5026 else_inv = extract32(insn, 30, 1); 5027 rm = extract32(insn, 16, 5); 5028 cond = extract32(insn, 12, 4); 5029 else_inc = extract32(insn, 10, 1); 5030 rn = extract32(insn, 5, 5); 5031 rd = extract32(insn, 0, 5); 5032 5033 tcg_rd = cpu_reg(s, rd); 5034 5035 a64_test_cc(&c, cond); 5036 zero = tcg_constant_i64(0); 5037 5038 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5039 /* CSET & CSETM. */ 5040 if (else_inv) { 5041 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 5042 tcg_rd, c.value, zero); 5043 } else { 5044 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 5045 tcg_rd, c.value, zero); 5046 } 5047 } else { 5048 TCGv_i64 t_true = cpu_reg(s, rn); 5049 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5050 if (else_inv && else_inc) { 5051 tcg_gen_neg_i64(t_false, t_false); 5052 } else if (else_inv) { 5053 tcg_gen_not_i64(t_false, t_false); 5054 } else if (else_inc) { 5055 tcg_gen_addi_i64(t_false, t_false, 1); 5056 } 5057 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5058 } 5059 5060 if (!sf) { 5061 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5062 } 5063 } 5064 5065 static void handle_clz(DisasContext *s, unsigned int sf, 5066 unsigned int rn, unsigned int rd) 5067 { 5068 TCGv_i64 tcg_rd, tcg_rn; 5069 tcg_rd = cpu_reg(s, rd); 5070 tcg_rn = cpu_reg(s, rn); 5071 5072 if (sf) { 5073 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5074 } else { 5075 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5076 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5077 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5078 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5079 } 5080 } 5081 5082 static void handle_cls(DisasContext *s, unsigned int sf, 5083 unsigned int rn, unsigned int rd) 5084 { 5085 TCGv_i64 tcg_rd, tcg_rn; 5086 tcg_rd = cpu_reg(s, rd); 5087 tcg_rn = cpu_reg(s, rn); 5088 5089 if (sf) { 5090 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5091 } else { 5092 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5093 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5094 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5095 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5096 } 5097 } 5098 5099 static void handle_rbit(DisasContext *s, unsigned int sf, 5100 unsigned int rn, unsigned int rd) 5101 { 5102 TCGv_i64 tcg_rd, tcg_rn; 5103 tcg_rd = cpu_reg(s, rd); 5104 tcg_rn = cpu_reg(s, rn); 5105 5106 if (sf) { 5107 gen_helper_rbit64(tcg_rd, tcg_rn); 5108 } else { 5109 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5110 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5111 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5112 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5113 } 5114 } 5115 5116 /* REV with sf==1, opcode==3 ("REV64") */ 5117 static void handle_rev64(DisasContext *s, unsigned int sf, 5118 unsigned int rn, unsigned int rd) 5119 { 5120 if (!sf) { 5121 unallocated_encoding(s); 5122 return; 5123 } 5124 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5125 } 5126 5127 /* REV with sf==0, opcode==2 5128 * REV32 (sf==1, opcode==2) 5129 */ 5130 static void handle_rev32(DisasContext *s, unsigned int sf, 5131 unsigned int rn, unsigned int rd) 5132 { 5133 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5134 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5135 5136 if (sf) { 5137 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5138 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5139 } else { 5140 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5141 } 5142 } 5143 5144 /* REV16 (opcode==1) */ 5145 static void handle_rev16(DisasContext *s, unsigned int sf, 5146 unsigned int rn, unsigned int rd) 5147 { 5148 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5149 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5150 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5151 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5152 5153 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5154 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5155 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5156 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5157 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5158 } 5159 5160 /* Data-processing (1 source) 5161 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5162 * +----+---+---+-----------------+---------+--------+------+------+ 5163 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5164 * +----+---+---+-----------------+---------+--------+------+------+ 5165 */ 5166 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5167 { 5168 unsigned int sf, opcode, opcode2, rn, rd; 5169 TCGv_i64 tcg_rd; 5170 5171 if (extract32(insn, 29, 1)) { 5172 unallocated_encoding(s); 5173 return; 5174 } 5175 5176 sf = extract32(insn, 31, 1); 5177 opcode = extract32(insn, 10, 6); 5178 opcode2 = extract32(insn, 16, 5); 5179 rn = extract32(insn, 5, 5); 5180 rd = extract32(insn, 0, 5); 5181 5182 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5183 5184 switch (MAP(sf, opcode2, opcode)) { 5185 case MAP(0, 0x00, 0x00): /* RBIT */ 5186 case MAP(1, 0x00, 0x00): 5187 handle_rbit(s, sf, rn, rd); 5188 break; 5189 case MAP(0, 0x00, 0x01): /* REV16 */ 5190 case MAP(1, 0x00, 0x01): 5191 handle_rev16(s, sf, rn, rd); 5192 break; 5193 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5194 case MAP(1, 0x00, 0x02): 5195 handle_rev32(s, sf, rn, rd); 5196 break; 5197 case MAP(1, 0x00, 0x03): /* REV64 */ 5198 handle_rev64(s, sf, rn, rd); 5199 break; 5200 case MAP(0, 0x00, 0x04): /* CLZ */ 5201 case MAP(1, 0x00, 0x04): 5202 handle_clz(s, sf, rn, rd); 5203 break; 5204 case MAP(0, 0x00, 0x05): /* CLS */ 5205 case MAP(1, 0x00, 0x05): 5206 handle_cls(s, sf, rn, rd); 5207 break; 5208 case MAP(1, 0x01, 0x00): /* PACIA */ 5209 if (s->pauth_active) { 5210 tcg_rd = cpu_reg(s, rd); 5211 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5212 } else if (!dc_isar_feature(aa64_pauth, s)) { 5213 goto do_unallocated; 5214 } 5215 break; 5216 case MAP(1, 0x01, 0x01): /* PACIB */ 5217 if (s->pauth_active) { 5218 tcg_rd = cpu_reg(s, rd); 5219 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5220 } else if (!dc_isar_feature(aa64_pauth, s)) { 5221 goto do_unallocated; 5222 } 5223 break; 5224 case MAP(1, 0x01, 0x02): /* PACDA */ 5225 if (s->pauth_active) { 5226 tcg_rd = cpu_reg(s, rd); 5227 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5228 } else if (!dc_isar_feature(aa64_pauth, s)) { 5229 goto do_unallocated; 5230 } 5231 break; 5232 case MAP(1, 0x01, 0x03): /* PACDB */ 5233 if (s->pauth_active) { 5234 tcg_rd = cpu_reg(s, rd); 5235 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5236 } else if (!dc_isar_feature(aa64_pauth, s)) { 5237 goto do_unallocated; 5238 } 5239 break; 5240 case MAP(1, 0x01, 0x04): /* AUTIA */ 5241 if (s->pauth_active) { 5242 tcg_rd = cpu_reg(s, rd); 5243 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5244 } else if (!dc_isar_feature(aa64_pauth, s)) { 5245 goto do_unallocated; 5246 } 5247 break; 5248 case MAP(1, 0x01, 0x05): /* AUTIB */ 5249 if (s->pauth_active) { 5250 tcg_rd = cpu_reg(s, rd); 5251 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5252 } else if (!dc_isar_feature(aa64_pauth, s)) { 5253 goto do_unallocated; 5254 } 5255 break; 5256 case MAP(1, 0x01, 0x06): /* AUTDA */ 5257 if (s->pauth_active) { 5258 tcg_rd = cpu_reg(s, rd); 5259 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5260 } else if (!dc_isar_feature(aa64_pauth, s)) { 5261 goto do_unallocated; 5262 } 5263 break; 5264 case MAP(1, 0x01, 0x07): /* AUTDB */ 5265 if (s->pauth_active) { 5266 tcg_rd = cpu_reg(s, rd); 5267 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5268 } else if (!dc_isar_feature(aa64_pauth, s)) { 5269 goto do_unallocated; 5270 } 5271 break; 5272 case MAP(1, 0x01, 0x08): /* PACIZA */ 5273 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5274 goto do_unallocated; 5275 } else if (s->pauth_active) { 5276 tcg_rd = cpu_reg(s, rd); 5277 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5278 } 5279 break; 5280 case MAP(1, 0x01, 0x09): /* PACIZB */ 5281 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5282 goto do_unallocated; 5283 } else if (s->pauth_active) { 5284 tcg_rd = cpu_reg(s, rd); 5285 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5286 } 5287 break; 5288 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5289 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5290 goto do_unallocated; 5291 } else if (s->pauth_active) { 5292 tcg_rd = cpu_reg(s, rd); 5293 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5294 } 5295 break; 5296 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5297 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5298 goto do_unallocated; 5299 } else if (s->pauth_active) { 5300 tcg_rd = cpu_reg(s, rd); 5301 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5302 } 5303 break; 5304 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5305 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5306 goto do_unallocated; 5307 } else if (s->pauth_active) { 5308 tcg_rd = cpu_reg(s, rd); 5309 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5310 } 5311 break; 5312 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5313 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5314 goto do_unallocated; 5315 } else if (s->pauth_active) { 5316 tcg_rd = cpu_reg(s, rd); 5317 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5318 } 5319 break; 5320 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5321 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5322 goto do_unallocated; 5323 } else if (s->pauth_active) { 5324 tcg_rd = cpu_reg(s, rd); 5325 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5326 } 5327 break; 5328 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5329 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5330 goto do_unallocated; 5331 } else if (s->pauth_active) { 5332 tcg_rd = cpu_reg(s, rd); 5333 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5334 } 5335 break; 5336 case MAP(1, 0x01, 0x10): /* XPACI */ 5337 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5338 goto do_unallocated; 5339 } else if (s->pauth_active) { 5340 tcg_rd = cpu_reg(s, rd); 5341 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); 5342 } 5343 break; 5344 case MAP(1, 0x01, 0x11): /* XPACD */ 5345 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5346 goto do_unallocated; 5347 } else if (s->pauth_active) { 5348 tcg_rd = cpu_reg(s, rd); 5349 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); 5350 } 5351 break; 5352 default: 5353 do_unallocated: 5354 unallocated_encoding(s); 5355 break; 5356 } 5357 5358 #undef MAP 5359 } 5360 5361 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5362 unsigned int rm, unsigned int rn, unsigned int rd) 5363 { 5364 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5365 tcg_rd = cpu_reg(s, rd); 5366 5367 if (!sf && is_signed) { 5368 tcg_n = tcg_temp_new_i64(); 5369 tcg_m = tcg_temp_new_i64(); 5370 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5371 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5372 } else { 5373 tcg_n = read_cpu_reg(s, rn, sf); 5374 tcg_m = read_cpu_reg(s, rm, sf); 5375 } 5376 5377 if (is_signed) { 5378 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5379 } else { 5380 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5381 } 5382 5383 if (!sf) { /* zero extend final result */ 5384 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5385 } 5386 } 5387 5388 /* LSLV, LSRV, ASRV, RORV */ 5389 static void handle_shift_reg(DisasContext *s, 5390 enum a64_shift_type shift_type, unsigned int sf, 5391 unsigned int rm, unsigned int rn, unsigned int rd) 5392 { 5393 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5394 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5395 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5396 5397 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5398 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5399 } 5400 5401 /* CRC32[BHWX], CRC32C[BHWX] */ 5402 static void handle_crc32(DisasContext *s, 5403 unsigned int sf, unsigned int sz, bool crc32c, 5404 unsigned int rm, unsigned int rn, unsigned int rd) 5405 { 5406 TCGv_i64 tcg_acc, tcg_val; 5407 TCGv_i32 tcg_bytes; 5408 5409 if (!dc_isar_feature(aa64_crc32, s) 5410 || (sf == 1 && sz != 3) 5411 || (sf == 0 && sz == 3)) { 5412 unallocated_encoding(s); 5413 return; 5414 } 5415 5416 if (sz == 3) { 5417 tcg_val = cpu_reg(s, rm); 5418 } else { 5419 uint64_t mask; 5420 switch (sz) { 5421 case 0: 5422 mask = 0xFF; 5423 break; 5424 case 1: 5425 mask = 0xFFFF; 5426 break; 5427 case 2: 5428 mask = 0xFFFFFFFF; 5429 break; 5430 default: 5431 g_assert_not_reached(); 5432 } 5433 tcg_val = tcg_temp_new_i64(); 5434 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5435 } 5436 5437 tcg_acc = cpu_reg(s, rn); 5438 tcg_bytes = tcg_constant_i32(1 << sz); 5439 5440 if (crc32c) { 5441 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5442 } else { 5443 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5444 } 5445 } 5446 5447 /* Data-processing (2 source) 5448 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5449 * +----+---+---+-----------------+------+--------+------+------+ 5450 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5451 * +----+---+---+-----------------+------+--------+------+------+ 5452 */ 5453 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5454 { 5455 unsigned int sf, rm, opcode, rn, rd, setflag; 5456 sf = extract32(insn, 31, 1); 5457 setflag = extract32(insn, 29, 1); 5458 rm = extract32(insn, 16, 5); 5459 opcode = extract32(insn, 10, 6); 5460 rn = extract32(insn, 5, 5); 5461 rd = extract32(insn, 0, 5); 5462 5463 if (setflag && opcode != 0) { 5464 unallocated_encoding(s); 5465 return; 5466 } 5467 5468 switch (opcode) { 5469 case 0: /* SUBP(S) */ 5470 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5471 goto do_unallocated; 5472 } else { 5473 TCGv_i64 tcg_n, tcg_m, tcg_d; 5474 5475 tcg_n = read_cpu_reg_sp(s, rn, true); 5476 tcg_m = read_cpu_reg_sp(s, rm, true); 5477 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5478 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5479 tcg_d = cpu_reg(s, rd); 5480 5481 if (setflag) { 5482 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5483 } else { 5484 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5485 } 5486 } 5487 break; 5488 case 2: /* UDIV */ 5489 handle_div(s, false, sf, rm, rn, rd); 5490 break; 5491 case 3: /* SDIV */ 5492 handle_div(s, true, sf, rm, rn, rd); 5493 break; 5494 case 4: /* IRG */ 5495 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5496 goto do_unallocated; 5497 } 5498 if (s->ata[0]) { 5499 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, 5500 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5501 } else { 5502 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5503 cpu_reg_sp(s, rn)); 5504 } 5505 break; 5506 case 5: /* GMI */ 5507 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5508 goto do_unallocated; 5509 } else { 5510 TCGv_i64 t = tcg_temp_new_i64(); 5511 5512 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5513 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5514 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5515 } 5516 break; 5517 case 8: /* LSLV */ 5518 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5519 break; 5520 case 9: /* LSRV */ 5521 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5522 break; 5523 case 10: /* ASRV */ 5524 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5525 break; 5526 case 11: /* RORV */ 5527 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5528 break; 5529 case 12: /* PACGA */ 5530 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5531 goto do_unallocated; 5532 } 5533 gen_helper_pacga(cpu_reg(s, rd), tcg_env, 5534 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5535 break; 5536 case 16: 5537 case 17: 5538 case 18: 5539 case 19: 5540 case 20: 5541 case 21: 5542 case 22: 5543 case 23: /* CRC32 */ 5544 { 5545 int sz = extract32(opcode, 0, 2); 5546 bool crc32c = extract32(opcode, 2, 1); 5547 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5548 break; 5549 } 5550 default: 5551 do_unallocated: 5552 unallocated_encoding(s); 5553 break; 5554 } 5555 } 5556 5557 /* 5558 * Data processing - register 5559 * 31 30 29 28 25 21 20 16 10 0 5560 * +--+---+--+---+-------+-----+-------+-------+---------+ 5561 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5562 * +--+---+--+---+-------+-----+-------+-------+---------+ 5563 */ 5564 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5565 { 5566 int op0 = extract32(insn, 30, 1); 5567 int op1 = extract32(insn, 28, 1); 5568 int op2 = extract32(insn, 21, 4); 5569 int op3 = extract32(insn, 10, 6); 5570 5571 if (!op1) { 5572 if (op2 & 8) { 5573 if (op2 & 1) { 5574 /* Add/sub (extended register) */ 5575 disas_add_sub_ext_reg(s, insn); 5576 } else { 5577 /* Add/sub (shifted register) */ 5578 disas_add_sub_reg(s, insn); 5579 } 5580 } else { 5581 /* Logical (shifted register) */ 5582 disas_logic_reg(s, insn); 5583 } 5584 return; 5585 } 5586 5587 switch (op2) { 5588 case 0x0: 5589 switch (op3) { 5590 case 0x00: /* Add/subtract (with carry) */ 5591 disas_adc_sbc(s, insn); 5592 break; 5593 5594 case 0x01: /* Rotate right into flags */ 5595 case 0x21: 5596 disas_rotate_right_into_flags(s, insn); 5597 break; 5598 5599 case 0x02: /* Evaluate into flags */ 5600 case 0x12: 5601 case 0x22: 5602 case 0x32: 5603 disas_evaluate_into_flags(s, insn); 5604 break; 5605 5606 default: 5607 goto do_unallocated; 5608 } 5609 break; 5610 5611 case 0x2: /* Conditional compare */ 5612 disas_cc(s, insn); /* both imm and reg forms */ 5613 break; 5614 5615 case 0x4: /* Conditional select */ 5616 disas_cond_select(s, insn); 5617 break; 5618 5619 case 0x6: /* Data-processing */ 5620 if (op0) { /* (1 source) */ 5621 disas_data_proc_1src(s, insn); 5622 } else { /* (2 source) */ 5623 disas_data_proc_2src(s, insn); 5624 } 5625 break; 5626 case 0x8 ... 0xf: /* (3 source) */ 5627 disas_data_proc_3src(s, insn); 5628 break; 5629 5630 default: 5631 do_unallocated: 5632 unallocated_encoding(s); 5633 break; 5634 } 5635 } 5636 5637 static void handle_fp_compare(DisasContext *s, int size, 5638 unsigned int rn, unsigned int rm, 5639 bool cmp_with_zero, bool signal_all_nans) 5640 { 5641 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5642 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 5643 5644 if (size == MO_64) { 5645 TCGv_i64 tcg_vn, tcg_vm; 5646 5647 tcg_vn = read_fp_dreg(s, rn); 5648 if (cmp_with_zero) { 5649 tcg_vm = tcg_constant_i64(0); 5650 } else { 5651 tcg_vm = read_fp_dreg(s, rm); 5652 } 5653 if (signal_all_nans) { 5654 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5655 } else { 5656 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5657 } 5658 } else { 5659 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 5660 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 5661 5662 read_vec_element_i32(s, tcg_vn, rn, 0, size); 5663 if (cmp_with_zero) { 5664 tcg_gen_movi_i32(tcg_vm, 0); 5665 } else { 5666 read_vec_element_i32(s, tcg_vm, rm, 0, size); 5667 } 5668 5669 switch (size) { 5670 case MO_32: 5671 if (signal_all_nans) { 5672 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5673 } else { 5674 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5675 } 5676 break; 5677 case MO_16: 5678 if (signal_all_nans) { 5679 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5680 } else { 5681 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5682 } 5683 break; 5684 default: 5685 g_assert_not_reached(); 5686 } 5687 } 5688 5689 gen_set_nzcv(tcg_flags); 5690 } 5691 5692 /* Floating point compare 5693 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 5694 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5695 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 5696 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5697 */ 5698 static void disas_fp_compare(DisasContext *s, uint32_t insn) 5699 { 5700 unsigned int mos, type, rm, op, rn, opc, op2r; 5701 int size; 5702 5703 mos = extract32(insn, 29, 3); 5704 type = extract32(insn, 22, 2); 5705 rm = extract32(insn, 16, 5); 5706 op = extract32(insn, 14, 2); 5707 rn = extract32(insn, 5, 5); 5708 opc = extract32(insn, 3, 2); 5709 op2r = extract32(insn, 0, 3); 5710 5711 if (mos || op || op2r) { 5712 unallocated_encoding(s); 5713 return; 5714 } 5715 5716 switch (type) { 5717 case 0: 5718 size = MO_32; 5719 break; 5720 case 1: 5721 size = MO_64; 5722 break; 5723 case 3: 5724 size = MO_16; 5725 if (dc_isar_feature(aa64_fp16, s)) { 5726 break; 5727 } 5728 /* fallthru */ 5729 default: 5730 unallocated_encoding(s); 5731 return; 5732 } 5733 5734 if (!fp_access_check(s)) { 5735 return; 5736 } 5737 5738 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 5739 } 5740 5741 /* Floating point conditional compare 5742 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5743 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5744 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 5745 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5746 */ 5747 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 5748 { 5749 unsigned int mos, type, rm, cond, rn, op, nzcv; 5750 TCGLabel *label_continue = NULL; 5751 int size; 5752 5753 mos = extract32(insn, 29, 3); 5754 type = extract32(insn, 22, 2); 5755 rm = extract32(insn, 16, 5); 5756 cond = extract32(insn, 12, 4); 5757 rn = extract32(insn, 5, 5); 5758 op = extract32(insn, 4, 1); 5759 nzcv = extract32(insn, 0, 4); 5760 5761 if (mos) { 5762 unallocated_encoding(s); 5763 return; 5764 } 5765 5766 switch (type) { 5767 case 0: 5768 size = MO_32; 5769 break; 5770 case 1: 5771 size = MO_64; 5772 break; 5773 case 3: 5774 size = MO_16; 5775 if (dc_isar_feature(aa64_fp16, s)) { 5776 break; 5777 } 5778 /* fallthru */ 5779 default: 5780 unallocated_encoding(s); 5781 return; 5782 } 5783 5784 if (!fp_access_check(s)) { 5785 return; 5786 } 5787 5788 if (cond < 0x0e) { /* not always */ 5789 TCGLabel *label_match = gen_new_label(); 5790 label_continue = gen_new_label(); 5791 arm_gen_test_cc(cond, label_match); 5792 /* nomatch: */ 5793 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 5794 tcg_gen_br(label_continue); 5795 gen_set_label(label_match); 5796 } 5797 5798 handle_fp_compare(s, size, rn, rm, false, op); 5799 5800 if (cond < 0x0e) { 5801 gen_set_label(label_continue); 5802 } 5803 } 5804 5805 /* Floating point conditional select 5806 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 5807 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5808 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 5809 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5810 */ 5811 static void disas_fp_csel(DisasContext *s, uint32_t insn) 5812 { 5813 unsigned int mos, type, rm, cond, rn, rd; 5814 TCGv_i64 t_true, t_false; 5815 DisasCompare64 c; 5816 MemOp sz; 5817 5818 mos = extract32(insn, 29, 3); 5819 type = extract32(insn, 22, 2); 5820 rm = extract32(insn, 16, 5); 5821 cond = extract32(insn, 12, 4); 5822 rn = extract32(insn, 5, 5); 5823 rd = extract32(insn, 0, 5); 5824 5825 if (mos) { 5826 unallocated_encoding(s); 5827 return; 5828 } 5829 5830 switch (type) { 5831 case 0: 5832 sz = MO_32; 5833 break; 5834 case 1: 5835 sz = MO_64; 5836 break; 5837 case 3: 5838 sz = MO_16; 5839 if (dc_isar_feature(aa64_fp16, s)) { 5840 break; 5841 } 5842 /* fallthru */ 5843 default: 5844 unallocated_encoding(s); 5845 return; 5846 } 5847 5848 if (!fp_access_check(s)) { 5849 return; 5850 } 5851 5852 /* Zero extend sreg & hreg inputs to 64 bits now. */ 5853 t_true = tcg_temp_new_i64(); 5854 t_false = tcg_temp_new_i64(); 5855 read_vec_element(s, t_true, rn, 0, sz); 5856 read_vec_element(s, t_false, rm, 0, sz); 5857 5858 a64_test_cc(&c, cond); 5859 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 5860 t_true, t_false); 5861 5862 /* Note that sregs & hregs write back zeros to the high bits, 5863 and we've already done the zero-extension. */ 5864 write_fp_dreg(s, rd, t_true); 5865 } 5866 5867 /* Floating-point data-processing (1 source) - half precision */ 5868 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 5869 { 5870 TCGv_ptr fpst = NULL; 5871 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 5872 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5873 5874 switch (opcode) { 5875 case 0x0: /* FMOV */ 5876 tcg_gen_mov_i32(tcg_res, tcg_op); 5877 break; 5878 case 0x1: /* FABS */ 5879 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 5880 break; 5881 case 0x2: /* FNEG */ 5882 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 5883 break; 5884 case 0x3: /* FSQRT */ 5885 fpst = fpstatus_ptr(FPST_FPCR_F16); 5886 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 5887 break; 5888 case 0x8: /* FRINTN */ 5889 case 0x9: /* FRINTP */ 5890 case 0xa: /* FRINTM */ 5891 case 0xb: /* FRINTZ */ 5892 case 0xc: /* FRINTA */ 5893 { 5894 TCGv_i32 tcg_rmode; 5895 5896 fpst = fpstatus_ptr(FPST_FPCR_F16); 5897 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 5898 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5899 gen_restore_rmode(tcg_rmode, fpst); 5900 break; 5901 } 5902 case 0xe: /* FRINTX */ 5903 fpst = fpstatus_ptr(FPST_FPCR_F16); 5904 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 5905 break; 5906 case 0xf: /* FRINTI */ 5907 fpst = fpstatus_ptr(FPST_FPCR_F16); 5908 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5909 break; 5910 default: 5911 g_assert_not_reached(); 5912 } 5913 5914 write_fp_sreg(s, rd, tcg_res); 5915 } 5916 5917 /* Floating-point data-processing (1 source) - single precision */ 5918 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 5919 { 5920 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 5921 TCGv_i32 tcg_op, tcg_res; 5922 TCGv_ptr fpst; 5923 int rmode = -1; 5924 5925 tcg_op = read_fp_sreg(s, rn); 5926 tcg_res = tcg_temp_new_i32(); 5927 5928 switch (opcode) { 5929 case 0x0: /* FMOV */ 5930 tcg_gen_mov_i32(tcg_res, tcg_op); 5931 goto done; 5932 case 0x1: /* FABS */ 5933 gen_helper_vfp_abss(tcg_res, tcg_op); 5934 goto done; 5935 case 0x2: /* FNEG */ 5936 gen_helper_vfp_negs(tcg_res, tcg_op); 5937 goto done; 5938 case 0x3: /* FSQRT */ 5939 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 5940 goto done; 5941 case 0x6: /* BFCVT */ 5942 gen_fpst = gen_helper_bfcvt; 5943 break; 5944 case 0x8: /* FRINTN */ 5945 case 0x9: /* FRINTP */ 5946 case 0xa: /* FRINTM */ 5947 case 0xb: /* FRINTZ */ 5948 case 0xc: /* FRINTA */ 5949 rmode = opcode & 7; 5950 gen_fpst = gen_helper_rints; 5951 break; 5952 case 0xe: /* FRINTX */ 5953 gen_fpst = gen_helper_rints_exact; 5954 break; 5955 case 0xf: /* FRINTI */ 5956 gen_fpst = gen_helper_rints; 5957 break; 5958 case 0x10: /* FRINT32Z */ 5959 rmode = FPROUNDING_ZERO; 5960 gen_fpst = gen_helper_frint32_s; 5961 break; 5962 case 0x11: /* FRINT32X */ 5963 gen_fpst = gen_helper_frint32_s; 5964 break; 5965 case 0x12: /* FRINT64Z */ 5966 rmode = FPROUNDING_ZERO; 5967 gen_fpst = gen_helper_frint64_s; 5968 break; 5969 case 0x13: /* FRINT64X */ 5970 gen_fpst = gen_helper_frint64_s; 5971 break; 5972 default: 5973 g_assert_not_reached(); 5974 } 5975 5976 fpst = fpstatus_ptr(FPST_FPCR); 5977 if (rmode >= 0) { 5978 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 5979 gen_fpst(tcg_res, tcg_op, fpst); 5980 gen_restore_rmode(tcg_rmode, fpst); 5981 } else { 5982 gen_fpst(tcg_res, tcg_op, fpst); 5983 } 5984 5985 done: 5986 write_fp_sreg(s, rd, tcg_res); 5987 } 5988 5989 /* Floating-point data-processing (1 source) - double precision */ 5990 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 5991 { 5992 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 5993 TCGv_i64 tcg_op, tcg_res; 5994 TCGv_ptr fpst; 5995 int rmode = -1; 5996 5997 switch (opcode) { 5998 case 0x0: /* FMOV */ 5999 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6000 return; 6001 } 6002 6003 tcg_op = read_fp_dreg(s, rn); 6004 tcg_res = tcg_temp_new_i64(); 6005 6006 switch (opcode) { 6007 case 0x1: /* FABS */ 6008 gen_helper_vfp_absd(tcg_res, tcg_op); 6009 goto done; 6010 case 0x2: /* FNEG */ 6011 gen_helper_vfp_negd(tcg_res, tcg_op); 6012 goto done; 6013 case 0x3: /* FSQRT */ 6014 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); 6015 goto done; 6016 case 0x8: /* FRINTN */ 6017 case 0x9: /* FRINTP */ 6018 case 0xa: /* FRINTM */ 6019 case 0xb: /* FRINTZ */ 6020 case 0xc: /* FRINTA */ 6021 rmode = opcode & 7; 6022 gen_fpst = gen_helper_rintd; 6023 break; 6024 case 0xe: /* FRINTX */ 6025 gen_fpst = gen_helper_rintd_exact; 6026 break; 6027 case 0xf: /* FRINTI */ 6028 gen_fpst = gen_helper_rintd; 6029 break; 6030 case 0x10: /* FRINT32Z */ 6031 rmode = FPROUNDING_ZERO; 6032 gen_fpst = gen_helper_frint32_d; 6033 break; 6034 case 0x11: /* FRINT32X */ 6035 gen_fpst = gen_helper_frint32_d; 6036 break; 6037 case 0x12: /* FRINT64Z */ 6038 rmode = FPROUNDING_ZERO; 6039 gen_fpst = gen_helper_frint64_d; 6040 break; 6041 case 0x13: /* FRINT64X */ 6042 gen_fpst = gen_helper_frint64_d; 6043 break; 6044 default: 6045 g_assert_not_reached(); 6046 } 6047 6048 fpst = fpstatus_ptr(FPST_FPCR); 6049 if (rmode >= 0) { 6050 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 6051 gen_fpst(tcg_res, tcg_op, fpst); 6052 gen_restore_rmode(tcg_rmode, fpst); 6053 } else { 6054 gen_fpst(tcg_res, tcg_op, fpst); 6055 } 6056 6057 done: 6058 write_fp_dreg(s, rd, tcg_res); 6059 } 6060 6061 static void handle_fp_fcvt(DisasContext *s, int opcode, 6062 int rd, int rn, int dtype, int ntype) 6063 { 6064 switch (ntype) { 6065 case 0x0: 6066 { 6067 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6068 if (dtype == 1) { 6069 /* Single to double */ 6070 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6071 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 6072 write_fp_dreg(s, rd, tcg_rd); 6073 } else { 6074 /* Single to half */ 6075 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6076 TCGv_i32 ahp = get_ahp_flag(); 6077 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6078 6079 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6080 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6081 write_fp_sreg(s, rd, tcg_rd); 6082 } 6083 break; 6084 } 6085 case 0x1: 6086 { 6087 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6088 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6089 if (dtype == 0) { 6090 /* Double to single */ 6091 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 6092 } else { 6093 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6094 TCGv_i32 ahp = get_ahp_flag(); 6095 /* Double to half */ 6096 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6097 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6098 } 6099 write_fp_sreg(s, rd, tcg_rd); 6100 break; 6101 } 6102 case 0x3: 6103 { 6104 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6105 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6106 TCGv_i32 tcg_ahp = get_ahp_flag(); 6107 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6108 if (dtype == 0) { 6109 /* Half to single */ 6110 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6111 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6112 write_fp_sreg(s, rd, tcg_rd); 6113 } else { 6114 /* Half to double */ 6115 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6116 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6117 write_fp_dreg(s, rd, tcg_rd); 6118 } 6119 break; 6120 } 6121 default: 6122 g_assert_not_reached(); 6123 } 6124 } 6125 6126 /* Floating point data-processing (1 source) 6127 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6128 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6129 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6130 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6131 */ 6132 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6133 { 6134 int mos = extract32(insn, 29, 3); 6135 int type = extract32(insn, 22, 2); 6136 int opcode = extract32(insn, 15, 6); 6137 int rn = extract32(insn, 5, 5); 6138 int rd = extract32(insn, 0, 5); 6139 6140 if (mos) { 6141 goto do_unallocated; 6142 } 6143 6144 switch (opcode) { 6145 case 0x4: case 0x5: case 0x7: 6146 { 6147 /* FCVT between half, single and double precision */ 6148 int dtype = extract32(opcode, 0, 2); 6149 if (type == 2 || dtype == type) { 6150 goto do_unallocated; 6151 } 6152 if (!fp_access_check(s)) { 6153 return; 6154 } 6155 6156 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6157 break; 6158 } 6159 6160 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6161 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6162 goto do_unallocated; 6163 } 6164 /* fall through */ 6165 case 0x0 ... 0x3: 6166 case 0x8 ... 0xc: 6167 case 0xe ... 0xf: 6168 /* 32-to-32 and 64-to-64 ops */ 6169 switch (type) { 6170 case 0: 6171 if (!fp_access_check(s)) { 6172 return; 6173 } 6174 handle_fp_1src_single(s, opcode, rd, rn); 6175 break; 6176 case 1: 6177 if (!fp_access_check(s)) { 6178 return; 6179 } 6180 handle_fp_1src_double(s, opcode, rd, rn); 6181 break; 6182 case 3: 6183 if (!dc_isar_feature(aa64_fp16, s)) { 6184 goto do_unallocated; 6185 } 6186 6187 if (!fp_access_check(s)) { 6188 return; 6189 } 6190 handle_fp_1src_half(s, opcode, rd, rn); 6191 break; 6192 default: 6193 goto do_unallocated; 6194 } 6195 break; 6196 6197 case 0x6: 6198 switch (type) { 6199 case 1: /* BFCVT */ 6200 if (!dc_isar_feature(aa64_bf16, s)) { 6201 goto do_unallocated; 6202 } 6203 if (!fp_access_check(s)) { 6204 return; 6205 } 6206 handle_fp_1src_single(s, opcode, rd, rn); 6207 break; 6208 default: 6209 goto do_unallocated; 6210 } 6211 break; 6212 6213 default: 6214 do_unallocated: 6215 unallocated_encoding(s); 6216 break; 6217 } 6218 } 6219 6220 /* Floating-point data-processing (2 source) - single precision */ 6221 static void handle_fp_2src_single(DisasContext *s, int opcode, 6222 int rd, int rn, int rm) 6223 { 6224 TCGv_i32 tcg_op1; 6225 TCGv_i32 tcg_op2; 6226 TCGv_i32 tcg_res; 6227 TCGv_ptr fpst; 6228 6229 tcg_res = tcg_temp_new_i32(); 6230 fpst = fpstatus_ptr(FPST_FPCR); 6231 tcg_op1 = read_fp_sreg(s, rn); 6232 tcg_op2 = read_fp_sreg(s, rm); 6233 6234 switch (opcode) { 6235 case 0x0: /* FMUL */ 6236 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6237 break; 6238 case 0x1: /* FDIV */ 6239 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6240 break; 6241 case 0x2: /* FADD */ 6242 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6243 break; 6244 case 0x3: /* FSUB */ 6245 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6246 break; 6247 case 0x4: /* FMAX */ 6248 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6249 break; 6250 case 0x5: /* FMIN */ 6251 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6252 break; 6253 case 0x6: /* FMAXNM */ 6254 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6255 break; 6256 case 0x7: /* FMINNM */ 6257 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6258 break; 6259 case 0x8: /* FNMUL */ 6260 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6261 gen_helper_vfp_negs(tcg_res, tcg_res); 6262 break; 6263 } 6264 6265 write_fp_sreg(s, rd, tcg_res); 6266 } 6267 6268 /* Floating-point data-processing (2 source) - double precision */ 6269 static void handle_fp_2src_double(DisasContext *s, int opcode, 6270 int rd, int rn, int rm) 6271 { 6272 TCGv_i64 tcg_op1; 6273 TCGv_i64 tcg_op2; 6274 TCGv_i64 tcg_res; 6275 TCGv_ptr fpst; 6276 6277 tcg_res = tcg_temp_new_i64(); 6278 fpst = fpstatus_ptr(FPST_FPCR); 6279 tcg_op1 = read_fp_dreg(s, rn); 6280 tcg_op2 = read_fp_dreg(s, rm); 6281 6282 switch (opcode) { 6283 case 0x0: /* FMUL */ 6284 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6285 break; 6286 case 0x1: /* FDIV */ 6287 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6288 break; 6289 case 0x2: /* FADD */ 6290 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6291 break; 6292 case 0x3: /* FSUB */ 6293 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6294 break; 6295 case 0x4: /* FMAX */ 6296 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6297 break; 6298 case 0x5: /* FMIN */ 6299 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6300 break; 6301 case 0x6: /* FMAXNM */ 6302 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6303 break; 6304 case 0x7: /* FMINNM */ 6305 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6306 break; 6307 case 0x8: /* FNMUL */ 6308 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6309 gen_helper_vfp_negd(tcg_res, tcg_res); 6310 break; 6311 } 6312 6313 write_fp_dreg(s, rd, tcg_res); 6314 } 6315 6316 /* Floating-point data-processing (2 source) - half precision */ 6317 static void handle_fp_2src_half(DisasContext *s, int opcode, 6318 int rd, int rn, int rm) 6319 { 6320 TCGv_i32 tcg_op1; 6321 TCGv_i32 tcg_op2; 6322 TCGv_i32 tcg_res; 6323 TCGv_ptr fpst; 6324 6325 tcg_res = tcg_temp_new_i32(); 6326 fpst = fpstatus_ptr(FPST_FPCR_F16); 6327 tcg_op1 = read_fp_hreg(s, rn); 6328 tcg_op2 = read_fp_hreg(s, rm); 6329 6330 switch (opcode) { 6331 case 0x0: /* FMUL */ 6332 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6333 break; 6334 case 0x1: /* FDIV */ 6335 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6336 break; 6337 case 0x2: /* FADD */ 6338 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6339 break; 6340 case 0x3: /* FSUB */ 6341 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6342 break; 6343 case 0x4: /* FMAX */ 6344 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6345 break; 6346 case 0x5: /* FMIN */ 6347 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6348 break; 6349 case 0x6: /* FMAXNM */ 6350 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6351 break; 6352 case 0x7: /* FMINNM */ 6353 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6354 break; 6355 case 0x8: /* FNMUL */ 6356 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6357 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6358 break; 6359 default: 6360 g_assert_not_reached(); 6361 } 6362 6363 write_fp_sreg(s, rd, tcg_res); 6364 } 6365 6366 /* Floating point data-processing (2 source) 6367 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6368 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6369 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6370 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6371 */ 6372 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6373 { 6374 int mos = extract32(insn, 29, 3); 6375 int type = extract32(insn, 22, 2); 6376 int rd = extract32(insn, 0, 5); 6377 int rn = extract32(insn, 5, 5); 6378 int rm = extract32(insn, 16, 5); 6379 int opcode = extract32(insn, 12, 4); 6380 6381 if (opcode > 8 || mos) { 6382 unallocated_encoding(s); 6383 return; 6384 } 6385 6386 switch (type) { 6387 case 0: 6388 if (!fp_access_check(s)) { 6389 return; 6390 } 6391 handle_fp_2src_single(s, opcode, rd, rn, rm); 6392 break; 6393 case 1: 6394 if (!fp_access_check(s)) { 6395 return; 6396 } 6397 handle_fp_2src_double(s, opcode, rd, rn, rm); 6398 break; 6399 case 3: 6400 if (!dc_isar_feature(aa64_fp16, s)) { 6401 unallocated_encoding(s); 6402 return; 6403 } 6404 if (!fp_access_check(s)) { 6405 return; 6406 } 6407 handle_fp_2src_half(s, opcode, rd, rn, rm); 6408 break; 6409 default: 6410 unallocated_encoding(s); 6411 } 6412 } 6413 6414 /* Floating-point data-processing (3 source) - single precision */ 6415 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6416 int rd, int rn, int rm, int ra) 6417 { 6418 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6419 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6420 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6421 6422 tcg_op1 = read_fp_sreg(s, rn); 6423 tcg_op2 = read_fp_sreg(s, rm); 6424 tcg_op3 = read_fp_sreg(s, ra); 6425 6426 /* These are fused multiply-add, and must be done as one 6427 * floating point operation with no rounding between the 6428 * multiplication and addition steps. 6429 * NB that doing the negations here as separate steps is 6430 * correct : an input NaN should come out with its sign bit 6431 * flipped if it is a negated-input. 6432 */ 6433 if (o1 == true) { 6434 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6435 } 6436 6437 if (o0 != o1) { 6438 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6439 } 6440 6441 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6442 6443 write_fp_sreg(s, rd, tcg_res); 6444 } 6445 6446 /* Floating-point data-processing (3 source) - double precision */ 6447 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6448 int rd, int rn, int rm, int ra) 6449 { 6450 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6451 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6452 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6453 6454 tcg_op1 = read_fp_dreg(s, rn); 6455 tcg_op2 = read_fp_dreg(s, rm); 6456 tcg_op3 = read_fp_dreg(s, ra); 6457 6458 /* These are fused multiply-add, and must be done as one 6459 * floating point operation with no rounding between the 6460 * multiplication and addition steps. 6461 * NB that doing the negations here as separate steps is 6462 * correct : an input NaN should come out with its sign bit 6463 * flipped if it is a negated-input. 6464 */ 6465 if (o1 == true) { 6466 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6467 } 6468 6469 if (o0 != o1) { 6470 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6471 } 6472 6473 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6474 6475 write_fp_dreg(s, rd, tcg_res); 6476 } 6477 6478 /* Floating-point data-processing (3 source) - half precision */ 6479 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6480 int rd, int rn, int rm, int ra) 6481 { 6482 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6483 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6484 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6485 6486 tcg_op1 = read_fp_hreg(s, rn); 6487 tcg_op2 = read_fp_hreg(s, rm); 6488 tcg_op3 = read_fp_hreg(s, ra); 6489 6490 /* These are fused multiply-add, and must be done as one 6491 * floating point operation with no rounding between the 6492 * multiplication and addition steps. 6493 * NB that doing the negations here as separate steps is 6494 * correct : an input NaN should come out with its sign bit 6495 * flipped if it is a negated-input. 6496 */ 6497 if (o1 == true) { 6498 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6499 } 6500 6501 if (o0 != o1) { 6502 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6503 } 6504 6505 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6506 6507 write_fp_sreg(s, rd, tcg_res); 6508 } 6509 6510 /* Floating point data-processing (3 source) 6511 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6512 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6513 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6514 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6515 */ 6516 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6517 { 6518 int mos = extract32(insn, 29, 3); 6519 int type = extract32(insn, 22, 2); 6520 int rd = extract32(insn, 0, 5); 6521 int rn = extract32(insn, 5, 5); 6522 int ra = extract32(insn, 10, 5); 6523 int rm = extract32(insn, 16, 5); 6524 bool o0 = extract32(insn, 15, 1); 6525 bool o1 = extract32(insn, 21, 1); 6526 6527 if (mos) { 6528 unallocated_encoding(s); 6529 return; 6530 } 6531 6532 switch (type) { 6533 case 0: 6534 if (!fp_access_check(s)) { 6535 return; 6536 } 6537 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6538 break; 6539 case 1: 6540 if (!fp_access_check(s)) { 6541 return; 6542 } 6543 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6544 break; 6545 case 3: 6546 if (!dc_isar_feature(aa64_fp16, s)) { 6547 unallocated_encoding(s); 6548 return; 6549 } 6550 if (!fp_access_check(s)) { 6551 return; 6552 } 6553 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6554 break; 6555 default: 6556 unallocated_encoding(s); 6557 } 6558 } 6559 6560 /* Floating point immediate 6561 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6562 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6563 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6564 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6565 */ 6566 static void disas_fp_imm(DisasContext *s, uint32_t insn) 6567 { 6568 int rd = extract32(insn, 0, 5); 6569 int imm5 = extract32(insn, 5, 5); 6570 int imm8 = extract32(insn, 13, 8); 6571 int type = extract32(insn, 22, 2); 6572 int mos = extract32(insn, 29, 3); 6573 uint64_t imm; 6574 MemOp sz; 6575 6576 if (mos || imm5) { 6577 unallocated_encoding(s); 6578 return; 6579 } 6580 6581 switch (type) { 6582 case 0: 6583 sz = MO_32; 6584 break; 6585 case 1: 6586 sz = MO_64; 6587 break; 6588 case 3: 6589 sz = MO_16; 6590 if (dc_isar_feature(aa64_fp16, s)) { 6591 break; 6592 } 6593 /* fallthru */ 6594 default: 6595 unallocated_encoding(s); 6596 return; 6597 } 6598 6599 if (!fp_access_check(s)) { 6600 return; 6601 } 6602 6603 imm = vfp_expand_imm(sz, imm8); 6604 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 6605 } 6606 6607 /* Handle floating point <=> fixed point conversions. Note that we can 6608 * also deal with fp <=> integer conversions as a special case (scale == 64) 6609 * OPTME: consider handling that special case specially or at least skipping 6610 * the call to scalbn in the helpers for zero shifts. 6611 */ 6612 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 6613 bool itof, int rmode, int scale, int sf, int type) 6614 { 6615 bool is_signed = !(opcode & 1); 6616 TCGv_ptr tcg_fpstatus; 6617 TCGv_i32 tcg_shift, tcg_single; 6618 TCGv_i64 tcg_double; 6619 6620 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 6621 6622 tcg_shift = tcg_constant_i32(64 - scale); 6623 6624 if (itof) { 6625 TCGv_i64 tcg_int = cpu_reg(s, rn); 6626 if (!sf) { 6627 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 6628 6629 if (is_signed) { 6630 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 6631 } else { 6632 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 6633 } 6634 6635 tcg_int = tcg_extend; 6636 } 6637 6638 switch (type) { 6639 case 1: /* float64 */ 6640 tcg_double = tcg_temp_new_i64(); 6641 if (is_signed) { 6642 gen_helper_vfp_sqtod(tcg_double, tcg_int, 6643 tcg_shift, tcg_fpstatus); 6644 } else { 6645 gen_helper_vfp_uqtod(tcg_double, tcg_int, 6646 tcg_shift, tcg_fpstatus); 6647 } 6648 write_fp_dreg(s, rd, tcg_double); 6649 break; 6650 6651 case 0: /* float32 */ 6652 tcg_single = tcg_temp_new_i32(); 6653 if (is_signed) { 6654 gen_helper_vfp_sqtos(tcg_single, tcg_int, 6655 tcg_shift, tcg_fpstatus); 6656 } else { 6657 gen_helper_vfp_uqtos(tcg_single, tcg_int, 6658 tcg_shift, tcg_fpstatus); 6659 } 6660 write_fp_sreg(s, rd, tcg_single); 6661 break; 6662 6663 case 3: /* float16 */ 6664 tcg_single = tcg_temp_new_i32(); 6665 if (is_signed) { 6666 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 6667 tcg_shift, tcg_fpstatus); 6668 } else { 6669 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 6670 tcg_shift, tcg_fpstatus); 6671 } 6672 write_fp_sreg(s, rd, tcg_single); 6673 break; 6674 6675 default: 6676 g_assert_not_reached(); 6677 } 6678 } else { 6679 TCGv_i64 tcg_int = cpu_reg(s, rd); 6680 TCGv_i32 tcg_rmode; 6681 6682 if (extract32(opcode, 2, 1)) { 6683 /* There are too many rounding modes to all fit into rmode, 6684 * so FCVTA[US] is a special case. 6685 */ 6686 rmode = FPROUNDING_TIEAWAY; 6687 } 6688 6689 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 6690 6691 switch (type) { 6692 case 1: /* float64 */ 6693 tcg_double = read_fp_dreg(s, rn); 6694 if (is_signed) { 6695 if (!sf) { 6696 gen_helper_vfp_tosld(tcg_int, tcg_double, 6697 tcg_shift, tcg_fpstatus); 6698 } else { 6699 gen_helper_vfp_tosqd(tcg_int, tcg_double, 6700 tcg_shift, tcg_fpstatus); 6701 } 6702 } else { 6703 if (!sf) { 6704 gen_helper_vfp_tould(tcg_int, tcg_double, 6705 tcg_shift, tcg_fpstatus); 6706 } else { 6707 gen_helper_vfp_touqd(tcg_int, tcg_double, 6708 tcg_shift, tcg_fpstatus); 6709 } 6710 } 6711 if (!sf) { 6712 tcg_gen_ext32u_i64(tcg_int, tcg_int); 6713 } 6714 break; 6715 6716 case 0: /* float32 */ 6717 tcg_single = read_fp_sreg(s, rn); 6718 if (sf) { 6719 if (is_signed) { 6720 gen_helper_vfp_tosqs(tcg_int, tcg_single, 6721 tcg_shift, tcg_fpstatus); 6722 } else { 6723 gen_helper_vfp_touqs(tcg_int, tcg_single, 6724 tcg_shift, tcg_fpstatus); 6725 } 6726 } else { 6727 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6728 if (is_signed) { 6729 gen_helper_vfp_tosls(tcg_dest, tcg_single, 6730 tcg_shift, tcg_fpstatus); 6731 } else { 6732 gen_helper_vfp_touls(tcg_dest, tcg_single, 6733 tcg_shift, tcg_fpstatus); 6734 } 6735 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6736 } 6737 break; 6738 6739 case 3: /* float16 */ 6740 tcg_single = read_fp_sreg(s, rn); 6741 if (sf) { 6742 if (is_signed) { 6743 gen_helper_vfp_tosqh(tcg_int, tcg_single, 6744 tcg_shift, tcg_fpstatus); 6745 } else { 6746 gen_helper_vfp_touqh(tcg_int, tcg_single, 6747 tcg_shift, tcg_fpstatus); 6748 } 6749 } else { 6750 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6751 if (is_signed) { 6752 gen_helper_vfp_toslh(tcg_dest, tcg_single, 6753 tcg_shift, tcg_fpstatus); 6754 } else { 6755 gen_helper_vfp_toulh(tcg_dest, tcg_single, 6756 tcg_shift, tcg_fpstatus); 6757 } 6758 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6759 } 6760 break; 6761 6762 default: 6763 g_assert_not_reached(); 6764 } 6765 6766 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 6767 } 6768 } 6769 6770 /* Floating point <-> fixed point conversions 6771 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6772 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6773 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 6774 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6775 */ 6776 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 6777 { 6778 int rd = extract32(insn, 0, 5); 6779 int rn = extract32(insn, 5, 5); 6780 int scale = extract32(insn, 10, 6); 6781 int opcode = extract32(insn, 16, 3); 6782 int rmode = extract32(insn, 19, 2); 6783 int type = extract32(insn, 22, 2); 6784 bool sbit = extract32(insn, 29, 1); 6785 bool sf = extract32(insn, 31, 1); 6786 bool itof; 6787 6788 if (sbit || (!sf && scale < 32)) { 6789 unallocated_encoding(s); 6790 return; 6791 } 6792 6793 switch (type) { 6794 case 0: /* float32 */ 6795 case 1: /* float64 */ 6796 break; 6797 case 3: /* float16 */ 6798 if (dc_isar_feature(aa64_fp16, s)) { 6799 break; 6800 } 6801 /* fallthru */ 6802 default: 6803 unallocated_encoding(s); 6804 return; 6805 } 6806 6807 switch ((rmode << 3) | opcode) { 6808 case 0x2: /* SCVTF */ 6809 case 0x3: /* UCVTF */ 6810 itof = true; 6811 break; 6812 case 0x18: /* FCVTZS */ 6813 case 0x19: /* FCVTZU */ 6814 itof = false; 6815 break; 6816 default: 6817 unallocated_encoding(s); 6818 return; 6819 } 6820 6821 if (!fp_access_check(s)) { 6822 return; 6823 } 6824 6825 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 6826 } 6827 6828 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 6829 { 6830 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 6831 * without conversion. 6832 */ 6833 6834 if (itof) { 6835 TCGv_i64 tcg_rn = cpu_reg(s, rn); 6836 TCGv_i64 tmp; 6837 6838 switch (type) { 6839 case 0: 6840 /* 32 bit */ 6841 tmp = tcg_temp_new_i64(); 6842 tcg_gen_ext32u_i64(tmp, tcg_rn); 6843 write_fp_dreg(s, rd, tmp); 6844 break; 6845 case 1: 6846 /* 64 bit */ 6847 write_fp_dreg(s, rd, tcg_rn); 6848 break; 6849 case 2: 6850 /* 64 bit to top half. */ 6851 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); 6852 clear_vec_high(s, true, rd); 6853 break; 6854 case 3: 6855 /* 16 bit */ 6856 tmp = tcg_temp_new_i64(); 6857 tcg_gen_ext16u_i64(tmp, tcg_rn); 6858 write_fp_dreg(s, rd, tmp); 6859 break; 6860 default: 6861 g_assert_not_reached(); 6862 } 6863 } else { 6864 TCGv_i64 tcg_rd = cpu_reg(s, rd); 6865 6866 switch (type) { 6867 case 0: 6868 /* 32 bit */ 6869 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); 6870 break; 6871 case 1: 6872 /* 64 bit */ 6873 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); 6874 break; 6875 case 2: 6876 /* 64 bits from top half */ 6877 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); 6878 break; 6879 case 3: 6880 /* 16 bit */ 6881 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); 6882 break; 6883 default: 6884 g_assert_not_reached(); 6885 } 6886 } 6887 } 6888 6889 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 6890 { 6891 TCGv_i64 t = read_fp_dreg(s, rn); 6892 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 6893 6894 gen_helper_fjcvtzs(t, t, fpstatus); 6895 6896 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 6897 tcg_gen_extrh_i64_i32(cpu_ZF, t); 6898 tcg_gen_movi_i32(cpu_CF, 0); 6899 tcg_gen_movi_i32(cpu_NF, 0); 6900 tcg_gen_movi_i32(cpu_VF, 0); 6901 } 6902 6903 /* Floating point <-> integer conversions 6904 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6905 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6906 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 6907 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6908 */ 6909 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 6910 { 6911 int rd = extract32(insn, 0, 5); 6912 int rn = extract32(insn, 5, 5); 6913 int opcode = extract32(insn, 16, 3); 6914 int rmode = extract32(insn, 19, 2); 6915 int type = extract32(insn, 22, 2); 6916 bool sbit = extract32(insn, 29, 1); 6917 bool sf = extract32(insn, 31, 1); 6918 bool itof = false; 6919 6920 if (sbit) { 6921 goto do_unallocated; 6922 } 6923 6924 switch (opcode) { 6925 case 2: /* SCVTF */ 6926 case 3: /* UCVTF */ 6927 itof = true; 6928 /* fallthru */ 6929 case 4: /* FCVTAS */ 6930 case 5: /* FCVTAU */ 6931 if (rmode != 0) { 6932 goto do_unallocated; 6933 } 6934 /* fallthru */ 6935 case 0: /* FCVT[NPMZ]S */ 6936 case 1: /* FCVT[NPMZ]U */ 6937 switch (type) { 6938 case 0: /* float32 */ 6939 case 1: /* float64 */ 6940 break; 6941 case 3: /* float16 */ 6942 if (!dc_isar_feature(aa64_fp16, s)) { 6943 goto do_unallocated; 6944 } 6945 break; 6946 default: 6947 goto do_unallocated; 6948 } 6949 if (!fp_access_check(s)) { 6950 return; 6951 } 6952 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 6953 break; 6954 6955 default: 6956 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 6957 case 0b01100110: /* FMOV half <-> 32-bit int */ 6958 case 0b01100111: 6959 case 0b11100110: /* FMOV half <-> 64-bit int */ 6960 case 0b11100111: 6961 if (!dc_isar_feature(aa64_fp16, s)) { 6962 goto do_unallocated; 6963 } 6964 /* fallthru */ 6965 case 0b00000110: /* FMOV 32-bit */ 6966 case 0b00000111: 6967 case 0b10100110: /* FMOV 64-bit */ 6968 case 0b10100111: 6969 case 0b11001110: /* FMOV top half of 128-bit */ 6970 case 0b11001111: 6971 if (!fp_access_check(s)) { 6972 return; 6973 } 6974 itof = opcode & 1; 6975 handle_fmov(s, rd, rn, type, itof); 6976 break; 6977 6978 case 0b00111110: /* FJCVTZS */ 6979 if (!dc_isar_feature(aa64_jscvt, s)) { 6980 goto do_unallocated; 6981 } else if (fp_access_check(s)) { 6982 handle_fjcvtzs(s, rd, rn); 6983 } 6984 break; 6985 6986 default: 6987 do_unallocated: 6988 unallocated_encoding(s); 6989 return; 6990 } 6991 break; 6992 } 6993 } 6994 6995 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 6996 * 31 30 29 28 25 24 0 6997 * +---+---+---+---------+-----------------------------+ 6998 * | | 0 | | 1 1 1 1 | | 6999 * +---+---+---+---------+-----------------------------+ 7000 */ 7001 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7002 { 7003 if (extract32(insn, 24, 1)) { 7004 /* Floating point data-processing (3 source) */ 7005 disas_fp_3src(s, insn); 7006 } else if (extract32(insn, 21, 1) == 0) { 7007 /* Floating point to fixed point conversions */ 7008 disas_fp_fixed_conv(s, insn); 7009 } else { 7010 switch (extract32(insn, 10, 2)) { 7011 case 1: 7012 /* Floating point conditional compare */ 7013 disas_fp_ccomp(s, insn); 7014 break; 7015 case 2: 7016 /* Floating point data-processing (2 source) */ 7017 disas_fp_2src(s, insn); 7018 break; 7019 case 3: 7020 /* Floating point conditional select */ 7021 disas_fp_csel(s, insn); 7022 break; 7023 case 0: 7024 switch (ctz32(extract32(insn, 12, 4))) { 7025 case 0: /* [15:12] == xxx1 */ 7026 /* Floating point immediate */ 7027 disas_fp_imm(s, insn); 7028 break; 7029 case 1: /* [15:12] == xx10 */ 7030 /* Floating point compare */ 7031 disas_fp_compare(s, insn); 7032 break; 7033 case 2: /* [15:12] == x100 */ 7034 /* Floating point data-processing (1 source) */ 7035 disas_fp_1src(s, insn); 7036 break; 7037 case 3: /* [15:12] == 1000 */ 7038 unallocated_encoding(s); 7039 break; 7040 default: /* [15:12] == 0000 */ 7041 /* Floating point <-> integer conversions */ 7042 disas_fp_int_conv(s, insn); 7043 break; 7044 } 7045 break; 7046 } 7047 } 7048 } 7049 7050 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7051 int pos) 7052 { 7053 /* Extract 64 bits from the middle of two concatenated 64 bit 7054 * vector register slices left:right. The extracted bits start 7055 * at 'pos' bits into the right (least significant) side. 7056 * We return the result in tcg_right, and guarantee not to 7057 * trash tcg_left. 7058 */ 7059 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7060 assert(pos > 0 && pos < 64); 7061 7062 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7063 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7064 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7065 } 7066 7067 /* EXT 7068 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7069 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7070 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7071 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7072 */ 7073 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7074 { 7075 int is_q = extract32(insn, 30, 1); 7076 int op2 = extract32(insn, 22, 2); 7077 int imm4 = extract32(insn, 11, 4); 7078 int rm = extract32(insn, 16, 5); 7079 int rn = extract32(insn, 5, 5); 7080 int rd = extract32(insn, 0, 5); 7081 int pos = imm4 << 3; 7082 TCGv_i64 tcg_resl, tcg_resh; 7083 7084 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7085 unallocated_encoding(s); 7086 return; 7087 } 7088 7089 if (!fp_access_check(s)) { 7090 return; 7091 } 7092 7093 tcg_resh = tcg_temp_new_i64(); 7094 tcg_resl = tcg_temp_new_i64(); 7095 7096 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7097 * either extracting 128 bits from a 128:128 concatenation, or 7098 * extracting 64 bits from a 64:64 concatenation. 7099 */ 7100 if (!is_q) { 7101 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7102 if (pos != 0) { 7103 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7104 do_ext64(s, tcg_resh, tcg_resl, pos); 7105 } 7106 } else { 7107 TCGv_i64 tcg_hh; 7108 typedef struct { 7109 int reg; 7110 int elt; 7111 } EltPosns; 7112 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7113 EltPosns *elt = eltposns; 7114 7115 if (pos >= 64) { 7116 elt++; 7117 pos -= 64; 7118 } 7119 7120 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7121 elt++; 7122 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7123 elt++; 7124 if (pos != 0) { 7125 do_ext64(s, tcg_resh, tcg_resl, pos); 7126 tcg_hh = tcg_temp_new_i64(); 7127 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7128 do_ext64(s, tcg_hh, tcg_resh, pos); 7129 } 7130 } 7131 7132 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7133 if (is_q) { 7134 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7135 } 7136 clear_vec_high(s, is_q, rd); 7137 } 7138 7139 /* TBL/TBX 7140 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7141 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7142 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7143 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7144 */ 7145 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7146 { 7147 int op2 = extract32(insn, 22, 2); 7148 int is_q = extract32(insn, 30, 1); 7149 int rm = extract32(insn, 16, 5); 7150 int rn = extract32(insn, 5, 5); 7151 int rd = extract32(insn, 0, 5); 7152 int is_tbx = extract32(insn, 12, 1); 7153 int len = (extract32(insn, 13, 2) + 1) * 16; 7154 7155 if (op2 != 0) { 7156 unallocated_encoding(s); 7157 return; 7158 } 7159 7160 if (!fp_access_check(s)) { 7161 return; 7162 } 7163 7164 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7165 vec_full_reg_offset(s, rm), tcg_env, 7166 is_q ? 16 : 8, vec_full_reg_size(s), 7167 (len << 6) | (is_tbx << 5) | rn, 7168 gen_helper_simd_tblx); 7169 } 7170 7171 /* ZIP/UZP/TRN 7172 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7173 * +---+---+-------------+------+---+------+---+------------------+------+ 7174 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7175 * +---+---+-------------+------+---+------+---+------------------+------+ 7176 */ 7177 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7178 { 7179 int rd = extract32(insn, 0, 5); 7180 int rn = extract32(insn, 5, 5); 7181 int rm = extract32(insn, 16, 5); 7182 int size = extract32(insn, 22, 2); 7183 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7184 * bit 2 indicates 1 vs 2 variant of the insn. 7185 */ 7186 int opcode = extract32(insn, 12, 2); 7187 bool part = extract32(insn, 14, 1); 7188 bool is_q = extract32(insn, 30, 1); 7189 int esize = 8 << size; 7190 int i; 7191 int datasize = is_q ? 128 : 64; 7192 int elements = datasize / esize; 7193 TCGv_i64 tcg_res[2], tcg_ele; 7194 7195 if (opcode == 0 || (size == 3 && !is_q)) { 7196 unallocated_encoding(s); 7197 return; 7198 } 7199 7200 if (!fp_access_check(s)) { 7201 return; 7202 } 7203 7204 tcg_res[0] = tcg_temp_new_i64(); 7205 tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; 7206 tcg_ele = tcg_temp_new_i64(); 7207 7208 for (i = 0; i < elements; i++) { 7209 int o, w; 7210 7211 switch (opcode) { 7212 case 1: /* UZP1/2 */ 7213 { 7214 int midpoint = elements / 2; 7215 if (i < midpoint) { 7216 read_vec_element(s, tcg_ele, rn, 2 * i + part, size); 7217 } else { 7218 read_vec_element(s, tcg_ele, rm, 7219 2 * (i - midpoint) + part, size); 7220 } 7221 break; 7222 } 7223 case 2: /* TRN1/2 */ 7224 if (i & 1) { 7225 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); 7226 } else { 7227 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); 7228 } 7229 break; 7230 case 3: /* ZIP1/2 */ 7231 { 7232 int base = part * elements / 2; 7233 if (i & 1) { 7234 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); 7235 } else { 7236 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); 7237 } 7238 break; 7239 } 7240 default: 7241 g_assert_not_reached(); 7242 } 7243 7244 w = (i * esize) / 64; 7245 o = (i * esize) % 64; 7246 if (o == 0) { 7247 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 7248 } else { 7249 tcg_gen_shli_i64(tcg_ele, tcg_ele, o); 7250 tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); 7251 } 7252 } 7253 7254 for (i = 0; i <= is_q; ++i) { 7255 write_vec_element(s, tcg_res[i], rd, i, MO_64); 7256 } 7257 clear_vec_high(s, is_q, rd); 7258 } 7259 7260 /* 7261 * do_reduction_op helper 7262 * 7263 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7264 * important for correct NaN propagation that we do these 7265 * operations in exactly the order specified by the pseudocode. 7266 * 7267 * This is a recursive function, TCG temps should be freed by the 7268 * calling function once it is done with the values. 7269 */ 7270 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7271 int esize, int size, int vmap, TCGv_ptr fpst) 7272 { 7273 if (esize == size) { 7274 int element; 7275 MemOp msize = esize == 16 ? MO_16 : MO_32; 7276 TCGv_i32 tcg_elem; 7277 7278 /* We should have one register left here */ 7279 assert(ctpop8(vmap) == 1); 7280 element = ctz32(vmap); 7281 assert(element < 8); 7282 7283 tcg_elem = tcg_temp_new_i32(); 7284 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7285 return tcg_elem; 7286 } else { 7287 int bits = size / 2; 7288 int shift = ctpop8(vmap) / 2; 7289 int vmap_lo = (vmap >> shift) & vmap; 7290 int vmap_hi = (vmap & ~vmap_lo); 7291 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7292 7293 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7294 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7295 tcg_res = tcg_temp_new_i32(); 7296 7297 switch (fpopcode) { 7298 case 0x0c: /* fmaxnmv half-precision */ 7299 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7300 break; 7301 case 0x0f: /* fmaxv half-precision */ 7302 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7303 break; 7304 case 0x1c: /* fminnmv half-precision */ 7305 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7306 break; 7307 case 0x1f: /* fminv half-precision */ 7308 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7309 break; 7310 case 0x2c: /* fmaxnmv */ 7311 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7312 break; 7313 case 0x2f: /* fmaxv */ 7314 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7315 break; 7316 case 0x3c: /* fminnmv */ 7317 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7318 break; 7319 case 0x3f: /* fminv */ 7320 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7321 break; 7322 default: 7323 g_assert_not_reached(); 7324 } 7325 return tcg_res; 7326 } 7327 } 7328 7329 /* AdvSIMD across lanes 7330 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7331 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7332 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7333 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7334 */ 7335 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7336 { 7337 int rd = extract32(insn, 0, 5); 7338 int rn = extract32(insn, 5, 5); 7339 int size = extract32(insn, 22, 2); 7340 int opcode = extract32(insn, 12, 5); 7341 bool is_q = extract32(insn, 30, 1); 7342 bool is_u = extract32(insn, 29, 1); 7343 bool is_fp = false; 7344 bool is_min = false; 7345 int esize; 7346 int elements; 7347 int i; 7348 TCGv_i64 tcg_res, tcg_elt; 7349 7350 switch (opcode) { 7351 case 0x1b: /* ADDV */ 7352 if (is_u) { 7353 unallocated_encoding(s); 7354 return; 7355 } 7356 /* fall through */ 7357 case 0x3: /* SADDLV, UADDLV */ 7358 case 0xa: /* SMAXV, UMAXV */ 7359 case 0x1a: /* SMINV, UMINV */ 7360 if (size == 3 || (size == 2 && !is_q)) { 7361 unallocated_encoding(s); 7362 return; 7363 } 7364 break; 7365 case 0xc: /* FMAXNMV, FMINNMV */ 7366 case 0xf: /* FMAXV, FMINV */ 7367 /* Bit 1 of size field encodes min vs max and the actual size 7368 * depends on the encoding of the U bit. If not set (and FP16 7369 * enabled) then we do half-precision float instead of single 7370 * precision. 7371 */ 7372 is_min = extract32(size, 1, 1); 7373 is_fp = true; 7374 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7375 size = 1; 7376 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7377 unallocated_encoding(s); 7378 return; 7379 } else { 7380 size = 2; 7381 } 7382 break; 7383 default: 7384 unallocated_encoding(s); 7385 return; 7386 } 7387 7388 if (!fp_access_check(s)) { 7389 return; 7390 } 7391 7392 esize = 8 << size; 7393 elements = (is_q ? 128 : 64) / esize; 7394 7395 tcg_res = tcg_temp_new_i64(); 7396 tcg_elt = tcg_temp_new_i64(); 7397 7398 /* These instructions operate across all lanes of a vector 7399 * to produce a single result. We can guarantee that a 64 7400 * bit intermediate is sufficient: 7401 * + for [US]ADDLV the maximum element size is 32 bits, and 7402 * the result type is 64 bits 7403 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7404 * same as the element size, which is 32 bits at most 7405 * For the integer operations we can choose to work at 64 7406 * or 32 bits and truncate at the end; for simplicity 7407 * we use 64 bits always. The floating point 7408 * ops do require 32 bit intermediates, though. 7409 */ 7410 if (!is_fp) { 7411 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7412 7413 for (i = 1; i < elements; i++) { 7414 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7415 7416 switch (opcode) { 7417 case 0x03: /* SADDLV / UADDLV */ 7418 case 0x1b: /* ADDV */ 7419 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7420 break; 7421 case 0x0a: /* SMAXV / UMAXV */ 7422 if (is_u) { 7423 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7424 } else { 7425 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7426 } 7427 break; 7428 case 0x1a: /* SMINV / UMINV */ 7429 if (is_u) { 7430 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7431 } else { 7432 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7433 } 7434 break; 7435 default: 7436 g_assert_not_reached(); 7437 } 7438 7439 } 7440 } else { 7441 /* Floating point vector reduction ops which work across 32 7442 * bit (single) or 16 bit (half-precision) intermediates. 7443 * Note that correct NaN propagation requires that we do these 7444 * operations in exactly the order specified by the pseudocode. 7445 */ 7446 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7447 int fpopcode = opcode | is_min << 4 | is_u << 5; 7448 int vmap = (1 << elements) - 1; 7449 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7450 (is_q ? 128 : 64), vmap, fpst); 7451 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7452 } 7453 7454 /* Now truncate the result to the width required for the final output */ 7455 if (opcode == 0x03) { 7456 /* SADDLV, UADDLV: result is 2*esize */ 7457 size++; 7458 } 7459 7460 switch (size) { 7461 case 0: 7462 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7463 break; 7464 case 1: 7465 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7466 break; 7467 case 2: 7468 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7469 break; 7470 case 3: 7471 break; 7472 default: 7473 g_assert_not_reached(); 7474 } 7475 7476 write_fp_dreg(s, rd, tcg_res); 7477 } 7478 7479 /* DUP (Element, Vector) 7480 * 7481 * 31 30 29 21 20 16 15 10 9 5 4 0 7482 * +---+---+-------------------+--------+-------------+------+------+ 7483 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7484 * +---+---+-------------------+--------+-------------+------+------+ 7485 * 7486 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7487 */ 7488 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7489 int imm5) 7490 { 7491 int size = ctz32(imm5); 7492 int index; 7493 7494 if (size > 3 || (size == 3 && !is_q)) { 7495 unallocated_encoding(s); 7496 return; 7497 } 7498 7499 if (!fp_access_check(s)) { 7500 return; 7501 } 7502 7503 index = imm5 >> (size + 1); 7504 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7505 vec_reg_offset(s, rn, index, size), 7506 is_q ? 16 : 8, vec_full_reg_size(s)); 7507 } 7508 7509 /* DUP (element, scalar) 7510 * 31 21 20 16 15 10 9 5 4 0 7511 * +-----------------------+--------+-------------+------+------+ 7512 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7513 * +-----------------------+--------+-------------+------+------+ 7514 */ 7515 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7516 int imm5) 7517 { 7518 int size = ctz32(imm5); 7519 int index; 7520 TCGv_i64 tmp; 7521 7522 if (size > 3) { 7523 unallocated_encoding(s); 7524 return; 7525 } 7526 7527 if (!fp_access_check(s)) { 7528 return; 7529 } 7530 7531 index = imm5 >> (size + 1); 7532 7533 /* This instruction just extracts the specified element and 7534 * zero-extends it into the bottom of the destination register. 7535 */ 7536 tmp = tcg_temp_new_i64(); 7537 read_vec_element(s, tmp, rn, index, size); 7538 write_fp_dreg(s, rd, tmp); 7539 } 7540 7541 /* DUP (General) 7542 * 7543 * 31 30 29 21 20 16 15 10 9 5 4 0 7544 * +---+---+-------------------+--------+-------------+------+------+ 7545 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 7546 * +---+---+-------------------+--------+-------------+------+------+ 7547 * 7548 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7549 */ 7550 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 7551 int imm5) 7552 { 7553 int size = ctz32(imm5); 7554 uint32_t dofs, oprsz, maxsz; 7555 7556 if (size > 3 || ((size == 3) && !is_q)) { 7557 unallocated_encoding(s); 7558 return; 7559 } 7560 7561 if (!fp_access_check(s)) { 7562 return; 7563 } 7564 7565 dofs = vec_full_reg_offset(s, rd); 7566 oprsz = is_q ? 16 : 8; 7567 maxsz = vec_full_reg_size(s); 7568 7569 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 7570 } 7571 7572 /* INS (Element) 7573 * 7574 * 31 21 20 16 15 14 11 10 9 5 4 0 7575 * +-----------------------+--------+------------+---+------+------+ 7576 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7577 * +-----------------------+--------+------------+---+------+------+ 7578 * 7579 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7580 * index: encoded in imm5<4:size+1> 7581 */ 7582 static void handle_simd_inse(DisasContext *s, int rd, int rn, 7583 int imm4, int imm5) 7584 { 7585 int size = ctz32(imm5); 7586 int src_index, dst_index; 7587 TCGv_i64 tmp; 7588 7589 if (size > 3) { 7590 unallocated_encoding(s); 7591 return; 7592 } 7593 7594 if (!fp_access_check(s)) { 7595 return; 7596 } 7597 7598 dst_index = extract32(imm5, 1+size, 5); 7599 src_index = extract32(imm4, size, 4); 7600 7601 tmp = tcg_temp_new_i64(); 7602 7603 read_vec_element(s, tmp, rn, src_index, size); 7604 write_vec_element(s, tmp, rd, dst_index, size); 7605 7606 /* INS is considered a 128-bit write for SVE. */ 7607 clear_vec_high(s, true, rd); 7608 } 7609 7610 7611 /* INS (General) 7612 * 7613 * 31 21 20 16 15 10 9 5 4 0 7614 * +-----------------------+--------+-------------+------+------+ 7615 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 7616 * +-----------------------+--------+-------------+------+------+ 7617 * 7618 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7619 * index: encoded in imm5<4:size+1> 7620 */ 7621 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 7622 { 7623 int size = ctz32(imm5); 7624 int idx; 7625 7626 if (size > 3) { 7627 unallocated_encoding(s); 7628 return; 7629 } 7630 7631 if (!fp_access_check(s)) { 7632 return; 7633 } 7634 7635 idx = extract32(imm5, 1 + size, 4 - size); 7636 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 7637 7638 /* INS is considered a 128-bit write for SVE. */ 7639 clear_vec_high(s, true, rd); 7640 } 7641 7642 /* 7643 * UMOV (General) 7644 * SMOV (General) 7645 * 7646 * 31 30 29 21 20 16 15 12 10 9 5 4 0 7647 * +---+---+-------------------+--------+-------------+------+------+ 7648 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 7649 * +---+---+-------------------+--------+-------------+------+------+ 7650 * 7651 * U: unsigned when set 7652 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7653 */ 7654 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 7655 int rn, int rd, int imm5) 7656 { 7657 int size = ctz32(imm5); 7658 int element; 7659 TCGv_i64 tcg_rd; 7660 7661 /* Check for UnallocatedEncodings */ 7662 if (is_signed) { 7663 if (size > 2 || (size == 2 && !is_q)) { 7664 unallocated_encoding(s); 7665 return; 7666 } 7667 } else { 7668 if (size > 3 7669 || (size < 3 && is_q) 7670 || (size == 3 && !is_q)) { 7671 unallocated_encoding(s); 7672 return; 7673 } 7674 } 7675 7676 if (!fp_access_check(s)) { 7677 return; 7678 } 7679 7680 element = extract32(imm5, 1+size, 4); 7681 7682 tcg_rd = cpu_reg(s, rd); 7683 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 7684 if (is_signed && !is_q) { 7685 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7686 } 7687 } 7688 7689 /* AdvSIMD copy 7690 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7691 * +---+---+----+-----------------+------+---+------+---+------+------+ 7692 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7693 * +---+---+----+-----------------+------+---+------+---+------+------+ 7694 */ 7695 static void disas_simd_copy(DisasContext *s, uint32_t insn) 7696 { 7697 int rd = extract32(insn, 0, 5); 7698 int rn = extract32(insn, 5, 5); 7699 int imm4 = extract32(insn, 11, 4); 7700 int op = extract32(insn, 29, 1); 7701 int is_q = extract32(insn, 30, 1); 7702 int imm5 = extract32(insn, 16, 5); 7703 7704 if (op) { 7705 if (is_q) { 7706 /* INS (element) */ 7707 handle_simd_inse(s, rd, rn, imm4, imm5); 7708 } else { 7709 unallocated_encoding(s); 7710 } 7711 } else { 7712 switch (imm4) { 7713 case 0: 7714 /* DUP (element - vector) */ 7715 handle_simd_dupe(s, is_q, rd, rn, imm5); 7716 break; 7717 case 1: 7718 /* DUP (general) */ 7719 handle_simd_dupg(s, is_q, rd, rn, imm5); 7720 break; 7721 case 3: 7722 if (is_q) { 7723 /* INS (general) */ 7724 handle_simd_insg(s, rd, rn, imm5); 7725 } else { 7726 unallocated_encoding(s); 7727 } 7728 break; 7729 case 5: 7730 case 7: 7731 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 7732 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 7733 break; 7734 default: 7735 unallocated_encoding(s); 7736 break; 7737 } 7738 } 7739 } 7740 7741 /* AdvSIMD modified immediate 7742 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 7743 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7744 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 7745 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7746 * 7747 * There are a number of operations that can be carried out here: 7748 * MOVI - move (shifted) imm into register 7749 * MVNI - move inverted (shifted) imm into register 7750 * ORR - bitwise OR of (shifted) imm with register 7751 * BIC - bitwise clear of (shifted) imm with register 7752 * With ARMv8.2 we also have: 7753 * FMOV half-precision 7754 */ 7755 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 7756 { 7757 int rd = extract32(insn, 0, 5); 7758 int cmode = extract32(insn, 12, 4); 7759 int o2 = extract32(insn, 11, 1); 7760 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 7761 bool is_neg = extract32(insn, 29, 1); 7762 bool is_q = extract32(insn, 30, 1); 7763 uint64_t imm = 0; 7764 7765 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 7766 /* Check for FMOV (vector, immediate) - half-precision */ 7767 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 7768 unallocated_encoding(s); 7769 return; 7770 } 7771 } 7772 7773 if (!fp_access_check(s)) { 7774 return; 7775 } 7776 7777 if (cmode == 15 && o2 && !is_neg) { 7778 /* FMOV (vector, immediate) - half-precision */ 7779 imm = vfp_expand_imm(MO_16, abcdefgh); 7780 /* now duplicate across the lanes */ 7781 imm = dup_const(MO_16, imm); 7782 } else { 7783 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 7784 } 7785 7786 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 7787 /* MOVI or MVNI, with MVNI negation handled above. */ 7788 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 7789 vec_full_reg_size(s), imm); 7790 } else { 7791 /* ORR or BIC, with BIC negation to AND handled above. */ 7792 if (is_neg) { 7793 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 7794 } else { 7795 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 7796 } 7797 } 7798 } 7799 7800 /* AdvSIMD scalar copy 7801 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7802 * +-----+----+-----------------+------+---+------+---+------+------+ 7803 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7804 * +-----+----+-----------------+------+---+------+---+------+------+ 7805 */ 7806 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 7807 { 7808 int rd = extract32(insn, 0, 5); 7809 int rn = extract32(insn, 5, 5); 7810 int imm4 = extract32(insn, 11, 4); 7811 int imm5 = extract32(insn, 16, 5); 7812 int op = extract32(insn, 29, 1); 7813 7814 if (op != 0 || imm4 != 0) { 7815 unallocated_encoding(s); 7816 return; 7817 } 7818 7819 /* DUP (element, scalar) */ 7820 handle_simd_dupes(s, rd, rn, imm5); 7821 } 7822 7823 /* AdvSIMD scalar pairwise 7824 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7825 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7826 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7827 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7828 */ 7829 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 7830 { 7831 int u = extract32(insn, 29, 1); 7832 int size = extract32(insn, 22, 2); 7833 int opcode = extract32(insn, 12, 5); 7834 int rn = extract32(insn, 5, 5); 7835 int rd = extract32(insn, 0, 5); 7836 TCGv_ptr fpst; 7837 7838 /* For some ops (the FP ones), size[1] is part of the encoding. 7839 * For ADDP strictly it is not but size[1] is always 1 for valid 7840 * encodings. 7841 */ 7842 opcode |= (extract32(size, 1, 1) << 5); 7843 7844 switch (opcode) { 7845 case 0x3b: /* ADDP */ 7846 if (u || size != 3) { 7847 unallocated_encoding(s); 7848 return; 7849 } 7850 if (!fp_access_check(s)) { 7851 return; 7852 } 7853 7854 fpst = NULL; 7855 break; 7856 case 0xc: /* FMAXNMP */ 7857 case 0xd: /* FADDP */ 7858 case 0xf: /* FMAXP */ 7859 case 0x2c: /* FMINNMP */ 7860 case 0x2f: /* FMINP */ 7861 /* FP op, size[0] is 32 or 64 bit*/ 7862 if (!u) { 7863 if (!dc_isar_feature(aa64_fp16, s)) { 7864 unallocated_encoding(s); 7865 return; 7866 } else { 7867 size = MO_16; 7868 } 7869 } else { 7870 size = extract32(size, 0, 1) ? MO_64 : MO_32; 7871 } 7872 7873 if (!fp_access_check(s)) { 7874 return; 7875 } 7876 7877 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7878 break; 7879 default: 7880 unallocated_encoding(s); 7881 return; 7882 } 7883 7884 if (size == MO_64) { 7885 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 7886 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 7887 TCGv_i64 tcg_res = tcg_temp_new_i64(); 7888 7889 read_vec_element(s, tcg_op1, rn, 0, MO_64); 7890 read_vec_element(s, tcg_op2, rn, 1, MO_64); 7891 7892 switch (opcode) { 7893 case 0x3b: /* ADDP */ 7894 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 7895 break; 7896 case 0xc: /* FMAXNMP */ 7897 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7898 break; 7899 case 0xd: /* FADDP */ 7900 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 7901 break; 7902 case 0xf: /* FMAXP */ 7903 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 7904 break; 7905 case 0x2c: /* FMINNMP */ 7906 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7907 break; 7908 case 0x2f: /* FMINP */ 7909 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 7910 break; 7911 default: 7912 g_assert_not_reached(); 7913 } 7914 7915 write_fp_dreg(s, rd, tcg_res); 7916 } else { 7917 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 7918 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 7919 TCGv_i32 tcg_res = tcg_temp_new_i32(); 7920 7921 read_vec_element_i32(s, tcg_op1, rn, 0, size); 7922 read_vec_element_i32(s, tcg_op2, rn, 1, size); 7923 7924 if (size == MO_16) { 7925 switch (opcode) { 7926 case 0xc: /* FMAXNMP */ 7927 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 7928 break; 7929 case 0xd: /* FADDP */ 7930 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 7931 break; 7932 case 0xf: /* FMAXP */ 7933 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 7934 break; 7935 case 0x2c: /* FMINNMP */ 7936 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 7937 break; 7938 case 0x2f: /* FMINP */ 7939 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 7940 break; 7941 default: 7942 g_assert_not_reached(); 7943 } 7944 } else { 7945 switch (opcode) { 7946 case 0xc: /* FMAXNMP */ 7947 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 7948 break; 7949 case 0xd: /* FADDP */ 7950 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 7951 break; 7952 case 0xf: /* FMAXP */ 7953 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 7954 break; 7955 case 0x2c: /* FMINNMP */ 7956 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 7957 break; 7958 case 0x2f: /* FMINP */ 7959 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 7960 break; 7961 default: 7962 g_assert_not_reached(); 7963 } 7964 } 7965 7966 write_fp_sreg(s, rd, tcg_res); 7967 } 7968 } 7969 7970 /* 7971 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 7972 * 7973 * This code is handles the common shifting code and is used by both 7974 * the vector and scalar code. 7975 */ 7976 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 7977 TCGv_i64 tcg_rnd, bool accumulate, 7978 bool is_u, int size, int shift) 7979 { 7980 bool extended_result = false; 7981 bool round = tcg_rnd != NULL; 7982 int ext_lshift = 0; 7983 TCGv_i64 tcg_src_hi; 7984 7985 if (round && size == 3) { 7986 extended_result = true; 7987 ext_lshift = 64 - shift; 7988 tcg_src_hi = tcg_temp_new_i64(); 7989 } else if (shift == 64) { 7990 if (!accumulate && is_u) { 7991 /* result is zero */ 7992 tcg_gen_movi_i64(tcg_res, 0); 7993 return; 7994 } 7995 } 7996 7997 /* Deal with the rounding step */ 7998 if (round) { 7999 if (extended_result) { 8000 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8001 if (!is_u) { 8002 /* take care of sign extending tcg_res */ 8003 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8004 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8005 tcg_src, tcg_src_hi, 8006 tcg_rnd, tcg_zero); 8007 } else { 8008 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8009 tcg_src, tcg_zero, 8010 tcg_rnd, tcg_zero); 8011 } 8012 } else { 8013 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8014 } 8015 } 8016 8017 /* Now do the shift right */ 8018 if (round && extended_result) { 8019 /* extended case, >64 bit precision required */ 8020 if (ext_lshift == 0) { 8021 /* special case, only high bits matter */ 8022 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8023 } else { 8024 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8025 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8026 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8027 } 8028 } else { 8029 if (is_u) { 8030 if (shift == 64) { 8031 /* essentially shifting in 64 zeros */ 8032 tcg_gen_movi_i64(tcg_src, 0); 8033 } else { 8034 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8035 } 8036 } else { 8037 if (shift == 64) { 8038 /* effectively extending the sign-bit */ 8039 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8040 } else { 8041 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8042 } 8043 } 8044 } 8045 8046 if (accumulate) { 8047 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8048 } else { 8049 tcg_gen_mov_i64(tcg_res, tcg_src); 8050 } 8051 } 8052 8053 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8054 static void handle_scalar_simd_shri(DisasContext *s, 8055 bool is_u, int immh, int immb, 8056 int opcode, int rn, int rd) 8057 { 8058 const int size = 3; 8059 int immhb = immh << 3 | immb; 8060 int shift = 2 * (8 << size) - immhb; 8061 bool accumulate = false; 8062 bool round = false; 8063 bool insert = false; 8064 TCGv_i64 tcg_rn; 8065 TCGv_i64 tcg_rd; 8066 TCGv_i64 tcg_round; 8067 8068 if (!extract32(immh, 3, 1)) { 8069 unallocated_encoding(s); 8070 return; 8071 } 8072 8073 if (!fp_access_check(s)) { 8074 return; 8075 } 8076 8077 switch (opcode) { 8078 case 0x02: /* SSRA / USRA (accumulate) */ 8079 accumulate = true; 8080 break; 8081 case 0x04: /* SRSHR / URSHR (rounding) */ 8082 round = true; 8083 break; 8084 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8085 accumulate = round = true; 8086 break; 8087 case 0x08: /* SRI */ 8088 insert = true; 8089 break; 8090 } 8091 8092 if (round) { 8093 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8094 } else { 8095 tcg_round = NULL; 8096 } 8097 8098 tcg_rn = read_fp_dreg(s, rn); 8099 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8100 8101 if (insert) { 8102 /* shift count same as element size is valid but does nothing; 8103 * special case to avoid potential shift by 64. 8104 */ 8105 int esize = 8 << size; 8106 if (shift != esize) { 8107 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8108 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8109 } 8110 } else { 8111 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8112 accumulate, is_u, size, shift); 8113 } 8114 8115 write_fp_dreg(s, rd, tcg_rd); 8116 } 8117 8118 /* SHL/SLI - Scalar shift left */ 8119 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8120 int immh, int immb, int opcode, 8121 int rn, int rd) 8122 { 8123 int size = 32 - clz32(immh) - 1; 8124 int immhb = immh << 3 | immb; 8125 int shift = immhb - (8 << size); 8126 TCGv_i64 tcg_rn; 8127 TCGv_i64 tcg_rd; 8128 8129 if (!extract32(immh, 3, 1)) { 8130 unallocated_encoding(s); 8131 return; 8132 } 8133 8134 if (!fp_access_check(s)) { 8135 return; 8136 } 8137 8138 tcg_rn = read_fp_dreg(s, rn); 8139 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8140 8141 if (insert) { 8142 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8143 } else { 8144 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8145 } 8146 8147 write_fp_dreg(s, rd, tcg_rd); 8148 } 8149 8150 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8151 * (signed/unsigned) narrowing */ 8152 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8153 bool is_u_shift, bool is_u_narrow, 8154 int immh, int immb, int opcode, 8155 int rn, int rd) 8156 { 8157 int immhb = immh << 3 | immb; 8158 int size = 32 - clz32(immh) - 1; 8159 int esize = 8 << size; 8160 int shift = (2 * esize) - immhb; 8161 int elements = is_scalar ? 1 : (64 / esize); 8162 bool round = extract32(opcode, 0, 1); 8163 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8164 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8165 TCGv_i32 tcg_rd_narrowed; 8166 TCGv_i64 tcg_final; 8167 8168 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8169 { gen_helper_neon_narrow_sat_s8, 8170 gen_helper_neon_unarrow_sat8 }, 8171 { gen_helper_neon_narrow_sat_s16, 8172 gen_helper_neon_unarrow_sat16 }, 8173 { gen_helper_neon_narrow_sat_s32, 8174 gen_helper_neon_unarrow_sat32 }, 8175 { NULL, NULL }, 8176 }; 8177 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8178 gen_helper_neon_narrow_sat_u8, 8179 gen_helper_neon_narrow_sat_u16, 8180 gen_helper_neon_narrow_sat_u32, 8181 NULL 8182 }; 8183 NeonGenNarrowEnvFn *narrowfn; 8184 8185 int i; 8186 8187 assert(size < 4); 8188 8189 if (extract32(immh, 3, 1)) { 8190 unallocated_encoding(s); 8191 return; 8192 } 8193 8194 if (!fp_access_check(s)) { 8195 return; 8196 } 8197 8198 if (is_u_shift) { 8199 narrowfn = unsigned_narrow_fns[size]; 8200 } else { 8201 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8202 } 8203 8204 tcg_rn = tcg_temp_new_i64(); 8205 tcg_rd = tcg_temp_new_i64(); 8206 tcg_rd_narrowed = tcg_temp_new_i32(); 8207 tcg_final = tcg_temp_new_i64(); 8208 8209 if (round) { 8210 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8211 } else { 8212 tcg_round = NULL; 8213 } 8214 8215 for (i = 0; i < elements; i++) { 8216 read_vec_element(s, tcg_rn, rn, i, ldop); 8217 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8218 false, is_u_shift, size+1, shift); 8219 narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); 8220 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8221 if (i == 0) { 8222 tcg_gen_mov_i64(tcg_final, tcg_rd); 8223 } else { 8224 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8225 } 8226 } 8227 8228 if (!is_q) { 8229 write_vec_element(s, tcg_final, rd, 0, MO_64); 8230 } else { 8231 write_vec_element(s, tcg_final, rd, 1, MO_64); 8232 } 8233 clear_vec_high(s, is_q, rd); 8234 } 8235 8236 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8237 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8238 bool src_unsigned, bool dst_unsigned, 8239 int immh, int immb, int rn, int rd) 8240 { 8241 int immhb = immh << 3 | immb; 8242 int size = 32 - clz32(immh) - 1; 8243 int shift = immhb - (8 << size); 8244 int pass; 8245 8246 assert(immh != 0); 8247 assert(!(scalar && is_q)); 8248 8249 if (!scalar) { 8250 if (!is_q && extract32(immh, 3, 1)) { 8251 unallocated_encoding(s); 8252 return; 8253 } 8254 8255 /* Since we use the variable-shift helpers we must 8256 * replicate the shift count into each element of 8257 * the tcg_shift value. 8258 */ 8259 switch (size) { 8260 case 0: 8261 shift |= shift << 8; 8262 /* fall through */ 8263 case 1: 8264 shift |= shift << 16; 8265 break; 8266 case 2: 8267 case 3: 8268 break; 8269 default: 8270 g_assert_not_reached(); 8271 } 8272 } 8273 8274 if (!fp_access_check(s)) { 8275 return; 8276 } 8277 8278 if (size == 3) { 8279 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8280 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8281 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8282 { NULL, gen_helper_neon_qshl_u64 }, 8283 }; 8284 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8285 int maxpass = is_q ? 2 : 1; 8286 8287 for (pass = 0; pass < maxpass; pass++) { 8288 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8289 8290 read_vec_element(s, tcg_op, rn, pass, MO_64); 8291 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 8292 write_vec_element(s, tcg_op, rd, pass, MO_64); 8293 } 8294 clear_vec_high(s, is_q, rd); 8295 } else { 8296 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8297 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8298 { 8299 { gen_helper_neon_qshl_s8, 8300 gen_helper_neon_qshl_s16, 8301 gen_helper_neon_qshl_s32 }, 8302 { gen_helper_neon_qshlu_s8, 8303 gen_helper_neon_qshlu_s16, 8304 gen_helper_neon_qshlu_s32 } 8305 }, { 8306 { NULL, NULL, NULL }, 8307 { gen_helper_neon_qshl_u8, 8308 gen_helper_neon_qshl_u16, 8309 gen_helper_neon_qshl_u32 } 8310 } 8311 }; 8312 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8313 MemOp memop = scalar ? size : MO_32; 8314 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8315 8316 for (pass = 0; pass < maxpass; pass++) { 8317 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8318 8319 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8320 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 8321 if (scalar) { 8322 switch (size) { 8323 case 0: 8324 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8325 break; 8326 case 1: 8327 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8328 break; 8329 case 2: 8330 break; 8331 default: 8332 g_assert_not_reached(); 8333 } 8334 write_fp_sreg(s, rd, tcg_op); 8335 } else { 8336 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8337 } 8338 } 8339 8340 if (!scalar) { 8341 clear_vec_high(s, is_q, rd); 8342 } 8343 } 8344 } 8345 8346 /* Common vector code for handling integer to FP conversion */ 8347 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8348 int elements, int is_signed, 8349 int fracbits, int size) 8350 { 8351 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8352 TCGv_i32 tcg_shift = NULL; 8353 8354 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8355 int pass; 8356 8357 if (fracbits || size == MO_64) { 8358 tcg_shift = tcg_constant_i32(fracbits); 8359 } 8360 8361 if (size == MO_64) { 8362 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8363 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8364 8365 for (pass = 0; pass < elements; pass++) { 8366 read_vec_element(s, tcg_int64, rn, pass, mop); 8367 8368 if (is_signed) { 8369 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8370 tcg_shift, tcg_fpst); 8371 } else { 8372 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8373 tcg_shift, tcg_fpst); 8374 } 8375 if (elements == 1) { 8376 write_fp_dreg(s, rd, tcg_double); 8377 } else { 8378 write_vec_element(s, tcg_double, rd, pass, MO_64); 8379 } 8380 } 8381 } else { 8382 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8383 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8384 8385 for (pass = 0; pass < elements; pass++) { 8386 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8387 8388 switch (size) { 8389 case MO_32: 8390 if (fracbits) { 8391 if (is_signed) { 8392 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8393 tcg_shift, tcg_fpst); 8394 } else { 8395 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8396 tcg_shift, tcg_fpst); 8397 } 8398 } else { 8399 if (is_signed) { 8400 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8401 } else { 8402 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8403 } 8404 } 8405 break; 8406 case MO_16: 8407 if (fracbits) { 8408 if (is_signed) { 8409 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8410 tcg_shift, tcg_fpst); 8411 } else { 8412 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8413 tcg_shift, tcg_fpst); 8414 } 8415 } else { 8416 if (is_signed) { 8417 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8418 } else { 8419 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8420 } 8421 } 8422 break; 8423 default: 8424 g_assert_not_reached(); 8425 } 8426 8427 if (elements == 1) { 8428 write_fp_sreg(s, rd, tcg_float); 8429 } else { 8430 write_vec_element_i32(s, tcg_float, rd, pass, size); 8431 } 8432 } 8433 } 8434 8435 clear_vec_high(s, elements << size == 16, rd); 8436 } 8437 8438 /* UCVTF/SCVTF - Integer to FP conversion */ 8439 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8440 bool is_q, bool is_u, 8441 int immh, int immb, int opcode, 8442 int rn, int rd) 8443 { 8444 int size, elements, fracbits; 8445 int immhb = immh << 3 | immb; 8446 8447 if (immh & 8) { 8448 size = MO_64; 8449 if (!is_scalar && !is_q) { 8450 unallocated_encoding(s); 8451 return; 8452 } 8453 } else if (immh & 4) { 8454 size = MO_32; 8455 } else if (immh & 2) { 8456 size = MO_16; 8457 if (!dc_isar_feature(aa64_fp16, s)) { 8458 unallocated_encoding(s); 8459 return; 8460 } 8461 } else { 8462 /* immh == 0 would be a failure of the decode logic */ 8463 g_assert(immh == 1); 8464 unallocated_encoding(s); 8465 return; 8466 } 8467 8468 if (is_scalar) { 8469 elements = 1; 8470 } else { 8471 elements = (8 << is_q) >> size; 8472 } 8473 fracbits = (16 << size) - immhb; 8474 8475 if (!fp_access_check(s)) { 8476 return; 8477 } 8478 8479 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8480 } 8481 8482 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8483 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8484 bool is_q, bool is_u, 8485 int immh, int immb, int rn, int rd) 8486 { 8487 int immhb = immh << 3 | immb; 8488 int pass, size, fracbits; 8489 TCGv_ptr tcg_fpstatus; 8490 TCGv_i32 tcg_rmode, tcg_shift; 8491 8492 if (immh & 0x8) { 8493 size = MO_64; 8494 if (!is_scalar && !is_q) { 8495 unallocated_encoding(s); 8496 return; 8497 } 8498 } else if (immh & 0x4) { 8499 size = MO_32; 8500 } else if (immh & 0x2) { 8501 size = MO_16; 8502 if (!dc_isar_feature(aa64_fp16, s)) { 8503 unallocated_encoding(s); 8504 return; 8505 } 8506 } else { 8507 /* Should have split out AdvSIMD modified immediate earlier. */ 8508 assert(immh == 1); 8509 unallocated_encoding(s); 8510 return; 8511 } 8512 8513 if (!fp_access_check(s)) { 8514 return; 8515 } 8516 8517 assert(!(is_scalar && is_q)); 8518 8519 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8520 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 8521 fracbits = (16 << size) - immhb; 8522 tcg_shift = tcg_constant_i32(fracbits); 8523 8524 if (size == MO_64) { 8525 int maxpass = is_scalar ? 1 : 2; 8526 8527 for (pass = 0; pass < maxpass; pass++) { 8528 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8529 8530 read_vec_element(s, tcg_op, rn, pass, MO_64); 8531 if (is_u) { 8532 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8533 } else { 8534 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8535 } 8536 write_vec_element(s, tcg_op, rd, pass, MO_64); 8537 } 8538 clear_vec_high(s, is_q, rd); 8539 } else { 8540 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 8541 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 8542 8543 switch (size) { 8544 case MO_16: 8545 if (is_u) { 8546 fn = gen_helper_vfp_touhh; 8547 } else { 8548 fn = gen_helper_vfp_toshh; 8549 } 8550 break; 8551 case MO_32: 8552 if (is_u) { 8553 fn = gen_helper_vfp_touls; 8554 } else { 8555 fn = gen_helper_vfp_tosls; 8556 } 8557 break; 8558 default: 8559 g_assert_not_reached(); 8560 } 8561 8562 for (pass = 0; pass < maxpass; pass++) { 8563 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8564 8565 read_vec_element_i32(s, tcg_op, rn, pass, size); 8566 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8567 if (is_scalar) { 8568 write_fp_sreg(s, rd, tcg_op); 8569 } else { 8570 write_vec_element_i32(s, tcg_op, rd, pass, size); 8571 } 8572 } 8573 if (!is_scalar) { 8574 clear_vec_high(s, is_q, rd); 8575 } 8576 } 8577 8578 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8579 } 8580 8581 /* AdvSIMD scalar shift by immediate 8582 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 8583 * +-----+---+-------------+------+------+--------+---+------+------+ 8584 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 8585 * +-----+---+-------------+------+------+--------+---+------+------+ 8586 * 8587 * This is the scalar version so it works on a fixed sized registers 8588 */ 8589 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 8590 { 8591 int rd = extract32(insn, 0, 5); 8592 int rn = extract32(insn, 5, 5); 8593 int opcode = extract32(insn, 11, 5); 8594 int immb = extract32(insn, 16, 3); 8595 int immh = extract32(insn, 19, 4); 8596 bool is_u = extract32(insn, 29, 1); 8597 8598 if (immh == 0) { 8599 unallocated_encoding(s); 8600 return; 8601 } 8602 8603 switch (opcode) { 8604 case 0x08: /* SRI */ 8605 if (!is_u) { 8606 unallocated_encoding(s); 8607 return; 8608 } 8609 /* fall through */ 8610 case 0x00: /* SSHR / USHR */ 8611 case 0x02: /* SSRA / USRA */ 8612 case 0x04: /* SRSHR / URSHR */ 8613 case 0x06: /* SRSRA / URSRA */ 8614 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 8615 break; 8616 case 0x0a: /* SHL / SLI */ 8617 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 8618 break; 8619 case 0x1c: /* SCVTF, UCVTF */ 8620 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 8621 opcode, rn, rd); 8622 break; 8623 case 0x10: /* SQSHRUN, SQSHRUN2 */ 8624 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 8625 if (!is_u) { 8626 unallocated_encoding(s); 8627 return; 8628 } 8629 handle_vec_simd_sqshrn(s, true, false, false, true, 8630 immh, immb, opcode, rn, rd); 8631 break; 8632 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 8633 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 8634 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 8635 immh, immb, opcode, rn, rd); 8636 break; 8637 case 0xc: /* SQSHLU */ 8638 if (!is_u) { 8639 unallocated_encoding(s); 8640 return; 8641 } 8642 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 8643 break; 8644 case 0xe: /* SQSHL, UQSHL */ 8645 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 8646 break; 8647 case 0x1f: /* FCVTZS, FCVTZU */ 8648 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 8649 break; 8650 default: 8651 unallocated_encoding(s); 8652 break; 8653 } 8654 } 8655 8656 /* AdvSIMD scalar three different 8657 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 8658 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8659 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 8660 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8661 */ 8662 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 8663 { 8664 bool is_u = extract32(insn, 29, 1); 8665 int size = extract32(insn, 22, 2); 8666 int opcode = extract32(insn, 12, 4); 8667 int rm = extract32(insn, 16, 5); 8668 int rn = extract32(insn, 5, 5); 8669 int rd = extract32(insn, 0, 5); 8670 8671 if (is_u) { 8672 unallocated_encoding(s); 8673 return; 8674 } 8675 8676 switch (opcode) { 8677 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8678 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8679 case 0xd: /* SQDMULL, SQDMULL2 */ 8680 if (size == 0 || size == 3) { 8681 unallocated_encoding(s); 8682 return; 8683 } 8684 break; 8685 default: 8686 unallocated_encoding(s); 8687 return; 8688 } 8689 8690 if (!fp_access_check(s)) { 8691 return; 8692 } 8693 8694 if (size == 2) { 8695 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8696 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8697 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8698 8699 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 8700 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 8701 8702 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 8703 gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res); 8704 8705 switch (opcode) { 8706 case 0xd: /* SQDMULL, SQDMULL2 */ 8707 break; 8708 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8709 tcg_gen_neg_i64(tcg_res, tcg_res); 8710 /* fall through */ 8711 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8712 read_vec_element(s, tcg_op1, rd, 0, MO_64); 8713 gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, 8714 tcg_res, tcg_op1); 8715 break; 8716 default: 8717 g_assert_not_reached(); 8718 } 8719 8720 write_fp_dreg(s, rd, tcg_res); 8721 } else { 8722 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 8723 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 8724 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8725 8726 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 8727 gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res); 8728 8729 switch (opcode) { 8730 case 0xd: /* SQDMULL, SQDMULL2 */ 8731 break; 8732 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8733 gen_helper_neon_negl_u32(tcg_res, tcg_res); 8734 /* fall through */ 8735 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8736 { 8737 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 8738 read_vec_element(s, tcg_op3, rd, 0, MO_32); 8739 gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, 8740 tcg_res, tcg_op3); 8741 break; 8742 } 8743 default: 8744 g_assert_not_reached(); 8745 } 8746 8747 tcg_gen_ext32u_i64(tcg_res, tcg_res); 8748 write_fp_dreg(s, rd, tcg_res); 8749 } 8750 } 8751 8752 static void handle_3same_64(DisasContext *s, int opcode, bool u, 8753 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 8754 { 8755 /* Handle 64x64->64 opcodes which are shared between the scalar 8756 * and vector 3-same groups. We cover every opcode where size == 3 8757 * is valid in either the three-reg-same (integer, not pairwise) 8758 * or scalar-three-reg-same groups. 8759 */ 8760 TCGCond cond; 8761 8762 switch (opcode) { 8763 case 0x1: /* SQADD */ 8764 if (u) { 8765 gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8766 } else { 8767 gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8768 } 8769 break; 8770 case 0x5: /* SQSUB */ 8771 if (u) { 8772 gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8773 } else { 8774 gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8775 } 8776 break; 8777 case 0x6: /* CMGT, CMHI */ 8778 cond = u ? TCG_COND_GTU : TCG_COND_GT; 8779 do_cmop: 8780 /* 64 bit integer comparison, result = test ? -1 : 0. */ 8781 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 8782 break; 8783 case 0x7: /* CMGE, CMHS */ 8784 cond = u ? TCG_COND_GEU : TCG_COND_GE; 8785 goto do_cmop; 8786 case 0x11: /* CMTST, CMEQ */ 8787 if (u) { 8788 cond = TCG_COND_EQ; 8789 goto do_cmop; 8790 } 8791 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 8792 break; 8793 case 0x8: /* SSHL, USHL */ 8794 if (u) { 8795 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 8796 } else { 8797 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 8798 } 8799 break; 8800 case 0x9: /* SQSHL, UQSHL */ 8801 if (u) { 8802 gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8803 } else { 8804 gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8805 } 8806 break; 8807 case 0xa: /* SRSHL, URSHL */ 8808 if (u) { 8809 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 8810 } else { 8811 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 8812 } 8813 break; 8814 case 0xb: /* SQRSHL, UQRSHL */ 8815 if (u) { 8816 gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8817 } else { 8818 gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8819 } 8820 break; 8821 case 0x10: /* ADD, SUB */ 8822 if (u) { 8823 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 8824 } else { 8825 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 8826 } 8827 break; 8828 default: 8829 g_assert_not_reached(); 8830 } 8831 } 8832 8833 /* Handle the 3-same-operands float operations; shared by the scalar 8834 * and vector encodings. The caller must filter out any encodings 8835 * not allocated for the encoding it is dealing with. 8836 */ 8837 static void handle_3same_float(DisasContext *s, int size, int elements, 8838 int fpopcode, int rd, int rn, int rm) 8839 { 8840 int pass; 8841 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8842 8843 for (pass = 0; pass < elements; pass++) { 8844 if (size) { 8845 /* Double */ 8846 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8847 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8848 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8849 8850 read_vec_element(s, tcg_op1, rn, pass, MO_64); 8851 read_vec_element(s, tcg_op2, rm, pass, MO_64); 8852 8853 switch (fpopcode) { 8854 case 0x39: /* FMLS */ 8855 /* As usual for ARM, separate negation for fused multiply-add */ 8856 gen_helper_vfp_negd(tcg_op1, tcg_op1); 8857 /* fall through */ 8858 case 0x19: /* FMLA */ 8859 read_vec_element(s, tcg_res, rd, pass, MO_64); 8860 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 8861 tcg_res, fpst); 8862 break; 8863 case 0x18: /* FMAXNM */ 8864 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8865 break; 8866 case 0x1a: /* FADD */ 8867 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8868 break; 8869 case 0x1b: /* FMULX */ 8870 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 8871 break; 8872 case 0x1c: /* FCMEQ */ 8873 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8874 break; 8875 case 0x1e: /* FMAX */ 8876 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8877 break; 8878 case 0x1f: /* FRECPS */ 8879 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8880 break; 8881 case 0x38: /* FMINNM */ 8882 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8883 break; 8884 case 0x3a: /* FSUB */ 8885 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8886 break; 8887 case 0x3e: /* FMIN */ 8888 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8889 break; 8890 case 0x3f: /* FRSQRTS */ 8891 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8892 break; 8893 case 0x5b: /* FMUL */ 8894 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 8895 break; 8896 case 0x5c: /* FCMGE */ 8897 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8898 break; 8899 case 0x5d: /* FACGE */ 8900 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8901 break; 8902 case 0x5f: /* FDIV */ 8903 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 8904 break; 8905 case 0x7a: /* FABD */ 8906 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8907 gen_helper_vfp_absd(tcg_res, tcg_res); 8908 break; 8909 case 0x7c: /* FCMGT */ 8910 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8911 break; 8912 case 0x7d: /* FACGT */ 8913 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8914 break; 8915 default: 8916 g_assert_not_reached(); 8917 } 8918 8919 write_vec_element(s, tcg_res, rd, pass, MO_64); 8920 } else { 8921 /* Single */ 8922 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8923 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8924 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8925 8926 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 8927 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 8928 8929 switch (fpopcode) { 8930 case 0x39: /* FMLS */ 8931 /* As usual for ARM, separate negation for fused multiply-add */ 8932 gen_helper_vfp_negs(tcg_op1, tcg_op1); 8933 /* fall through */ 8934 case 0x19: /* FMLA */ 8935 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 8936 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 8937 tcg_res, fpst); 8938 break; 8939 case 0x1a: /* FADD */ 8940 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8941 break; 8942 case 0x1b: /* FMULX */ 8943 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 8944 break; 8945 case 0x1c: /* FCMEQ */ 8946 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8947 break; 8948 case 0x1e: /* FMAX */ 8949 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8950 break; 8951 case 0x1f: /* FRECPS */ 8952 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8953 break; 8954 case 0x18: /* FMAXNM */ 8955 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8956 break; 8957 case 0x38: /* FMINNM */ 8958 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8959 break; 8960 case 0x3a: /* FSUB */ 8961 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 8962 break; 8963 case 0x3e: /* FMIN */ 8964 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8965 break; 8966 case 0x3f: /* FRSQRTS */ 8967 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8968 break; 8969 case 0x5b: /* FMUL */ 8970 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 8971 break; 8972 case 0x5c: /* FCMGE */ 8973 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8974 break; 8975 case 0x5d: /* FACGE */ 8976 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8977 break; 8978 case 0x5f: /* FDIV */ 8979 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 8980 break; 8981 case 0x7a: /* FABD */ 8982 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 8983 gen_helper_vfp_abss(tcg_res, tcg_res); 8984 break; 8985 case 0x7c: /* FCMGT */ 8986 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8987 break; 8988 case 0x7d: /* FACGT */ 8989 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8990 break; 8991 default: 8992 g_assert_not_reached(); 8993 } 8994 8995 if (elements == 1) { 8996 /* scalar single so clear high part */ 8997 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8998 8999 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9000 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9001 } else { 9002 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9003 } 9004 } 9005 } 9006 9007 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9008 } 9009 9010 /* AdvSIMD scalar three same 9011 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9012 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9013 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9014 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9015 */ 9016 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9017 { 9018 int rd = extract32(insn, 0, 5); 9019 int rn = extract32(insn, 5, 5); 9020 int opcode = extract32(insn, 11, 5); 9021 int rm = extract32(insn, 16, 5); 9022 int size = extract32(insn, 22, 2); 9023 bool u = extract32(insn, 29, 1); 9024 TCGv_i64 tcg_rd; 9025 9026 if (opcode >= 0x18) { 9027 /* Floating point: U, size[1] and opcode indicate operation */ 9028 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9029 switch (fpopcode) { 9030 case 0x1b: /* FMULX */ 9031 case 0x1f: /* FRECPS */ 9032 case 0x3f: /* FRSQRTS */ 9033 case 0x5d: /* FACGE */ 9034 case 0x7d: /* FACGT */ 9035 case 0x1c: /* FCMEQ */ 9036 case 0x5c: /* FCMGE */ 9037 case 0x7c: /* FCMGT */ 9038 case 0x7a: /* FABD */ 9039 break; 9040 default: 9041 unallocated_encoding(s); 9042 return; 9043 } 9044 9045 if (!fp_access_check(s)) { 9046 return; 9047 } 9048 9049 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9050 return; 9051 } 9052 9053 switch (opcode) { 9054 case 0x1: /* SQADD, UQADD */ 9055 case 0x5: /* SQSUB, UQSUB */ 9056 case 0x9: /* SQSHL, UQSHL */ 9057 case 0xb: /* SQRSHL, UQRSHL */ 9058 break; 9059 case 0x8: /* SSHL, USHL */ 9060 case 0xa: /* SRSHL, URSHL */ 9061 case 0x6: /* CMGT, CMHI */ 9062 case 0x7: /* CMGE, CMHS */ 9063 case 0x11: /* CMTST, CMEQ */ 9064 case 0x10: /* ADD, SUB (vector) */ 9065 if (size != 3) { 9066 unallocated_encoding(s); 9067 return; 9068 } 9069 break; 9070 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9071 if (size != 1 && size != 2) { 9072 unallocated_encoding(s); 9073 return; 9074 } 9075 break; 9076 default: 9077 unallocated_encoding(s); 9078 return; 9079 } 9080 9081 if (!fp_access_check(s)) { 9082 return; 9083 } 9084 9085 tcg_rd = tcg_temp_new_i64(); 9086 9087 if (size == 3) { 9088 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9089 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9090 9091 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9092 } else { 9093 /* Do a single operation on the lowest element in the vector. 9094 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9095 * no side effects for all these operations. 9096 * OPTME: special-purpose helpers would avoid doing some 9097 * unnecessary work in the helper for the 8 and 16 bit cases. 9098 */ 9099 NeonGenTwoOpEnvFn *genenvfn; 9100 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9101 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9102 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9103 9104 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9105 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9106 9107 switch (opcode) { 9108 case 0x1: /* SQADD, UQADD */ 9109 { 9110 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9111 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9112 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9113 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9114 }; 9115 genenvfn = fns[size][u]; 9116 break; 9117 } 9118 case 0x5: /* SQSUB, UQSUB */ 9119 { 9120 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9121 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9122 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9123 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9124 }; 9125 genenvfn = fns[size][u]; 9126 break; 9127 } 9128 case 0x9: /* SQSHL, UQSHL */ 9129 { 9130 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9131 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9132 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9133 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9134 }; 9135 genenvfn = fns[size][u]; 9136 break; 9137 } 9138 case 0xb: /* SQRSHL, UQRSHL */ 9139 { 9140 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9141 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9142 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9143 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9144 }; 9145 genenvfn = fns[size][u]; 9146 break; 9147 } 9148 case 0x16: /* SQDMULH, SQRDMULH */ 9149 { 9150 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9151 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9152 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9153 }; 9154 assert(size == 1 || size == 2); 9155 genenvfn = fns[size - 1][u]; 9156 break; 9157 } 9158 default: 9159 g_assert_not_reached(); 9160 } 9161 9162 genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm); 9163 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9164 } 9165 9166 write_fp_dreg(s, rd, tcg_rd); 9167 } 9168 9169 /* AdvSIMD scalar three same FP16 9170 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9171 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9172 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9173 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9174 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9175 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9176 */ 9177 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9178 uint32_t insn) 9179 { 9180 int rd = extract32(insn, 0, 5); 9181 int rn = extract32(insn, 5, 5); 9182 int opcode = extract32(insn, 11, 3); 9183 int rm = extract32(insn, 16, 5); 9184 bool u = extract32(insn, 29, 1); 9185 bool a = extract32(insn, 23, 1); 9186 int fpopcode = opcode | (a << 3) | (u << 4); 9187 TCGv_ptr fpst; 9188 TCGv_i32 tcg_op1; 9189 TCGv_i32 tcg_op2; 9190 TCGv_i32 tcg_res; 9191 9192 switch (fpopcode) { 9193 case 0x03: /* FMULX */ 9194 case 0x04: /* FCMEQ (reg) */ 9195 case 0x07: /* FRECPS */ 9196 case 0x0f: /* FRSQRTS */ 9197 case 0x14: /* FCMGE (reg) */ 9198 case 0x15: /* FACGE */ 9199 case 0x1a: /* FABD */ 9200 case 0x1c: /* FCMGT (reg) */ 9201 case 0x1d: /* FACGT */ 9202 break; 9203 default: 9204 unallocated_encoding(s); 9205 return; 9206 } 9207 9208 if (!dc_isar_feature(aa64_fp16, s)) { 9209 unallocated_encoding(s); 9210 } 9211 9212 if (!fp_access_check(s)) { 9213 return; 9214 } 9215 9216 fpst = fpstatus_ptr(FPST_FPCR_F16); 9217 9218 tcg_op1 = read_fp_hreg(s, rn); 9219 tcg_op2 = read_fp_hreg(s, rm); 9220 tcg_res = tcg_temp_new_i32(); 9221 9222 switch (fpopcode) { 9223 case 0x03: /* FMULX */ 9224 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9225 break; 9226 case 0x04: /* FCMEQ (reg) */ 9227 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9228 break; 9229 case 0x07: /* FRECPS */ 9230 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9231 break; 9232 case 0x0f: /* FRSQRTS */ 9233 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9234 break; 9235 case 0x14: /* FCMGE (reg) */ 9236 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9237 break; 9238 case 0x15: /* FACGE */ 9239 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9240 break; 9241 case 0x1a: /* FABD */ 9242 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9243 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9244 break; 9245 case 0x1c: /* FCMGT (reg) */ 9246 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9247 break; 9248 case 0x1d: /* FACGT */ 9249 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9250 break; 9251 default: 9252 g_assert_not_reached(); 9253 } 9254 9255 write_fp_sreg(s, rd, tcg_res); 9256 } 9257 9258 /* AdvSIMD scalar three same extra 9259 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9260 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9261 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9262 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9263 */ 9264 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9265 uint32_t insn) 9266 { 9267 int rd = extract32(insn, 0, 5); 9268 int rn = extract32(insn, 5, 5); 9269 int opcode = extract32(insn, 11, 4); 9270 int rm = extract32(insn, 16, 5); 9271 int size = extract32(insn, 22, 2); 9272 bool u = extract32(insn, 29, 1); 9273 TCGv_i32 ele1, ele2, ele3; 9274 TCGv_i64 res; 9275 bool feature; 9276 9277 switch (u * 16 + opcode) { 9278 case 0x10: /* SQRDMLAH (vector) */ 9279 case 0x11: /* SQRDMLSH (vector) */ 9280 if (size != 1 && size != 2) { 9281 unallocated_encoding(s); 9282 return; 9283 } 9284 feature = dc_isar_feature(aa64_rdm, s); 9285 break; 9286 default: 9287 unallocated_encoding(s); 9288 return; 9289 } 9290 if (!feature) { 9291 unallocated_encoding(s); 9292 return; 9293 } 9294 if (!fp_access_check(s)) { 9295 return; 9296 } 9297 9298 /* Do a single operation on the lowest element in the vector. 9299 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9300 * with no side effects for all these operations. 9301 * OPTME: special-purpose helpers would avoid doing some 9302 * unnecessary work in the helper for the 16 bit cases. 9303 */ 9304 ele1 = tcg_temp_new_i32(); 9305 ele2 = tcg_temp_new_i32(); 9306 ele3 = tcg_temp_new_i32(); 9307 9308 read_vec_element_i32(s, ele1, rn, 0, size); 9309 read_vec_element_i32(s, ele2, rm, 0, size); 9310 read_vec_element_i32(s, ele3, rd, 0, size); 9311 9312 switch (opcode) { 9313 case 0x0: /* SQRDMLAH */ 9314 if (size == 1) { 9315 gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3); 9316 } else { 9317 gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3); 9318 } 9319 break; 9320 case 0x1: /* SQRDMLSH */ 9321 if (size == 1) { 9322 gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3); 9323 } else { 9324 gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3); 9325 } 9326 break; 9327 default: 9328 g_assert_not_reached(); 9329 } 9330 9331 res = tcg_temp_new_i64(); 9332 tcg_gen_extu_i32_i64(res, ele3); 9333 write_fp_dreg(s, rd, res); 9334 } 9335 9336 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9337 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9338 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9339 { 9340 /* Handle 64->64 opcodes which are shared between the scalar and 9341 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9342 * is valid in either group and also the double-precision fp ops. 9343 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9344 * requires them. 9345 */ 9346 TCGCond cond; 9347 9348 switch (opcode) { 9349 case 0x4: /* CLS, CLZ */ 9350 if (u) { 9351 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9352 } else { 9353 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9354 } 9355 break; 9356 case 0x5: /* NOT */ 9357 /* This opcode is shared with CNT and RBIT but we have earlier 9358 * enforced that size == 3 if and only if this is the NOT insn. 9359 */ 9360 tcg_gen_not_i64(tcg_rd, tcg_rn); 9361 break; 9362 case 0x7: /* SQABS, SQNEG */ 9363 if (u) { 9364 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); 9365 } else { 9366 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); 9367 } 9368 break; 9369 case 0xa: /* CMLT */ 9370 cond = TCG_COND_LT; 9371 do_cmop: 9372 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 9373 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 9374 break; 9375 case 0x8: /* CMGT, CMGE */ 9376 cond = u ? TCG_COND_GE : TCG_COND_GT; 9377 goto do_cmop; 9378 case 0x9: /* CMEQ, CMLE */ 9379 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9380 goto do_cmop; 9381 case 0xb: /* ABS, NEG */ 9382 if (u) { 9383 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9384 } else { 9385 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9386 } 9387 break; 9388 case 0x2f: /* FABS */ 9389 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9390 break; 9391 case 0x6f: /* FNEG */ 9392 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9393 break; 9394 case 0x7f: /* FSQRT */ 9395 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); 9396 break; 9397 case 0x1a: /* FCVTNS */ 9398 case 0x1b: /* FCVTMS */ 9399 case 0x1c: /* FCVTAS */ 9400 case 0x3a: /* FCVTPS */ 9401 case 0x3b: /* FCVTZS */ 9402 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9403 break; 9404 case 0x5a: /* FCVTNU */ 9405 case 0x5b: /* FCVTMU */ 9406 case 0x5c: /* FCVTAU */ 9407 case 0x7a: /* FCVTPU */ 9408 case 0x7b: /* FCVTZU */ 9409 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9410 break; 9411 case 0x18: /* FRINTN */ 9412 case 0x19: /* FRINTM */ 9413 case 0x38: /* FRINTP */ 9414 case 0x39: /* FRINTZ */ 9415 case 0x58: /* FRINTA */ 9416 case 0x79: /* FRINTI */ 9417 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9418 break; 9419 case 0x59: /* FRINTX */ 9420 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9421 break; 9422 case 0x1e: /* FRINT32Z */ 9423 case 0x5e: /* FRINT32X */ 9424 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9425 break; 9426 case 0x1f: /* FRINT64Z */ 9427 case 0x5f: /* FRINT64X */ 9428 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9429 break; 9430 default: 9431 g_assert_not_reached(); 9432 } 9433 } 9434 9435 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9436 bool is_scalar, bool is_u, bool is_q, 9437 int size, int rn, int rd) 9438 { 9439 bool is_double = (size == MO_64); 9440 TCGv_ptr fpst; 9441 9442 if (!fp_access_check(s)) { 9443 return; 9444 } 9445 9446 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9447 9448 if (is_double) { 9449 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9450 TCGv_i64 tcg_zero = tcg_constant_i64(0); 9451 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9452 NeonGenTwoDoubleOpFn *genfn; 9453 bool swap = false; 9454 int pass; 9455 9456 switch (opcode) { 9457 case 0x2e: /* FCMLT (zero) */ 9458 swap = true; 9459 /* fallthrough */ 9460 case 0x2c: /* FCMGT (zero) */ 9461 genfn = gen_helper_neon_cgt_f64; 9462 break; 9463 case 0x2d: /* FCMEQ (zero) */ 9464 genfn = gen_helper_neon_ceq_f64; 9465 break; 9466 case 0x6d: /* FCMLE (zero) */ 9467 swap = true; 9468 /* fall through */ 9469 case 0x6c: /* FCMGE (zero) */ 9470 genfn = gen_helper_neon_cge_f64; 9471 break; 9472 default: 9473 g_assert_not_reached(); 9474 } 9475 9476 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9477 read_vec_element(s, tcg_op, rn, pass, MO_64); 9478 if (swap) { 9479 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9480 } else { 9481 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9482 } 9483 write_vec_element(s, tcg_res, rd, pass, MO_64); 9484 } 9485 9486 clear_vec_high(s, !is_scalar, rd); 9487 } else { 9488 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9489 TCGv_i32 tcg_zero = tcg_constant_i32(0); 9490 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9491 NeonGenTwoSingleOpFn *genfn; 9492 bool swap = false; 9493 int pass, maxpasses; 9494 9495 if (size == MO_16) { 9496 switch (opcode) { 9497 case 0x2e: /* FCMLT (zero) */ 9498 swap = true; 9499 /* fall through */ 9500 case 0x2c: /* FCMGT (zero) */ 9501 genfn = gen_helper_advsimd_cgt_f16; 9502 break; 9503 case 0x2d: /* FCMEQ (zero) */ 9504 genfn = gen_helper_advsimd_ceq_f16; 9505 break; 9506 case 0x6d: /* FCMLE (zero) */ 9507 swap = true; 9508 /* fall through */ 9509 case 0x6c: /* FCMGE (zero) */ 9510 genfn = gen_helper_advsimd_cge_f16; 9511 break; 9512 default: 9513 g_assert_not_reached(); 9514 } 9515 } else { 9516 switch (opcode) { 9517 case 0x2e: /* FCMLT (zero) */ 9518 swap = true; 9519 /* fall through */ 9520 case 0x2c: /* FCMGT (zero) */ 9521 genfn = gen_helper_neon_cgt_f32; 9522 break; 9523 case 0x2d: /* FCMEQ (zero) */ 9524 genfn = gen_helper_neon_ceq_f32; 9525 break; 9526 case 0x6d: /* FCMLE (zero) */ 9527 swap = true; 9528 /* fall through */ 9529 case 0x6c: /* FCMGE (zero) */ 9530 genfn = gen_helper_neon_cge_f32; 9531 break; 9532 default: 9533 g_assert_not_reached(); 9534 } 9535 } 9536 9537 if (is_scalar) { 9538 maxpasses = 1; 9539 } else { 9540 int vector_size = 8 << is_q; 9541 maxpasses = vector_size >> size; 9542 } 9543 9544 for (pass = 0; pass < maxpasses; pass++) { 9545 read_vec_element_i32(s, tcg_op, rn, pass, size); 9546 if (swap) { 9547 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9548 } else { 9549 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9550 } 9551 if (is_scalar) { 9552 write_fp_sreg(s, rd, tcg_res); 9553 } else { 9554 write_vec_element_i32(s, tcg_res, rd, pass, size); 9555 } 9556 } 9557 9558 if (!is_scalar) { 9559 clear_vec_high(s, is_q, rd); 9560 } 9561 } 9562 } 9563 9564 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 9565 bool is_scalar, bool is_u, bool is_q, 9566 int size, int rn, int rd) 9567 { 9568 bool is_double = (size == 3); 9569 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9570 9571 if (is_double) { 9572 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9573 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9574 int pass; 9575 9576 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9577 read_vec_element(s, tcg_op, rn, pass, MO_64); 9578 switch (opcode) { 9579 case 0x3d: /* FRECPE */ 9580 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 9581 break; 9582 case 0x3f: /* FRECPX */ 9583 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 9584 break; 9585 case 0x7d: /* FRSQRTE */ 9586 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 9587 break; 9588 default: 9589 g_assert_not_reached(); 9590 } 9591 write_vec_element(s, tcg_res, rd, pass, MO_64); 9592 } 9593 clear_vec_high(s, !is_scalar, rd); 9594 } else { 9595 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9596 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9597 int pass, maxpasses; 9598 9599 if (is_scalar) { 9600 maxpasses = 1; 9601 } else { 9602 maxpasses = is_q ? 4 : 2; 9603 } 9604 9605 for (pass = 0; pass < maxpasses; pass++) { 9606 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 9607 9608 switch (opcode) { 9609 case 0x3c: /* URECPE */ 9610 gen_helper_recpe_u32(tcg_res, tcg_op); 9611 break; 9612 case 0x3d: /* FRECPE */ 9613 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 9614 break; 9615 case 0x3f: /* FRECPX */ 9616 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 9617 break; 9618 case 0x7d: /* FRSQRTE */ 9619 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 9620 break; 9621 default: 9622 g_assert_not_reached(); 9623 } 9624 9625 if (is_scalar) { 9626 write_fp_sreg(s, rd, tcg_res); 9627 } else { 9628 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9629 } 9630 } 9631 if (!is_scalar) { 9632 clear_vec_high(s, is_q, rd); 9633 } 9634 } 9635 } 9636 9637 static void handle_2misc_narrow(DisasContext *s, bool scalar, 9638 int opcode, bool u, bool is_q, 9639 int size, int rn, int rd) 9640 { 9641 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 9642 * in the source becomes a size element in the destination). 9643 */ 9644 int pass; 9645 TCGv_i32 tcg_res[2]; 9646 int destelt = is_q ? 2 : 0; 9647 int passes = scalar ? 1 : 2; 9648 9649 if (scalar) { 9650 tcg_res[1] = tcg_constant_i32(0); 9651 } 9652 9653 for (pass = 0; pass < passes; pass++) { 9654 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9655 NeonGenNarrowFn *genfn = NULL; 9656 NeonGenNarrowEnvFn *genenvfn = NULL; 9657 9658 if (scalar) { 9659 read_vec_element(s, tcg_op, rn, pass, size + 1); 9660 } else { 9661 read_vec_element(s, tcg_op, rn, pass, MO_64); 9662 } 9663 tcg_res[pass] = tcg_temp_new_i32(); 9664 9665 switch (opcode) { 9666 case 0x12: /* XTN, SQXTUN */ 9667 { 9668 static NeonGenNarrowFn * const xtnfns[3] = { 9669 gen_helper_neon_narrow_u8, 9670 gen_helper_neon_narrow_u16, 9671 tcg_gen_extrl_i64_i32, 9672 }; 9673 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 9674 gen_helper_neon_unarrow_sat8, 9675 gen_helper_neon_unarrow_sat16, 9676 gen_helper_neon_unarrow_sat32, 9677 }; 9678 if (u) { 9679 genenvfn = sqxtunfns[size]; 9680 } else { 9681 genfn = xtnfns[size]; 9682 } 9683 break; 9684 } 9685 case 0x14: /* SQXTN, UQXTN */ 9686 { 9687 static NeonGenNarrowEnvFn * const fns[3][2] = { 9688 { gen_helper_neon_narrow_sat_s8, 9689 gen_helper_neon_narrow_sat_u8 }, 9690 { gen_helper_neon_narrow_sat_s16, 9691 gen_helper_neon_narrow_sat_u16 }, 9692 { gen_helper_neon_narrow_sat_s32, 9693 gen_helper_neon_narrow_sat_u32 }, 9694 }; 9695 genenvfn = fns[size][u]; 9696 break; 9697 } 9698 case 0x16: /* FCVTN, FCVTN2 */ 9699 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 9700 if (size == 2) { 9701 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env); 9702 } else { 9703 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9704 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9705 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9706 TCGv_i32 ahp = get_ahp_flag(); 9707 9708 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 9709 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9710 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9711 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 9712 } 9713 break; 9714 case 0x36: /* BFCVTN, BFCVTN2 */ 9715 { 9716 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9717 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 9718 } 9719 break; 9720 case 0x56: /* FCVTXN, FCVTXN2 */ 9721 /* 64 bit to 32 bit float conversion 9722 * with von Neumann rounding (round to odd) 9723 */ 9724 assert(size == 2); 9725 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env); 9726 break; 9727 default: 9728 g_assert_not_reached(); 9729 } 9730 9731 if (genfn) { 9732 genfn(tcg_res[pass], tcg_op); 9733 } else if (genenvfn) { 9734 genenvfn(tcg_res[pass], tcg_env, tcg_op); 9735 } 9736 } 9737 9738 for (pass = 0; pass < 2; pass++) { 9739 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 9740 } 9741 clear_vec_high(s, is_q, rd); 9742 } 9743 9744 /* Remaining saturating accumulating ops */ 9745 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 9746 bool is_q, int size, int rn, int rd) 9747 { 9748 bool is_double = (size == 3); 9749 9750 if (is_double) { 9751 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 9752 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9753 int pass; 9754 9755 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9756 read_vec_element(s, tcg_rn, rn, pass, MO_64); 9757 read_vec_element(s, tcg_rd, rd, pass, MO_64); 9758 9759 if (is_u) { /* USQADD */ 9760 gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9761 } else { /* SUQADD */ 9762 gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9763 } 9764 write_vec_element(s, tcg_rd, rd, pass, MO_64); 9765 } 9766 clear_vec_high(s, !is_scalar, rd); 9767 } else { 9768 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9769 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9770 int pass, maxpasses; 9771 9772 if (is_scalar) { 9773 maxpasses = 1; 9774 } else { 9775 maxpasses = is_q ? 4 : 2; 9776 } 9777 9778 for (pass = 0; pass < maxpasses; pass++) { 9779 if (is_scalar) { 9780 read_vec_element_i32(s, tcg_rn, rn, pass, size); 9781 read_vec_element_i32(s, tcg_rd, rd, pass, size); 9782 } else { 9783 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 9784 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9785 } 9786 9787 if (is_u) { /* USQADD */ 9788 switch (size) { 9789 case 0: 9790 gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9791 break; 9792 case 1: 9793 gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9794 break; 9795 case 2: 9796 gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9797 break; 9798 default: 9799 g_assert_not_reached(); 9800 } 9801 } else { /* SUQADD */ 9802 switch (size) { 9803 case 0: 9804 gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9805 break; 9806 case 1: 9807 gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9808 break; 9809 case 2: 9810 gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9811 break; 9812 default: 9813 g_assert_not_reached(); 9814 } 9815 } 9816 9817 if (is_scalar) { 9818 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 9819 } 9820 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9821 } 9822 clear_vec_high(s, is_q, rd); 9823 } 9824 } 9825 9826 /* AdvSIMD scalar two reg misc 9827 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 9828 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9829 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 9830 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9831 */ 9832 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 9833 { 9834 int rd = extract32(insn, 0, 5); 9835 int rn = extract32(insn, 5, 5); 9836 int opcode = extract32(insn, 12, 5); 9837 int size = extract32(insn, 22, 2); 9838 bool u = extract32(insn, 29, 1); 9839 bool is_fcvt = false; 9840 int rmode; 9841 TCGv_i32 tcg_rmode; 9842 TCGv_ptr tcg_fpstatus; 9843 9844 switch (opcode) { 9845 case 0x3: /* USQADD / SUQADD*/ 9846 if (!fp_access_check(s)) { 9847 return; 9848 } 9849 handle_2misc_satacc(s, true, u, false, size, rn, rd); 9850 return; 9851 case 0x7: /* SQABS / SQNEG */ 9852 break; 9853 case 0xa: /* CMLT */ 9854 if (u) { 9855 unallocated_encoding(s); 9856 return; 9857 } 9858 /* fall through */ 9859 case 0x8: /* CMGT, CMGE */ 9860 case 0x9: /* CMEQ, CMLE */ 9861 case 0xb: /* ABS, NEG */ 9862 if (size != 3) { 9863 unallocated_encoding(s); 9864 return; 9865 } 9866 break; 9867 case 0x12: /* SQXTUN */ 9868 if (!u) { 9869 unallocated_encoding(s); 9870 return; 9871 } 9872 /* fall through */ 9873 case 0x14: /* SQXTN, UQXTN */ 9874 if (size == 3) { 9875 unallocated_encoding(s); 9876 return; 9877 } 9878 if (!fp_access_check(s)) { 9879 return; 9880 } 9881 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 9882 return; 9883 case 0xc ... 0xf: 9884 case 0x16 ... 0x1d: 9885 case 0x1f: 9886 /* Floating point: U, size[1] and opcode indicate operation; 9887 * size[0] indicates single or double precision. 9888 */ 9889 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 9890 size = extract32(size, 0, 1) ? 3 : 2; 9891 switch (opcode) { 9892 case 0x2c: /* FCMGT (zero) */ 9893 case 0x2d: /* FCMEQ (zero) */ 9894 case 0x2e: /* FCMLT (zero) */ 9895 case 0x6c: /* FCMGE (zero) */ 9896 case 0x6d: /* FCMLE (zero) */ 9897 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 9898 return; 9899 case 0x1d: /* SCVTF */ 9900 case 0x5d: /* UCVTF */ 9901 { 9902 bool is_signed = (opcode == 0x1d); 9903 if (!fp_access_check(s)) { 9904 return; 9905 } 9906 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 9907 return; 9908 } 9909 case 0x3d: /* FRECPE */ 9910 case 0x3f: /* FRECPX */ 9911 case 0x7d: /* FRSQRTE */ 9912 if (!fp_access_check(s)) { 9913 return; 9914 } 9915 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 9916 return; 9917 case 0x1a: /* FCVTNS */ 9918 case 0x1b: /* FCVTMS */ 9919 case 0x3a: /* FCVTPS */ 9920 case 0x3b: /* FCVTZS */ 9921 case 0x5a: /* FCVTNU */ 9922 case 0x5b: /* FCVTMU */ 9923 case 0x7a: /* FCVTPU */ 9924 case 0x7b: /* FCVTZU */ 9925 is_fcvt = true; 9926 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 9927 break; 9928 case 0x1c: /* FCVTAS */ 9929 case 0x5c: /* FCVTAU */ 9930 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 9931 is_fcvt = true; 9932 rmode = FPROUNDING_TIEAWAY; 9933 break; 9934 case 0x56: /* FCVTXN, FCVTXN2 */ 9935 if (size == 2) { 9936 unallocated_encoding(s); 9937 return; 9938 } 9939 if (!fp_access_check(s)) { 9940 return; 9941 } 9942 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 9943 return; 9944 default: 9945 unallocated_encoding(s); 9946 return; 9947 } 9948 break; 9949 default: 9950 unallocated_encoding(s); 9951 return; 9952 } 9953 9954 if (!fp_access_check(s)) { 9955 return; 9956 } 9957 9958 if (is_fcvt) { 9959 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 9960 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9961 } else { 9962 tcg_fpstatus = NULL; 9963 tcg_rmode = NULL; 9964 } 9965 9966 if (size == 3) { 9967 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9968 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9969 9970 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 9971 write_fp_dreg(s, rd, tcg_rd); 9972 } else { 9973 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9974 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9975 9976 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9977 9978 switch (opcode) { 9979 case 0x7: /* SQABS, SQNEG */ 9980 { 9981 NeonGenOneOpEnvFn *genfn; 9982 static NeonGenOneOpEnvFn * const fns[3][2] = { 9983 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 9984 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 9985 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 9986 }; 9987 genfn = fns[size][u]; 9988 genfn(tcg_rd, tcg_env, tcg_rn); 9989 break; 9990 } 9991 case 0x1a: /* FCVTNS */ 9992 case 0x1b: /* FCVTMS */ 9993 case 0x1c: /* FCVTAS */ 9994 case 0x3a: /* FCVTPS */ 9995 case 0x3b: /* FCVTZS */ 9996 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 9997 tcg_fpstatus); 9998 break; 9999 case 0x5a: /* FCVTNU */ 10000 case 0x5b: /* FCVTMU */ 10001 case 0x5c: /* FCVTAU */ 10002 case 0x7a: /* FCVTPU */ 10003 case 0x7b: /* FCVTZU */ 10004 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10005 tcg_fpstatus); 10006 break; 10007 default: 10008 g_assert_not_reached(); 10009 } 10010 10011 write_fp_sreg(s, rd, tcg_rd); 10012 } 10013 10014 if (is_fcvt) { 10015 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10016 } 10017 } 10018 10019 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10020 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10021 int immh, int immb, int opcode, int rn, int rd) 10022 { 10023 int size = 32 - clz32(immh) - 1; 10024 int immhb = immh << 3 | immb; 10025 int shift = 2 * (8 << size) - immhb; 10026 GVecGen2iFn *gvec_fn; 10027 10028 if (extract32(immh, 3, 1) && !is_q) { 10029 unallocated_encoding(s); 10030 return; 10031 } 10032 tcg_debug_assert(size <= 3); 10033 10034 if (!fp_access_check(s)) { 10035 return; 10036 } 10037 10038 switch (opcode) { 10039 case 0x02: /* SSRA / USRA (accumulate) */ 10040 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10041 break; 10042 10043 case 0x08: /* SRI */ 10044 gvec_fn = gen_gvec_sri; 10045 break; 10046 10047 case 0x00: /* SSHR / USHR */ 10048 if (is_u) { 10049 if (shift == 8 << size) { 10050 /* Shift count the same size as element size produces zero. */ 10051 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10052 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10053 return; 10054 } 10055 gvec_fn = tcg_gen_gvec_shri; 10056 } else { 10057 /* Shift count the same size as element size produces all sign. */ 10058 if (shift == 8 << size) { 10059 shift -= 1; 10060 } 10061 gvec_fn = tcg_gen_gvec_sari; 10062 } 10063 break; 10064 10065 case 0x04: /* SRSHR / URSHR (rounding) */ 10066 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10067 break; 10068 10069 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10070 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10071 break; 10072 10073 default: 10074 g_assert_not_reached(); 10075 } 10076 10077 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10078 } 10079 10080 /* SHL/SLI - Vector shift left */ 10081 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10082 int immh, int immb, int opcode, int rn, int rd) 10083 { 10084 int size = 32 - clz32(immh) - 1; 10085 int immhb = immh << 3 | immb; 10086 int shift = immhb - (8 << size); 10087 10088 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10089 assert(size >= 0 && size <= 3); 10090 10091 if (extract32(immh, 3, 1) && !is_q) { 10092 unallocated_encoding(s); 10093 return; 10094 } 10095 10096 if (!fp_access_check(s)) { 10097 return; 10098 } 10099 10100 if (insert) { 10101 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10102 } else { 10103 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10104 } 10105 } 10106 10107 /* USHLL/SHLL - Vector shift left with widening */ 10108 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10109 int immh, int immb, int opcode, int rn, int rd) 10110 { 10111 int size = 32 - clz32(immh) - 1; 10112 int immhb = immh << 3 | immb; 10113 int shift = immhb - (8 << size); 10114 int dsize = 64; 10115 int esize = 8 << size; 10116 int elements = dsize/esize; 10117 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10118 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10119 int i; 10120 10121 if (size >= 3) { 10122 unallocated_encoding(s); 10123 return; 10124 } 10125 10126 if (!fp_access_check(s)) { 10127 return; 10128 } 10129 10130 /* For the LL variants the store is larger than the load, 10131 * so if rd == rn we would overwrite parts of our input. 10132 * So load everything right now and use shifts in the main loop. 10133 */ 10134 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10135 10136 for (i = 0; i < elements; i++) { 10137 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10138 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10139 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10140 write_vec_element(s, tcg_rd, rd, i, size + 1); 10141 } 10142 } 10143 10144 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10145 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10146 int immh, int immb, int opcode, int rn, int rd) 10147 { 10148 int immhb = immh << 3 | immb; 10149 int size = 32 - clz32(immh) - 1; 10150 int dsize = 64; 10151 int esize = 8 << size; 10152 int elements = dsize/esize; 10153 int shift = (2 * esize) - immhb; 10154 bool round = extract32(opcode, 0, 1); 10155 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10156 TCGv_i64 tcg_round; 10157 int i; 10158 10159 if (extract32(immh, 3, 1)) { 10160 unallocated_encoding(s); 10161 return; 10162 } 10163 10164 if (!fp_access_check(s)) { 10165 return; 10166 } 10167 10168 tcg_rn = tcg_temp_new_i64(); 10169 tcg_rd = tcg_temp_new_i64(); 10170 tcg_final = tcg_temp_new_i64(); 10171 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10172 10173 if (round) { 10174 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10175 } else { 10176 tcg_round = NULL; 10177 } 10178 10179 for (i = 0; i < elements; i++) { 10180 read_vec_element(s, tcg_rn, rn, i, size+1); 10181 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10182 false, true, size+1, shift); 10183 10184 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10185 } 10186 10187 if (!is_q) { 10188 write_vec_element(s, tcg_final, rd, 0, MO_64); 10189 } else { 10190 write_vec_element(s, tcg_final, rd, 1, MO_64); 10191 } 10192 10193 clear_vec_high(s, is_q, rd); 10194 } 10195 10196 10197 /* AdvSIMD shift by immediate 10198 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10199 * +---+---+---+-------------+------+------+--------+---+------+------+ 10200 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10201 * +---+---+---+-------------+------+------+--------+---+------+------+ 10202 */ 10203 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10204 { 10205 int rd = extract32(insn, 0, 5); 10206 int rn = extract32(insn, 5, 5); 10207 int opcode = extract32(insn, 11, 5); 10208 int immb = extract32(insn, 16, 3); 10209 int immh = extract32(insn, 19, 4); 10210 bool is_u = extract32(insn, 29, 1); 10211 bool is_q = extract32(insn, 30, 1); 10212 10213 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10214 assert(immh != 0); 10215 10216 switch (opcode) { 10217 case 0x08: /* SRI */ 10218 if (!is_u) { 10219 unallocated_encoding(s); 10220 return; 10221 } 10222 /* fall through */ 10223 case 0x00: /* SSHR / USHR */ 10224 case 0x02: /* SSRA / USRA (accumulate) */ 10225 case 0x04: /* SRSHR / URSHR (rounding) */ 10226 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10227 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10228 break; 10229 case 0x0a: /* SHL / SLI */ 10230 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10231 break; 10232 case 0x10: /* SHRN */ 10233 case 0x11: /* RSHRN / SQRSHRUN */ 10234 if (is_u) { 10235 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10236 opcode, rn, rd); 10237 } else { 10238 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10239 } 10240 break; 10241 case 0x12: /* SQSHRN / UQSHRN */ 10242 case 0x13: /* SQRSHRN / UQRSHRN */ 10243 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10244 opcode, rn, rd); 10245 break; 10246 case 0x14: /* SSHLL / USHLL */ 10247 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10248 break; 10249 case 0x1c: /* SCVTF / UCVTF */ 10250 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10251 opcode, rn, rd); 10252 break; 10253 case 0xc: /* SQSHLU */ 10254 if (!is_u) { 10255 unallocated_encoding(s); 10256 return; 10257 } 10258 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10259 break; 10260 case 0xe: /* SQSHL, UQSHL */ 10261 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10262 break; 10263 case 0x1f: /* FCVTZS/ FCVTZU */ 10264 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10265 return; 10266 default: 10267 unallocated_encoding(s); 10268 return; 10269 } 10270 } 10271 10272 /* Generate code to do a "long" addition or subtraction, ie one done in 10273 * TCGv_i64 on vector lanes twice the width specified by size. 10274 */ 10275 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10276 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10277 { 10278 static NeonGenTwo64OpFn * const fns[3][2] = { 10279 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10280 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10281 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10282 }; 10283 NeonGenTwo64OpFn *genfn; 10284 assert(size < 3); 10285 10286 genfn = fns[size][is_sub]; 10287 genfn(tcg_res, tcg_op1, tcg_op2); 10288 } 10289 10290 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10291 int opcode, int rd, int rn, int rm) 10292 { 10293 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10294 TCGv_i64 tcg_res[2]; 10295 int pass, accop; 10296 10297 tcg_res[0] = tcg_temp_new_i64(); 10298 tcg_res[1] = tcg_temp_new_i64(); 10299 10300 /* Does this op do an adding accumulate, a subtracting accumulate, 10301 * or no accumulate at all? 10302 */ 10303 switch (opcode) { 10304 case 5: 10305 case 8: 10306 case 9: 10307 accop = 1; 10308 break; 10309 case 10: 10310 case 11: 10311 accop = -1; 10312 break; 10313 default: 10314 accop = 0; 10315 break; 10316 } 10317 10318 if (accop != 0) { 10319 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10320 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10321 } 10322 10323 /* size == 2 means two 32x32->64 operations; this is worth special 10324 * casing because we can generally handle it inline. 10325 */ 10326 if (size == 2) { 10327 for (pass = 0; pass < 2; pass++) { 10328 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10329 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10330 TCGv_i64 tcg_passres; 10331 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10332 10333 int elt = pass + is_q * 2; 10334 10335 read_vec_element(s, tcg_op1, rn, elt, memop); 10336 read_vec_element(s, tcg_op2, rm, elt, memop); 10337 10338 if (accop == 0) { 10339 tcg_passres = tcg_res[pass]; 10340 } else { 10341 tcg_passres = tcg_temp_new_i64(); 10342 } 10343 10344 switch (opcode) { 10345 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10346 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10347 break; 10348 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10349 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10350 break; 10351 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10352 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10353 { 10354 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10355 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10356 10357 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10358 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10359 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10360 tcg_passres, 10361 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10362 break; 10363 } 10364 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10365 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10366 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10367 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10368 break; 10369 case 9: /* SQDMLAL, SQDMLAL2 */ 10370 case 11: /* SQDMLSL, SQDMLSL2 */ 10371 case 13: /* SQDMULL, SQDMULL2 */ 10372 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10373 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, 10374 tcg_passres, tcg_passres); 10375 break; 10376 default: 10377 g_assert_not_reached(); 10378 } 10379 10380 if (opcode == 9 || opcode == 11) { 10381 /* saturating accumulate ops */ 10382 if (accop < 0) { 10383 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10384 } 10385 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, 10386 tcg_res[pass], tcg_passres); 10387 } else if (accop > 0) { 10388 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10389 } else if (accop < 0) { 10390 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10391 } 10392 } 10393 } else { 10394 /* size 0 or 1, generally helper functions */ 10395 for (pass = 0; pass < 2; pass++) { 10396 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10397 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10398 TCGv_i64 tcg_passres; 10399 int elt = pass + is_q * 2; 10400 10401 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 10402 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 10403 10404 if (accop == 0) { 10405 tcg_passres = tcg_res[pass]; 10406 } else { 10407 tcg_passres = tcg_temp_new_i64(); 10408 } 10409 10410 switch (opcode) { 10411 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10412 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10413 { 10414 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 10415 static NeonGenWidenFn * const widenfns[2][2] = { 10416 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10417 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10418 }; 10419 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10420 10421 widenfn(tcg_op2_64, tcg_op2); 10422 widenfn(tcg_passres, tcg_op1); 10423 gen_neon_addl(size, (opcode == 2), tcg_passres, 10424 tcg_passres, tcg_op2_64); 10425 break; 10426 } 10427 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10428 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10429 if (size == 0) { 10430 if (is_u) { 10431 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 10432 } else { 10433 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 10434 } 10435 } else { 10436 if (is_u) { 10437 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 10438 } else { 10439 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 10440 } 10441 } 10442 break; 10443 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10444 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10445 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10446 if (size == 0) { 10447 if (is_u) { 10448 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 10449 } else { 10450 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 10451 } 10452 } else { 10453 if (is_u) { 10454 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 10455 } else { 10456 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10457 } 10458 } 10459 break; 10460 case 9: /* SQDMLAL, SQDMLAL2 */ 10461 case 11: /* SQDMLSL, SQDMLSL2 */ 10462 case 13: /* SQDMULL, SQDMULL2 */ 10463 assert(size == 1); 10464 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10465 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, 10466 tcg_passres, tcg_passres); 10467 break; 10468 default: 10469 g_assert_not_reached(); 10470 } 10471 10472 if (accop != 0) { 10473 if (opcode == 9 || opcode == 11) { 10474 /* saturating accumulate ops */ 10475 if (accop < 0) { 10476 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 10477 } 10478 gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, 10479 tcg_res[pass], 10480 tcg_passres); 10481 } else { 10482 gen_neon_addl(size, (accop < 0), tcg_res[pass], 10483 tcg_res[pass], tcg_passres); 10484 } 10485 } 10486 } 10487 } 10488 10489 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 10490 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 10491 } 10492 10493 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 10494 int opcode, int rd, int rn, int rm) 10495 { 10496 TCGv_i64 tcg_res[2]; 10497 int part = is_q ? 2 : 0; 10498 int pass; 10499 10500 for (pass = 0; pass < 2; pass++) { 10501 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10502 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10503 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 10504 static NeonGenWidenFn * const widenfns[3][2] = { 10505 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10506 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10507 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 10508 }; 10509 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10510 10511 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10512 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 10513 widenfn(tcg_op2_wide, tcg_op2); 10514 tcg_res[pass] = tcg_temp_new_i64(); 10515 gen_neon_addl(size, (opcode == 3), 10516 tcg_res[pass], tcg_op1, tcg_op2_wide); 10517 } 10518 10519 for (pass = 0; pass < 2; pass++) { 10520 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10521 } 10522 } 10523 10524 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 10525 { 10526 tcg_gen_addi_i64(in, in, 1U << 31); 10527 tcg_gen_extrh_i64_i32(res, in); 10528 } 10529 10530 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 10531 int opcode, int rd, int rn, int rm) 10532 { 10533 TCGv_i32 tcg_res[2]; 10534 int part = is_q ? 2 : 0; 10535 int pass; 10536 10537 for (pass = 0; pass < 2; pass++) { 10538 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10539 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10540 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 10541 static NeonGenNarrowFn * const narrowfns[3][2] = { 10542 { gen_helper_neon_narrow_high_u8, 10543 gen_helper_neon_narrow_round_high_u8 }, 10544 { gen_helper_neon_narrow_high_u16, 10545 gen_helper_neon_narrow_round_high_u16 }, 10546 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 10547 }; 10548 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 10549 10550 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10551 read_vec_element(s, tcg_op2, rm, pass, MO_64); 10552 10553 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 10554 10555 tcg_res[pass] = tcg_temp_new_i32(); 10556 gennarrow(tcg_res[pass], tcg_wideres); 10557 } 10558 10559 for (pass = 0; pass < 2; pass++) { 10560 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 10561 } 10562 clear_vec_high(s, is_q, rd); 10563 } 10564 10565 /* AdvSIMD three different 10566 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 10567 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10568 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 10569 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10570 */ 10571 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 10572 { 10573 /* Instructions in this group fall into three basic classes 10574 * (in each case with the operation working on each element in 10575 * the input vectors): 10576 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 10577 * 128 bit input) 10578 * (2) wide 64 x 128 -> 128 10579 * (3) narrowing 128 x 128 -> 64 10580 * Here we do initial decode, catch unallocated cases and 10581 * dispatch to separate functions for each class. 10582 */ 10583 int is_q = extract32(insn, 30, 1); 10584 int is_u = extract32(insn, 29, 1); 10585 int size = extract32(insn, 22, 2); 10586 int opcode = extract32(insn, 12, 4); 10587 int rm = extract32(insn, 16, 5); 10588 int rn = extract32(insn, 5, 5); 10589 int rd = extract32(insn, 0, 5); 10590 10591 switch (opcode) { 10592 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 10593 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 10594 /* 64 x 128 -> 128 */ 10595 if (size == 3) { 10596 unallocated_encoding(s); 10597 return; 10598 } 10599 if (!fp_access_check(s)) { 10600 return; 10601 } 10602 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 10603 break; 10604 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 10605 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 10606 /* 128 x 128 -> 64 */ 10607 if (size == 3) { 10608 unallocated_encoding(s); 10609 return; 10610 } 10611 if (!fp_access_check(s)) { 10612 return; 10613 } 10614 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 10615 break; 10616 case 14: /* PMULL, PMULL2 */ 10617 if (is_u) { 10618 unallocated_encoding(s); 10619 return; 10620 } 10621 switch (size) { 10622 case 0: /* PMULL.P8 */ 10623 if (!fp_access_check(s)) { 10624 return; 10625 } 10626 /* The Q field specifies lo/hi half input for this insn. */ 10627 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10628 gen_helper_neon_pmull_h); 10629 break; 10630 10631 case 3: /* PMULL.P64 */ 10632 if (!dc_isar_feature(aa64_pmull, s)) { 10633 unallocated_encoding(s); 10634 return; 10635 } 10636 if (!fp_access_check(s)) { 10637 return; 10638 } 10639 /* The Q field specifies lo/hi half input for this insn. */ 10640 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10641 gen_helper_gvec_pmull_q); 10642 break; 10643 10644 default: 10645 unallocated_encoding(s); 10646 break; 10647 } 10648 return; 10649 case 9: /* SQDMLAL, SQDMLAL2 */ 10650 case 11: /* SQDMLSL, SQDMLSL2 */ 10651 case 13: /* SQDMULL, SQDMULL2 */ 10652 if (is_u || size == 0) { 10653 unallocated_encoding(s); 10654 return; 10655 } 10656 /* fall through */ 10657 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10658 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10659 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10660 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10661 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10662 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10663 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 10664 /* 64 x 64 -> 128 */ 10665 if (size == 3) { 10666 unallocated_encoding(s); 10667 return; 10668 } 10669 if (!fp_access_check(s)) { 10670 return; 10671 } 10672 10673 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 10674 break; 10675 default: 10676 /* opcode 15 not allocated */ 10677 unallocated_encoding(s); 10678 break; 10679 } 10680 } 10681 10682 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 10683 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 10684 { 10685 int rd = extract32(insn, 0, 5); 10686 int rn = extract32(insn, 5, 5); 10687 int rm = extract32(insn, 16, 5); 10688 int size = extract32(insn, 22, 2); 10689 bool is_u = extract32(insn, 29, 1); 10690 bool is_q = extract32(insn, 30, 1); 10691 10692 if (!fp_access_check(s)) { 10693 return; 10694 } 10695 10696 switch (size + 4 * is_u) { 10697 case 0: /* AND */ 10698 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 10699 return; 10700 case 1: /* BIC */ 10701 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 10702 return; 10703 case 2: /* ORR */ 10704 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 10705 return; 10706 case 3: /* ORN */ 10707 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 10708 return; 10709 case 4: /* EOR */ 10710 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 10711 return; 10712 10713 case 5: /* BSL bitwise select */ 10714 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 10715 return; 10716 case 6: /* BIT, bitwise insert if true */ 10717 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 10718 return; 10719 case 7: /* BIF, bitwise insert if false */ 10720 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 10721 return; 10722 10723 default: 10724 g_assert_not_reached(); 10725 } 10726 } 10727 10728 /* Pairwise op subgroup of C3.6.16. 10729 * 10730 * This is called directly or via the handle_3same_float for float pairwise 10731 * operations where the opcode and size are calculated differently. 10732 */ 10733 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 10734 int size, int rn, int rm, int rd) 10735 { 10736 TCGv_ptr fpst; 10737 int pass; 10738 10739 /* Floating point operations need fpst */ 10740 if (opcode >= 0x58) { 10741 fpst = fpstatus_ptr(FPST_FPCR); 10742 } else { 10743 fpst = NULL; 10744 } 10745 10746 if (!fp_access_check(s)) { 10747 return; 10748 } 10749 10750 /* These operations work on the concatenated rm:rn, with each pair of 10751 * adjacent elements being operated on to produce an element in the result. 10752 */ 10753 if (size == 3) { 10754 TCGv_i64 tcg_res[2]; 10755 10756 for (pass = 0; pass < 2; pass++) { 10757 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10758 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10759 int passreg = (pass == 0) ? rn : rm; 10760 10761 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 10762 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 10763 tcg_res[pass] = tcg_temp_new_i64(); 10764 10765 switch (opcode) { 10766 case 0x17: /* ADDP */ 10767 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 10768 break; 10769 case 0x58: /* FMAXNMP */ 10770 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10771 break; 10772 case 0x5a: /* FADDP */ 10773 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10774 break; 10775 case 0x5e: /* FMAXP */ 10776 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10777 break; 10778 case 0x78: /* FMINNMP */ 10779 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10780 break; 10781 case 0x7e: /* FMINP */ 10782 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10783 break; 10784 default: 10785 g_assert_not_reached(); 10786 } 10787 } 10788 10789 for (pass = 0; pass < 2; pass++) { 10790 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10791 } 10792 } else { 10793 int maxpass = is_q ? 4 : 2; 10794 TCGv_i32 tcg_res[4]; 10795 10796 for (pass = 0; pass < maxpass; pass++) { 10797 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10798 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10799 NeonGenTwoOpFn *genfn = NULL; 10800 int passreg = pass < (maxpass / 2) ? rn : rm; 10801 int passelt = (is_q && (pass & 1)) ? 2 : 0; 10802 10803 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 10804 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 10805 tcg_res[pass] = tcg_temp_new_i32(); 10806 10807 switch (opcode) { 10808 case 0x17: /* ADDP */ 10809 { 10810 static NeonGenTwoOpFn * const fns[3] = { 10811 gen_helper_neon_padd_u8, 10812 gen_helper_neon_padd_u16, 10813 tcg_gen_add_i32, 10814 }; 10815 genfn = fns[size]; 10816 break; 10817 } 10818 case 0x14: /* SMAXP, UMAXP */ 10819 { 10820 static NeonGenTwoOpFn * const fns[3][2] = { 10821 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 10822 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 10823 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 10824 }; 10825 genfn = fns[size][u]; 10826 break; 10827 } 10828 case 0x15: /* SMINP, UMINP */ 10829 { 10830 static NeonGenTwoOpFn * const fns[3][2] = { 10831 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 10832 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 10833 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 10834 }; 10835 genfn = fns[size][u]; 10836 break; 10837 } 10838 /* The FP operations are all on single floats (32 bit) */ 10839 case 0x58: /* FMAXNMP */ 10840 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10841 break; 10842 case 0x5a: /* FADDP */ 10843 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10844 break; 10845 case 0x5e: /* FMAXP */ 10846 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10847 break; 10848 case 0x78: /* FMINNMP */ 10849 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10850 break; 10851 case 0x7e: /* FMINP */ 10852 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10853 break; 10854 default: 10855 g_assert_not_reached(); 10856 } 10857 10858 /* FP ops called directly, otherwise call now */ 10859 if (genfn) { 10860 genfn(tcg_res[pass], tcg_op1, tcg_op2); 10861 } 10862 } 10863 10864 for (pass = 0; pass < maxpass; pass++) { 10865 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10866 } 10867 clear_vec_high(s, is_q, rd); 10868 } 10869 } 10870 10871 /* Floating point op subgroup of C3.6.16. */ 10872 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 10873 { 10874 /* For floating point ops, the U, size[1] and opcode bits 10875 * together indicate the operation. size[0] indicates single 10876 * or double. 10877 */ 10878 int fpopcode = extract32(insn, 11, 5) 10879 | (extract32(insn, 23, 1) << 5) 10880 | (extract32(insn, 29, 1) << 6); 10881 int is_q = extract32(insn, 30, 1); 10882 int size = extract32(insn, 22, 1); 10883 int rm = extract32(insn, 16, 5); 10884 int rn = extract32(insn, 5, 5); 10885 int rd = extract32(insn, 0, 5); 10886 10887 int datasize = is_q ? 128 : 64; 10888 int esize = 32 << size; 10889 int elements = datasize / esize; 10890 10891 if (size == 1 && !is_q) { 10892 unallocated_encoding(s); 10893 return; 10894 } 10895 10896 switch (fpopcode) { 10897 case 0x58: /* FMAXNMP */ 10898 case 0x5a: /* FADDP */ 10899 case 0x5e: /* FMAXP */ 10900 case 0x78: /* FMINNMP */ 10901 case 0x7e: /* FMINP */ 10902 if (size && !is_q) { 10903 unallocated_encoding(s); 10904 return; 10905 } 10906 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 10907 rn, rm, rd); 10908 return; 10909 case 0x1b: /* FMULX */ 10910 case 0x1f: /* FRECPS */ 10911 case 0x3f: /* FRSQRTS */ 10912 case 0x5d: /* FACGE */ 10913 case 0x7d: /* FACGT */ 10914 case 0x19: /* FMLA */ 10915 case 0x39: /* FMLS */ 10916 case 0x18: /* FMAXNM */ 10917 case 0x1a: /* FADD */ 10918 case 0x1c: /* FCMEQ */ 10919 case 0x1e: /* FMAX */ 10920 case 0x38: /* FMINNM */ 10921 case 0x3a: /* FSUB */ 10922 case 0x3e: /* FMIN */ 10923 case 0x5b: /* FMUL */ 10924 case 0x5c: /* FCMGE */ 10925 case 0x5f: /* FDIV */ 10926 case 0x7a: /* FABD */ 10927 case 0x7c: /* FCMGT */ 10928 if (!fp_access_check(s)) { 10929 return; 10930 } 10931 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 10932 return; 10933 10934 case 0x1d: /* FMLAL */ 10935 case 0x3d: /* FMLSL */ 10936 case 0x59: /* FMLAL2 */ 10937 case 0x79: /* FMLSL2 */ 10938 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 10939 unallocated_encoding(s); 10940 return; 10941 } 10942 if (fp_access_check(s)) { 10943 int is_s = extract32(insn, 23, 1); 10944 int is_2 = extract32(insn, 29, 1); 10945 int data = (is_2 << 1) | is_s; 10946 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 10947 vec_full_reg_offset(s, rn), 10948 vec_full_reg_offset(s, rm), tcg_env, 10949 is_q ? 16 : 8, vec_full_reg_size(s), 10950 data, gen_helper_gvec_fmlal_a64); 10951 } 10952 return; 10953 10954 default: 10955 unallocated_encoding(s); 10956 return; 10957 } 10958 } 10959 10960 /* Integer op subgroup of C3.6.16. */ 10961 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 10962 { 10963 int is_q = extract32(insn, 30, 1); 10964 int u = extract32(insn, 29, 1); 10965 int size = extract32(insn, 22, 2); 10966 int opcode = extract32(insn, 11, 5); 10967 int rm = extract32(insn, 16, 5); 10968 int rn = extract32(insn, 5, 5); 10969 int rd = extract32(insn, 0, 5); 10970 int pass; 10971 TCGCond cond; 10972 10973 switch (opcode) { 10974 case 0x13: /* MUL, PMUL */ 10975 if (u && size != 0) { 10976 unallocated_encoding(s); 10977 return; 10978 } 10979 /* fall through */ 10980 case 0x0: /* SHADD, UHADD */ 10981 case 0x2: /* SRHADD, URHADD */ 10982 case 0x4: /* SHSUB, UHSUB */ 10983 case 0xc: /* SMAX, UMAX */ 10984 case 0xd: /* SMIN, UMIN */ 10985 case 0xe: /* SABD, UABD */ 10986 case 0xf: /* SABA, UABA */ 10987 case 0x12: /* MLA, MLS */ 10988 if (size == 3) { 10989 unallocated_encoding(s); 10990 return; 10991 } 10992 break; 10993 case 0x16: /* SQDMULH, SQRDMULH */ 10994 if (size == 0 || size == 3) { 10995 unallocated_encoding(s); 10996 return; 10997 } 10998 break; 10999 default: 11000 if (size == 3 && !is_q) { 11001 unallocated_encoding(s); 11002 return; 11003 } 11004 break; 11005 } 11006 11007 if (!fp_access_check(s)) { 11008 return; 11009 } 11010 11011 switch (opcode) { 11012 case 0x01: /* SQADD, UQADD */ 11013 if (u) { 11014 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11015 } else { 11016 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11017 } 11018 return; 11019 case 0x05: /* SQSUB, UQSUB */ 11020 if (u) { 11021 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11022 } else { 11023 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11024 } 11025 return; 11026 case 0x08: /* SSHL, USHL */ 11027 if (u) { 11028 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11029 } else { 11030 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11031 } 11032 return; 11033 case 0x0c: /* SMAX, UMAX */ 11034 if (u) { 11035 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11036 } else { 11037 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11038 } 11039 return; 11040 case 0x0d: /* SMIN, UMIN */ 11041 if (u) { 11042 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11043 } else { 11044 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11045 } 11046 return; 11047 case 0xe: /* SABD, UABD */ 11048 if (u) { 11049 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11050 } else { 11051 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11052 } 11053 return; 11054 case 0xf: /* SABA, UABA */ 11055 if (u) { 11056 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11057 } else { 11058 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11059 } 11060 return; 11061 case 0x10: /* ADD, SUB */ 11062 if (u) { 11063 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11064 } else { 11065 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11066 } 11067 return; 11068 case 0x13: /* MUL, PMUL */ 11069 if (!u) { /* MUL */ 11070 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11071 } else { /* PMUL */ 11072 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11073 } 11074 return; 11075 case 0x12: /* MLA, MLS */ 11076 if (u) { 11077 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11078 } else { 11079 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11080 } 11081 return; 11082 case 0x16: /* SQDMULH, SQRDMULH */ 11083 { 11084 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11085 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11086 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11087 }; 11088 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11089 } 11090 return; 11091 case 0x11: 11092 if (!u) { /* CMTST */ 11093 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11094 return; 11095 } 11096 /* else CMEQ */ 11097 cond = TCG_COND_EQ; 11098 goto do_gvec_cmp; 11099 case 0x06: /* CMGT, CMHI */ 11100 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11101 goto do_gvec_cmp; 11102 case 0x07: /* CMGE, CMHS */ 11103 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11104 do_gvec_cmp: 11105 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11106 vec_full_reg_offset(s, rn), 11107 vec_full_reg_offset(s, rm), 11108 is_q ? 16 : 8, vec_full_reg_size(s)); 11109 return; 11110 } 11111 11112 if (size == 3) { 11113 assert(is_q); 11114 for (pass = 0; pass < 2; pass++) { 11115 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11116 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11117 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11118 11119 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11120 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11121 11122 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11123 11124 write_vec_element(s, tcg_res, rd, pass, MO_64); 11125 } 11126 } else { 11127 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11128 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11129 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11130 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11131 NeonGenTwoOpFn *genfn = NULL; 11132 NeonGenTwoOpEnvFn *genenvfn = NULL; 11133 11134 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11135 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11136 11137 switch (opcode) { 11138 case 0x0: /* SHADD, UHADD */ 11139 { 11140 static NeonGenTwoOpFn * const fns[3][2] = { 11141 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11142 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11143 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11144 }; 11145 genfn = fns[size][u]; 11146 break; 11147 } 11148 case 0x2: /* SRHADD, URHADD */ 11149 { 11150 static NeonGenTwoOpFn * const fns[3][2] = { 11151 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11152 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11153 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11154 }; 11155 genfn = fns[size][u]; 11156 break; 11157 } 11158 case 0x4: /* SHSUB, UHSUB */ 11159 { 11160 static NeonGenTwoOpFn * const fns[3][2] = { 11161 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11162 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11163 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11164 }; 11165 genfn = fns[size][u]; 11166 break; 11167 } 11168 case 0x9: /* SQSHL, UQSHL */ 11169 { 11170 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11171 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11172 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11173 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11174 }; 11175 genenvfn = fns[size][u]; 11176 break; 11177 } 11178 case 0xa: /* SRSHL, URSHL */ 11179 { 11180 static NeonGenTwoOpFn * const fns[3][2] = { 11181 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11182 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11183 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11184 }; 11185 genfn = fns[size][u]; 11186 break; 11187 } 11188 case 0xb: /* SQRSHL, UQRSHL */ 11189 { 11190 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11191 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11192 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11193 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11194 }; 11195 genenvfn = fns[size][u]; 11196 break; 11197 } 11198 default: 11199 g_assert_not_reached(); 11200 } 11201 11202 if (genenvfn) { 11203 genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2); 11204 } else { 11205 genfn(tcg_res, tcg_op1, tcg_op2); 11206 } 11207 11208 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11209 } 11210 } 11211 clear_vec_high(s, is_q, rd); 11212 } 11213 11214 /* AdvSIMD three same 11215 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11216 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11217 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11218 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11219 */ 11220 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11221 { 11222 int opcode = extract32(insn, 11, 5); 11223 11224 switch (opcode) { 11225 case 0x3: /* logic ops */ 11226 disas_simd_3same_logic(s, insn); 11227 break; 11228 case 0x17: /* ADDP */ 11229 case 0x14: /* SMAXP, UMAXP */ 11230 case 0x15: /* SMINP, UMINP */ 11231 { 11232 /* Pairwise operations */ 11233 int is_q = extract32(insn, 30, 1); 11234 int u = extract32(insn, 29, 1); 11235 int size = extract32(insn, 22, 2); 11236 int rm = extract32(insn, 16, 5); 11237 int rn = extract32(insn, 5, 5); 11238 int rd = extract32(insn, 0, 5); 11239 if (opcode == 0x17) { 11240 if (u || (size == 3 && !is_q)) { 11241 unallocated_encoding(s); 11242 return; 11243 } 11244 } else { 11245 if (size == 3) { 11246 unallocated_encoding(s); 11247 return; 11248 } 11249 } 11250 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11251 break; 11252 } 11253 case 0x18 ... 0x31: 11254 /* floating point ops, sz[1] and U are part of opcode */ 11255 disas_simd_3same_float(s, insn); 11256 break; 11257 default: 11258 disas_simd_3same_int(s, insn); 11259 break; 11260 } 11261 } 11262 11263 /* 11264 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11265 * 11266 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11267 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11268 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11269 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11270 * 11271 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11272 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11273 * 11274 */ 11275 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11276 { 11277 int opcode = extract32(insn, 11, 3); 11278 int u = extract32(insn, 29, 1); 11279 int a = extract32(insn, 23, 1); 11280 int is_q = extract32(insn, 30, 1); 11281 int rm = extract32(insn, 16, 5); 11282 int rn = extract32(insn, 5, 5); 11283 int rd = extract32(insn, 0, 5); 11284 /* 11285 * For these floating point ops, the U, a and opcode bits 11286 * together indicate the operation. 11287 */ 11288 int fpopcode = opcode | (a << 3) | (u << 4); 11289 int datasize = is_q ? 128 : 64; 11290 int elements = datasize / 16; 11291 bool pairwise; 11292 TCGv_ptr fpst; 11293 int pass; 11294 11295 switch (fpopcode) { 11296 case 0x0: /* FMAXNM */ 11297 case 0x1: /* FMLA */ 11298 case 0x2: /* FADD */ 11299 case 0x3: /* FMULX */ 11300 case 0x4: /* FCMEQ */ 11301 case 0x6: /* FMAX */ 11302 case 0x7: /* FRECPS */ 11303 case 0x8: /* FMINNM */ 11304 case 0x9: /* FMLS */ 11305 case 0xa: /* FSUB */ 11306 case 0xe: /* FMIN */ 11307 case 0xf: /* FRSQRTS */ 11308 case 0x13: /* FMUL */ 11309 case 0x14: /* FCMGE */ 11310 case 0x15: /* FACGE */ 11311 case 0x17: /* FDIV */ 11312 case 0x1a: /* FABD */ 11313 case 0x1c: /* FCMGT */ 11314 case 0x1d: /* FACGT */ 11315 pairwise = false; 11316 break; 11317 case 0x10: /* FMAXNMP */ 11318 case 0x12: /* FADDP */ 11319 case 0x16: /* FMAXP */ 11320 case 0x18: /* FMINNMP */ 11321 case 0x1e: /* FMINP */ 11322 pairwise = true; 11323 break; 11324 default: 11325 unallocated_encoding(s); 11326 return; 11327 } 11328 11329 if (!dc_isar_feature(aa64_fp16, s)) { 11330 unallocated_encoding(s); 11331 return; 11332 } 11333 11334 if (!fp_access_check(s)) { 11335 return; 11336 } 11337 11338 fpst = fpstatus_ptr(FPST_FPCR_F16); 11339 11340 if (pairwise) { 11341 int maxpass = is_q ? 8 : 4; 11342 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11343 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11344 TCGv_i32 tcg_res[8]; 11345 11346 for (pass = 0; pass < maxpass; pass++) { 11347 int passreg = pass < (maxpass / 2) ? rn : rm; 11348 int passelt = (pass << 1) & (maxpass - 1); 11349 11350 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11351 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11352 tcg_res[pass] = tcg_temp_new_i32(); 11353 11354 switch (fpopcode) { 11355 case 0x10: /* FMAXNMP */ 11356 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11357 fpst); 11358 break; 11359 case 0x12: /* FADDP */ 11360 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11361 break; 11362 case 0x16: /* FMAXP */ 11363 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11364 break; 11365 case 0x18: /* FMINNMP */ 11366 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 11367 fpst); 11368 break; 11369 case 0x1e: /* FMINP */ 11370 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11371 break; 11372 default: 11373 g_assert_not_reached(); 11374 } 11375 } 11376 11377 for (pass = 0; pass < maxpass; pass++) { 11378 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 11379 } 11380 } else { 11381 for (pass = 0; pass < elements; pass++) { 11382 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11383 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11384 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11385 11386 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 11387 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 11388 11389 switch (fpopcode) { 11390 case 0x0: /* FMAXNM */ 11391 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11392 break; 11393 case 0x1: /* FMLA */ 11394 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11395 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11396 fpst); 11397 break; 11398 case 0x2: /* FADD */ 11399 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 11400 break; 11401 case 0x3: /* FMULX */ 11402 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 11403 break; 11404 case 0x4: /* FCMEQ */ 11405 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11406 break; 11407 case 0x6: /* FMAX */ 11408 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 11409 break; 11410 case 0x7: /* FRECPS */ 11411 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11412 break; 11413 case 0x8: /* FMINNM */ 11414 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11415 break; 11416 case 0x9: /* FMLS */ 11417 /* As usual for ARM, separate negation for fused multiply-add */ 11418 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 11419 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11420 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11421 fpst); 11422 break; 11423 case 0xa: /* FSUB */ 11424 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11425 break; 11426 case 0xe: /* FMIN */ 11427 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 11428 break; 11429 case 0xf: /* FRSQRTS */ 11430 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11431 break; 11432 case 0x13: /* FMUL */ 11433 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 11434 break; 11435 case 0x14: /* FCMGE */ 11436 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11437 break; 11438 case 0x15: /* FACGE */ 11439 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11440 break; 11441 case 0x17: /* FDIV */ 11442 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 11443 break; 11444 case 0x1a: /* FABD */ 11445 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11446 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 11447 break; 11448 case 0x1c: /* FCMGT */ 11449 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11450 break; 11451 case 0x1d: /* FACGT */ 11452 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11453 break; 11454 default: 11455 g_assert_not_reached(); 11456 } 11457 11458 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11459 } 11460 } 11461 11462 clear_vec_high(s, is_q, rd); 11463 } 11464 11465 /* AdvSIMD three same extra 11466 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 11467 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11468 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 11469 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11470 */ 11471 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 11472 { 11473 int rd = extract32(insn, 0, 5); 11474 int rn = extract32(insn, 5, 5); 11475 int opcode = extract32(insn, 11, 4); 11476 int rm = extract32(insn, 16, 5); 11477 int size = extract32(insn, 22, 2); 11478 bool u = extract32(insn, 29, 1); 11479 bool is_q = extract32(insn, 30, 1); 11480 bool feature; 11481 int rot; 11482 11483 switch (u * 16 + opcode) { 11484 case 0x10: /* SQRDMLAH (vector) */ 11485 case 0x11: /* SQRDMLSH (vector) */ 11486 if (size != 1 && size != 2) { 11487 unallocated_encoding(s); 11488 return; 11489 } 11490 feature = dc_isar_feature(aa64_rdm, s); 11491 break; 11492 case 0x02: /* SDOT (vector) */ 11493 case 0x12: /* UDOT (vector) */ 11494 if (size != MO_32) { 11495 unallocated_encoding(s); 11496 return; 11497 } 11498 feature = dc_isar_feature(aa64_dp, s); 11499 break; 11500 case 0x03: /* USDOT */ 11501 if (size != MO_32) { 11502 unallocated_encoding(s); 11503 return; 11504 } 11505 feature = dc_isar_feature(aa64_i8mm, s); 11506 break; 11507 case 0x04: /* SMMLA */ 11508 case 0x14: /* UMMLA */ 11509 case 0x05: /* USMMLA */ 11510 if (!is_q || size != MO_32) { 11511 unallocated_encoding(s); 11512 return; 11513 } 11514 feature = dc_isar_feature(aa64_i8mm, s); 11515 break; 11516 case 0x18: /* FCMLA, #0 */ 11517 case 0x19: /* FCMLA, #90 */ 11518 case 0x1a: /* FCMLA, #180 */ 11519 case 0x1b: /* FCMLA, #270 */ 11520 case 0x1c: /* FCADD, #90 */ 11521 case 0x1e: /* FCADD, #270 */ 11522 if (size == 0 11523 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 11524 || (size == 3 && !is_q)) { 11525 unallocated_encoding(s); 11526 return; 11527 } 11528 feature = dc_isar_feature(aa64_fcma, s); 11529 break; 11530 case 0x1d: /* BFMMLA */ 11531 if (size != MO_16 || !is_q) { 11532 unallocated_encoding(s); 11533 return; 11534 } 11535 feature = dc_isar_feature(aa64_bf16, s); 11536 break; 11537 case 0x1f: 11538 switch (size) { 11539 case 1: /* BFDOT */ 11540 case 3: /* BFMLAL{B,T} */ 11541 feature = dc_isar_feature(aa64_bf16, s); 11542 break; 11543 default: 11544 unallocated_encoding(s); 11545 return; 11546 } 11547 break; 11548 default: 11549 unallocated_encoding(s); 11550 return; 11551 } 11552 if (!feature) { 11553 unallocated_encoding(s); 11554 return; 11555 } 11556 if (!fp_access_check(s)) { 11557 return; 11558 } 11559 11560 switch (opcode) { 11561 case 0x0: /* SQRDMLAH (vector) */ 11562 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 11563 return; 11564 11565 case 0x1: /* SQRDMLSH (vector) */ 11566 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 11567 return; 11568 11569 case 0x2: /* SDOT / UDOT */ 11570 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 11571 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 11572 return; 11573 11574 case 0x3: /* USDOT */ 11575 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 11576 return; 11577 11578 case 0x04: /* SMMLA, UMMLA */ 11579 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 11580 u ? gen_helper_gvec_ummla_b 11581 : gen_helper_gvec_smmla_b); 11582 return; 11583 case 0x05: /* USMMLA */ 11584 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 11585 return; 11586 11587 case 0x8: /* FCMLA, #0 */ 11588 case 0x9: /* FCMLA, #90 */ 11589 case 0xa: /* FCMLA, #180 */ 11590 case 0xb: /* FCMLA, #270 */ 11591 rot = extract32(opcode, 0, 2); 11592 switch (size) { 11593 case 1: 11594 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 11595 gen_helper_gvec_fcmlah); 11596 break; 11597 case 2: 11598 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11599 gen_helper_gvec_fcmlas); 11600 break; 11601 case 3: 11602 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11603 gen_helper_gvec_fcmlad); 11604 break; 11605 default: 11606 g_assert_not_reached(); 11607 } 11608 return; 11609 11610 case 0xc: /* FCADD, #90 */ 11611 case 0xe: /* FCADD, #270 */ 11612 rot = extract32(opcode, 1, 1); 11613 switch (size) { 11614 case 1: 11615 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11616 gen_helper_gvec_fcaddh); 11617 break; 11618 case 2: 11619 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11620 gen_helper_gvec_fcadds); 11621 break; 11622 case 3: 11623 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11624 gen_helper_gvec_fcaddd); 11625 break; 11626 default: 11627 g_assert_not_reached(); 11628 } 11629 return; 11630 11631 case 0xd: /* BFMMLA */ 11632 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 11633 return; 11634 case 0xf: 11635 switch (size) { 11636 case 1: /* BFDOT */ 11637 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 11638 break; 11639 case 3: /* BFMLAL{B,T} */ 11640 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 11641 gen_helper_gvec_bfmlal); 11642 break; 11643 default: 11644 g_assert_not_reached(); 11645 } 11646 return; 11647 11648 default: 11649 g_assert_not_reached(); 11650 } 11651 } 11652 11653 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 11654 int size, int rn, int rd) 11655 { 11656 /* Handle 2-reg-misc ops which are widening (so each size element 11657 * in the source becomes a 2*size element in the destination. 11658 * The only instruction like this is FCVTL. 11659 */ 11660 int pass; 11661 11662 if (size == 3) { 11663 /* 32 -> 64 bit fp conversion */ 11664 TCGv_i64 tcg_res[2]; 11665 int srcelt = is_q ? 2 : 0; 11666 11667 for (pass = 0; pass < 2; pass++) { 11668 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11669 tcg_res[pass] = tcg_temp_new_i64(); 11670 11671 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 11672 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 11673 } 11674 for (pass = 0; pass < 2; pass++) { 11675 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11676 } 11677 } else { 11678 /* 16 -> 32 bit fp conversion */ 11679 int srcelt = is_q ? 4 : 0; 11680 TCGv_i32 tcg_res[4]; 11681 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 11682 TCGv_i32 ahp = get_ahp_flag(); 11683 11684 for (pass = 0; pass < 4; pass++) { 11685 tcg_res[pass] = tcg_temp_new_i32(); 11686 11687 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 11688 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 11689 fpst, ahp); 11690 } 11691 for (pass = 0; pass < 4; pass++) { 11692 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11693 } 11694 } 11695 } 11696 11697 static void handle_rev(DisasContext *s, int opcode, bool u, 11698 bool is_q, int size, int rn, int rd) 11699 { 11700 int op = (opcode << 1) | u; 11701 int opsz = op + size; 11702 int grp_size = 3 - opsz; 11703 int dsize = is_q ? 128 : 64; 11704 int i; 11705 11706 if (opsz >= 3) { 11707 unallocated_encoding(s); 11708 return; 11709 } 11710 11711 if (!fp_access_check(s)) { 11712 return; 11713 } 11714 11715 if (size == 0) { 11716 /* Special case bytes, use bswap op on each group of elements */ 11717 int groups = dsize / (8 << grp_size); 11718 11719 for (i = 0; i < groups; i++) { 11720 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 11721 11722 read_vec_element(s, tcg_tmp, rn, i, grp_size); 11723 switch (grp_size) { 11724 case MO_16: 11725 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11726 break; 11727 case MO_32: 11728 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11729 break; 11730 case MO_64: 11731 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 11732 break; 11733 default: 11734 g_assert_not_reached(); 11735 } 11736 write_vec_element(s, tcg_tmp, rd, i, grp_size); 11737 } 11738 clear_vec_high(s, is_q, rd); 11739 } else { 11740 int revmask = (1 << grp_size) - 1; 11741 int esize = 8 << size; 11742 int elements = dsize / esize; 11743 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 11744 TCGv_i64 tcg_rd[2]; 11745 11746 for (i = 0; i < 2; i++) { 11747 tcg_rd[i] = tcg_temp_new_i64(); 11748 tcg_gen_movi_i64(tcg_rd[i], 0); 11749 } 11750 11751 for (i = 0; i < elements; i++) { 11752 int e_rev = (i & 0xf) ^ revmask; 11753 int w = (e_rev * esize) / 64; 11754 int o = (e_rev * esize) % 64; 11755 11756 read_vec_element(s, tcg_rn, rn, i, size); 11757 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 11758 } 11759 11760 for (i = 0; i < 2; i++) { 11761 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 11762 } 11763 clear_vec_high(s, true, rd); 11764 } 11765 } 11766 11767 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 11768 bool is_q, int size, int rn, int rd) 11769 { 11770 /* Implement the pairwise operations from 2-misc: 11771 * SADDLP, UADDLP, SADALP, UADALP. 11772 * These all add pairs of elements in the input to produce a 11773 * double-width result element in the output (possibly accumulating). 11774 */ 11775 bool accum = (opcode == 0x6); 11776 int maxpass = is_q ? 2 : 1; 11777 int pass; 11778 TCGv_i64 tcg_res[2]; 11779 11780 if (size == 2) { 11781 /* 32 + 32 -> 64 op */ 11782 MemOp memop = size + (u ? 0 : MO_SIGN); 11783 11784 for (pass = 0; pass < maxpass; pass++) { 11785 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11786 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11787 11788 tcg_res[pass] = tcg_temp_new_i64(); 11789 11790 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 11791 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 11792 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11793 if (accum) { 11794 read_vec_element(s, tcg_op1, rd, pass, MO_64); 11795 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 11796 } 11797 } 11798 } else { 11799 for (pass = 0; pass < maxpass; pass++) { 11800 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11801 NeonGenOne64OpFn *genfn; 11802 static NeonGenOne64OpFn * const fns[2][2] = { 11803 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 11804 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 11805 }; 11806 11807 genfn = fns[size][u]; 11808 11809 tcg_res[pass] = tcg_temp_new_i64(); 11810 11811 read_vec_element(s, tcg_op, rn, pass, MO_64); 11812 genfn(tcg_res[pass], tcg_op); 11813 11814 if (accum) { 11815 read_vec_element(s, tcg_op, rd, pass, MO_64); 11816 if (size == 0) { 11817 gen_helper_neon_addl_u16(tcg_res[pass], 11818 tcg_res[pass], tcg_op); 11819 } else { 11820 gen_helper_neon_addl_u32(tcg_res[pass], 11821 tcg_res[pass], tcg_op); 11822 } 11823 } 11824 } 11825 } 11826 if (!is_q) { 11827 tcg_res[1] = tcg_constant_i64(0); 11828 } 11829 for (pass = 0; pass < 2; pass++) { 11830 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11831 } 11832 } 11833 11834 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 11835 { 11836 /* Implement SHLL and SHLL2 */ 11837 int pass; 11838 int part = is_q ? 2 : 0; 11839 TCGv_i64 tcg_res[2]; 11840 11841 for (pass = 0; pass < 2; pass++) { 11842 static NeonGenWidenFn * const widenfns[3] = { 11843 gen_helper_neon_widen_u8, 11844 gen_helper_neon_widen_u16, 11845 tcg_gen_extu_i32_i64, 11846 }; 11847 NeonGenWidenFn *widenfn = widenfns[size]; 11848 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11849 11850 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 11851 tcg_res[pass] = tcg_temp_new_i64(); 11852 widenfn(tcg_res[pass], tcg_op); 11853 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 11854 } 11855 11856 for (pass = 0; pass < 2; pass++) { 11857 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11858 } 11859 } 11860 11861 /* AdvSIMD two reg misc 11862 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 11863 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11864 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 11865 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11866 */ 11867 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 11868 { 11869 int size = extract32(insn, 22, 2); 11870 int opcode = extract32(insn, 12, 5); 11871 bool u = extract32(insn, 29, 1); 11872 bool is_q = extract32(insn, 30, 1); 11873 int rn = extract32(insn, 5, 5); 11874 int rd = extract32(insn, 0, 5); 11875 bool need_fpstatus = false; 11876 int rmode = -1; 11877 TCGv_i32 tcg_rmode; 11878 TCGv_ptr tcg_fpstatus; 11879 11880 switch (opcode) { 11881 case 0x0: /* REV64, REV32 */ 11882 case 0x1: /* REV16 */ 11883 handle_rev(s, opcode, u, is_q, size, rn, rd); 11884 return; 11885 case 0x5: /* CNT, NOT, RBIT */ 11886 if (u && size == 0) { 11887 /* NOT */ 11888 break; 11889 } else if (u && size == 1) { 11890 /* RBIT */ 11891 break; 11892 } else if (!u && size == 0) { 11893 /* CNT */ 11894 break; 11895 } 11896 unallocated_encoding(s); 11897 return; 11898 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 11899 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 11900 if (size == 3) { 11901 unallocated_encoding(s); 11902 return; 11903 } 11904 if (!fp_access_check(s)) { 11905 return; 11906 } 11907 11908 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 11909 return; 11910 case 0x4: /* CLS, CLZ */ 11911 if (size == 3) { 11912 unallocated_encoding(s); 11913 return; 11914 } 11915 break; 11916 case 0x2: /* SADDLP, UADDLP */ 11917 case 0x6: /* SADALP, UADALP */ 11918 if (size == 3) { 11919 unallocated_encoding(s); 11920 return; 11921 } 11922 if (!fp_access_check(s)) { 11923 return; 11924 } 11925 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 11926 return; 11927 case 0x13: /* SHLL, SHLL2 */ 11928 if (u == 0 || size == 3) { 11929 unallocated_encoding(s); 11930 return; 11931 } 11932 if (!fp_access_check(s)) { 11933 return; 11934 } 11935 handle_shll(s, is_q, size, rn, rd); 11936 return; 11937 case 0xa: /* CMLT */ 11938 if (u == 1) { 11939 unallocated_encoding(s); 11940 return; 11941 } 11942 /* fall through */ 11943 case 0x8: /* CMGT, CMGE */ 11944 case 0x9: /* CMEQ, CMLE */ 11945 case 0xb: /* ABS, NEG */ 11946 if (size == 3 && !is_q) { 11947 unallocated_encoding(s); 11948 return; 11949 } 11950 break; 11951 case 0x3: /* SUQADD, USQADD */ 11952 if (size == 3 && !is_q) { 11953 unallocated_encoding(s); 11954 return; 11955 } 11956 if (!fp_access_check(s)) { 11957 return; 11958 } 11959 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 11960 return; 11961 case 0x7: /* SQABS, SQNEG */ 11962 if (size == 3 && !is_q) { 11963 unallocated_encoding(s); 11964 return; 11965 } 11966 break; 11967 case 0xc ... 0xf: 11968 case 0x16 ... 0x1f: 11969 { 11970 /* Floating point: U, size[1] and opcode indicate operation; 11971 * size[0] indicates single or double precision. 11972 */ 11973 int is_double = extract32(size, 0, 1); 11974 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 11975 size = is_double ? 3 : 2; 11976 switch (opcode) { 11977 case 0x2f: /* FABS */ 11978 case 0x6f: /* FNEG */ 11979 if (size == 3 && !is_q) { 11980 unallocated_encoding(s); 11981 return; 11982 } 11983 break; 11984 case 0x1d: /* SCVTF */ 11985 case 0x5d: /* UCVTF */ 11986 { 11987 bool is_signed = (opcode == 0x1d) ? true : false; 11988 int elements = is_double ? 2 : is_q ? 4 : 2; 11989 if (is_double && !is_q) { 11990 unallocated_encoding(s); 11991 return; 11992 } 11993 if (!fp_access_check(s)) { 11994 return; 11995 } 11996 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 11997 return; 11998 } 11999 case 0x2c: /* FCMGT (zero) */ 12000 case 0x2d: /* FCMEQ (zero) */ 12001 case 0x2e: /* FCMLT (zero) */ 12002 case 0x6c: /* FCMGE (zero) */ 12003 case 0x6d: /* FCMLE (zero) */ 12004 if (size == 3 && !is_q) { 12005 unallocated_encoding(s); 12006 return; 12007 } 12008 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12009 return; 12010 case 0x7f: /* FSQRT */ 12011 if (size == 3 && !is_q) { 12012 unallocated_encoding(s); 12013 return; 12014 } 12015 break; 12016 case 0x1a: /* FCVTNS */ 12017 case 0x1b: /* FCVTMS */ 12018 case 0x3a: /* FCVTPS */ 12019 case 0x3b: /* FCVTZS */ 12020 case 0x5a: /* FCVTNU */ 12021 case 0x5b: /* FCVTMU */ 12022 case 0x7a: /* FCVTPU */ 12023 case 0x7b: /* FCVTZU */ 12024 need_fpstatus = true; 12025 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12026 if (size == 3 && !is_q) { 12027 unallocated_encoding(s); 12028 return; 12029 } 12030 break; 12031 case 0x5c: /* FCVTAU */ 12032 case 0x1c: /* FCVTAS */ 12033 need_fpstatus = true; 12034 rmode = FPROUNDING_TIEAWAY; 12035 if (size == 3 && !is_q) { 12036 unallocated_encoding(s); 12037 return; 12038 } 12039 break; 12040 case 0x3c: /* URECPE */ 12041 if (size == 3) { 12042 unallocated_encoding(s); 12043 return; 12044 } 12045 /* fall through */ 12046 case 0x3d: /* FRECPE */ 12047 case 0x7d: /* FRSQRTE */ 12048 if (size == 3 && !is_q) { 12049 unallocated_encoding(s); 12050 return; 12051 } 12052 if (!fp_access_check(s)) { 12053 return; 12054 } 12055 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12056 return; 12057 case 0x56: /* FCVTXN, FCVTXN2 */ 12058 if (size == 2) { 12059 unallocated_encoding(s); 12060 return; 12061 } 12062 /* fall through */ 12063 case 0x16: /* FCVTN, FCVTN2 */ 12064 /* handle_2misc_narrow does a 2*size -> size operation, but these 12065 * instructions encode the source size rather than dest size. 12066 */ 12067 if (!fp_access_check(s)) { 12068 return; 12069 } 12070 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12071 return; 12072 case 0x36: /* BFCVTN, BFCVTN2 */ 12073 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12074 unallocated_encoding(s); 12075 return; 12076 } 12077 if (!fp_access_check(s)) { 12078 return; 12079 } 12080 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12081 return; 12082 case 0x17: /* FCVTL, FCVTL2 */ 12083 if (!fp_access_check(s)) { 12084 return; 12085 } 12086 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12087 return; 12088 case 0x18: /* FRINTN */ 12089 case 0x19: /* FRINTM */ 12090 case 0x38: /* FRINTP */ 12091 case 0x39: /* FRINTZ */ 12092 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12093 /* fall through */ 12094 case 0x59: /* FRINTX */ 12095 case 0x79: /* FRINTI */ 12096 need_fpstatus = true; 12097 if (size == 3 && !is_q) { 12098 unallocated_encoding(s); 12099 return; 12100 } 12101 break; 12102 case 0x58: /* FRINTA */ 12103 rmode = FPROUNDING_TIEAWAY; 12104 need_fpstatus = true; 12105 if (size == 3 && !is_q) { 12106 unallocated_encoding(s); 12107 return; 12108 } 12109 break; 12110 case 0x7c: /* URSQRTE */ 12111 if (size == 3) { 12112 unallocated_encoding(s); 12113 return; 12114 } 12115 break; 12116 case 0x1e: /* FRINT32Z */ 12117 case 0x1f: /* FRINT64Z */ 12118 rmode = FPROUNDING_ZERO; 12119 /* fall through */ 12120 case 0x5e: /* FRINT32X */ 12121 case 0x5f: /* FRINT64X */ 12122 need_fpstatus = true; 12123 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12124 unallocated_encoding(s); 12125 return; 12126 } 12127 break; 12128 default: 12129 unallocated_encoding(s); 12130 return; 12131 } 12132 break; 12133 } 12134 default: 12135 unallocated_encoding(s); 12136 return; 12137 } 12138 12139 if (!fp_access_check(s)) { 12140 return; 12141 } 12142 12143 if (need_fpstatus || rmode >= 0) { 12144 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12145 } else { 12146 tcg_fpstatus = NULL; 12147 } 12148 if (rmode >= 0) { 12149 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12150 } else { 12151 tcg_rmode = NULL; 12152 } 12153 12154 switch (opcode) { 12155 case 0x5: 12156 if (u && size == 0) { /* NOT */ 12157 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12158 return; 12159 } 12160 break; 12161 case 0x8: /* CMGT, CMGE */ 12162 if (u) { 12163 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12164 } else { 12165 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12166 } 12167 return; 12168 case 0x9: /* CMEQ, CMLE */ 12169 if (u) { 12170 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12171 } else { 12172 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12173 } 12174 return; 12175 case 0xa: /* CMLT */ 12176 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12177 return; 12178 case 0xb: 12179 if (u) { /* ABS, NEG */ 12180 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12181 } else { 12182 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12183 } 12184 return; 12185 } 12186 12187 if (size == 3) { 12188 /* All 64-bit element operations can be shared with scalar 2misc */ 12189 int pass; 12190 12191 /* Coverity claims (size == 3 && !is_q) has been eliminated 12192 * from all paths leading to here. 12193 */ 12194 tcg_debug_assert(is_q); 12195 for (pass = 0; pass < 2; pass++) { 12196 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12197 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12198 12199 read_vec_element(s, tcg_op, rn, pass, MO_64); 12200 12201 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12202 tcg_rmode, tcg_fpstatus); 12203 12204 write_vec_element(s, tcg_res, rd, pass, MO_64); 12205 } 12206 } else { 12207 int pass; 12208 12209 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12210 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12211 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12212 12213 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12214 12215 if (size == 2) { 12216 /* Special cases for 32 bit elements */ 12217 switch (opcode) { 12218 case 0x4: /* CLS */ 12219 if (u) { 12220 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12221 } else { 12222 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12223 } 12224 break; 12225 case 0x7: /* SQABS, SQNEG */ 12226 if (u) { 12227 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); 12228 } else { 12229 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); 12230 } 12231 break; 12232 case 0x2f: /* FABS */ 12233 gen_helper_vfp_abss(tcg_res, tcg_op); 12234 break; 12235 case 0x6f: /* FNEG */ 12236 gen_helper_vfp_negs(tcg_res, tcg_op); 12237 break; 12238 case 0x7f: /* FSQRT */ 12239 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 12240 break; 12241 case 0x1a: /* FCVTNS */ 12242 case 0x1b: /* FCVTMS */ 12243 case 0x1c: /* FCVTAS */ 12244 case 0x3a: /* FCVTPS */ 12245 case 0x3b: /* FCVTZS */ 12246 gen_helper_vfp_tosls(tcg_res, tcg_op, 12247 tcg_constant_i32(0), tcg_fpstatus); 12248 break; 12249 case 0x5a: /* FCVTNU */ 12250 case 0x5b: /* FCVTMU */ 12251 case 0x5c: /* FCVTAU */ 12252 case 0x7a: /* FCVTPU */ 12253 case 0x7b: /* FCVTZU */ 12254 gen_helper_vfp_touls(tcg_res, tcg_op, 12255 tcg_constant_i32(0), tcg_fpstatus); 12256 break; 12257 case 0x18: /* FRINTN */ 12258 case 0x19: /* FRINTM */ 12259 case 0x38: /* FRINTP */ 12260 case 0x39: /* FRINTZ */ 12261 case 0x58: /* FRINTA */ 12262 case 0x79: /* FRINTI */ 12263 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12264 break; 12265 case 0x59: /* FRINTX */ 12266 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12267 break; 12268 case 0x7c: /* URSQRTE */ 12269 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12270 break; 12271 case 0x1e: /* FRINT32Z */ 12272 case 0x5e: /* FRINT32X */ 12273 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12274 break; 12275 case 0x1f: /* FRINT64Z */ 12276 case 0x5f: /* FRINT64X */ 12277 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12278 break; 12279 default: 12280 g_assert_not_reached(); 12281 } 12282 } else { 12283 /* Use helpers for 8 and 16 bit elements */ 12284 switch (opcode) { 12285 case 0x5: /* CNT, RBIT */ 12286 /* For these two insns size is part of the opcode specifier 12287 * (handled earlier); they always operate on byte elements. 12288 */ 12289 if (u) { 12290 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12291 } else { 12292 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12293 } 12294 break; 12295 case 0x7: /* SQABS, SQNEG */ 12296 { 12297 NeonGenOneOpEnvFn *genfn; 12298 static NeonGenOneOpEnvFn * const fns[2][2] = { 12299 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12300 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12301 }; 12302 genfn = fns[size][u]; 12303 genfn(tcg_res, tcg_env, tcg_op); 12304 break; 12305 } 12306 case 0x4: /* CLS, CLZ */ 12307 if (u) { 12308 if (size == 0) { 12309 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12310 } else { 12311 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12312 } 12313 } else { 12314 if (size == 0) { 12315 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12316 } else { 12317 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12318 } 12319 } 12320 break; 12321 default: 12322 g_assert_not_reached(); 12323 } 12324 } 12325 12326 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12327 } 12328 } 12329 clear_vec_high(s, is_q, rd); 12330 12331 if (tcg_rmode) { 12332 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12333 } 12334 } 12335 12336 /* AdvSIMD [scalar] two register miscellaneous (FP16) 12337 * 12338 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 12339 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12340 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 12341 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12342 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 12343 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 12344 * 12345 * This actually covers two groups where scalar access is governed by 12346 * bit 28. A bunch of the instructions (float to integral) only exist 12347 * in the vector form and are un-allocated for the scalar decode. Also 12348 * in the scalar decode Q is always 1. 12349 */ 12350 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 12351 { 12352 int fpop, opcode, a, u; 12353 int rn, rd; 12354 bool is_q; 12355 bool is_scalar; 12356 bool only_in_vector = false; 12357 12358 int pass; 12359 TCGv_i32 tcg_rmode = NULL; 12360 TCGv_ptr tcg_fpstatus = NULL; 12361 bool need_fpst = true; 12362 int rmode = -1; 12363 12364 if (!dc_isar_feature(aa64_fp16, s)) { 12365 unallocated_encoding(s); 12366 return; 12367 } 12368 12369 rd = extract32(insn, 0, 5); 12370 rn = extract32(insn, 5, 5); 12371 12372 a = extract32(insn, 23, 1); 12373 u = extract32(insn, 29, 1); 12374 is_scalar = extract32(insn, 28, 1); 12375 is_q = extract32(insn, 30, 1); 12376 12377 opcode = extract32(insn, 12, 5); 12378 fpop = deposit32(opcode, 5, 1, a); 12379 fpop = deposit32(fpop, 6, 1, u); 12380 12381 switch (fpop) { 12382 case 0x1d: /* SCVTF */ 12383 case 0x5d: /* UCVTF */ 12384 { 12385 int elements; 12386 12387 if (is_scalar) { 12388 elements = 1; 12389 } else { 12390 elements = (is_q ? 8 : 4); 12391 } 12392 12393 if (!fp_access_check(s)) { 12394 return; 12395 } 12396 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 12397 return; 12398 } 12399 break; 12400 case 0x2c: /* FCMGT (zero) */ 12401 case 0x2d: /* FCMEQ (zero) */ 12402 case 0x2e: /* FCMLT (zero) */ 12403 case 0x6c: /* FCMGE (zero) */ 12404 case 0x6d: /* FCMLE (zero) */ 12405 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 12406 return; 12407 case 0x3d: /* FRECPE */ 12408 case 0x3f: /* FRECPX */ 12409 break; 12410 case 0x18: /* FRINTN */ 12411 only_in_vector = true; 12412 rmode = FPROUNDING_TIEEVEN; 12413 break; 12414 case 0x19: /* FRINTM */ 12415 only_in_vector = true; 12416 rmode = FPROUNDING_NEGINF; 12417 break; 12418 case 0x38: /* FRINTP */ 12419 only_in_vector = true; 12420 rmode = FPROUNDING_POSINF; 12421 break; 12422 case 0x39: /* FRINTZ */ 12423 only_in_vector = true; 12424 rmode = FPROUNDING_ZERO; 12425 break; 12426 case 0x58: /* FRINTA */ 12427 only_in_vector = true; 12428 rmode = FPROUNDING_TIEAWAY; 12429 break; 12430 case 0x59: /* FRINTX */ 12431 case 0x79: /* FRINTI */ 12432 only_in_vector = true; 12433 /* current rounding mode */ 12434 break; 12435 case 0x1a: /* FCVTNS */ 12436 rmode = FPROUNDING_TIEEVEN; 12437 break; 12438 case 0x1b: /* FCVTMS */ 12439 rmode = FPROUNDING_NEGINF; 12440 break; 12441 case 0x1c: /* FCVTAS */ 12442 rmode = FPROUNDING_TIEAWAY; 12443 break; 12444 case 0x3a: /* FCVTPS */ 12445 rmode = FPROUNDING_POSINF; 12446 break; 12447 case 0x3b: /* FCVTZS */ 12448 rmode = FPROUNDING_ZERO; 12449 break; 12450 case 0x5a: /* FCVTNU */ 12451 rmode = FPROUNDING_TIEEVEN; 12452 break; 12453 case 0x5b: /* FCVTMU */ 12454 rmode = FPROUNDING_NEGINF; 12455 break; 12456 case 0x5c: /* FCVTAU */ 12457 rmode = FPROUNDING_TIEAWAY; 12458 break; 12459 case 0x7a: /* FCVTPU */ 12460 rmode = FPROUNDING_POSINF; 12461 break; 12462 case 0x7b: /* FCVTZU */ 12463 rmode = FPROUNDING_ZERO; 12464 break; 12465 case 0x2f: /* FABS */ 12466 case 0x6f: /* FNEG */ 12467 need_fpst = false; 12468 break; 12469 case 0x7d: /* FRSQRTE */ 12470 case 0x7f: /* FSQRT (vector) */ 12471 break; 12472 default: 12473 unallocated_encoding(s); 12474 return; 12475 } 12476 12477 12478 /* Check additional constraints for the scalar encoding */ 12479 if (is_scalar) { 12480 if (!is_q) { 12481 unallocated_encoding(s); 12482 return; 12483 } 12484 /* FRINTxx is only in the vector form */ 12485 if (only_in_vector) { 12486 unallocated_encoding(s); 12487 return; 12488 } 12489 } 12490 12491 if (!fp_access_check(s)) { 12492 return; 12493 } 12494 12495 if (rmode >= 0 || need_fpst) { 12496 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 12497 } 12498 12499 if (rmode >= 0) { 12500 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12501 } 12502 12503 if (is_scalar) { 12504 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 12505 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12506 12507 switch (fpop) { 12508 case 0x1a: /* FCVTNS */ 12509 case 0x1b: /* FCVTMS */ 12510 case 0x1c: /* FCVTAS */ 12511 case 0x3a: /* FCVTPS */ 12512 case 0x3b: /* FCVTZS */ 12513 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12514 break; 12515 case 0x3d: /* FRECPE */ 12516 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12517 break; 12518 case 0x3f: /* FRECPX */ 12519 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 12520 break; 12521 case 0x5a: /* FCVTNU */ 12522 case 0x5b: /* FCVTMU */ 12523 case 0x5c: /* FCVTAU */ 12524 case 0x7a: /* FCVTPU */ 12525 case 0x7b: /* FCVTZU */ 12526 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12527 break; 12528 case 0x6f: /* FNEG */ 12529 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12530 break; 12531 case 0x7d: /* FRSQRTE */ 12532 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12533 break; 12534 default: 12535 g_assert_not_reached(); 12536 } 12537 12538 /* limit any sign extension going on */ 12539 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 12540 write_fp_sreg(s, rd, tcg_res); 12541 } else { 12542 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 12543 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12544 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12545 12546 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 12547 12548 switch (fpop) { 12549 case 0x1a: /* FCVTNS */ 12550 case 0x1b: /* FCVTMS */ 12551 case 0x1c: /* FCVTAS */ 12552 case 0x3a: /* FCVTPS */ 12553 case 0x3b: /* FCVTZS */ 12554 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12555 break; 12556 case 0x3d: /* FRECPE */ 12557 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12558 break; 12559 case 0x5a: /* FCVTNU */ 12560 case 0x5b: /* FCVTMU */ 12561 case 0x5c: /* FCVTAU */ 12562 case 0x7a: /* FCVTPU */ 12563 case 0x7b: /* FCVTZU */ 12564 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12565 break; 12566 case 0x18: /* FRINTN */ 12567 case 0x19: /* FRINTM */ 12568 case 0x38: /* FRINTP */ 12569 case 0x39: /* FRINTZ */ 12570 case 0x58: /* FRINTA */ 12571 case 0x79: /* FRINTI */ 12572 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 12573 break; 12574 case 0x59: /* FRINTX */ 12575 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 12576 break; 12577 case 0x2f: /* FABS */ 12578 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 12579 break; 12580 case 0x6f: /* FNEG */ 12581 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12582 break; 12583 case 0x7d: /* FRSQRTE */ 12584 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12585 break; 12586 case 0x7f: /* FSQRT */ 12587 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 12588 break; 12589 default: 12590 g_assert_not_reached(); 12591 } 12592 12593 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12594 } 12595 12596 clear_vec_high(s, is_q, rd); 12597 } 12598 12599 if (tcg_rmode) { 12600 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12601 } 12602 } 12603 12604 /* AdvSIMD scalar x indexed element 12605 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12606 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12607 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12608 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12609 * AdvSIMD vector x indexed element 12610 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12611 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12612 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12613 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12614 */ 12615 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 12616 { 12617 /* This encoding has two kinds of instruction: 12618 * normal, where we perform elt x idxelt => elt for each 12619 * element in the vector 12620 * long, where we perform elt x idxelt and generate a result of 12621 * double the width of the input element 12622 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 12623 */ 12624 bool is_scalar = extract32(insn, 28, 1); 12625 bool is_q = extract32(insn, 30, 1); 12626 bool u = extract32(insn, 29, 1); 12627 int size = extract32(insn, 22, 2); 12628 int l = extract32(insn, 21, 1); 12629 int m = extract32(insn, 20, 1); 12630 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 12631 int rm = extract32(insn, 16, 4); 12632 int opcode = extract32(insn, 12, 4); 12633 int h = extract32(insn, 11, 1); 12634 int rn = extract32(insn, 5, 5); 12635 int rd = extract32(insn, 0, 5); 12636 bool is_long = false; 12637 int is_fp = 0; 12638 bool is_fp16 = false; 12639 int index; 12640 TCGv_ptr fpst; 12641 12642 switch (16 * u + opcode) { 12643 case 0x08: /* MUL */ 12644 case 0x10: /* MLA */ 12645 case 0x14: /* MLS */ 12646 if (is_scalar) { 12647 unallocated_encoding(s); 12648 return; 12649 } 12650 break; 12651 case 0x02: /* SMLAL, SMLAL2 */ 12652 case 0x12: /* UMLAL, UMLAL2 */ 12653 case 0x06: /* SMLSL, SMLSL2 */ 12654 case 0x16: /* UMLSL, UMLSL2 */ 12655 case 0x0a: /* SMULL, SMULL2 */ 12656 case 0x1a: /* UMULL, UMULL2 */ 12657 if (is_scalar) { 12658 unallocated_encoding(s); 12659 return; 12660 } 12661 is_long = true; 12662 break; 12663 case 0x03: /* SQDMLAL, SQDMLAL2 */ 12664 case 0x07: /* SQDMLSL, SQDMLSL2 */ 12665 case 0x0b: /* SQDMULL, SQDMULL2 */ 12666 is_long = true; 12667 break; 12668 case 0x0c: /* SQDMULH */ 12669 case 0x0d: /* SQRDMULH */ 12670 break; 12671 case 0x01: /* FMLA */ 12672 case 0x05: /* FMLS */ 12673 case 0x09: /* FMUL */ 12674 case 0x19: /* FMULX */ 12675 is_fp = 1; 12676 break; 12677 case 0x1d: /* SQRDMLAH */ 12678 case 0x1f: /* SQRDMLSH */ 12679 if (!dc_isar_feature(aa64_rdm, s)) { 12680 unallocated_encoding(s); 12681 return; 12682 } 12683 break; 12684 case 0x0e: /* SDOT */ 12685 case 0x1e: /* UDOT */ 12686 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 12687 unallocated_encoding(s); 12688 return; 12689 } 12690 break; 12691 case 0x0f: 12692 switch (size) { 12693 case 0: /* SUDOT */ 12694 case 2: /* USDOT */ 12695 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 12696 unallocated_encoding(s); 12697 return; 12698 } 12699 size = MO_32; 12700 break; 12701 case 1: /* BFDOT */ 12702 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12703 unallocated_encoding(s); 12704 return; 12705 } 12706 size = MO_32; 12707 break; 12708 case 3: /* BFMLAL{B,T} */ 12709 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12710 unallocated_encoding(s); 12711 return; 12712 } 12713 /* can't set is_fp without other incorrect size checks */ 12714 size = MO_16; 12715 break; 12716 default: 12717 unallocated_encoding(s); 12718 return; 12719 } 12720 break; 12721 case 0x11: /* FCMLA #0 */ 12722 case 0x13: /* FCMLA #90 */ 12723 case 0x15: /* FCMLA #180 */ 12724 case 0x17: /* FCMLA #270 */ 12725 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 12726 unallocated_encoding(s); 12727 return; 12728 } 12729 is_fp = 2; 12730 break; 12731 case 0x00: /* FMLAL */ 12732 case 0x04: /* FMLSL */ 12733 case 0x18: /* FMLAL2 */ 12734 case 0x1c: /* FMLSL2 */ 12735 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 12736 unallocated_encoding(s); 12737 return; 12738 } 12739 size = MO_16; 12740 /* is_fp, but we pass tcg_env not fp_status. */ 12741 break; 12742 default: 12743 unallocated_encoding(s); 12744 return; 12745 } 12746 12747 switch (is_fp) { 12748 case 1: /* normal fp */ 12749 /* convert insn encoded size to MemOp size */ 12750 switch (size) { 12751 case 0: /* half-precision */ 12752 size = MO_16; 12753 is_fp16 = true; 12754 break; 12755 case MO_32: /* single precision */ 12756 case MO_64: /* double precision */ 12757 break; 12758 default: 12759 unallocated_encoding(s); 12760 return; 12761 } 12762 break; 12763 12764 case 2: /* complex fp */ 12765 /* Each indexable element is a complex pair. */ 12766 size += 1; 12767 switch (size) { 12768 case MO_32: 12769 if (h && !is_q) { 12770 unallocated_encoding(s); 12771 return; 12772 } 12773 is_fp16 = true; 12774 break; 12775 case MO_64: 12776 break; 12777 default: 12778 unallocated_encoding(s); 12779 return; 12780 } 12781 break; 12782 12783 default: /* integer */ 12784 switch (size) { 12785 case MO_8: 12786 case MO_64: 12787 unallocated_encoding(s); 12788 return; 12789 } 12790 break; 12791 } 12792 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 12793 unallocated_encoding(s); 12794 return; 12795 } 12796 12797 /* Given MemOp size, adjust register and indexing. */ 12798 switch (size) { 12799 case MO_16: 12800 index = h << 2 | l << 1 | m; 12801 break; 12802 case MO_32: 12803 index = h << 1 | l; 12804 rm |= m << 4; 12805 break; 12806 case MO_64: 12807 if (l || !is_q) { 12808 unallocated_encoding(s); 12809 return; 12810 } 12811 index = h; 12812 rm |= m << 4; 12813 break; 12814 default: 12815 g_assert_not_reached(); 12816 } 12817 12818 if (!fp_access_check(s)) { 12819 return; 12820 } 12821 12822 if (is_fp) { 12823 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 12824 } else { 12825 fpst = NULL; 12826 } 12827 12828 switch (16 * u + opcode) { 12829 case 0x0e: /* SDOT */ 12830 case 0x1e: /* UDOT */ 12831 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12832 u ? gen_helper_gvec_udot_idx_b 12833 : gen_helper_gvec_sdot_idx_b); 12834 return; 12835 case 0x0f: 12836 switch (extract32(insn, 22, 2)) { 12837 case 0: /* SUDOT */ 12838 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12839 gen_helper_gvec_sudot_idx_b); 12840 return; 12841 case 1: /* BFDOT */ 12842 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12843 gen_helper_gvec_bfdot_idx); 12844 return; 12845 case 2: /* USDOT */ 12846 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12847 gen_helper_gvec_usdot_idx_b); 12848 return; 12849 case 3: /* BFMLAL{B,T} */ 12850 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 12851 gen_helper_gvec_bfmlal_idx); 12852 return; 12853 } 12854 g_assert_not_reached(); 12855 case 0x11: /* FCMLA #0 */ 12856 case 0x13: /* FCMLA #90 */ 12857 case 0x15: /* FCMLA #180 */ 12858 case 0x17: /* FCMLA #270 */ 12859 { 12860 int rot = extract32(insn, 13, 2); 12861 int data = (index << 2) | rot; 12862 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 12863 vec_full_reg_offset(s, rn), 12864 vec_full_reg_offset(s, rm), 12865 vec_full_reg_offset(s, rd), fpst, 12866 is_q ? 16 : 8, vec_full_reg_size(s), data, 12867 size == MO_64 12868 ? gen_helper_gvec_fcmlas_idx 12869 : gen_helper_gvec_fcmlah_idx); 12870 } 12871 return; 12872 12873 case 0x00: /* FMLAL */ 12874 case 0x04: /* FMLSL */ 12875 case 0x18: /* FMLAL2 */ 12876 case 0x1c: /* FMLSL2 */ 12877 { 12878 int is_s = extract32(opcode, 2, 1); 12879 int is_2 = u; 12880 int data = (index << 2) | (is_2 << 1) | is_s; 12881 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 12882 vec_full_reg_offset(s, rn), 12883 vec_full_reg_offset(s, rm), tcg_env, 12884 is_q ? 16 : 8, vec_full_reg_size(s), 12885 data, gen_helper_gvec_fmlal_idx_a64); 12886 } 12887 return; 12888 12889 case 0x08: /* MUL */ 12890 if (!is_long && !is_scalar) { 12891 static gen_helper_gvec_3 * const fns[3] = { 12892 gen_helper_gvec_mul_idx_h, 12893 gen_helper_gvec_mul_idx_s, 12894 gen_helper_gvec_mul_idx_d, 12895 }; 12896 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 12897 vec_full_reg_offset(s, rn), 12898 vec_full_reg_offset(s, rm), 12899 is_q ? 16 : 8, vec_full_reg_size(s), 12900 index, fns[size - 1]); 12901 return; 12902 } 12903 break; 12904 12905 case 0x10: /* MLA */ 12906 if (!is_long && !is_scalar) { 12907 static gen_helper_gvec_4 * const fns[3] = { 12908 gen_helper_gvec_mla_idx_h, 12909 gen_helper_gvec_mla_idx_s, 12910 gen_helper_gvec_mla_idx_d, 12911 }; 12912 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12913 vec_full_reg_offset(s, rn), 12914 vec_full_reg_offset(s, rm), 12915 vec_full_reg_offset(s, rd), 12916 is_q ? 16 : 8, vec_full_reg_size(s), 12917 index, fns[size - 1]); 12918 return; 12919 } 12920 break; 12921 12922 case 0x14: /* MLS */ 12923 if (!is_long && !is_scalar) { 12924 static gen_helper_gvec_4 * const fns[3] = { 12925 gen_helper_gvec_mls_idx_h, 12926 gen_helper_gvec_mls_idx_s, 12927 gen_helper_gvec_mls_idx_d, 12928 }; 12929 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12930 vec_full_reg_offset(s, rn), 12931 vec_full_reg_offset(s, rm), 12932 vec_full_reg_offset(s, rd), 12933 is_q ? 16 : 8, vec_full_reg_size(s), 12934 index, fns[size - 1]); 12935 return; 12936 } 12937 break; 12938 } 12939 12940 if (size == 3) { 12941 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 12942 int pass; 12943 12944 assert(is_fp && is_q && !is_long); 12945 12946 read_vec_element(s, tcg_idx, rm, index, MO_64); 12947 12948 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 12949 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12950 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12951 12952 read_vec_element(s, tcg_op, rn, pass, MO_64); 12953 12954 switch (16 * u + opcode) { 12955 case 0x05: /* FMLS */ 12956 /* As usual for ARM, separate negation for fused multiply-add */ 12957 gen_helper_vfp_negd(tcg_op, tcg_op); 12958 /* fall through */ 12959 case 0x01: /* FMLA */ 12960 read_vec_element(s, tcg_res, rd, pass, MO_64); 12961 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 12962 break; 12963 case 0x09: /* FMUL */ 12964 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 12965 break; 12966 case 0x19: /* FMULX */ 12967 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 12968 break; 12969 default: 12970 g_assert_not_reached(); 12971 } 12972 12973 write_vec_element(s, tcg_res, rd, pass, MO_64); 12974 } 12975 12976 clear_vec_high(s, !is_scalar, rd); 12977 } else if (!is_long) { 12978 /* 32 bit floating point, or 16 or 32 bit integer. 12979 * For the 16 bit scalar case we use the usual Neon helpers and 12980 * rely on the fact that 0 op 0 == 0 with no side effects. 12981 */ 12982 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 12983 int pass, maxpasses; 12984 12985 if (is_scalar) { 12986 maxpasses = 1; 12987 } else { 12988 maxpasses = is_q ? 4 : 2; 12989 } 12990 12991 read_vec_element_i32(s, tcg_idx, rm, index, size); 12992 12993 if (size == 1 && !is_scalar) { 12994 /* The simplest way to handle the 16x16 indexed ops is to duplicate 12995 * the index into both halves of the 32 bit tcg_idx and then use 12996 * the usual Neon helpers. 12997 */ 12998 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 12999 } 13000 13001 for (pass = 0; pass < maxpasses; pass++) { 13002 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13003 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13004 13005 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13006 13007 switch (16 * u + opcode) { 13008 case 0x08: /* MUL */ 13009 case 0x10: /* MLA */ 13010 case 0x14: /* MLS */ 13011 { 13012 static NeonGenTwoOpFn * const fns[2][2] = { 13013 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13014 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13015 }; 13016 NeonGenTwoOpFn *genfn; 13017 bool is_sub = opcode == 0x4; 13018 13019 if (size == 1) { 13020 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13021 } else { 13022 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13023 } 13024 if (opcode == 0x8) { 13025 break; 13026 } 13027 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13028 genfn = fns[size - 1][is_sub]; 13029 genfn(tcg_res, tcg_op, tcg_res); 13030 break; 13031 } 13032 case 0x05: /* FMLS */ 13033 case 0x01: /* FMLA */ 13034 read_vec_element_i32(s, tcg_res, rd, pass, 13035 is_scalar ? size : MO_32); 13036 switch (size) { 13037 case 1: 13038 if (opcode == 0x5) { 13039 /* As usual for ARM, separate negation for fused 13040 * multiply-add */ 13041 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13042 } 13043 if (is_scalar) { 13044 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13045 tcg_res, fpst); 13046 } else { 13047 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13048 tcg_res, fpst); 13049 } 13050 break; 13051 case 2: 13052 if (opcode == 0x5) { 13053 /* As usual for ARM, separate negation for 13054 * fused multiply-add */ 13055 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13056 } 13057 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13058 tcg_res, fpst); 13059 break; 13060 default: 13061 g_assert_not_reached(); 13062 } 13063 break; 13064 case 0x09: /* FMUL */ 13065 switch (size) { 13066 case 1: 13067 if (is_scalar) { 13068 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13069 tcg_idx, fpst); 13070 } else { 13071 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13072 tcg_idx, fpst); 13073 } 13074 break; 13075 case 2: 13076 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13077 break; 13078 default: 13079 g_assert_not_reached(); 13080 } 13081 break; 13082 case 0x19: /* FMULX */ 13083 switch (size) { 13084 case 1: 13085 if (is_scalar) { 13086 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13087 tcg_idx, fpst); 13088 } else { 13089 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13090 tcg_idx, fpst); 13091 } 13092 break; 13093 case 2: 13094 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13095 break; 13096 default: 13097 g_assert_not_reached(); 13098 } 13099 break; 13100 case 0x0c: /* SQDMULH */ 13101 if (size == 1) { 13102 gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, 13103 tcg_op, tcg_idx); 13104 } else { 13105 gen_helper_neon_qdmulh_s32(tcg_res, tcg_env, 13106 tcg_op, tcg_idx); 13107 } 13108 break; 13109 case 0x0d: /* SQRDMULH */ 13110 if (size == 1) { 13111 gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env, 13112 tcg_op, tcg_idx); 13113 } else { 13114 gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env, 13115 tcg_op, tcg_idx); 13116 } 13117 break; 13118 case 0x1d: /* SQRDMLAH */ 13119 read_vec_element_i32(s, tcg_res, rd, pass, 13120 is_scalar ? size : MO_32); 13121 if (size == 1) { 13122 gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env, 13123 tcg_op, tcg_idx, tcg_res); 13124 } else { 13125 gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env, 13126 tcg_op, tcg_idx, tcg_res); 13127 } 13128 break; 13129 case 0x1f: /* SQRDMLSH */ 13130 read_vec_element_i32(s, tcg_res, rd, pass, 13131 is_scalar ? size : MO_32); 13132 if (size == 1) { 13133 gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env, 13134 tcg_op, tcg_idx, tcg_res); 13135 } else { 13136 gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env, 13137 tcg_op, tcg_idx, tcg_res); 13138 } 13139 break; 13140 default: 13141 g_assert_not_reached(); 13142 } 13143 13144 if (is_scalar) { 13145 write_fp_sreg(s, rd, tcg_res); 13146 } else { 13147 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13148 } 13149 } 13150 13151 clear_vec_high(s, is_q, rd); 13152 } else { 13153 /* long ops: 16x16->32 or 32x32->64 */ 13154 TCGv_i64 tcg_res[2]; 13155 int pass; 13156 bool satop = extract32(opcode, 0, 1); 13157 MemOp memop = MO_32; 13158 13159 if (satop || !u) { 13160 memop |= MO_SIGN; 13161 } 13162 13163 if (size == 2) { 13164 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13165 13166 read_vec_element(s, tcg_idx, rm, index, memop); 13167 13168 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13169 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13170 TCGv_i64 tcg_passres; 13171 int passelt; 13172 13173 if (is_scalar) { 13174 passelt = 0; 13175 } else { 13176 passelt = pass + (is_q * 2); 13177 } 13178 13179 read_vec_element(s, tcg_op, rn, passelt, memop); 13180 13181 tcg_res[pass] = tcg_temp_new_i64(); 13182 13183 if (opcode == 0xa || opcode == 0xb) { 13184 /* Non-accumulating ops */ 13185 tcg_passres = tcg_res[pass]; 13186 } else { 13187 tcg_passres = tcg_temp_new_i64(); 13188 } 13189 13190 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13191 13192 if (satop) { 13193 /* saturating, doubling */ 13194 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, 13195 tcg_passres, tcg_passres); 13196 } 13197 13198 if (opcode == 0xa || opcode == 0xb) { 13199 continue; 13200 } 13201 13202 /* Accumulating op: handle accumulate step */ 13203 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13204 13205 switch (opcode) { 13206 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13207 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13208 break; 13209 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13210 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13211 break; 13212 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13213 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13214 /* fall through */ 13215 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13216 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, 13217 tcg_res[pass], 13218 tcg_passres); 13219 break; 13220 default: 13221 g_assert_not_reached(); 13222 } 13223 } 13224 13225 clear_vec_high(s, !is_scalar, rd); 13226 } else { 13227 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13228 13229 assert(size == 1); 13230 read_vec_element_i32(s, tcg_idx, rm, index, size); 13231 13232 if (!is_scalar) { 13233 /* The simplest way to handle the 16x16 indexed ops is to 13234 * duplicate the index into both halves of the 32 bit tcg_idx 13235 * and then use the usual Neon helpers. 13236 */ 13237 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13238 } 13239 13240 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13241 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13242 TCGv_i64 tcg_passres; 13243 13244 if (is_scalar) { 13245 read_vec_element_i32(s, tcg_op, rn, pass, size); 13246 } else { 13247 read_vec_element_i32(s, tcg_op, rn, 13248 pass + (is_q * 2), MO_32); 13249 } 13250 13251 tcg_res[pass] = tcg_temp_new_i64(); 13252 13253 if (opcode == 0xa || opcode == 0xb) { 13254 /* Non-accumulating ops */ 13255 tcg_passres = tcg_res[pass]; 13256 } else { 13257 tcg_passres = tcg_temp_new_i64(); 13258 } 13259 13260 if (memop & MO_SIGN) { 13261 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13262 } else { 13263 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13264 } 13265 if (satop) { 13266 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, 13267 tcg_passres, tcg_passres); 13268 } 13269 13270 if (opcode == 0xa || opcode == 0xb) { 13271 continue; 13272 } 13273 13274 /* Accumulating op: handle accumulate step */ 13275 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13276 13277 switch (opcode) { 13278 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13279 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13280 tcg_passres); 13281 break; 13282 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13283 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 13284 tcg_passres); 13285 break; 13286 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13287 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 13288 /* fall through */ 13289 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13290 gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, 13291 tcg_res[pass], 13292 tcg_passres); 13293 break; 13294 default: 13295 g_assert_not_reached(); 13296 } 13297 } 13298 13299 if (is_scalar) { 13300 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 13301 } 13302 } 13303 13304 if (is_scalar) { 13305 tcg_res[1] = tcg_constant_i64(0); 13306 } 13307 13308 for (pass = 0; pass < 2; pass++) { 13309 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13310 } 13311 } 13312 } 13313 13314 /* Crypto AES 13315 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13316 * +-----------------+------+-----------+--------+-----+------+------+ 13317 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13318 * +-----------------+------+-----------+--------+-----+------+------+ 13319 */ 13320 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 13321 { 13322 int size = extract32(insn, 22, 2); 13323 int opcode = extract32(insn, 12, 5); 13324 int rn = extract32(insn, 5, 5); 13325 int rd = extract32(insn, 0, 5); 13326 gen_helper_gvec_2 *genfn2 = NULL; 13327 gen_helper_gvec_3 *genfn3 = NULL; 13328 13329 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 13330 unallocated_encoding(s); 13331 return; 13332 } 13333 13334 switch (opcode) { 13335 case 0x4: /* AESE */ 13336 genfn3 = gen_helper_crypto_aese; 13337 break; 13338 case 0x6: /* AESMC */ 13339 genfn2 = gen_helper_crypto_aesmc; 13340 break; 13341 case 0x5: /* AESD */ 13342 genfn3 = gen_helper_crypto_aesd; 13343 break; 13344 case 0x7: /* AESIMC */ 13345 genfn2 = gen_helper_crypto_aesimc; 13346 break; 13347 default: 13348 unallocated_encoding(s); 13349 return; 13350 } 13351 13352 if (!fp_access_check(s)) { 13353 return; 13354 } 13355 if (genfn2) { 13356 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); 13357 } else { 13358 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); 13359 } 13360 } 13361 13362 /* Crypto three-reg SHA 13363 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 13364 * +-----------------+------+---+------+---+--------+-----+------+------+ 13365 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 13366 * +-----------------+------+---+------+---+--------+-----+------+------+ 13367 */ 13368 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 13369 { 13370 int size = extract32(insn, 22, 2); 13371 int opcode = extract32(insn, 12, 3); 13372 int rm = extract32(insn, 16, 5); 13373 int rn = extract32(insn, 5, 5); 13374 int rd = extract32(insn, 0, 5); 13375 gen_helper_gvec_3 *genfn; 13376 bool feature; 13377 13378 if (size != 0) { 13379 unallocated_encoding(s); 13380 return; 13381 } 13382 13383 switch (opcode) { 13384 case 0: /* SHA1C */ 13385 genfn = gen_helper_crypto_sha1c; 13386 feature = dc_isar_feature(aa64_sha1, s); 13387 break; 13388 case 1: /* SHA1P */ 13389 genfn = gen_helper_crypto_sha1p; 13390 feature = dc_isar_feature(aa64_sha1, s); 13391 break; 13392 case 2: /* SHA1M */ 13393 genfn = gen_helper_crypto_sha1m; 13394 feature = dc_isar_feature(aa64_sha1, s); 13395 break; 13396 case 3: /* SHA1SU0 */ 13397 genfn = gen_helper_crypto_sha1su0; 13398 feature = dc_isar_feature(aa64_sha1, s); 13399 break; 13400 case 4: /* SHA256H */ 13401 genfn = gen_helper_crypto_sha256h; 13402 feature = dc_isar_feature(aa64_sha256, s); 13403 break; 13404 case 5: /* SHA256H2 */ 13405 genfn = gen_helper_crypto_sha256h2; 13406 feature = dc_isar_feature(aa64_sha256, s); 13407 break; 13408 case 6: /* SHA256SU1 */ 13409 genfn = gen_helper_crypto_sha256su1; 13410 feature = dc_isar_feature(aa64_sha256, s); 13411 break; 13412 default: 13413 unallocated_encoding(s); 13414 return; 13415 } 13416 13417 if (!feature) { 13418 unallocated_encoding(s); 13419 return; 13420 } 13421 13422 if (!fp_access_check(s)) { 13423 return; 13424 } 13425 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 13426 } 13427 13428 /* Crypto two-reg SHA 13429 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13430 * +-----------------+------+-----------+--------+-----+------+------+ 13431 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13432 * +-----------------+------+-----------+--------+-----+------+------+ 13433 */ 13434 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 13435 { 13436 int size = extract32(insn, 22, 2); 13437 int opcode = extract32(insn, 12, 5); 13438 int rn = extract32(insn, 5, 5); 13439 int rd = extract32(insn, 0, 5); 13440 gen_helper_gvec_2 *genfn; 13441 bool feature; 13442 13443 if (size != 0) { 13444 unallocated_encoding(s); 13445 return; 13446 } 13447 13448 switch (opcode) { 13449 case 0: /* SHA1H */ 13450 feature = dc_isar_feature(aa64_sha1, s); 13451 genfn = gen_helper_crypto_sha1h; 13452 break; 13453 case 1: /* SHA1SU1 */ 13454 feature = dc_isar_feature(aa64_sha1, s); 13455 genfn = gen_helper_crypto_sha1su1; 13456 break; 13457 case 2: /* SHA256SU0 */ 13458 feature = dc_isar_feature(aa64_sha256, s); 13459 genfn = gen_helper_crypto_sha256su0; 13460 break; 13461 default: 13462 unallocated_encoding(s); 13463 return; 13464 } 13465 13466 if (!feature) { 13467 unallocated_encoding(s); 13468 return; 13469 } 13470 13471 if (!fp_access_check(s)) { 13472 return; 13473 } 13474 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 13475 } 13476 13477 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 13478 { 13479 tcg_gen_rotli_i64(d, m, 1); 13480 tcg_gen_xor_i64(d, d, n); 13481 } 13482 13483 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 13484 { 13485 tcg_gen_rotli_vec(vece, d, m, 1); 13486 tcg_gen_xor_vec(vece, d, d, n); 13487 } 13488 13489 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 13490 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 13491 { 13492 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 13493 static const GVecGen3 op = { 13494 .fni8 = gen_rax1_i64, 13495 .fniv = gen_rax1_vec, 13496 .opt_opc = vecop_list, 13497 .fno = gen_helper_crypto_rax1, 13498 .vece = MO_64, 13499 }; 13500 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 13501 } 13502 13503 /* Crypto three-reg SHA512 13504 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13505 * +-----------------------+------+---+---+-----+--------+------+------+ 13506 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 13507 * +-----------------------+------+---+---+-----+--------+------+------+ 13508 */ 13509 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 13510 { 13511 int opcode = extract32(insn, 10, 2); 13512 int o = extract32(insn, 14, 1); 13513 int rm = extract32(insn, 16, 5); 13514 int rn = extract32(insn, 5, 5); 13515 int rd = extract32(insn, 0, 5); 13516 bool feature; 13517 gen_helper_gvec_3 *oolfn = NULL; 13518 GVecGen3Fn *gvecfn = NULL; 13519 13520 if (o == 0) { 13521 switch (opcode) { 13522 case 0: /* SHA512H */ 13523 feature = dc_isar_feature(aa64_sha512, s); 13524 oolfn = gen_helper_crypto_sha512h; 13525 break; 13526 case 1: /* SHA512H2 */ 13527 feature = dc_isar_feature(aa64_sha512, s); 13528 oolfn = gen_helper_crypto_sha512h2; 13529 break; 13530 case 2: /* SHA512SU1 */ 13531 feature = dc_isar_feature(aa64_sha512, s); 13532 oolfn = gen_helper_crypto_sha512su1; 13533 break; 13534 case 3: /* RAX1 */ 13535 feature = dc_isar_feature(aa64_sha3, s); 13536 gvecfn = gen_gvec_rax1; 13537 break; 13538 default: 13539 g_assert_not_reached(); 13540 } 13541 } else { 13542 switch (opcode) { 13543 case 0: /* SM3PARTW1 */ 13544 feature = dc_isar_feature(aa64_sm3, s); 13545 oolfn = gen_helper_crypto_sm3partw1; 13546 break; 13547 case 1: /* SM3PARTW2 */ 13548 feature = dc_isar_feature(aa64_sm3, s); 13549 oolfn = gen_helper_crypto_sm3partw2; 13550 break; 13551 case 2: /* SM4EKEY */ 13552 feature = dc_isar_feature(aa64_sm4, s); 13553 oolfn = gen_helper_crypto_sm4ekey; 13554 break; 13555 default: 13556 unallocated_encoding(s); 13557 return; 13558 } 13559 } 13560 13561 if (!feature) { 13562 unallocated_encoding(s); 13563 return; 13564 } 13565 13566 if (!fp_access_check(s)) { 13567 return; 13568 } 13569 13570 if (oolfn) { 13571 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 13572 } else { 13573 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 13574 } 13575 } 13576 13577 /* Crypto two-reg SHA512 13578 * 31 12 11 10 9 5 4 0 13579 * +-----------------------------------------+--------+------+------+ 13580 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 13581 * +-----------------------------------------+--------+------+------+ 13582 */ 13583 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 13584 { 13585 int opcode = extract32(insn, 10, 2); 13586 int rn = extract32(insn, 5, 5); 13587 int rd = extract32(insn, 0, 5); 13588 bool feature; 13589 13590 switch (opcode) { 13591 case 0: /* SHA512SU0 */ 13592 feature = dc_isar_feature(aa64_sha512, s); 13593 break; 13594 case 1: /* SM4E */ 13595 feature = dc_isar_feature(aa64_sm4, s); 13596 break; 13597 default: 13598 unallocated_encoding(s); 13599 return; 13600 } 13601 13602 if (!feature) { 13603 unallocated_encoding(s); 13604 return; 13605 } 13606 13607 if (!fp_access_check(s)) { 13608 return; 13609 } 13610 13611 switch (opcode) { 13612 case 0: /* SHA512SU0 */ 13613 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 13614 break; 13615 case 1: /* SM4E */ 13616 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 13617 break; 13618 default: 13619 g_assert_not_reached(); 13620 } 13621 } 13622 13623 /* Crypto four-register 13624 * 31 23 22 21 20 16 15 14 10 9 5 4 0 13625 * +-------------------+-----+------+---+------+------+------+ 13626 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 13627 * +-------------------+-----+------+---+------+------+------+ 13628 */ 13629 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 13630 { 13631 int op0 = extract32(insn, 21, 2); 13632 int rm = extract32(insn, 16, 5); 13633 int ra = extract32(insn, 10, 5); 13634 int rn = extract32(insn, 5, 5); 13635 int rd = extract32(insn, 0, 5); 13636 bool feature; 13637 13638 switch (op0) { 13639 case 0: /* EOR3 */ 13640 case 1: /* BCAX */ 13641 feature = dc_isar_feature(aa64_sha3, s); 13642 break; 13643 case 2: /* SM3SS1 */ 13644 feature = dc_isar_feature(aa64_sm3, s); 13645 break; 13646 default: 13647 unallocated_encoding(s); 13648 return; 13649 } 13650 13651 if (!feature) { 13652 unallocated_encoding(s); 13653 return; 13654 } 13655 13656 if (!fp_access_check(s)) { 13657 return; 13658 } 13659 13660 if (op0 < 2) { 13661 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 13662 int pass; 13663 13664 tcg_op1 = tcg_temp_new_i64(); 13665 tcg_op2 = tcg_temp_new_i64(); 13666 tcg_op3 = tcg_temp_new_i64(); 13667 tcg_res[0] = tcg_temp_new_i64(); 13668 tcg_res[1] = tcg_temp_new_i64(); 13669 13670 for (pass = 0; pass < 2; pass++) { 13671 read_vec_element(s, tcg_op1, rn, pass, MO_64); 13672 read_vec_element(s, tcg_op2, rm, pass, MO_64); 13673 read_vec_element(s, tcg_op3, ra, pass, MO_64); 13674 13675 if (op0 == 0) { 13676 /* EOR3 */ 13677 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 13678 } else { 13679 /* BCAX */ 13680 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 13681 } 13682 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 13683 } 13684 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 13685 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 13686 } else { 13687 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 13688 13689 tcg_op1 = tcg_temp_new_i32(); 13690 tcg_op2 = tcg_temp_new_i32(); 13691 tcg_op3 = tcg_temp_new_i32(); 13692 tcg_res = tcg_temp_new_i32(); 13693 tcg_zero = tcg_constant_i32(0); 13694 13695 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 13696 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 13697 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 13698 13699 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 13700 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 13701 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 13702 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 13703 13704 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 13705 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 13706 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 13707 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 13708 } 13709 } 13710 13711 /* Crypto XAR 13712 * 31 21 20 16 15 10 9 5 4 0 13713 * +-----------------------+------+--------+------+------+ 13714 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 13715 * +-----------------------+------+--------+------+------+ 13716 */ 13717 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 13718 { 13719 int rm = extract32(insn, 16, 5); 13720 int imm6 = extract32(insn, 10, 6); 13721 int rn = extract32(insn, 5, 5); 13722 int rd = extract32(insn, 0, 5); 13723 13724 if (!dc_isar_feature(aa64_sha3, s)) { 13725 unallocated_encoding(s); 13726 return; 13727 } 13728 13729 if (!fp_access_check(s)) { 13730 return; 13731 } 13732 13733 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 13734 vec_full_reg_offset(s, rn), 13735 vec_full_reg_offset(s, rm), imm6, 16, 13736 vec_full_reg_size(s)); 13737 } 13738 13739 /* Crypto three-reg imm2 13740 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13741 * +-----------------------+------+-----+------+--------+------+------+ 13742 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 13743 * +-----------------------+------+-----+------+--------+------+------+ 13744 */ 13745 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 13746 { 13747 static gen_helper_gvec_3 * const fns[4] = { 13748 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 13749 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 13750 }; 13751 int opcode = extract32(insn, 10, 2); 13752 int imm2 = extract32(insn, 12, 2); 13753 int rm = extract32(insn, 16, 5); 13754 int rn = extract32(insn, 5, 5); 13755 int rd = extract32(insn, 0, 5); 13756 13757 if (!dc_isar_feature(aa64_sm3, s)) { 13758 unallocated_encoding(s); 13759 return; 13760 } 13761 13762 if (!fp_access_check(s)) { 13763 return; 13764 } 13765 13766 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 13767 } 13768 13769 /* C3.6 Data processing - SIMD, inc Crypto 13770 * 13771 * As the decode gets a little complex we are using a table based 13772 * approach for this part of the decode. 13773 */ 13774 static const AArch64DecodeTable data_proc_simd[] = { 13775 /* pattern , mask , fn */ 13776 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 13777 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 13778 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 13779 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 13780 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 13781 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 13782 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 13783 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 13784 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 13785 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 13786 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 13787 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 13788 { 0x2e000000, 0xbf208400, disas_simd_ext }, 13789 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 13790 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 13791 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 13792 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 13793 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 13794 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 13795 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 13796 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 13797 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 13798 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 13799 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 13800 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 13801 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 13802 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 13803 { 0xce800000, 0xffe00000, disas_crypto_xar }, 13804 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 13805 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 13806 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 13807 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 13808 { 0x00000000, 0x00000000, NULL } 13809 }; 13810 13811 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 13812 { 13813 /* Note that this is called with all non-FP cases from 13814 * table C3-6 so it must UNDEF for entries not specifically 13815 * allocated to instructions in that table. 13816 */ 13817 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 13818 if (fn) { 13819 fn(s, insn); 13820 } else { 13821 unallocated_encoding(s); 13822 } 13823 } 13824 13825 /* C3.6 Data processing - SIMD and floating point */ 13826 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 13827 { 13828 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 13829 disas_data_proc_fp(s, insn); 13830 } else { 13831 /* SIMD, including crypto */ 13832 disas_data_proc_simd(s, insn); 13833 } 13834 } 13835 13836 static bool trans_OK(DisasContext *s, arg_OK *a) 13837 { 13838 return true; 13839 } 13840 13841 static bool trans_FAIL(DisasContext *s, arg_OK *a) 13842 { 13843 s->is_nonstreaming = true; 13844 return true; 13845 } 13846 13847 /** 13848 * is_guarded_page: 13849 * @env: The cpu environment 13850 * @s: The DisasContext 13851 * 13852 * Return true if the page is guarded. 13853 */ 13854 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 13855 { 13856 uint64_t addr = s->base.pc_first; 13857 #ifdef CONFIG_USER_ONLY 13858 return page_get_flags(addr) & PAGE_BTI; 13859 #else 13860 CPUTLBEntryFull *full; 13861 void *host; 13862 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 13863 int flags; 13864 13865 /* 13866 * We test this immediately after reading an insn, which means 13867 * that the TLB entry must be present and valid, and thus this 13868 * access will never raise an exception. 13869 */ 13870 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 13871 false, &host, &full, 0); 13872 assert(!(flags & TLB_INVALID_MASK)); 13873 13874 return full->extra.arm.guarded; 13875 #endif 13876 } 13877 13878 /** 13879 * btype_destination_ok: 13880 * @insn: The instruction at the branch destination 13881 * @bt: SCTLR_ELx.BT 13882 * @btype: PSTATE.BTYPE, and is non-zero 13883 * 13884 * On a guarded page, there are a limited number of insns 13885 * that may be present at the branch target: 13886 * - branch target identifiers, 13887 * - paciasp, pacibsp, 13888 * - BRK insn 13889 * - HLT insn 13890 * Anything else causes a Branch Target Exception. 13891 * 13892 * Return true if the branch is compatible, false to raise BTITRAP. 13893 */ 13894 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 13895 { 13896 if ((insn & 0xfffff01fu) == 0xd503201fu) { 13897 /* HINT space */ 13898 switch (extract32(insn, 5, 7)) { 13899 case 0b011001: /* PACIASP */ 13900 case 0b011011: /* PACIBSP */ 13901 /* 13902 * If SCTLR_ELx.BT, then PACI*SP are not compatible 13903 * with btype == 3. Otherwise all btype are ok. 13904 */ 13905 return !bt || btype != 3; 13906 case 0b100000: /* BTI */ 13907 /* Not compatible with any btype. */ 13908 return false; 13909 case 0b100010: /* BTI c */ 13910 /* Not compatible with btype == 3 */ 13911 return btype != 3; 13912 case 0b100100: /* BTI j */ 13913 /* Not compatible with btype == 2 */ 13914 return btype != 2; 13915 case 0b100110: /* BTI jc */ 13916 /* Compatible with any btype. */ 13917 return true; 13918 } 13919 } else { 13920 switch (insn & 0xffe0001fu) { 13921 case 0xd4200000u: /* BRK */ 13922 case 0xd4400000u: /* HLT */ 13923 /* Give priority to the breakpoint exception. */ 13924 return true; 13925 } 13926 } 13927 return false; 13928 } 13929 13930 /* C3.1 A64 instruction index by encoding */ 13931 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 13932 { 13933 switch (extract32(insn, 25, 4)) { 13934 case 0x5: 13935 case 0xd: /* Data processing - register */ 13936 disas_data_proc_reg(s, insn); 13937 break; 13938 case 0x7: 13939 case 0xf: /* Data processing - SIMD and floating point */ 13940 disas_data_proc_simd_fp(s, insn); 13941 break; 13942 default: 13943 unallocated_encoding(s); 13944 break; 13945 } 13946 } 13947 13948 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 13949 CPUState *cpu) 13950 { 13951 DisasContext *dc = container_of(dcbase, DisasContext, base); 13952 CPUARMState *env = cpu_env(cpu); 13953 ARMCPU *arm_cpu = env_archcpu(env); 13954 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 13955 int bound, core_mmu_idx; 13956 13957 dc->isar = &arm_cpu->isar; 13958 dc->condjmp = 0; 13959 dc->pc_save = dc->base.pc_first; 13960 dc->aarch64 = true; 13961 dc->thumb = false; 13962 dc->sctlr_b = 0; 13963 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 13964 dc->condexec_mask = 0; 13965 dc->condexec_cond = 0; 13966 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 13967 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 13968 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 13969 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 13970 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 13971 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 13972 #if !defined(CONFIG_USER_ONLY) 13973 dc->user = (dc->current_el == 0); 13974 #endif 13975 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 13976 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 13977 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 13978 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 13979 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 13980 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 13981 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 13982 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 13983 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 13984 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 13985 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 13986 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 13987 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 13988 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 13989 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 13990 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 13991 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 13992 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 13993 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 13994 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 13995 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 13996 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 13997 dc->vec_len = 0; 13998 dc->vec_stride = 0; 13999 dc->cp_regs = arm_cpu->cp_regs; 14000 dc->features = env->features; 14001 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14002 dc->gm_blocksize = arm_cpu->gm_blocksize; 14003 14004 #ifdef CONFIG_USER_ONLY 14005 /* In sve_probe_page, we assume TBI is enabled. */ 14006 tcg_debug_assert(dc->tbid & 1); 14007 #endif 14008 14009 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 14010 14011 /* Single step state. The code-generation logic here is: 14012 * SS_ACTIVE == 0: 14013 * generate code with no special handling for single-stepping (except 14014 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14015 * this happens anyway because those changes are all system register or 14016 * PSTATE writes). 14017 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14018 * emit code for one insn 14019 * emit code to clear PSTATE.SS 14020 * emit code to generate software step exception for completed step 14021 * end TB (as usual for having generated an exception) 14022 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14023 * emit code to generate a software step exception 14024 * end the TB 14025 */ 14026 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14027 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14028 dc->is_ldex = false; 14029 14030 /* Bound the number of insns to execute to those left on the page. */ 14031 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14032 14033 /* If architectural single step active, limit to 1. */ 14034 if (dc->ss_active) { 14035 bound = 1; 14036 } 14037 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14038 } 14039 14040 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14041 { 14042 } 14043 14044 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14045 { 14046 DisasContext *dc = container_of(dcbase, DisasContext, base); 14047 target_ulong pc_arg = dc->base.pc_next; 14048 14049 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14050 pc_arg &= ~TARGET_PAGE_MASK; 14051 } 14052 tcg_gen_insn_start(pc_arg, 0, 0); 14053 dc->insn_start = tcg_last_op(); 14054 } 14055 14056 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14057 { 14058 DisasContext *s = container_of(dcbase, DisasContext, base); 14059 CPUARMState *env = cpu_env(cpu); 14060 uint64_t pc = s->base.pc_next; 14061 uint32_t insn; 14062 14063 /* Singlestep exceptions have the highest priority. */ 14064 if (s->ss_active && !s->pstate_ss) { 14065 /* Singlestep state is Active-pending. 14066 * If we're in this state at the start of a TB then either 14067 * a) we just took an exception to an EL which is being debugged 14068 * and this is the first insn in the exception handler 14069 * b) debug exceptions were masked and we just unmasked them 14070 * without changing EL (eg by clearing PSTATE.D) 14071 * In either case we're going to take a swstep exception in the 14072 * "did not step an insn" case, and so the syndrome ISV and EX 14073 * bits should be zero. 14074 */ 14075 assert(s->base.num_insns == 1); 14076 gen_swstep_exception(s, 0, 0); 14077 s->base.is_jmp = DISAS_NORETURN; 14078 s->base.pc_next = pc + 4; 14079 return; 14080 } 14081 14082 if (pc & 3) { 14083 /* 14084 * PC alignment fault. This has priority over the instruction abort 14085 * that we would receive from a translation fault via arm_ldl_code. 14086 * This should only be possible after an indirect branch, at the 14087 * start of the TB. 14088 */ 14089 assert(s->base.num_insns == 1); 14090 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 14091 s->base.is_jmp = DISAS_NORETURN; 14092 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14093 return; 14094 } 14095 14096 s->pc_curr = pc; 14097 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14098 s->insn = insn; 14099 s->base.pc_next = pc + 4; 14100 14101 s->fp_access_checked = false; 14102 s->sve_access_checked = false; 14103 14104 if (s->pstate_il) { 14105 /* 14106 * Illegal execution state. This has priority over BTI 14107 * exceptions, but comes after instruction abort exceptions. 14108 */ 14109 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14110 return; 14111 } 14112 14113 if (dc_isar_feature(aa64_bti, s)) { 14114 if (s->base.num_insns == 1) { 14115 /* 14116 * At the first insn of the TB, compute s->guarded_page. 14117 * We delayed computing this until successfully reading 14118 * the first insn of the TB, above. This (mostly) ensures 14119 * that the softmmu tlb entry has been populated, and the 14120 * page table GP bit is available. 14121 * 14122 * Note that we need to compute this even if btype == 0, 14123 * because this value is used for BR instructions later 14124 * where ENV is not available. 14125 */ 14126 s->guarded_page = is_guarded_page(env, s); 14127 14128 /* First insn can have btype set to non-zero. */ 14129 tcg_debug_assert(s->btype >= 0); 14130 14131 /* 14132 * Note that the Branch Target Exception has fairly high 14133 * priority -- below debugging exceptions but above most 14134 * everything else. This allows us to handle this now 14135 * instead of waiting until the insn is otherwise decoded. 14136 */ 14137 if (s->btype != 0 14138 && s->guarded_page 14139 && !btype_destination_ok(insn, s->bt, s->btype)) { 14140 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14141 return; 14142 } 14143 } else { 14144 /* Not the first insn: btype must be 0. */ 14145 tcg_debug_assert(s->btype == 0); 14146 } 14147 } 14148 14149 s->is_nonstreaming = false; 14150 if (s->sme_trap_nonstreaming) { 14151 disas_sme_fa64(s, insn); 14152 } 14153 14154 if (!disas_a64(s, insn) && 14155 !disas_sme(s, insn) && 14156 !disas_sve(s, insn)) { 14157 disas_a64_legacy(s, insn); 14158 } 14159 14160 /* 14161 * After execution of most insns, btype is reset to 0. 14162 * Note that we set btype == -1 when the insn sets btype. 14163 */ 14164 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14165 reset_btype(s); 14166 } 14167 } 14168 14169 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14170 { 14171 DisasContext *dc = container_of(dcbase, DisasContext, base); 14172 14173 if (unlikely(dc->ss_active)) { 14174 /* Note that this means single stepping WFI doesn't halt the CPU. 14175 * For conditional branch insns this is harmless unreachable code as 14176 * gen_goto_tb() has already handled emitting the debug exception 14177 * (and thus a tb-jump is not possible when singlestepping). 14178 */ 14179 switch (dc->base.is_jmp) { 14180 default: 14181 gen_a64_update_pc(dc, 4); 14182 /* fall through */ 14183 case DISAS_EXIT: 14184 case DISAS_JUMP: 14185 gen_step_complete_exception(dc); 14186 break; 14187 case DISAS_NORETURN: 14188 break; 14189 } 14190 } else { 14191 switch (dc->base.is_jmp) { 14192 case DISAS_NEXT: 14193 case DISAS_TOO_MANY: 14194 gen_goto_tb(dc, 1, 4); 14195 break; 14196 default: 14197 case DISAS_UPDATE_EXIT: 14198 gen_a64_update_pc(dc, 4); 14199 /* fall through */ 14200 case DISAS_EXIT: 14201 tcg_gen_exit_tb(NULL, 0); 14202 break; 14203 case DISAS_UPDATE_NOCHAIN: 14204 gen_a64_update_pc(dc, 4); 14205 /* fall through */ 14206 case DISAS_JUMP: 14207 tcg_gen_lookup_and_goto_ptr(); 14208 break; 14209 case DISAS_NORETURN: 14210 case DISAS_SWI: 14211 break; 14212 case DISAS_WFE: 14213 gen_a64_update_pc(dc, 4); 14214 gen_helper_wfe(tcg_env); 14215 break; 14216 case DISAS_YIELD: 14217 gen_a64_update_pc(dc, 4); 14218 gen_helper_yield(tcg_env); 14219 break; 14220 case DISAS_WFI: 14221 /* 14222 * This is a special case because we don't want to just halt 14223 * the CPU if trying to debug across a WFI. 14224 */ 14225 gen_a64_update_pc(dc, 4); 14226 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 14227 /* 14228 * The helper doesn't necessarily throw an exception, but we 14229 * must go back to the main loop to check for interrupts anyway. 14230 */ 14231 tcg_gen_exit_tb(NULL, 0); 14232 break; 14233 } 14234 } 14235 } 14236 14237 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14238 CPUState *cpu, FILE *logfile) 14239 { 14240 DisasContext *dc = container_of(dcbase, DisasContext, base); 14241 14242 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14243 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 14244 } 14245 14246 const TranslatorOps aarch64_translator_ops = { 14247 .init_disas_context = aarch64_tr_init_disas_context, 14248 .tb_start = aarch64_tr_tb_start, 14249 .insn_start = aarch64_tr_insn_start, 14250 .translate_insn = aarch64_tr_translate_insn, 14251 .tb_stop = aarch64_tr_tb_stop, 14252 .disas_log = aarch64_tr_disas_log, 14253 }; 14254