1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "disas/disas.h" 26 #include "arm_ldst.h" 27 #include "semihosting/semihost.h" 28 #include "cpregs.h" 29 30 static TCGv_i64 cpu_X[32]; 31 static TCGv_i64 cpu_pc; 32 33 /* Load/store exclusive handling */ 34 static TCGv_i64 cpu_exclusive_high; 35 36 static const char *regnames[] = { 37 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 38 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 39 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 40 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 41 }; 42 43 enum a64_shift_type { 44 A64_SHIFT_TYPE_LSL = 0, 45 A64_SHIFT_TYPE_LSR = 1, 46 A64_SHIFT_TYPE_ASR = 2, 47 A64_SHIFT_TYPE_ROR = 3 48 }; 49 50 /* 51 * Helpers for extracting complex instruction fields 52 */ 53 54 /* 55 * For load/store with an unsigned 12 bit immediate scaled by the element 56 * size. The input has the immediate field in bits [14:3] and the element 57 * size in [2:0]. 58 */ 59 static int uimm_scaled(DisasContext *s, int x) 60 { 61 unsigned imm = x >> 3; 62 unsigned scale = extract32(x, 0, 3); 63 return imm << scale; 64 } 65 66 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 67 static int scale_by_log2_tag_granule(DisasContext *s, int x) 68 { 69 return x << LOG2_TAG_GRANULE; 70 } 71 72 /* 73 * Include the generated decoders. 74 */ 75 76 #include "decode-sme-fa64.c.inc" 77 #include "decode-a64.c.inc" 78 79 /* Table based decoder typedefs - used when the relevant bits for decode 80 * are too awkwardly scattered across the instruction (eg SIMD). 81 */ 82 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 83 84 typedef struct AArch64DecodeTable { 85 uint32_t pattern; 86 uint32_t mask; 87 AArch64DecodeFn *disas_fn; 88 } AArch64DecodeTable; 89 90 /* initialize TCG globals. */ 91 void a64_translate_init(void) 92 { 93 int i; 94 95 cpu_pc = tcg_global_mem_new_i64(tcg_env, 96 offsetof(CPUARMState, pc), 97 "pc"); 98 for (i = 0; i < 32; i++) { 99 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 100 offsetof(CPUARMState, xregs[i]), 101 regnames[i]); 102 } 103 104 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 105 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 106 } 107 108 /* 109 * Return the core mmu_idx to use for A64 load/store insns which 110 * have a "unprivileged load/store" variant. Those insns access 111 * EL0 if executed from an EL which has control over EL0 (usually 112 * EL1) but behave like normal loads and stores if executed from 113 * elsewhere (eg EL3). 114 * 115 * @unpriv : true for the unprivileged encoding; false for the 116 * normal encoding (in which case we will return the same 117 * thing as get_mem_index(). 118 */ 119 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 120 { 121 /* 122 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 123 * which is the usual mmu_idx for this cpu state. 124 */ 125 ARMMMUIdx useridx = s->mmu_idx; 126 127 if (unpriv && s->unpriv) { 128 /* 129 * We have pre-computed the condition for AccType_UNPRIV. 130 * Therefore we should never get here with a mmu_idx for 131 * which we do not know the corresponding user mmu_idx. 132 */ 133 switch (useridx) { 134 case ARMMMUIdx_E10_1: 135 case ARMMMUIdx_E10_1_PAN: 136 useridx = ARMMMUIdx_E10_0; 137 break; 138 case ARMMMUIdx_E20_2: 139 case ARMMMUIdx_E20_2_PAN: 140 useridx = ARMMMUIdx_E20_0; 141 break; 142 default: 143 g_assert_not_reached(); 144 } 145 } 146 return arm_to_core_mmu_idx(useridx); 147 } 148 149 static void set_btype_raw(int val) 150 { 151 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 152 offsetof(CPUARMState, btype)); 153 } 154 155 static void set_btype(DisasContext *s, int val) 156 { 157 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 158 tcg_debug_assert(val >= 1 && val <= 3); 159 set_btype_raw(val); 160 s->btype = -1; 161 } 162 163 static void reset_btype(DisasContext *s) 164 { 165 if (s->btype != 0) { 166 set_btype_raw(0); 167 s->btype = 0; 168 } 169 } 170 171 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 172 { 173 assert(s->pc_save != -1); 174 if (tb_cflags(s->base.tb) & CF_PCREL) { 175 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 176 } else { 177 tcg_gen_movi_i64(dest, s->pc_curr + diff); 178 } 179 } 180 181 void gen_a64_update_pc(DisasContext *s, target_long diff) 182 { 183 gen_pc_plus_diff(s, cpu_pc, diff); 184 s->pc_save = s->pc_curr + diff; 185 } 186 187 /* 188 * Handle Top Byte Ignore (TBI) bits. 189 * 190 * If address tagging is enabled via the TCR TBI bits: 191 * + for EL2 and EL3 there is only one TBI bit, and if it is set 192 * then the address is zero-extended, clearing bits [63:56] 193 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 194 * and TBI1 controls addresses with bit 55 == 1. 195 * If the appropriate TBI bit is set for the address then 196 * the address is sign-extended from bit 55 into bits [63:56] 197 * 198 * Here We have concatenated TBI{1,0} into tbi. 199 */ 200 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 201 TCGv_i64 src, int tbi) 202 { 203 if (tbi == 0) { 204 /* Load unmodified address */ 205 tcg_gen_mov_i64(dst, src); 206 } else if (!regime_has_2_ranges(s->mmu_idx)) { 207 /* Force tag byte to all zero */ 208 tcg_gen_extract_i64(dst, src, 0, 56); 209 } else { 210 /* Sign-extend from bit 55. */ 211 tcg_gen_sextract_i64(dst, src, 0, 56); 212 213 switch (tbi) { 214 case 1: 215 /* tbi0 but !tbi1: only use the extension if positive */ 216 tcg_gen_and_i64(dst, dst, src); 217 break; 218 case 2: 219 /* !tbi0 but tbi1: only use the extension if negative */ 220 tcg_gen_or_i64(dst, dst, src); 221 break; 222 case 3: 223 /* tbi0 and tbi1: always use the extension */ 224 break; 225 default: 226 g_assert_not_reached(); 227 } 228 } 229 } 230 231 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 232 { 233 /* 234 * If address tagging is enabled for instructions via the TCR TBI bits, 235 * then loading an address into the PC will clear out any tag. 236 */ 237 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 238 s->pc_save = -1; 239 } 240 241 /* 242 * Handle MTE and/or TBI. 243 * 244 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 245 * for the tag to be present in the FAR_ELx register. But for user-only 246 * mode we do not have a TLB with which to implement this, so we must 247 * remove the top byte now. 248 * 249 * Always return a fresh temporary that we can increment independently 250 * of the write-back address. 251 */ 252 253 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 254 { 255 TCGv_i64 clean = tcg_temp_new_i64(); 256 #ifdef CONFIG_USER_ONLY 257 gen_top_byte_ignore(s, clean, addr, s->tbid); 258 #else 259 tcg_gen_mov_i64(clean, addr); 260 #endif 261 return clean; 262 } 263 264 /* Insert a zero tag into src, with the result at dst. */ 265 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 266 { 267 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 268 } 269 270 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 271 MMUAccessType acc, int log2_size) 272 { 273 gen_helper_probe_access(tcg_env, ptr, 274 tcg_constant_i32(acc), 275 tcg_constant_i32(get_mem_index(s)), 276 tcg_constant_i32(1 << log2_size)); 277 } 278 279 /* 280 * For MTE, check a single logical or atomic access. This probes a single 281 * address, the exact one specified. The size and alignment of the access 282 * is not relevant to MTE, per se, but watchpoints do require the size, 283 * and we want to recognize those before making any other changes to state. 284 */ 285 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 286 bool is_write, bool tag_checked, 287 MemOp memop, bool is_unpriv, 288 int core_idx) 289 { 290 if (tag_checked && s->mte_active[is_unpriv]) { 291 TCGv_i64 ret; 292 int desc = 0; 293 294 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 295 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 296 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 297 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 298 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); 299 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 300 301 ret = tcg_temp_new_i64(); 302 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 303 304 return ret; 305 } 306 return clean_data_tbi(s, addr); 307 } 308 309 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 310 bool tag_checked, MemOp memop) 311 { 312 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 313 false, get_mem_index(s)); 314 } 315 316 /* 317 * For MTE, check multiple logical sequential accesses. 318 */ 319 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 320 bool tag_checked, int total_size, MemOp single_mop) 321 { 322 if (tag_checked && s->mte_active[0]) { 323 TCGv_i64 ret; 324 int desc = 0; 325 326 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 327 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 328 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 329 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 330 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); 331 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 332 333 ret = tcg_temp_new_i64(); 334 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 335 336 return ret; 337 } 338 return clean_data_tbi(s, addr); 339 } 340 341 /* 342 * Generate the special alignment check that applies to AccType_ATOMIC 343 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 344 * naturally aligned, but it must not cross a 16-byte boundary. 345 * See AArch64.CheckAlignment(). 346 */ 347 static void check_lse2_align(DisasContext *s, int rn, int imm, 348 bool is_write, MemOp mop) 349 { 350 TCGv_i32 tmp; 351 TCGv_i64 addr; 352 TCGLabel *over_label; 353 MMUAccessType type; 354 int mmu_idx; 355 356 tmp = tcg_temp_new_i32(); 357 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 358 tcg_gen_addi_i32(tmp, tmp, imm & 15); 359 tcg_gen_andi_i32(tmp, tmp, 15); 360 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 361 362 over_label = gen_new_label(); 363 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 364 365 addr = tcg_temp_new_i64(); 366 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 367 368 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 369 mmu_idx = get_mem_index(s); 370 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 371 tcg_constant_i32(mmu_idx)); 372 373 gen_set_label(over_label); 374 375 } 376 377 /* Handle the alignment check for AccType_ATOMIC instructions. */ 378 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 379 { 380 MemOp size = mop & MO_SIZE; 381 382 if (size == MO_8) { 383 return mop; 384 } 385 386 /* 387 * If size == MO_128, this is a LDXP, and the operation is single-copy 388 * atomic for each doubleword, not the entire quadword; it still must 389 * be quadword aligned. 390 */ 391 if (size == MO_128) { 392 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 393 MO_ATOM_IFALIGN_PAIR); 394 } 395 if (dc_isar_feature(aa64_lse2, s)) { 396 check_lse2_align(s, rn, 0, true, mop); 397 } else { 398 mop |= MO_ALIGN; 399 } 400 return finalize_memop(s, mop); 401 } 402 403 /* Handle the alignment check for AccType_ORDERED instructions. */ 404 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 405 bool is_write, MemOp mop) 406 { 407 MemOp size = mop & MO_SIZE; 408 409 if (size == MO_8) { 410 return mop; 411 } 412 if (size == MO_128) { 413 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 414 MO_ATOM_IFALIGN_PAIR); 415 } 416 if (!dc_isar_feature(aa64_lse2, s)) { 417 mop |= MO_ALIGN; 418 } else if (!s->naa) { 419 check_lse2_align(s, rn, imm, is_write, mop); 420 } 421 return finalize_memop(s, mop); 422 } 423 424 typedef struct DisasCompare64 { 425 TCGCond cond; 426 TCGv_i64 value; 427 } DisasCompare64; 428 429 static void a64_test_cc(DisasCompare64 *c64, int cc) 430 { 431 DisasCompare c32; 432 433 arm_test_cc(&c32, cc); 434 435 /* 436 * Sign-extend the 32-bit value so that the GE/LT comparisons work 437 * properly. The NE/EQ comparisons are also fine with this choice. 438 */ 439 c64->cond = c32.cond; 440 c64->value = tcg_temp_new_i64(); 441 tcg_gen_ext_i32_i64(c64->value, c32.value); 442 } 443 444 static void gen_rebuild_hflags(DisasContext *s) 445 { 446 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 447 } 448 449 static void gen_exception_internal(int excp) 450 { 451 assert(excp_is_internal(excp)); 452 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 453 } 454 455 static void gen_exception_internal_insn(DisasContext *s, int excp) 456 { 457 gen_a64_update_pc(s, 0); 458 gen_exception_internal(excp); 459 s->base.is_jmp = DISAS_NORETURN; 460 } 461 462 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 463 { 464 gen_a64_update_pc(s, 0); 465 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 466 s->base.is_jmp = DISAS_NORETURN; 467 } 468 469 static void gen_step_complete_exception(DisasContext *s) 470 { 471 /* We just completed step of an insn. Move from Active-not-pending 472 * to Active-pending, and then also take the swstep exception. 473 * This corresponds to making the (IMPDEF) choice to prioritize 474 * swstep exceptions over asynchronous exceptions taken to an exception 475 * level where debug is disabled. This choice has the advantage that 476 * we do not need to maintain internal state corresponding to the 477 * ISV/EX syndrome bits between completion of the step and generation 478 * of the exception, and our syndrome information is always correct. 479 */ 480 gen_ss_advance(s); 481 gen_swstep_exception(s, 1, s->is_ldex); 482 s->base.is_jmp = DISAS_NORETURN; 483 } 484 485 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 486 { 487 if (s->ss_active) { 488 return false; 489 } 490 return translator_use_goto_tb(&s->base, dest); 491 } 492 493 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 494 { 495 if (use_goto_tb(s, s->pc_curr + diff)) { 496 /* 497 * For pcrel, the pc must always be up-to-date on entry to 498 * the linked TB, so that it can use simple additions for all 499 * further adjustments. For !pcrel, the linked TB is compiled 500 * to know its full virtual address, so we can delay the 501 * update to pc to the unlinked path. A long chain of links 502 * can thus avoid many updates to the PC. 503 */ 504 if (tb_cflags(s->base.tb) & CF_PCREL) { 505 gen_a64_update_pc(s, diff); 506 tcg_gen_goto_tb(n); 507 } else { 508 tcg_gen_goto_tb(n); 509 gen_a64_update_pc(s, diff); 510 } 511 tcg_gen_exit_tb(s->base.tb, n); 512 s->base.is_jmp = DISAS_NORETURN; 513 } else { 514 gen_a64_update_pc(s, diff); 515 if (s->ss_active) { 516 gen_step_complete_exception(s); 517 } else { 518 tcg_gen_lookup_and_goto_ptr(); 519 s->base.is_jmp = DISAS_NORETURN; 520 } 521 } 522 } 523 524 /* 525 * Register access functions 526 * 527 * These functions are used for directly accessing a register in where 528 * changes to the final register value are likely to be made. If you 529 * need to use a register for temporary calculation (e.g. index type 530 * operations) use the read_* form. 531 * 532 * B1.2.1 Register mappings 533 * 534 * In instruction register encoding 31 can refer to ZR (zero register) or 535 * the SP (stack pointer) depending on context. In QEMU's case we map SP 536 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 537 * This is the point of the _sp forms. 538 */ 539 TCGv_i64 cpu_reg(DisasContext *s, int reg) 540 { 541 if (reg == 31) { 542 TCGv_i64 t = tcg_temp_new_i64(); 543 tcg_gen_movi_i64(t, 0); 544 return t; 545 } else { 546 return cpu_X[reg]; 547 } 548 } 549 550 /* register access for when 31 == SP */ 551 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 552 { 553 return cpu_X[reg]; 554 } 555 556 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 557 * representing the register contents. This TCGv is an auto-freed 558 * temporary so it need not be explicitly freed, and may be modified. 559 */ 560 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 561 { 562 TCGv_i64 v = tcg_temp_new_i64(); 563 if (reg != 31) { 564 if (sf) { 565 tcg_gen_mov_i64(v, cpu_X[reg]); 566 } else { 567 tcg_gen_ext32u_i64(v, cpu_X[reg]); 568 } 569 } else { 570 tcg_gen_movi_i64(v, 0); 571 } 572 return v; 573 } 574 575 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 576 { 577 TCGv_i64 v = tcg_temp_new_i64(); 578 if (sf) { 579 tcg_gen_mov_i64(v, cpu_X[reg]); 580 } else { 581 tcg_gen_ext32u_i64(v, cpu_X[reg]); 582 } 583 return v; 584 } 585 586 /* Return the offset into CPUARMState of a slice (from 587 * the least significant end) of FP register Qn (ie 588 * Dn, Sn, Hn or Bn). 589 * (Note that this is not the same mapping as for A32; see cpu.h) 590 */ 591 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 592 { 593 return vec_reg_offset(s, regno, 0, size); 594 } 595 596 /* Offset of the high half of the 128 bit vector Qn */ 597 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 598 { 599 return vec_reg_offset(s, regno, 1, MO_64); 600 } 601 602 /* Convenience accessors for reading and writing single and double 603 * FP registers. Writing clears the upper parts of the associated 604 * 128 bit vector register, as required by the architecture. 605 * Note that unlike the GP register accessors, the values returned 606 * by the read functions must be manually freed. 607 */ 608 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 609 { 610 TCGv_i64 v = tcg_temp_new_i64(); 611 612 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 613 return v; 614 } 615 616 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 617 { 618 TCGv_i32 v = tcg_temp_new_i32(); 619 620 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 621 return v; 622 } 623 624 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 625 { 626 TCGv_i32 v = tcg_temp_new_i32(); 627 628 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 629 return v; 630 } 631 632 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 633 * If SVE is not enabled, then there are only 128 bits in the vector. 634 */ 635 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 636 { 637 unsigned ofs = fp_reg_offset(s, rd, MO_64); 638 unsigned vsz = vec_full_reg_size(s); 639 640 /* Nop move, with side effect of clearing the tail. */ 641 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 642 } 643 644 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 645 { 646 unsigned ofs = fp_reg_offset(s, reg, MO_64); 647 648 tcg_gen_st_i64(v, tcg_env, ofs); 649 clear_vec_high(s, false, reg); 650 } 651 652 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 653 { 654 TCGv_i64 tmp = tcg_temp_new_i64(); 655 656 tcg_gen_extu_i32_i64(tmp, v); 657 write_fp_dreg(s, reg, tmp); 658 } 659 660 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 661 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 662 GVecGen2Fn *gvec_fn, int vece) 663 { 664 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 665 is_q ? 16 : 8, vec_full_reg_size(s)); 666 } 667 668 /* Expand a 2-operand + immediate AdvSIMD vector operation using 669 * an expander function. 670 */ 671 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 672 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 673 { 674 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 675 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 676 } 677 678 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 679 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 680 GVecGen3Fn *gvec_fn, int vece) 681 { 682 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 683 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 684 } 685 686 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 687 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 688 int rx, GVecGen4Fn *gvec_fn, int vece) 689 { 690 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 691 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 692 is_q ? 16 : 8, vec_full_reg_size(s)); 693 } 694 695 /* Expand a 2-operand operation using an out-of-line helper. */ 696 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 697 int rn, int data, gen_helper_gvec_2 *fn) 698 { 699 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 700 vec_full_reg_offset(s, rn), 701 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 702 } 703 704 /* Expand a 3-operand operation using an out-of-line helper. */ 705 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 706 int rn, int rm, int data, gen_helper_gvec_3 *fn) 707 { 708 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 709 vec_full_reg_offset(s, rn), 710 vec_full_reg_offset(s, rm), 711 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 712 } 713 714 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 715 * an out-of-line helper. 716 */ 717 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 718 int rm, bool is_fp16, int data, 719 gen_helper_gvec_3_ptr *fn) 720 { 721 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 722 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 723 vec_full_reg_offset(s, rn), 724 vec_full_reg_offset(s, rm), fpst, 725 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 726 } 727 728 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 729 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 730 int rm, gen_helper_gvec_3_ptr *fn) 731 { 732 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 733 734 tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); 735 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 736 vec_full_reg_offset(s, rn), 737 vec_full_reg_offset(s, rm), qc_ptr, 738 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 739 } 740 741 /* Expand a 4-operand operation using an out-of-line helper. */ 742 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 743 int rm, int ra, int data, gen_helper_gvec_4 *fn) 744 { 745 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 746 vec_full_reg_offset(s, rn), 747 vec_full_reg_offset(s, rm), 748 vec_full_reg_offset(s, ra), 749 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 750 } 751 752 /* 753 * Expand a 4-operand + fpstatus pointer + simd data value operation using 754 * an out-of-line helper. 755 */ 756 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 757 int rm, int ra, bool is_fp16, int data, 758 gen_helper_gvec_4_ptr *fn) 759 { 760 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 761 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 762 vec_full_reg_offset(s, rn), 763 vec_full_reg_offset(s, rm), 764 vec_full_reg_offset(s, ra), fpst, 765 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 766 } 767 768 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 769 * than the 32 bit equivalent. 770 */ 771 static inline void gen_set_NZ64(TCGv_i64 result) 772 { 773 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 774 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 775 } 776 777 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 778 static inline void gen_logic_CC(int sf, TCGv_i64 result) 779 { 780 if (sf) { 781 gen_set_NZ64(result); 782 } else { 783 tcg_gen_extrl_i64_i32(cpu_ZF, result); 784 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 785 } 786 tcg_gen_movi_i32(cpu_CF, 0); 787 tcg_gen_movi_i32(cpu_VF, 0); 788 } 789 790 /* dest = T0 + T1; compute C, N, V and Z flags */ 791 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 792 { 793 TCGv_i64 result, flag, tmp; 794 result = tcg_temp_new_i64(); 795 flag = tcg_temp_new_i64(); 796 tmp = tcg_temp_new_i64(); 797 798 tcg_gen_movi_i64(tmp, 0); 799 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 800 801 tcg_gen_extrl_i64_i32(cpu_CF, flag); 802 803 gen_set_NZ64(result); 804 805 tcg_gen_xor_i64(flag, result, t0); 806 tcg_gen_xor_i64(tmp, t0, t1); 807 tcg_gen_andc_i64(flag, flag, tmp); 808 tcg_gen_extrh_i64_i32(cpu_VF, flag); 809 810 tcg_gen_mov_i64(dest, result); 811 } 812 813 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 814 { 815 TCGv_i32 t0_32 = tcg_temp_new_i32(); 816 TCGv_i32 t1_32 = tcg_temp_new_i32(); 817 TCGv_i32 tmp = tcg_temp_new_i32(); 818 819 tcg_gen_movi_i32(tmp, 0); 820 tcg_gen_extrl_i64_i32(t0_32, t0); 821 tcg_gen_extrl_i64_i32(t1_32, t1); 822 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 823 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 824 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 825 tcg_gen_xor_i32(tmp, t0_32, t1_32); 826 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 827 tcg_gen_extu_i32_i64(dest, cpu_NF); 828 } 829 830 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 831 { 832 if (sf) { 833 gen_add64_CC(dest, t0, t1); 834 } else { 835 gen_add32_CC(dest, t0, t1); 836 } 837 } 838 839 /* dest = T0 - T1; compute C, N, V and Z flags */ 840 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 841 { 842 /* 64 bit arithmetic */ 843 TCGv_i64 result, flag, tmp; 844 845 result = tcg_temp_new_i64(); 846 flag = tcg_temp_new_i64(); 847 tcg_gen_sub_i64(result, t0, t1); 848 849 gen_set_NZ64(result); 850 851 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 852 tcg_gen_extrl_i64_i32(cpu_CF, flag); 853 854 tcg_gen_xor_i64(flag, result, t0); 855 tmp = tcg_temp_new_i64(); 856 tcg_gen_xor_i64(tmp, t0, t1); 857 tcg_gen_and_i64(flag, flag, tmp); 858 tcg_gen_extrh_i64_i32(cpu_VF, flag); 859 tcg_gen_mov_i64(dest, result); 860 } 861 862 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 863 { 864 /* 32 bit arithmetic */ 865 TCGv_i32 t0_32 = tcg_temp_new_i32(); 866 TCGv_i32 t1_32 = tcg_temp_new_i32(); 867 TCGv_i32 tmp; 868 869 tcg_gen_extrl_i64_i32(t0_32, t0); 870 tcg_gen_extrl_i64_i32(t1_32, t1); 871 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 872 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 873 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 874 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 875 tmp = tcg_temp_new_i32(); 876 tcg_gen_xor_i32(tmp, t0_32, t1_32); 877 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 878 tcg_gen_extu_i32_i64(dest, cpu_NF); 879 } 880 881 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 882 { 883 if (sf) { 884 gen_sub64_CC(dest, t0, t1); 885 } else { 886 gen_sub32_CC(dest, t0, t1); 887 } 888 } 889 890 /* dest = T0 + T1 + CF; do not compute flags. */ 891 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 892 { 893 TCGv_i64 flag = tcg_temp_new_i64(); 894 tcg_gen_extu_i32_i64(flag, cpu_CF); 895 tcg_gen_add_i64(dest, t0, t1); 896 tcg_gen_add_i64(dest, dest, flag); 897 898 if (!sf) { 899 tcg_gen_ext32u_i64(dest, dest); 900 } 901 } 902 903 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 904 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 905 { 906 if (sf) { 907 TCGv_i64 result = tcg_temp_new_i64(); 908 TCGv_i64 cf_64 = tcg_temp_new_i64(); 909 TCGv_i64 vf_64 = tcg_temp_new_i64(); 910 TCGv_i64 tmp = tcg_temp_new_i64(); 911 TCGv_i64 zero = tcg_constant_i64(0); 912 913 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 914 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 915 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 916 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 917 gen_set_NZ64(result); 918 919 tcg_gen_xor_i64(vf_64, result, t0); 920 tcg_gen_xor_i64(tmp, t0, t1); 921 tcg_gen_andc_i64(vf_64, vf_64, tmp); 922 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 923 924 tcg_gen_mov_i64(dest, result); 925 } else { 926 TCGv_i32 t0_32 = tcg_temp_new_i32(); 927 TCGv_i32 t1_32 = tcg_temp_new_i32(); 928 TCGv_i32 tmp = tcg_temp_new_i32(); 929 TCGv_i32 zero = tcg_constant_i32(0); 930 931 tcg_gen_extrl_i64_i32(t0_32, t0); 932 tcg_gen_extrl_i64_i32(t1_32, t1); 933 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 934 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 935 936 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 937 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 938 tcg_gen_xor_i32(tmp, t0_32, t1_32); 939 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 940 tcg_gen_extu_i32_i64(dest, cpu_NF); 941 } 942 } 943 944 /* 945 * Load/Store generators 946 */ 947 948 /* 949 * Store from GPR register to memory. 950 */ 951 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 952 TCGv_i64 tcg_addr, MemOp memop, int memidx, 953 bool iss_valid, 954 unsigned int iss_srt, 955 bool iss_sf, bool iss_ar) 956 { 957 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 958 959 if (iss_valid) { 960 uint32_t syn; 961 962 syn = syn_data_abort_with_iss(0, 963 (memop & MO_SIZE), 964 false, 965 iss_srt, 966 iss_sf, 967 iss_ar, 968 0, 0, 0, 0, 0, false); 969 disas_set_insn_syndrome(s, syn); 970 } 971 } 972 973 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 974 TCGv_i64 tcg_addr, MemOp memop, 975 bool iss_valid, 976 unsigned int iss_srt, 977 bool iss_sf, bool iss_ar) 978 { 979 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 980 iss_valid, iss_srt, iss_sf, iss_ar); 981 } 982 983 /* 984 * Load from memory to GPR register 985 */ 986 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 987 MemOp memop, bool extend, int memidx, 988 bool iss_valid, unsigned int iss_srt, 989 bool iss_sf, bool iss_ar) 990 { 991 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 992 993 if (extend && (memop & MO_SIGN)) { 994 g_assert((memop & MO_SIZE) <= MO_32); 995 tcg_gen_ext32u_i64(dest, dest); 996 } 997 998 if (iss_valid) { 999 uint32_t syn; 1000 1001 syn = syn_data_abort_with_iss(0, 1002 (memop & MO_SIZE), 1003 (memop & MO_SIGN) != 0, 1004 iss_srt, 1005 iss_sf, 1006 iss_ar, 1007 0, 0, 0, 0, 0, false); 1008 disas_set_insn_syndrome(s, syn); 1009 } 1010 } 1011 1012 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1013 MemOp memop, bool extend, 1014 bool iss_valid, unsigned int iss_srt, 1015 bool iss_sf, bool iss_ar) 1016 { 1017 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1018 iss_valid, iss_srt, iss_sf, iss_ar); 1019 } 1020 1021 /* 1022 * Store from FP register to memory 1023 */ 1024 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1025 { 1026 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1027 TCGv_i64 tmplo = tcg_temp_new_i64(); 1028 1029 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1030 1031 if ((mop & MO_SIZE) < MO_128) { 1032 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1033 } else { 1034 TCGv_i64 tmphi = tcg_temp_new_i64(); 1035 TCGv_i128 t16 = tcg_temp_new_i128(); 1036 1037 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1038 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1039 1040 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1041 } 1042 } 1043 1044 /* 1045 * Load from memory to FP register 1046 */ 1047 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1048 { 1049 /* This always zero-extends and writes to a full 128 bit wide vector */ 1050 TCGv_i64 tmplo = tcg_temp_new_i64(); 1051 TCGv_i64 tmphi = NULL; 1052 1053 if ((mop & MO_SIZE) < MO_128) { 1054 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1055 } else { 1056 TCGv_i128 t16 = tcg_temp_new_i128(); 1057 1058 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1059 1060 tmphi = tcg_temp_new_i64(); 1061 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1062 } 1063 1064 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1065 1066 if (tmphi) { 1067 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1068 } 1069 clear_vec_high(s, tmphi != NULL, destidx); 1070 } 1071 1072 /* 1073 * Vector load/store helpers. 1074 * 1075 * The principal difference between this and a FP load is that we don't 1076 * zero extend as we are filling a partial chunk of the vector register. 1077 * These functions don't support 128 bit loads/stores, which would be 1078 * normal load/store operations. 1079 * 1080 * The _i32 versions are useful when operating on 32 bit quantities 1081 * (eg for floating point single or using Neon helper functions). 1082 */ 1083 1084 /* Get value of an element within a vector register */ 1085 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1086 int element, MemOp memop) 1087 { 1088 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1089 switch ((unsigned)memop) { 1090 case MO_8: 1091 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1092 break; 1093 case MO_16: 1094 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1095 break; 1096 case MO_32: 1097 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1098 break; 1099 case MO_8|MO_SIGN: 1100 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1101 break; 1102 case MO_16|MO_SIGN: 1103 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1104 break; 1105 case MO_32|MO_SIGN: 1106 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1107 break; 1108 case MO_64: 1109 case MO_64|MO_SIGN: 1110 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1111 break; 1112 default: 1113 g_assert_not_reached(); 1114 } 1115 } 1116 1117 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1118 int element, MemOp memop) 1119 { 1120 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1121 switch (memop) { 1122 case MO_8: 1123 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1124 break; 1125 case MO_16: 1126 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1127 break; 1128 case MO_8|MO_SIGN: 1129 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1130 break; 1131 case MO_16|MO_SIGN: 1132 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1133 break; 1134 case MO_32: 1135 case MO_32|MO_SIGN: 1136 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1137 break; 1138 default: 1139 g_assert_not_reached(); 1140 } 1141 } 1142 1143 /* Set value of an element within a vector register */ 1144 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1145 int element, MemOp memop) 1146 { 1147 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1148 switch (memop) { 1149 case MO_8: 1150 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1151 break; 1152 case MO_16: 1153 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1154 break; 1155 case MO_32: 1156 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1157 break; 1158 case MO_64: 1159 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1160 break; 1161 default: 1162 g_assert_not_reached(); 1163 } 1164 } 1165 1166 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1167 int destidx, int element, MemOp memop) 1168 { 1169 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1170 switch (memop) { 1171 case MO_8: 1172 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1173 break; 1174 case MO_16: 1175 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1176 break; 1177 case MO_32: 1178 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1179 break; 1180 default: 1181 g_assert_not_reached(); 1182 } 1183 } 1184 1185 /* Store from vector register to memory */ 1186 static void do_vec_st(DisasContext *s, int srcidx, int element, 1187 TCGv_i64 tcg_addr, MemOp mop) 1188 { 1189 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1190 1191 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1192 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1193 } 1194 1195 /* Load from memory to vector register */ 1196 static void do_vec_ld(DisasContext *s, int destidx, int element, 1197 TCGv_i64 tcg_addr, MemOp mop) 1198 { 1199 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1200 1201 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1202 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1203 } 1204 1205 /* Check that FP/Neon access is enabled. If it is, return 1206 * true. If not, emit code to generate an appropriate exception, 1207 * and return false; the caller should not emit any code for 1208 * the instruction. Note that this check must happen after all 1209 * unallocated-encoding checks (otherwise the syndrome information 1210 * for the resulting exception will be incorrect). 1211 */ 1212 static bool fp_access_check_only(DisasContext *s) 1213 { 1214 if (s->fp_excp_el) { 1215 assert(!s->fp_access_checked); 1216 s->fp_access_checked = true; 1217 1218 gen_exception_insn_el(s, 0, EXCP_UDEF, 1219 syn_fp_access_trap(1, 0xe, false, 0), 1220 s->fp_excp_el); 1221 return false; 1222 } 1223 s->fp_access_checked = true; 1224 return true; 1225 } 1226 1227 static bool fp_access_check(DisasContext *s) 1228 { 1229 if (!fp_access_check_only(s)) { 1230 return false; 1231 } 1232 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1233 gen_exception_insn(s, 0, EXCP_UDEF, 1234 syn_smetrap(SME_ET_Streaming, false)); 1235 return false; 1236 } 1237 return true; 1238 } 1239 1240 /* 1241 * Check that SVE access is enabled. If it is, return true. 1242 * If not, emit code to generate an appropriate exception and return false. 1243 * This function corresponds to CheckSVEEnabled(). 1244 */ 1245 bool sve_access_check(DisasContext *s) 1246 { 1247 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1248 assert(dc_isar_feature(aa64_sme, s)); 1249 if (!sme_sm_enabled_check(s)) { 1250 goto fail_exit; 1251 } 1252 } else if (s->sve_excp_el) { 1253 gen_exception_insn_el(s, 0, EXCP_UDEF, 1254 syn_sve_access_trap(), s->sve_excp_el); 1255 goto fail_exit; 1256 } 1257 s->sve_access_checked = true; 1258 return fp_access_check(s); 1259 1260 fail_exit: 1261 /* Assert that we only raise one exception per instruction. */ 1262 assert(!s->sve_access_checked); 1263 s->sve_access_checked = true; 1264 return false; 1265 } 1266 1267 /* 1268 * Check that SME access is enabled, raise an exception if not. 1269 * Note that this function corresponds to CheckSMEAccess and is 1270 * only used directly for cpregs. 1271 */ 1272 static bool sme_access_check(DisasContext *s) 1273 { 1274 if (s->sme_excp_el) { 1275 gen_exception_insn_el(s, 0, EXCP_UDEF, 1276 syn_smetrap(SME_ET_AccessTrap, false), 1277 s->sme_excp_el); 1278 return false; 1279 } 1280 return true; 1281 } 1282 1283 /* This function corresponds to CheckSMEEnabled. */ 1284 bool sme_enabled_check(DisasContext *s) 1285 { 1286 /* 1287 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1288 * to be zero when fp_excp_el has priority. This is because we need 1289 * sme_excp_el by itself for cpregs access checks. 1290 */ 1291 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1292 s->fp_access_checked = true; 1293 return sme_access_check(s); 1294 } 1295 return fp_access_check_only(s); 1296 } 1297 1298 /* Common subroutine for CheckSMEAnd*Enabled. */ 1299 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1300 { 1301 if (!sme_enabled_check(s)) { 1302 return false; 1303 } 1304 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1305 gen_exception_insn(s, 0, EXCP_UDEF, 1306 syn_smetrap(SME_ET_NotStreaming, false)); 1307 return false; 1308 } 1309 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1310 gen_exception_insn(s, 0, EXCP_UDEF, 1311 syn_smetrap(SME_ET_InactiveZA, false)); 1312 return false; 1313 } 1314 return true; 1315 } 1316 1317 /* 1318 * This utility function is for doing register extension with an 1319 * optional shift. You will likely want to pass a temporary for the 1320 * destination register. See DecodeRegExtend() in the ARM ARM. 1321 */ 1322 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1323 int option, unsigned int shift) 1324 { 1325 int extsize = extract32(option, 0, 2); 1326 bool is_signed = extract32(option, 2, 1); 1327 1328 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1329 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1330 } 1331 1332 static inline void gen_check_sp_alignment(DisasContext *s) 1333 { 1334 /* The AArch64 architecture mandates that (if enabled via PSTATE 1335 * or SCTLR bits) there is a check that SP is 16-aligned on every 1336 * SP-relative load or store (with an exception generated if it is not). 1337 * In line with general QEMU practice regarding misaligned accesses, 1338 * we omit these checks for the sake of guest program performance. 1339 * This function is provided as a hook so we can more easily add these 1340 * checks in future (possibly as a "favour catching guest program bugs 1341 * over speed" user selectable option). 1342 */ 1343 } 1344 1345 /* 1346 * This provides a simple table based table lookup decoder. It is 1347 * intended to be used when the relevant bits for decode are too 1348 * awkwardly placed and switch/if based logic would be confusing and 1349 * deeply nested. Since it's a linear search through the table, tables 1350 * should be kept small. 1351 * 1352 * It returns the first handler where insn & mask == pattern, or 1353 * NULL if there is no match. 1354 * The table is terminated by an empty mask (i.e. 0) 1355 */ 1356 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1357 uint32_t insn) 1358 { 1359 const AArch64DecodeTable *tptr = table; 1360 1361 while (tptr->mask) { 1362 if ((insn & tptr->mask) == tptr->pattern) { 1363 return tptr->disas_fn; 1364 } 1365 tptr++; 1366 } 1367 return NULL; 1368 } 1369 1370 /* 1371 * The instruction disassembly implemented here matches 1372 * the instruction encoding classifications in chapter C4 1373 * of the ARM Architecture Reference Manual (DDI0487B_a); 1374 * classification names and decode diagrams here should generally 1375 * match up with those in the manual. 1376 */ 1377 1378 static bool trans_B(DisasContext *s, arg_i *a) 1379 { 1380 reset_btype(s); 1381 gen_goto_tb(s, 0, a->imm); 1382 return true; 1383 } 1384 1385 static bool trans_BL(DisasContext *s, arg_i *a) 1386 { 1387 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1388 reset_btype(s); 1389 gen_goto_tb(s, 0, a->imm); 1390 return true; 1391 } 1392 1393 1394 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1395 { 1396 DisasLabel match; 1397 TCGv_i64 tcg_cmp; 1398 1399 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1400 reset_btype(s); 1401 1402 match = gen_disas_label(s); 1403 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1404 tcg_cmp, 0, match.label); 1405 gen_goto_tb(s, 0, 4); 1406 set_disas_label(s, match); 1407 gen_goto_tb(s, 1, a->imm); 1408 return true; 1409 } 1410 1411 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1412 { 1413 DisasLabel match; 1414 TCGv_i64 tcg_cmp; 1415 1416 tcg_cmp = tcg_temp_new_i64(); 1417 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1418 1419 reset_btype(s); 1420 1421 match = gen_disas_label(s); 1422 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1423 tcg_cmp, 0, match.label); 1424 gen_goto_tb(s, 0, 4); 1425 set_disas_label(s, match); 1426 gen_goto_tb(s, 1, a->imm); 1427 return true; 1428 } 1429 1430 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1431 { 1432 /* BC.cond is only present with FEAT_HBC */ 1433 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1434 return false; 1435 } 1436 reset_btype(s); 1437 if (a->cond < 0x0e) { 1438 /* genuinely conditional branches */ 1439 DisasLabel match = gen_disas_label(s); 1440 arm_gen_test_cc(a->cond, match.label); 1441 gen_goto_tb(s, 0, 4); 1442 set_disas_label(s, match); 1443 gen_goto_tb(s, 1, a->imm); 1444 } else { 1445 /* 0xe and 0xf are both "always" conditions */ 1446 gen_goto_tb(s, 0, a->imm); 1447 } 1448 return true; 1449 } 1450 1451 static void set_btype_for_br(DisasContext *s, int rn) 1452 { 1453 if (dc_isar_feature(aa64_bti, s)) { 1454 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1455 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 1456 } 1457 } 1458 1459 static void set_btype_for_blr(DisasContext *s) 1460 { 1461 if (dc_isar_feature(aa64_bti, s)) { 1462 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1463 set_btype(s, 2); 1464 } 1465 } 1466 1467 static bool trans_BR(DisasContext *s, arg_r *a) 1468 { 1469 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1470 set_btype_for_br(s, a->rn); 1471 s->base.is_jmp = DISAS_JUMP; 1472 return true; 1473 } 1474 1475 static bool trans_BLR(DisasContext *s, arg_r *a) 1476 { 1477 TCGv_i64 dst = cpu_reg(s, a->rn); 1478 TCGv_i64 lr = cpu_reg(s, 30); 1479 if (dst == lr) { 1480 TCGv_i64 tmp = tcg_temp_new_i64(); 1481 tcg_gen_mov_i64(tmp, dst); 1482 dst = tmp; 1483 } 1484 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1485 gen_a64_set_pc(s, dst); 1486 set_btype_for_blr(s); 1487 s->base.is_jmp = DISAS_JUMP; 1488 return true; 1489 } 1490 1491 static bool trans_RET(DisasContext *s, arg_r *a) 1492 { 1493 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1494 s->base.is_jmp = DISAS_JUMP; 1495 return true; 1496 } 1497 1498 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1499 TCGv_i64 modifier, bool use_key_a) 1500 { 1501 TCGv_i64 truedst; 1502 /* 1503 * Return the branch target for a BRAA/RETA/etc, which is either 1504 * just the destination dst, or that value with the pauth check 1505 * done and the code removed from the high bits. 1506 */ 1507 if (!s->pauth_active) { 1508 return dst; 1509 } 1510 1511 truedst = tcg_temp_new_i64(); 1512 if (use_key_a) { 1513 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1514 } else { 1515 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1516 } 1517 return truedst; 1518 } 1519 1520 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1521 { 1522 TCGv_i64 dst; 1523 1524 if (!dc_isar_feature(aa64_pauth, s)) { 1525 return false; 1526 } 1527 1528 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1529 gen_a64_set_pc(s, dst); 1530 set_btype_for_br(s, a->rn); 1531 s->base.is_jmp = DISAS_JUMP; 1532 return true; 1533 } 1534 1535 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1536 { 1537 TCGv_i64 dst, lr; 1538 1539 if (!dc_isar_feature(aa64_pauth, s)) { 1540 return false; 1541 } 1542 1543 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1544 lr = cpu_reg(s, 30); 1545 if (dst == lr) { 1546 TCGv_i64 tmp = tcg_temp_new_i64(); 1547 tcg_gen_mov_i64(tmp, dst); 1548 dst = tmp; 1549 } 1550 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1551 gen_a64_set_pc(s, dst); 1552 set_btype_for_blr(s); 1553 s->base.is_jmp = DISAS_JUMP; 1554 return true; 1555 } 1556 1557 static bool trans_RETA(DisasContext *s, arg_reta *a) 1558 { 1559 TCGv_i64 dst; 1560 1561 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1562 gen_a64_set_pc(s, dst); 1563 s->base.is_jmp = DISAS_JUMP; 1564 return true; 1565 } 1566 1567 static bool trans_BRA(DisasContext *s, arg_bra *a) 1568 { 1569 TCGv_i64 dst; 1570 1571 if (!dc_isar_feature(aa64_pauth, s)) { 1572 return false; 1573 } 1574 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1575 gen_a64_set_pc(s, dst); 1576 set_btype_for_br(s, a->rn); 1577 s->base.is_jmp = DISAS_JUMP; 1578 return true; 1579 } 1580 1581 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1582 { 1583 TCGv_i64 dst, lr; 1584 1585 if (!dc_isar_feature(aa64_pauth, s)) { 1586 return false; 1587 } 1588 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1589 lr = cpu_reg(s, 30); 1590 if (dst == lr) { 1591 TCGv_i64 tmp = tcg_temp_new_i64(); 1592 tcg_gen_mov_i64(tmp, dst); 1593 dst = tmp; 1594 } 1595 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1596 gen_a64_set_pc(s, dst); 1597 set_btype_for_blr(s); 1598 s->base.is_jmp = DISAS_JUMP; 1599 return true; 1600 } 1601 1602 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1603 { 1604 TCGv_i64 dst; 1605 1606 if (s->current_el == 0) { 1607 return false; 1608 } 1609 if (s->trap_eret) { 1610 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1611 return true; 1612 } 1613 dst = tcg_temp_new_i64(); 1614 tcg_gen_ld_i64(dst, tcg_env, 1615 offsetof(CPUARMState, elr_el[s->current_el])); 1616 1617 translator_io_start(&s->base); 1618 1619 gen_helper_exception_return(tcg_env, dst); 1620 /* Must exit loop to check un-masked IRQs */ 1621 s->base.is_jmp = DISAS_EXIT; 1622 return true; 1623 } 1624 1625 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1626 { 1627 TCGv_i64 dst; 1628 1629 if (!dc_isar_feature(aa64_pauth, s)) { 1630 return false; 1631 } 1632 if (s->current_el == 0) { 1633 return false; 1634 } 1635 /* The FGT trap takes precedence over an auth trap. */ 1636 if (s->trap_eret) { 1637 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1638 return true; 1639 } 1640 dst = tcg_temp_new_i64(); 1641 tcg_gen_ld_i64(dst, tcg_env, 1642 offsetof(CPUARMState, elr_el[s->current_el])); 1643 1644 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1645 1646 translator_io_start(&s->base); 1647 1648 gen_helper_exception_return(tcg_env, dst); 1649 /* Must exit loop to check un-masked IRQs */ 1650 s->base.is_jmp = DISAS_EXIT; 1651 return true; 1652 } 1653 1654 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1655 { 1656 return true; 1657 } 1658 1659 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1660 { 1661 /* 1662 * When running in MTTCG we don't generate jumps to the yield and 1663 * WFE helpers as it won't affect the scheduling of other vCPUs. 1664 * If we wanted to more completely model WFE/SEV so we don't busy 1665 * spin unnecessarily we would need to do something more involved. 1666 */ 1667 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1668 s->base.is_jmp = DISAS_YIELD; 1669 } 1670 return true; 1671 } 1672 1673 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1674 { 1675 s->base.is_jmp = DISAS_WFI; 1676 return true; 1677 } 1678 1679 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1680 { 1681 /* 1682 * When running in MTTCG we don't generate jumps to the yield and 1683 * WFE helpers as it won't affect the scheduling of other vCPUs. 1684 * If we wanted to more completely model WFE/SEV so we don't busy 1685 * spin unnecessarily we would need to do something more involved. 1686 */ 1687 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1688 s->base.is_jmp = DISAS_WFE; 1689 } 1690 return true; 1691 } 1692 1693 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1694 { 1695 if (s->pauth_active) { 1696 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1697 } 1698 return true; 1699 } 1700 1701 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1702 { 1703 if (s->pauth_active) { 1704 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1705 } 1706 return true; 1707 } 1708 1709 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1710 { 1711 if (s->pauth_active) { 1712 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1713 } 1714 return true; 1715 } 1716 1717 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1718 { 1719 if (s->pauth_active) { 1720 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1721 } 1722 return true; 1723 } 1724 1725 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1726 { 1727 if (s->pauth_active) { 1728 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1729 } 1730 return true; 1731 } 1732 1733 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1734 { 1735 /* Without RAS, we must implement this as NOP. */ 1736 if (dc_isar_feature(aa64_ras, s)) { 1737 /* 1738 * QEMU does not have a source of physical SErrors, 1739 * so we are only concerned with virtual SErrors. 1740 * The pseudocode in the ARM for this case is 1741 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1742 * AArch64.vESBOperation(); 1743 * Most of the condition can be evaluated at translation time. 1744 * Test for EL2 present, and defer test for SEL2 to runtime. 1745 */ 1746 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1747 gen_helper_vesb(tcg_env); 1748 } 1749 } 1750 return true; 1751 } 1752 1753 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1754 { 1755 if (s->pauth_active) { 1756 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1757 } 1758 return true; 1759 } 1760 1761 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1762 { 1763 if (s->pauth_active) { 1764 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1765 } 1766 return true; 1767 } 1768 1769 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1770 { 1771 if (s->pauth_active) { 1772 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1773 } 1774 return true; 1775 } 1776 1777 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1778 { 1779 if (s->pauth_active) { 1780 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1781 } 1782 return true; 1783 } 1784 1785 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1786 { 1787 if (s->pauth_active) { 1788 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1789 } 1790 return true; 1791 } 1792 1793 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1794 { 1795 if (s->pauth_active) { 1796 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1797 } 1798 return true; 1799 } 1800 1801 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1802 { 1803 if (s->pauth_active) { 1804 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1805 } 1806 return true; 1807 } 1808 1809 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1810 { 1811 if (s->pauth_active) { 1812 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1813 } 1814 return true; 1815 } 1816 1817 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1818 { 1819 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1820 return true; 1821 } 1822 1823 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1824 { 1825 /* We handle DSB and DMB the same way */ 1826 TCGBar bar; 1827 1828 switch (a->types) { 1829 case 1: /* MBReqTypes_Reads */ 1830 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1831 break; 1832 case 2: /* MBReqTypes_Writes */ 1833 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1834 break; 1835 default: /* MBReqTypes_All */ 1836 bar = TCG_BAR_SC | TCG_MO_ALL; 1837 break; 1838 } 1839 tcg_gen_mb(bar); 1840 return true; 1841 } 1842 1843 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1844 { 1845 /* 1846 * We need to break the TB after this insn to execute 1847 * self-modifying code correctly and also to take 1848 * any pending interrupts immediately. 1849 */ 1850 reset_btype(s); 1851 gen_goto_tb(s, 0, 4); 1852 return true; 1853 } 1854 1855 static bool trans_SB(DisasContext *s, arg_SB *a) 1856 { 1857 if (!dc_isar_feature(aa64_sb, s)) { 1858 return false; 1859 } 1860 /* 1861 * TODO: There is no speculation barrier opcode for TCG; 1862 * MB and end the TB instead. 1863 */ 1864 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1865 gen_goto_tb(s, 0, 4); 1866 return true; 1867 } 1868 1869 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1870 { 1871 if (!dc_isar_feature(aa64_condm_4, s)) { 1872 return false; 1873 } 1874 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1875 return true; 1876 } 1877 1878 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1879 { 1880 TCGv_i32 z; 1881 1882 if (!dc_isar_feature(aa64_condm_5, s)) { 1883 return false; 1884 } 1885 1886 z = tcg_temp_new_i32(); 1887 1888 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1889 1890 /* 1891 * (!C & !Z) << 31 1892 * (!(C | Z)) << 31 1893 * ~((C | Z) << 31) 1894 * ~-(C | Z) 1895 * (C | Z) - 1 1896 */ 1897 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1898 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1899 1900 /* !(Z & C) */ 1901 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1902 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1903 1904 /* (!C & Z) << 31 -> -(Z & ~C) */ 1905 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1906 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1907 1908 /* C | Z */ 1909 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1910 1911 return true; 1912 } 1913 1914 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 1915 { 1916 if (!dc_isar_feature(aa64_condm_5, s)) { 1917 return false; 1918 } 1919 1920 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1921 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1922 1923 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1924 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1925 1926 tcg_gen_movi_i32(cpu_NF, 0); 1927 tcg_gen_movi_i32(cpu_VF, 0); 1928 1929 return true; 1930 } 1931 1932 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 1933 { 1934 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1935 return false; 1936 } 1937 if (a->imm & 1) { 1938 set_pstate_bits(PSTATE_UAO); 1939 } else { 1940 clear_pstate_bits(PSTATE_UAO); 1941 } 1942 gen_rebuild_hflags(s); 1943 s->base.is_jmp = DISAS_TOO_MANY; 1944 return true; 1945 } 1946 1947 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 1948 { 1949 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1950 return false; 1951 } 1952 if (a->imm & 1) { 1953 set_pstate_bits(PSTATE_PAN); 1954 } else { 1955 clear_pstate_bits(PSTATE_PAN); 1956 } 1957 gen_rebuild_hflags(s); 1958 s->base.is_jmp = DISAS_TOO_MANY; 1959 return true; 1960 } 1961 1962 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 1963 { 1964 if (s->current_el == 0) { 1965 return false; 1966 } 1967 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 1968 s->base.is_jmp = DISAS_TOO_MANY; 1969 return true; 1970 } 1971 1972 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 1973 { 1974 if (!dc_isar_feature(aa64_ssbs, s)) { 1975 return false; 1976 } 1977 if (a->imm & 1) { 1978 set_pstate_bits(PSTATE_SSBS); 1979 } else { 1980 clear_pstate_bits(PSTATE_SSBS); 1981 } 1982 /* Don't need to rebuild hflags since SSBS is a nop */ 1983 s->base.is_jmp = DISAS_TOO_MANY; 1984 return true; 1985 } 1986 1987 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 1988 { 1989 if (!dc_isar_feature(aa64_dit, s)) { 1990 return false; 1991 } 1992 if (a->imm & 1) { 1993 set_pstate_bits(PSTATE_DIT); 1994 } else { 1995 clear_pstate_bits(PSTATE_DIT); 1996 } 1997 /* There's no need to rebuild hflags because DIT is a nop */ 1998 s->base.is_jmp = DISAS_TOO_MANY; 1999 return true; 2000 } 2001 2002 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2003 { 2004 if (dc_isar_feature(aa64_mte, s)) { 2005 /* Full MTE is enabled -- set the TCO bit as directed. */ 2006 if (a->imm & 1) { 2007 set_pstate_bits(PSTATE_TCO); 2008 } else { 2009 clear_pstate_bits(PSTATE_TCO); 2010 } 2011 gen_rebuild_hflags(s); 2012 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2013 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2014 return true; 2015 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2016 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2017 return true; 2018 } else { 2019 /* Insn not present */ 2020 return false; 2021 } 2022 } 2023 2024 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2025 { 2026 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2027 s->base.is_jmp = DISAS_TOO_MANY; 2028 return true; 2029 } 2030 2031 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2032 { 2033 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2034 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2035 s->base.is_jmp = DISAS_UPDATE_EXIT; 2036 return true; 2037 } 2038 2039 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2040 { 2041 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2042 return false; 2043 } 2044 if (sme_access_check(s)) { 2045 int old = s->pstate_sm | (s->pstate_za << 1); 2046 int new = a->imm * 3; 2047 2048 if ((old ^ new) & a->mask) { 2049 /* At least one bit changes. */ 2050 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2051 tcg_constant_i32(a->mask)); 2052 s->base.is_jmp = DISAS_TOO_MANY; 2053 } 2054 } 2055 return true; 2056 } 2057 2058 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2059 { 2060 TCGv_i32 tmp = tcg_temp_new_i32(); 2061 TCGv_i32 nzcv = tcg_temp_new_i32(); 2062 2063 /* build bit 31, N */ 2064 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2065 /* build bit 30, Z */ 2066 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2067 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2068 /* build bit 29, C */ 2069 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2070 /* build bit 28, V */ 2071 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2072 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2073 /* generate result */ 2074 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2075 } 2076 2077 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2078 { 2079 TCGv_i32 nzcv = tcg_temp_new_i32(); 2080 2081 /* take NZCV from R[t] */ 2082 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2083 2084 /* bit 31, N */ 2085 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2086 /* bit 30, Z */ 2087 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2088 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2089 /* bit 29, C */ 2090 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2091 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2092 /* bit 28, V */ 2093 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2094 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2095 } 2096 2097 static void gen_sysreg_undef(DisasContext *s, bool isread, 2098 uint8_t op0, uint8_t op1, uint8_t op2, 2099 uint8_t crn, uint8_t crm, uint8_t rt) 2100 { 2101 /* 2102 * Generate code to emit an UNDEF with correct syndrome 2103 * information for a failed system register access. 2104 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2105 * but if FEAT_IDST is implemented then read accesses to registers 2106 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2107 * syndrome. 2108 */ 2109 uint32_t syndrome; 2110 2111 if (isread && dc_isar_feature(aa64_ids, s) && 2112 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2113 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2114 } else { 2115 syndrome = syn_uncategorized(); 2116 } 2117 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2118 } 2119 2120 /* MRS - move from system register 2121 * MSR (register) - move to system register 2122 * SYS 2123 * SYSL 2124 * These are all essentially the same insn in 'read' and 'write' 2125 * versions, with varying op0 fields. 2126 */ 2127 static void handle_sys(DisasContext *s, bool isread, 2128 unsigned int op0, unsigned int op1, unsigned int op2, 2129 unsigned int crn, unsigned int crm, unsigned int rt) 2130 { 2131 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2132 crn, crm, op0, op1, op2); 2133 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2134 bool need_exit_tb = false; 2135 bool nv_trap_to_el2 = false; 2136 bool nv_redirect_reg = false; 2137 bool skip_fp_access_checks = false; 2138 TCGv_ptr tcg_ri = NULL; 2139 TCGv_i64 tcg_rt; 2140 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2141 2142 if (crn == 11 || crn == 15) { 2143 /* 2144 * Check for TIDCP trap, which must take precedence over 2145 * the UNDEF for "no such register" etc. 2146 */ 2147 switch (s->current_el) { 2148 case 0: 2149 if (dc_isar_feature(aa64_tidcp1, s)) { 2150 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2151 } 2152 break; 2153 case 1: 2154 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2155 break; 2156 } 2157 } 2158 2159 if (!ri) { 2160 /* Unknown register; this might be a guest error or a QEMU 2161 * unimplemented feature. 2162 */ 2163 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2164 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2165 isread ? "read" : "write", op0, op1, crn, crm, op2); 2166 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2167 return; 2168 } 2169 2170 /* Check access permissions */ 2171 if (!cp_access_ok(s->current_el, ri, isread)) { 2172 /* 2173 * FEAT_NV/NV2 handling does not do the usual FP access checks 2174 * for registers only accessible at EL2 (though it *does* do them 2175 * for registers accessible at EL1). 2176 */ 2177 skip_fp_access_checks = true; 2178 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2179 /* 2180 * This is one of the few EL2 registers which should redirect 2181 * to the equivalent EL1 register. We do that after running 2182 * the EL2 register's accessfn. 2183 */ 2184 nv_redirect_reg = true; 2185 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2186 /* 2187 * This register / instruction exists and is an EL2 register, so 2188 * we must trap to EL2 if accessed in nested virtualization EL1 2189 * instead of UNDEFing. We'll do that after the usual access checks. 2190 * (This makes a difference only for a couple of registers like 2191 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2192 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2193 * an accessfn which does nothing when called from EL1, because 2194 * the trap-to-EL3 controls which would apply to that register 2195 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2196 */ 2197 nv_trap_to_el2 = true; 2198 } else { 2199 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2200 return; 2201 } 2202 } 2203 2204 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2205 /* Emit code to perform further access permissions checks at 2206 * runtime; this may result in an exception. 2207 */ 2208 gen_a64_update_pc(s, 0); 2209 tcg_ri = tcg_temp_new_ptr(); 2210 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2211 tcg_constant_i32(key), 2212 tcg_constant_i32(syndrome), 2213 tcg_constant_i32(isread)); 2214 } else if (ri->type & ARM_CP_RAISES_EXC) { 2215 /* 2216 * The readfn or writefn might raise an exception; 2217 * synchronize the CPU state in case it does. 2218 */ 2219 gen_a64_update_pc(s, 0); 2220 } 2221 2222 if (!skip_fp_access_checks) { 2223 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2224 return; 2225 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2226 return; 2227 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2228 return; 2229 } 2230 } 2231 2232 if (nv_trap_to_el2) { 2233 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2234 return; 2235 } 2236 2237 if (nv_redirect_reg) { 2238 /* 2239 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2240 * Conveniently in all cases the encoding of the EL1 register is 2241 * identical to the EL2 register except that opc1 is 0. 2242 * Get the reginfo for the EL1 register to use for the actual access. 2243 * We don't use the EL1 register's access function, and 2244 * fine-grained-traps on EL1 also do not apply here. 2245 */ 2246 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2247 crn, crm, op0, 0, op2); 2248 ri = get_arm_cp_reginfo(s->cp_regs, key); 2249 assert(ri); 2250 assert(cp_access_ok(s->current_el, ri, isread)); 2251 /* 2252 * We might not have done an update_pc earlier, so check we don't 2253 * need it. We could support this in future if necessary. 2254 */ 2255 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2256 } 2257 2258 /* Handle special cases first */ 2259 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2260 case 0: 2261 break; 2262 case ARM_CP_NOP: 2263 return; 2264 case ARM_CP_NZCV: 2265 tcg_rt = cpu_reg(s, rt); 2266 if (isread) { 2267 gen_get_nzcv(tcg_rt); 2268 } else { 2269 gen_set_nzcv(tcg_rt); 2270 } 2271 return; 2272 case ARM_CP_CURRENTEL: 2273 { 2274 /* 2275 * Reads as current EL value from pstate, which is 2276 * guaranteed to be constant by the tb flags. 2277 * For nested virt we should report EL2. 2278 */ 2279 int el = s->nv ? 2 : s->current_el; 2280 tcg_rt = cpu_reg(s, rt); 2281 tcg_gen_movi_i64(tcg_rt, el << 2); 2282 return; 2283 } 2284 case ARM_CP_DC_ZVA: 2285 /* Writes clear the aligned block of memory which rt points into. */ 2286 if (s->mte_active[0]) { 2287 int desc = 0; 2288 2289 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2290 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2291 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2292 2293 tcg_rt = tcg_temp_new_i64(); 2294 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2295 tcg_constant_i32(desc), cpu_reg(s, rt)); 2296 } else { 2297 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2298 } 2299 gen_helper_dc_zva(tcg_env, tcg_rt); 2300 return; 2301 case ARM_CP_DC_GVA: 2302 { 2303 TCGv_i64 clean_addr, tag; 2304 2305 /* 2306 * DC_GVA, like DC_ZVA, requires that we supply the original 2307 * pointer for an invalid page. Probe that address first. 2308 */ 2309 tcg_rt = cpu_reg(s, rt); 2310 clean_addr = clean_data_tbi(s, tcg_rt); 2311 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2312 2313 if (s->ata[0]) { 2314 /* Extract the tag from the register to match STZGM. */ 2315 tag = tcg_temp_new_i64(); 2316 tcg_gen_shri_i64(tag, tcg_rt, 56); 2317 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2318 } 2319 } 2320 return; 2321 case ARM_CP_DC_GZVA: 2322 { 2323 TCGv_i64 clean_addr, tag; 2324 2325 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2326 tcg_rt = cpu_reg(s, rt); 2327 clean_addr = clean_data_tbi(s, tcg_rt); 2328 gen_helper_dc_zva(tcg_env, clean_addr); 2329 2330 if (s->ata[0]) { 2331 /* Extract the tag from the register to match STZGM. */ 2332 tag = tcg_temp_new_i64(); 2333 tcg_gen_shri_i64(tag, tcg_rt, 56); 2334 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2335 } 2336 } 2337 return; 2338 default: 2339 g_assert_not_reached(); 2340 } 2341 2342 if (ri->type & ARM_CP_IO) { 2343 /* I/O operations must end the TB here (whether read or write) */ 2344 need_exit_tb = translator_io_start(&s->base); 2345 } 2346 2347 tcg_rt = cpu_reg(s, rt); 2348 2349 if (isread) { 2350 if (ri->type & ARM_CP_CONST) { 2351 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2352 } else if (ri->readfn) { 2353 if (!tcg_ri) { 2354 tcg_ri = gen_lookup_cp_reg(key); 2355 } 2356 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2357 } else { 2358 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2359 } 2360 } else { 2361 if (ri->type & ARM_CP_CONST) { 2362 /* If not forbidden by access permissions, treat as WI */ 2363 return; 2364 } else if (ri->writefn) { 2365 if (!tcg_ri) { 2366 tcg_ri = gen_lookup_cp_reg(key); 2367 } 2368 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2369 } else { 2370 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2371 } 2372 } 2373 2374 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2375 /* 2376 * A write to any coprocessor register that ends a TB 2377 * must rebuild the hflags for the next TB. 2378 */ 2379 gen_rebuild_hflags(s); 2380 /* 2381 * We default to ending the TB on a coprocessor register write, 2382 * but allow this to be suppressed by the register definition 2383 * (usually only necessary to work around guest bugs). 2384 */ 2385 need_exit_tb = true; 2386 } 2387 if (need_exit_tb) { 2388 s->base.is_jmp = DISAS_UPDATE_EXIT; 2389 } 2390 } 2391 2392 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2393 { 2394 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2395 return true; 2396 } 2397 2398 static bool trans_SVC(DisasContext *s, arg_i *a) 2399 { 2400 /* 2401 * For SVC, HVC and SMC we advance the single-step state 2402 * machine before taking the exception. This is architecturally 2403 * mandated, to ensure that single-stepping a system call 2404 * instruction works properly. 2405 */ 2406 uint32_t syndrome = syn_aa64_svc(a->imm); 2407 if (s->fgt_svc) { 2408 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2409 return true; 2410 } 2411 gen_ss_advance(s); 2412 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2413 return true; 2414 } 2415 2416 static bool trans_HVC(DisasContext *s, arg_i *a) 2417 { 2418 int target_el = s->current_el == 3 ? 3 : 2; 2419 2420 if (s->current_el == 0) { 2421 unallocated_encoding(s); 2422 return true; 2423 } 2424 /* 2425 * The pre HVC helper handles cases when HVC gets trapped 2426 * as an undefined insn by runtime configuration. 2427 */ 2428 gen_a64_update_pc(s, 0); 2429 gen_helper_pre_hvc(tcg_env); 2430 /* Architecture requires ss advance before we do the actual work */ 2431 gen_ss_advance(s); 2432 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2433 return true; 2434 } 2435 2436 static bool trans_SMC(DisasContext *s, arg_i *a) 2437 { 2438 if (s->current_el == 0) { 2439 unallocated_encoding(s); 2440 return true; 2441 } 2442 gen_a64_update_pc(s, 0); 2443 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2444 /* Architecture requires ss advance before we do the actual work */ 2445 gen_ss_advance(s); 2446 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2447 return true; 2448 } 2449 2450 static bool trans_BRK(DisasContext *s, arg_i *a) 2451 { 2452 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2453 return true; 2454 } 2455 2456 static bool trans_HLT(DisasContext *s, arg_i *a) 2457 { 2458 /* 2459 * HLT. This has two purposes. 2460 * Architecturally, it is an external halting debug instruction. 2461 * Since QEMU doesn't implement external debug, we treat this as 2462 * it is required for halting debug disabled: it will UNDEF. 2463 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2464 */ 2465 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2466 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2467 } else { 2468 unallocated_encoding(s); 2469 } 2470 return true; 2471 } 2472 2473 /* 2474 * Load/Store exclusive instructions are implemented by remembering 2475 * the value/address loaded, and seeing if these are the same 2476 * when the store is performed. This is not actually the architecturally 2477 * mandated semantics, but it works for typical guest code sequences 2478 * and avoids having to monitor regular stores. 2479 * 2480 * The store exclusive uses the atomic cmpxchg primitives to avoid 2481 * races in multi-threaded linux-user and when MTTCG softmmu is 2482 * enabled. 2483 */ 2484 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2485 int size, bool is_pair) 2486 { 2487 int idx = get_mem_index(s); 2488 TCGv_i64 dirty_addr, clean_addr; 2489 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2490 2491 s->is_ldex = true; 2492 dirty_addr = cpu_reg_sp(s, rn); 2493 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2494 2495 g_assert(size <= 3); 2496 if (is_pair) { 2497 g_assert(size >= 2); 2498 if (size == 2) { 2499 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2500 if (s->be_data == MO_LE) { 2501 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2502 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2503 } else { 2504 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2505 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2506 } 2507 } else { 2508 TCGv_i128 t16 = tcg_temp_new_i128(); 2509 2510 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2511 2512 if (s->be_data == MO_LE) { 2513 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2514 cpu_exclusive_high, t16); 2515 } else { 2516 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2517 cpu_exclusive_val, t16); 2518 } 2519 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2520 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2521 } 2522 } else { 2523 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2524 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2525 } 2526 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2527 } 2528 2529 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2530 int rn, int size, int is_pair) 2531 { 2532 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2533 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2534 * [addr] = {Rt}; 2535 * if (is_pair) { 2536 * [addr + datasize] = {Rt2}; 2537 * } 2538 * {Rd} = 0; 2539 * } else { 2540 * {Rd} = 1; 2541 * } 2542 * env->exclusive_addr = -1; 2543 */ 2544 TCGLabel *fail_label = gen_new_label(); 2545 TCGLabel *done_label = gen_new_label(); 2546 TCGv_i64 tmp, clean_addr; 2547 MemOp memop; 2548 2549 /* 2550 * FIXME: We are out of spec here. We have recorded only the address 2551 * from load_exclusive, not the entire range, and we assume that the 2552 * size of the access on both sides match. The architecture allows the 2553 * store to be smaller than the load, so long as the stored bytes are 2554 * within the range recorded by the load. 2555 */ 2556 2557 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2558 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2559 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2560 2561 /* 2562 * The write, and any associated faults, only happen if the virtual 2563 * and physical addresses pass the exclusive monitor check. These 2564 * faults are exceedingly unlikely, because normally the guest uses 2565 * the exact same address register for the load_exclusive, and we 2566 * would have recognized these faults there. 2567 * 2568 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2569 * unaligned 4-byte write within the range of an aligned 8-byte load. 2570 * With LSE2, the store would need to cross a 16-byte boundary when the 2571 * load did not, which would mean the store is outside the range 2572 * recorded for the monitor, which would have failed a corrected monitor 2573 * check above. For now, we assume no size change and retain the 2574 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2575 * 2576 * It is possible to trigger an MTE fault, by performing the load with 2577 * a virtual address with a valid tag and performing the store with the 2578 * same virtual address and a different invalid tag. 2579 */ 2580 memop = size + is_pair; 2581 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2582 memop |= MO_ALIGN; 2583 } 2584 memop = finalize_memop(s, memop); 2585 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2586 2587 tmp = tcg_temp_new_i64(); 2588 if (is_pair) { 2589 if (size == 2) { 2590 if (s->be_data == MO_LE) { 2591 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2592 } else { 2593 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2594 } 2595 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2596 cpu_exclusive_val, tmp, 2597 get_mem_index(s), memop); 2598 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2599 } else { 2600 TCGv_i128 t16 = tcg_temp_new_i128(); 2601 TCGv_i128 c16 = tcg_temp_new_i128(); 2602 TCGv_i64 a, b; 2603 2604 if (s->be_data == MO_LE) { 2605 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2606 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2607 cpu_exclusive_high); 2608 } else { 2609 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2610 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2611 cpu_exclusive_val); 2612 } 2613 2614 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2615 get_mem_index(s), memop); 2616 2617 a = tcg_temp_new_i64(); 2618 b = tcg_temp_new_i64(); 2619 if (s->be_data == MO_LE) { 2620 tcg_gen_extr_i128_i64(a, b, t16); 2621 } else { 2622 tcg_gen_extr_i128_i64(b, a, t16); 2623 } 2624 2625 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2626 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2627 tcg_gen_or_i64(tmp, a, b); 2628 2629 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2630 } 2631 } else { 2632 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2633 cpu_reg(s, rt), get_mem_index(s), memop); 2634 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2635 } 2636 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2637 tcg_gen_br(done_label); 2638 2639 gen_set_label(fail_label); 2640 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2641 gen_set_label(done_label); 2642 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2643 } 2644 2645 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2646 int rn, int size) 2647 { 2648 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2649 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2650 int memidx = get_mem_index(s); 2651 TCGv_i64 clean_addr; 2652 MemOp memop; 2653 2654 if (rn == 31) { 2655 gen_check_sp_alignment(s); 2656 } 2657 memop = check_atomic_align(s, rn, size); 2658 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2659 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2660 memidx, memop); 2661 } 2662 2663 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2664 int rn, int size) 2665 { 2666 TCGv_i64 s1 = cpu_reg(s, rs); 2667 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2668 TCGv_i64 t1 = cpu_reg(s, rt); 2669 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2670 TCGv_i64 clean_addr; 2671 int memidx = get_mem_index(s); 2672 MemOp memop; 2673 2674 if (rn == 31) { 2675 gen_check_sp_alignment(s); 2676 } 2677 2678 /* This is a single atomic access, despite the "pair". */ 2679 memop = check_atomic_align(s, rn, size + 1); 2680 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2681 2682 if (size == 2) { 2683 TCGv_i64 cmp = tcg_temp_new_i64(); 2684 TCGv_i64 val = tcg_temp_new_i64(); 2685 2686 if (s->be_data == MO_LE) { 2687 tcg_gen_concat32_i64(val, t1, t2); 2688 tcg_gen_concat32_i64(cmp, s1, s2); 2689 } else { 2690 tcg_gen_concat32_i64(val, t2, t1); 2691 tcg_gen_concat32_i64(cmp, s2, s1); 2692 } 2693 2694 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2695 2696 if (s->be_data == MO_LE) { 2697 tcg_gen_extr32_i64(s1, s2, cmp); 2698 } else { 2699 tcg_gen_extr32_i64(s2, s1, cmp); 2700 } 2701 } else { 2702 TCGv_i128 cmp = tcg_temp_new_i128(); 2703 TCGv_i128 val = tcg_temp_new_i128(); 2704 2705 if (s->be_data == MO_LE) { 2706 tcg_gen_concat_i64_i128(val, t1, t2); 2707 tcg_gen_concat_i64_i128(cmp, s1, s2); 2708 } else { 2709 tcg_gen_concat_i64_i128(val, t2, t1); 2710 tcg_gen_concat_i64_i128(cmp, s2, s1); 2711 } 2712 2713 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2714 2715 if (s->be_data == MO_LE) { 2716 tcg_gen_extr_i128_i64(s1, s2, cmp); 2717 } else { 2718 tcg_gen_extr_i128_i64(s2, s1, cmp); 2719 } 2720 } 2721 } 2722 2723 /* 2724 * Compute the ISS.SF bit for syndrome information if an exception 2725 * is taken on a load or store. This indicates whether the instruction 2726 * is accessing a 32-bit or 64-bit register. This logic is derived 2727 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2728 */ 2729 static bool ldst_iss_sf(int size, bool sign, bool ext) 2730 { 2731 2732 if (sign) { 2733 /* 2734 * Signed loads are 64 bit results if we are not going to 2735 * do a zero-extend from 32 to 64 after the load. 2736 * (For a store, sign and ext are always false.) 2737 */ 2738 return !ext; 2739 } else { 2740 /* Unsigned loads/stores work at the specified size */ 2741 return size == MO_64; 2742 } 2743 } 2744 2745 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2746 { 2747 if (a->rn == 31) { 2748 gen_check_sp_alignment(s); 2749 } 2750 if (a->lasr) { 2751 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2752 } 2753 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2754 return true; 2755 } 2756 2757 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2758 { 2759 if (a->rn == 31) { 2760 gen_check_sp_alignment(s); 2761 } 2762 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2763 if (a->lasr) { 2764 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2765 } 2766 return true; 2767 } 2768 2769 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2770 { 2771 TCGv_i64 clean_addr; 2772 MemOp memop; 2773 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2774 2775 /* 2776 * StoreLORelease is the same as Store-Release for QEMU, but 2777 * needs the feature-test. 2778 */ 2779 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2780 return false; 2781 } 2782 /* Generate ISS for non-exclusive accesses including LASR. */ 2783 if (a->rn == 31) { 2784 gen_check_sp_alignment(s); 2785 } 2786 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2787 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2788 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2789 true, a->rn != 31, memop); 2790 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2791 iss_sf, a->lasr); 2792 return true; 2793 } 2794 2795 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2796 { 2797 TCGv_i64 clean_addr; 2798 MemOp memop; 2799 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2800 2801 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2802 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2803 return false; 2804 } 2805 /* Generate ISS for non-exclusive accesses including LASR. */ 2806 if (a->rn == 31) { 2807 gen_check_sp_alignment(s); 2808 } 2809 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 2810 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2811 false, a->rn != 31, memop); 2812 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 2813 a->rt, iss_sf, a->lasr); 2814 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2815 return true; 2816 } 2817 2818 static bool trans_STXP(DisasContext *s, arg_stxr *a) 2819 { 2820 if (a->rn == 31) { 2821 gen_check_sp_alignment(s); 2822 } 2823 if (a->lasr) { 2824 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2825 } 2826 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 2827 return true; 2828 } 2829 2830 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 2831 { 2832 if (a->rn == 31) { 2833 gen_check_sp_alignment(s); 2834 } 2835 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 2836 if (a->lasr) { 2837 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2838 } 2839 return true; 2840 } 2841 2842 static bool trans_CASP(DisasContext *s, arg_CASP *a) 2843 { 2844 if (!dc_isar_feature(aa64_atomics, s)) { 2845 return false; 2846 } 2847 if (((a->rt | a->rs) & 1) != 0) { 2848 return false; 2849 } 2850 2851 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 2852 return true; 2853 } 2854 2855 static bool trans_CAS(DisasContext *s, arg_CAS *a) 2856 { 2857 if (!dc_isar_feature(aa64_atomics, s)) { 2858 return false; 2859 } 2860 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 2861 return true; 2862 } 2863 2864 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 2865 { 2866 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 2867 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 2868 TCGv_i64 clean_addr = tcg_temp_new_i64(); 2869 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 2870 2871 gen_pc_plus_diff(s, clean_addr, a->imm); 2872 do_gpr_ld(s, tcg_rt, clean_addr, memop, 2873 false, true, a->rt, iss_sf, false); 2874 return true; 2875 } 2876 2877 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 2878 { 2879 /* Load register (literal), vector version */ 2880 TCGv_i64 clean_addr; 2881 MemOp memop; 2882 2883 if (!fp_access_check(s)) { 2884 return true; 2885 } 2886 memop = finalize_memop_asimd(s, a->sz); 2887 clean_addr = tcg_temp_new_i64(); 2888 gen_pc_plus_diff(s, clean_addr, a->imm); 2889 do_fp_ld(s, a->rt, clean_addr, memop); 2890 return true; 2891 } 2892 2893 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 2894 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 2895 uint64_t offset, bool is_store, MemOp mop) 2896 { 2897 if (a->rn == 31) { 2898 gen_check_sp_alignment(s); 2899 } 2900 2901 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 2902 if (!a->p) { 2903 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 2904 } 2905 2906 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 2907 (a->w || a->rn != 31), 2 << a->sz, mop); 2908 } 2909 2910 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 2911 TCGv_i64 dirty_addr, uint64_t offset) 2912 { 2913 if (a->w) { 2914 if (a->p) { 2915 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 2916 } 2917 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 2918 } 2919 } 2920 2921 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 2922 { 2923 uint64_t offset = a->imm << a->sz; 2924 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2925 MemOp mop = finalize_memop(s, a->sz); 2926 2927 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 2928 tcg_rt = cpu_reg(s, a->rt); 2929 tcg_rt2 = cpu_reg(s, a->rt2); 2930 /* 2931 * We built mop above for the single logical access -- rebuild it 2932 * now for the paired operation. 2933 * 2934 * With LSE2, non-sign-extending pairs are treated atomically if 2935 * aligned, and if unaligned one of the pair will be completely 2936 * within a 16-byte block and that element will be atomic. 2937 * Otherwise each element is separately atomic. 2938 * In all cases, issue one operation with the correct atomicity. 2939 */ 2940 mop = a->sz + 1; 2941 if (s->align_mem) { 2942 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2943 } 2944 mop = finalize_memop_pair(s, mop); 2945 if (a->sz == 2) { 2946 TCGv_i64 tmp = tcg_temp_new_i64(); 2947 2948 if (s->be_data == MO_LE) { 2949 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 2950 } else { 2951 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 2952 } 2953 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 2954 } else { 2955 TCGv_i128 tmp = tcg_temp_new_i128(); 2956 2957 if (s->be_data == MO_LE) { 2958 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 2959 } else { 2960 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 2961 } 2962 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 2963 } 2964 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2965 return true; 2966 } 2967 2968 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 2969 { 2970 uint64_t offset = a->imm << a->sz; 2971 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2972 MemOp mop = finalize_memop(s, a->sz); 2973 2974 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 2975 tcg_rt = cpu_reg(s, a->rt); 2976 tcg_rt2 = cpu_reg(s, a->rt2); 2977 2978 /* 2979 * We built mop above for the single logical access -- rebuild it 2980 * now for the paired operation. 2981 * 2982 * With LSE2, non-sign-extending pairs are treated atomically if 2983 * aligned, and if unaligned one of the pair will be completely 2984 * within a 16-byte block and that element will be atomic. 2985 * Otherwise each element is separately atomic. 2986 * In all cases, issue one operation with the correct atomicity. 2987 * 2988 * This treats sign-extending loads like zero-extending loads, 2989 * since that reuses the most code below. 2990 */ 2991 mop = a->sz + 1; 2992 if (s->align_mem) { 2993 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2994 } 2995 mop = finalize_memop_pair(s, mop); 2996 if (a->sz == 2) { 2997 int o2 = s->be_data == MO_LE ? 32 : 0; 2998 int o1 = o2 ^ 32; 2999 3000 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3001 if (a->sign) { 3002 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3003 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3004 } else { 3005 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3006 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3007 } 3008 } else { 3009 TCGv_i128 tmp = tcg_temp_new_i128(); 3010 3011 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3012 if (s->be_data == MO_LE) { 3013 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3014 } else { 3015 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3016 } 3017 } 3018 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3019 return true; 3020 } 3021 3022 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3023 { 3024 uint64_t offset = a->imm << a->sz; 3025 TCGv_i64 clean_addr, dirty_addr; 3026 MemOp mop; 3027 3028 if (!fp_access_check(s)) { 3029 return true; 3030 } 3031 3032 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3033 mop = finalize_memop_asimd(s, a->sz); 3034 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3035 do_fp_st(s, a->rt, clean_addr, mop); 3036 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3037 do_fp_st(s, a->rt2, clean_addr, mop); 3038 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3039 return true; 3040 } 3041 3042 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3043 { 3044 uint64_t offset = a->imm << a->sz; 3045 TCGv_i64 clean_addr, dirty_addr; 3046 MemOp mop; 3047 3048 if (!fp_access_check(s)) { 3049 return true; 3050 } 3051 3052 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3053 mop = finalize_memop_asimd(s, a->sz); 3054 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3055 do_fp_ld(s, a->rt, clean_addr, mop); 3056 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3057 do_fp_ld(s, a->rt2, clean_addr, mop); 3058 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3059 return true; 3060 } 3061 3062 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3063 { 3064 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3065 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3066 MemOp mop; 3067 TCGv_i128 tmp; 3068 3069 /* STGP only comes in one size. */ 3070 tcg_debug_assert(a->sz == MO_64); 3071 3072 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3073 return false; 3074 } 3075 3076 if (a->rn == 31) { 3077 gen_check_sp_alignment(s); 3078 } 3079 3080 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3081 if (!a->p) { 3082 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3083 } 3084 3085 clean_addr = clean_data_tbi(s, dirty_addr); 3086 tcg_rt = cpu_reg(s, a->rt); 3087 tcg_rt2 = cpu_reg(s, a->rt2); 3088 3089 /* 3090 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3091 * and one tag operation. We implement it as one single aligned 16-byte 3092 * memory operation for convenience. Note that the alignment ensures 3093 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3094 */ 3095 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3096 3097 tmp = tcg_temp_new_i128(); 3098 if (s->be_data == MO_LE) { 3099 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3100 } else { 3101 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3102 } 3103 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3104 3105 /* Perform the tag store, if tag access enabled. */ 3106 if (s->ata[0]) { 3107 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3108 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3109 } else { 3110 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3111 } 3112 } 3113 3114 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3115 return true; 3116 } 3117 3118 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3119 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3120 uint64_t offset, bool is_store, MemOp mop) 3121 { 3122 int memidx; 3123 3124 if (a->rn == 31) { 3125 gen_check_sp_alignment(s); 3126 } 3127 3128 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3129 if (!a->p) { 3130 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3131 } 3132 memidx = get_a64_user_mem_index(s, a->unpriv); 3133 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3134 a->w || a->rn != 31, 3135 mop, a->unpriv, memidx); 3136 } 3137 3138 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3139 TCGv_i64 dirty_addr, uint64_t offset) 3140 { 3141 if (a->w) { 3142 if (a->p) { 3143 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3144 } 3145 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3146 } 3147 } 3148 3149 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3150 { 3151 bool iss_sf, iss_valid = !a->w; 3152 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3153 int memidx = get_a64_user_mem_index(s, a->unpriv); 3154 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3155 3156 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3157 3158 tcg_rt = cpu_reg(s, a->rt); 3159 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3160 3161 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3162 iss_valid, a->rt, iss_sf, false); 3163 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3164 return true; 3165 } 3166 3167 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3168 { 3169 bool iss_sf, iss_valid = !a->w; 3170 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3171 int memidx = get_a64_user_mem_index(s, a->unpriv); 3172 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3173 3174 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3175 3176 tcg_rt = cpu_reg(s, a->rt); 3177 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3178 3179 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3180 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3181 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3182 return true; 3183 } 3184 3185 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3186 { 3187 TCGv_i64 clean_addr, dirty_addr; 3188 MemOp mop; 3189 3190 if (!fp_access_check(s)) { 3191 return true; 3192 } 3193 mop = finalize_memop_asimd(s, a->sz); 3194 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3195 do_fp_st(s, a->rt, clean_addr, mop); 3196 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3197 return true; 3198 } 3199 3200 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3201 { 3202 TCGv_i64 clean_addr, dirty_addr; 3203 MemOp mop; 3204 3205 if (!fp_access_check(s)) { 3206 return true; 3207 } 3208 mop = finalize_memop_asimd(s, a->sz); 3209 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3210 do_fp_ld(s, a->rt, clean_addr, mop); 3211 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3212 return true; 3213 } 3214 3215 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3216 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3217 bool is_store, MemOp memop) 3218 { 3219 TCGv_i64 tcg_rm; 3220 3221 if (a->rn == 31) { 3222 gen_check_sp_alignment(s); 3223 } 3224 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3225 3226 tcg_rm = read_cpu_reg(s, a->rm, 1); 3227 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3228 3229 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3230 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3231 } 3232 3233 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3234 { 3235 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3236 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3237 MemOp memop; 3238 3239 if (extract32(a->opt, 1, 1) == 0) { 3240 return false; 3241 } 3242 3243 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3244 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3245 tcg_rt = cpu_reg(s, a->rt); 3246 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3247 a->ext, true, a->rt, iss_sf, false); 3248 return true; 3249 } 3250 3251 static bool trans_STR(DisasContext *s, arg_ldst *a) 3252 { 3253 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3254 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3255 MemOp memop; 3256 3257 if (extract32(a->opt, 1, 1) == 0) { 3258 return false; 3259 } 3260 3261 memop = finalize_memop(s, a->sz); 3262 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3263 tcg_rt = cpu_reg(s, a->rt); 3264 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3265 return true; 3266 } 3267 3268 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3269 { 3270 TCGv_i64 clean_addr, dirty_addr; 3271 MemOp memop; 3272 3273 if (extract32(a->opt, 1, 1) == 0) { 3274 return false; 3275 } 3276 3277 if (!fp_access_check(s)) { 3278 return true; 3279 } 3280 3281 memop = finalize_memop_asimd(s, a->sz); 3282 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3283 do_fp_ld(s, a->rt, clean_addr, memop); 3284 return true; 3285 } 3286 3287 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3288 { 3289 TCGv_i64 clean_addr, dirty_addr; 3290 MemOp memop; 3291 3292 if (extract32(a->opt, 1, 1) == 0) { 3293 return false; 3294 } 3295 3296 if (!fp_access_check(s)) { 3297 return true; 3298 } 3299 3300 memop = finalize_memop_asimd(s, a->sz); 3301 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3302 do_fp_st(s, a->rt, clean_addr, memop); 3303 return true; 3304 } 3305 3306 3307 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3308 int sign, bool invert) 3309 { 3310 MemOp mop = a->sz | sign; 3311 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3312 3313 if (a->rn == 31) { 3314 gen_check_sp_alignment(s); 3315 } 3316 mop = check_atomic_align(s, a->rn, mop); 3317 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3318 a->rn != 31, mop); 3319 tcg_rs = read_cpu_reg(s, a->rs, true); 3320 tcg_rt = cpu_reg(s, a->rt); 3321 if (invert) { 3322 tcg_gen_not_i64(tcg_rs, tcg_rs); 3323 } 3324 /* 3325 * The tcg atomic primitives are all full barriers. Therefore we 3326 * can ignore the Acquire and Release bits of this instruction. 3327 */ 3328 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3329 3330 if (mop & MO_SIGN) { 3331 switch (a->sz) { 3332 case MO_8: 3333 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3334 break; 3335 case MO_16: 3336 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3337 break; 3338 case MO_32: 3339 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3340 break; 3341 case MO_64: 3342 break; 3343 default: 3344 g_assert_not_reached(); 3345 } 3346 } 3347 return true; 3348 } 3349 3350 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3351 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3352 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3353 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3354 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3355 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3356 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3357 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3358 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3359 3360 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3361 { 3362 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3363 TCGv_i64 clean_addr; 3364 MemOp mop; 3365 3366 if (!dc_isar_feature(aa64_atomics, s) || 3367 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3368 return false; 3369 } 3370 if (a->rn == 31) { 3371 gen_check_sp_alignment(s); 3372 } 3373 mop = check_atomic_align(s, a->rn, a->sz); 3374 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3375 a->rn != 31, mop); 3376 /* 3377 * LDAPR* are a special case because they are a simple load, not a 3378 * fetch-and-do-something op. 3379 * The architectural consistency requirements here are weaker than 3380 * full load-acquire (we only need "load-acquire processor consistent"), 3381 * but we choose to implement them as full LDAQ. 3382 */ 3383 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3384 true, a->rt, iss_sf, true); 3385 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3386 return true; 3387 } 3388 3389 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3390 { 3391 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3392 MemOp memop; 3393 3394 /* Load with pointer authentication */ 3395 if (!dc_isar_feature(aa64_pauth, s)) { 3396 return false; 3397 } 3398 3399 if (a->rn == 31) { 3400 gen_check_sp_alignment(s); 3401 } 3402 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3403 3404 if (s->pauth_active) { 3405 if (!a->m) { 3406 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3407 tcg_constant_i64(0)); 3408 } else { 3409 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3410 tcg_constant_i64(0)); 3411 } 3412 } 3413 3414 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3415 3416 memop = finalize_memop(s, MO_64); 3417 3418 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3419 clean_addr = gen_mte_check1(s, dirty_addr, false, 3420 a->w || a->rn != 31, memop); 3421 3422 tcg_rt = cpu_reg(s, a->rt); 3423 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3424 /* extend */ false, /* iss_valid */ !a->w, 3425 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3426 3427 if (a->w) { 3428 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3429 } 3430 return true; 3431 } 3432 3433 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3434 { 3435 TCGv_i64 clean_addr, dirty_addr; 3436 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3437 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3438 3439 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3440 return false; 3441 } 3442 3443 if (a->rn == 31) { 3444 gen_check_sp_alignment(s); 3445 } 3446 3447 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3448 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3449 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3450 clean_addr = clean_data_tbi(s, dirty_addr); 3451 3452 /* 3453 * Load-AcquirePC semantics; we implement as the slightly more 3454 * restrictive Load-Acquire. 3455 */ 3456 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3457 a->rt, iss_sf, true); 3458 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3459 return true; 3460 } 3461 3462 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3463 { 3464 TCGv_i64 clean_addr, dirty_addr; 3465 MemOp mop = a->sz; 3466 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3467 3468 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3469 return false; 3470 } 3471 3472 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3473 3474 if (a->rn == 31) { 3475 gen_check_sp_alignment(s); 3476 } 3477 3478 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3479 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3480 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3481 clean_addr = clean_data_tbi(s, dirty_addr); 3482 3483 /* Store-Release semantics */ 3484 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3485 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3486 return true; 3487 } 3488 3489 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3490 { 3491 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3492 MemOp endian, align, mop; 3493 3494 int total; /* total bytes */ 3495 int elements; /* elements per vector */ 3496 int r; 3497 int size = a->sz; 3498 3499 if (!a->p && a->rm != 0) { 3500 /* For non-postindexed accesses the Rm field must be 0 */ 3501 return false; 3502 } 3503 if (size == 3 && !a->q && a->selem != 1) { 3504 return false; 3505 } 3506 if (!fp_access_check(s)) { 3507 return true; 3508 } 3509 3510 if (a->rn == 31) { 3511 gen_check_sp_alignment(s); 3512 } 3513 3514 /* For our purposes, bytes are always little-endian. */ 3515 endian = s->be_data; 3516 if (size == 0) { 3517 endian = MO_LE; 3518 } 3519 3520 total = a->rpt * a->selem * (a->q ? 16 : 8); 3521 tcg_rn = cpu_reg_sp(s, a->rn); 3522 3523 /* 3524 * Issue the MTE check vs the logical repeat count, before we 3525 * promote consecutive little-endian elements below. 3526 */ 3527 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3528 finalize_memop_asimd(s, size)); 3529 3530 /* 3531 * Consecutive little-endian elements from a single register 3532 * can be promoted to a larger little-endian operation. 3533 */ 3534 align = MO_ALIGN; 3535 if (a->selem == 1 && endian == MO_LE) { 3536 align = pow2_align(size); 3537 size = 3; 3538 } 3539 if (!s->align_mem) { 3540 align = 0; 3541 } 3542 mop = endian | size | align; 3543 3544 elements = (a->q ? 16 : 8) >> size; 3545 tcg_ebytes = tcg_constant_i64(1 << size); 3546 for (r = 0; r < a->rpt; r++) { 3547 int e; 3548 for (e = 0; e < elements; e++) { 3549 int xs; 3550 for (xs = 0; xs < a->selem; xs++) { 3551 int tt = (a->rt + r + xs) % 32; 3552 do_vec_ld(s, tt, e, clean_addr, mop); 3553 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3554 } 3555 } 3556 } 3557 3558 /* 3559 * For non-quad operations, setting a slice of the low 64 bits of 3560 * the register clears the high 64 bits (in the ARM ARM pseudocode 3561 * this is implicit in the fact that 'rval' is a 64 bit wide 3562 * variable). For quad operations, we might still need to zero 3563 * the high bits of SVE. 3564 */ 3565 for (r = 0; r < a->rpt * a->selem; r++) { 3566 int tt = (a->rt + r) % 32; 3567 clear_vec_high(s, a->q, tt); 3568 } 3569 3570 if (a->p) { 3571 if (a->rm == 31) { 3572 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3573 } else { 3574 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3575 } 3576 } 3577 return true; 3578 } 3579 3580 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3581 { 3582 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3583 MemOp endian, align, mop; 3584 3585 int total; /* total bytes */ 3586 int elements; /* elements per vector */ 3587 int r; 3588 int size = a->sz; 3589 3590 if (!a->p && a->rm != 0) { 3591 /* For non-postindexed accesses the Rm field must be 0 */ 3592 return false; 3593 } 3594 if (size == 3 && !a->q && a->selem != 1) { 3595 return false; 3596 } 3597 if (!fp_access_check(s)) { 3598 return true; 3599 } 3600 3601 if (a->rn == 31) { 3602 gen_check_sp_alignment(s); 3603 } 3604 3605 /* For our purposes, bytes are always little-endian. */ 3606 endian = s->be_data; 3607 if (size == 0) { 3608 endian = MO_LE; 3609 } 3610 3611 total = a->rpt * a->selem * (a->q ? 16 : 8); 3612 tcg_rn = cpu_reg_sp(s, a->rn); 3613 3614 /* 3615 * Issue the MTE check vs the logical repeat count, before we 3616 * promote consecutive little-endian elements below. 3617 */ 3618 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3619 finalize_memop_asimd(s, size)); 3620 3621 /* 3622 * Consecutive little-endian elements from a single register 3623 * can be promoted to a larger little-endian operation. 3624 */ 3625 align = MO_ALIGN; 3626 if (a->selem == 1 && endian == MO_LE) { 3627 align = pow2_align(size); 3628 size = 3; 3629 } 3630 if (!s->align_mem) { 3631 align = 0; 3632 } 3633 mop = endian | size | align; 3634 3635 elements = (a->q ? 16 : 8) >> size; 3636 tcg_ebytes = tcg_constant_i64(1 << size); 3637 for (r = 0; r < a->rpt; r++) { 3638 int e; 3639 for (e = 0; e < elements; e++) { 3640 int xs; 3641 for (xs = 0; xs < a->selem; xs++) { 3642 int tt = (a->rt + r + xs) % 32; 3643 do_vec_st(s, tt, e, clean_addr, mop); 3644 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3645 } 3646 } 3647 } 3648 3649 if (a->p) { 3650 if (a->rm == 31) { 3651 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3652 } else { 3653 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3654 } 3655 } 3656 return true; 3657 } 3658 3659 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3660 { 3661 int xs, total, rt; 3662 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3663 MemOp mop; 3664 3665 if (!a->p && a->rm != 0) { 3666 return false; 3667 } 3668 if (!fp_access_check(s)) { 3669 return true; 3670 } 3671 3672 if (a->rn == 31) { 3673 gen_check_sp_alignment(s); 3674 } 3675 3676 total = a->selem << a->scale; 3677 tcg_rn = cpu_reg_sp(s, a->rn); 3678 3679 mop = finalize_memop_asimd(s, a->scale); 3680 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3681 total, mop); 3682 3683 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3684 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3685 do_vec_st(s, rt, a->index, clean_addr, mop); 3686 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3687 } 3688 3689 if (a->p) { 3690 if (a->rm == 31) { 3691 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3692 } else { 3693 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3694 } 3695 } 3696 return true; 3697 } 3698 3699 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3700 { 3701 int xs, total, rt; 3702 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3703 MemOp mop; 3704 3705 if (!a->p && a->rm != 0) { 3706 return false; 3707 } 3708 if (!fp_access_check(s)) { 3709 return true; 3710 } 3711 3712 if (a->rn == 31) { 3713 gen_check_sp_alignment(s); 3714 } 3715 3716 total = a->selem << a->scale; 3717 tcg_rn = cpu_reg_sp(s, a->rn); 3718 3719 mop = finalize_memop_asimd(s, a->scale); 3720 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3721 total, mop); 3722 3723 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3724 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3725 do_vec_ld(s, rt, a->index, clean_addr, mop); 3726 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3727 } 3728 3729 if (a->p) { 3730 if (a->rm == 31) { 3731 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3732 } else { 3733 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3734 } 3735 } 3736 return true; 3737 } 3738 3739 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3740 { 3741 int xs, total, rt; 3742 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3743 MemOp mop; 3744 3745 if (!a->p && a->rm != 0) { 3746 return false; 3747 } 3748 if (!fp_access_check(s)) { 3749 return true; 3750 } 3751 3752 if (a->rn == 31) { 3753 gen_check_sp_alignment(s); 3754 } 3755 3756 total = a->selem << a->scale; 3757 tcg_rn = cpu_reg_sp(s, a->rn); 3758 3759 mop = finalize_memop_asimd(s, a->scale); 3760 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3761 total, mop); 3762 3763 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3764 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3765 /* Load and replicate to all elements */ 3766 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3767 3768 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3769 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3770 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3771 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3772 } 3773 3774 if (a->p) { 3775 if (a->rm == 31) { 3776 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3777 } else { 3778 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3779 } 3780 } 3781 return true; 3782 } 3783 3784 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3785 { 3786 TCGv_i64 addr, clean_addr, tcg_rt; 3787 int size = 4 << s->dcz_blocksize; 3788 3789 if (!dc_isar_feature(aa64_mte, s)) { 3790 return false; 3791 } 3792 if (s->current_el == 0) { 3793 return false; 3794 } 3795 3796 if (a->rn == 31) { 3797 gen_check_sp_alignment(s); 3798 } 3799 3800 addr = read_cpu_reg_sp(s, a->rn, true); 3801 tcg_gen_addi_i64(addr, addr, a->imm); 3802 tcg_rt = cpu_reg(s, a->rt); 3803 3804 if (s->ata[0]) { 3805 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 3806 } 3807 /* 3808 * The non-tags portion of STZGM is mostly like DC_ZVA, 3809 * except the alignment happens before the access. 3810 */ 3811 clean_addr = clean_data_tbi(s, addr); 3812 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3813 gen_helper_dc_zva(tcg_env, clean_addr); 3814 return true; 3815 } 3816 3817 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 3818 { 3819 TCGv_i64 addr, clean_addr, tcg_rt; 3820 3821 if (!dc_isar_feature(aa64_mte, s)) { 3822 return false; 3823 } 3824 if (s->current_el == 0) { 3825 return false; 3826 } 3827 3828 if (a->rn == 31) { 3829 gen_check_sp_alignment(s); 3830 } 3831 3832 addr = read_cpu_reg_sp(s, a->rn, true); 3833 tcg_gen_addi_i64(addr, addr, a->imm); 3834 tcg_rt = cpu_reg(s, a->rt); 3835 3836 if (s->ata[0]) { 3837 gen_helper_stgm(tcg_env, addr, tcg_rt); 3838 } else { 3839 MMUAccessType acc = MMU_DATA_STORE; 3840 int size = 4 << s->gm_blocksize; 3841 3842 clean_addr = clean_data_tbi(s, addr); 3843 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3844 gen_probe_access(s, clean_addr, acc, size); 3845 } 3846 return true; 3847 } 3848 3849 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 3850 { 3851 TCGv_i64 addr, clean_addr, tcg_rt; 3852 3853 if (!dc_isar_feature(aa64_mte, s)) { 3854 return false; 3855 } 3856 if (s->current_el == 0) { 3857 return false; 3858 } 3859 3860 if (a->rn == 31) { 3861 gen_check_sp_alignment(s); 3862 } 3863 3864 addr = read_cpu_reg_sp(s, a->rn, true); 3865 tcg_gen_addi_i64(addr, addr, a->imm); 3866 tcg_rt = cpu_reg(s, a->rt); 3867 3868 if (s->ata[0]) { 3869 gen_helper_ldgm(tcg_rt, tcg_env, addr); 3870 } else { 3871 MMUAccessType acc = MMU_DATA_LOAD; 3872 int size = 4 << s->gm_blocksize; 3873 3874 clean_addr = clean_data_tbi(s, addr); 3875 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3876 gen_probe_access(s, clean_addr, acc, size); 3877 /* The result tags are zeros. */ 3878 tcg_gen_movi_i64(tcg_rt, 0); 3879 } 3880 return true; 3881 } 3882 3883 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 3884 { 3885 TCGv_i64 addr, clean_addr, tcg_rt; 3886 3887 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3888 return false; 3889 } 3890 3891 if (a->rn == 31) { 3892 gen_check_sp_alignment(s); 3893 } 3894 3895 addr = read_cpu_reg_sp(s, a->rn, true); 3896 if (!a->p) { 3897 /* pre-index or signed offset */ 3898 tcg_gen_addi_i64(addr, addr, a->imm); 3899 } 3900 3901 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 3902 tcg_rt = cpu_reg(s, a->rt); 3903 if (s->ata[0]) { 3904 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 3905 } else { 3906 /* 3907 * Tag access disabled: we must check for aborts on the load 3908 * load from [rn+offset], and then insert a 0 tag into rt. 3909 */ 3910 clean_addr = clean_data_tbi(s, addr); 3911 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 3912 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 3913 } 3914 3915 if (a->w) { 3916 /* pre-index or post-index */ 3917 if (a->p) { 3918 /* post-index */ 3919 tcg_gen_addi_i64(addr, addr, a->imm); 3920 } 3921 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3922 } 3923 return true; 3924 } 3925 3926 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 3927 { 3928 TCGv_i64 addr, tcg_rt; 3929 3930 if (a->rn == 31) { 3931 gen_check_sp_alignment(s); 3932 } 3933 3934 addr = read_cpu_reg_sp(s, a->rn, true); 3935 if (!a->p) { 3936 /* pre-index or signed offset */ 3937 tcg_gen_addi_i64(addr, addr, a->imm); 3938 } 3939 tcg_rt = cpu_reg_sp(s, a->rt); 3940 if (!s->ata[0]) { 3941 /* 3942 * For STG and ST2G, we need to check alignment and probe memory. 3943 * TODO: For STZG and STZ2G, we could rely on the stores below, 3944 * at least for system mode; user-only won't enforce alignment. 3945 */ 3946 if (is_pair) { 3947 gen_helper_st2g_stub(tcg_env, addr); 3948 } else { 3949 gen_helper_stg_stub(tcg_env, addr); 3950 } 3951 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3952 if (is_pair) { 3953 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 3954 } else { 3955 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 3956 } 3957 } else { 3958 if (is_pair) { 3959 gen_helper_st2g(tcg_env, addr, tcg_rt); 3960 } else { 3961 gen_helper_stg(tcg_env, addr, tcg_rt); 3962 } 3963 } 3964 3965 if (is_zero) { 3966 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 3967 TCGv_i64 zero64 = tcg_constant_i64(0); 3968 TCGv_i128 zero128 = tcg_temp_new_i128(); 3969 int mem_index = get_mem_index(s); 3970 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 3971 3972 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 3973 3974 /* This is 1 or 2 atomic 16-byte operations. */ 3975 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3976 if (is_pair) { 3977 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 3978 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3979 } 3980 } 3981 3982 if (a->w) { 3983 /* pre-index or post-index */ 3984 if (a->p) { 3985 /* post-index */ 3986 tcg_gen_addi_i64(addr, addr, a->imm); 3987 } 3988 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3989 } 3990 return true; 3991 } 3992 3993 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 3994 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 3995 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 3996 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 3997 3998 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 3999 4000 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4001 bool is_setg, SetFn fn) 4002 { 4003 int memidx; 4004 uint32_t syndrome, desc = 0; 4005 4006 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4007 return false; 4008 } 4009 4010 /* 4011 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4012 * us to pull this check before the CheckMOPSEnabled() test 4013 * (which we do in the helper function) 4014 */ 4015 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4016 a->rd == 31 || a->rn == 31) { 4017 return false; 4018 } 4019 4020 memidx = get_a64_user_mem_index(s, a->unpriv); 4021 4022 /* 4023 * We pass option_a == true, matching our implementation; 4024 * we pass wrong_option == false: helper function may set that bit. 4025 */ 4026 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4027 is_epilogue, false, true, a->rd, a->rs, a->rn); 4028 4029 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4030 /* We may need to do MTE tag checking, so assemble the descriptor */ 4031 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4032 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4033 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4034 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4035 } 4036 /* The helper function always needs the memidx even with MTE disabled */ 4037 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4038 4039 /* 4040 * The helper needs the register numbers, but since they're in 4041 * the syndrome anyway, we let it extract them from there rather 4042 * than passing in an extra three integer arguments. 4043 */ 4044 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4045 return true; 4046 } 4047 4048 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4049 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4050 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4051 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4052 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4053 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4054 4055 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4056 4057 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4058 { 4059 int rmemidx, wmemidx; 4060 uint32_t syndrome, rdesc = 0, wdesc = 0; 4061 bool wunpriv = extract32(a->options, 0, 1); 4062 bool runpriv = extract32(a->options, 1, 1); 4063 4064 /* 4065 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4066 * us to pull this check before the CheckMOPSEnabled() test 4067 * (which we do in the helper function) 4068 */ 4069 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4070 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4071 return false; 4072 } 4073 4074 rmemidx = get_a64_user_mem_index(s, runpriv); 4075 wmemidx = get_a64_user_mem_index(s, wunpriv); 4076 4077 /* 4078 * We pass option_a == true, matching our implementation; 4079 * we pass wrong_option == false: helper function may set that bit. 4080 */ 4081 syndrome = syn_mop(false, false, a->options, is_epilogue, 4082 false, true, a->rd, a->rs, a->rn); 4083 4084 /* If we need to do MTE tag checking, assemble the descriptors */ 4085 if (s->mte_active[runpriv]) { 4086 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4087 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4088 } 4089 if (s->mte_active[wunpriv]) { 4090 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4091 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4092 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4093 } 4094 /* The helper function needs these parts of the descriptor regardless */ 4095 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4096 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4097 4098 /* 4099 * The helper needs the register numbers, but since they're in 4100 * the syndrome anyway, we let it extract them from there rather 4101 * than passing in an extra three integer arguments. 4102 */ 4103 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4104 tcg_constant_i32(rdesc)); 4105 return true; 4106 } 4107 4108 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4109 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4110 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4111 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4112 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4113 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4114 4115 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4116 4117 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4118 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4119 { 4120 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4121 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4122 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4123 4124 fn(tcg_rd, tcg_rn, tcg_imm); 4125 if (!a->sf) { 4126 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4127 } 4128 return true; 4129 } 4130 4131 /* 4132 * PC-rel. addressing 4133 */ 4134 4135 static bool trans_ADR(DisasContext *s, arg_ri *a) 4136 { 4137 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4138 return true; 4139 } 4140 4141 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4142 { 4143 int64_t offset = (int64_t)a->imm << 12; 4144 4145 /* The page offset is ok for CF_PCREL. */ 4146 offset -= s->pc_curr & 0xfff; 4147 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4148 return true; 4149 } 4150 4151 /* 4152 * Add/subtract (immediate) 4153 */ 4154 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4155 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4156 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4157 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4158 4159 /* 4160 * Add/subtract (immediate, with tags) 4161 */ 4162 4163 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4164 bool sub_op) 4165 { 4166 TCGv_i64 tcg_rn, tcg_rd; 4167 int imm; 4168 4169 imm = a->uimm6 << LOG2_TAG_GRANULE; 4170 if (sub_op) { 4171 imm = -imm; 4172 } 4173 4174 tcg_rn = cpu_reg_sp(s, a->rn); 4175 tcg_rd = cpu_reg_sp(s, a->rd); 4176 4177 if (s->ata[0]) { 4178 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4179 tcg_constant_i32(imm), 4180 tcg_constant_i32(a->uimm4)); 4181 } else { 4182 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4183 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4184 } 4185 return true; 4186 } 4187 4188 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4189 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4190 4191 /* The input should be a value in the bottom e bits (with higher 4192 * bits zero); returns that value replicated into every element 4193 * of size e in a 64 bit integer. 4194 */ 4195 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4196 { 4197 assert(e != 0); 4198 while (e < 64) { 4199 mask |= mask << e; 4200 e *= 2; 4201 } 4202 return mask; 4203 } 4204 4205 /* 4206 * Logical (immediate) 4207 */ 4208 4209 /* 4210 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4211 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4212 * value (ie should cause a guest UNDEF exception), and true if they are 4213 * valid, in which case the decoded bit pattern is written to result. 4214 */ 4215 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4216 unsigned int imms, unsigned int immr) 4217 { 4218 uint64_t mask; 4219 unsigned e, levels, s, r; 4220 int len; 4221 4222 assert(immn < 2 && imms < 64 && immr < 64); 4223 4224 /* The bit patterns we create here are 64 bit patterns which 4225 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4226 * 64 bits each. Each element contains the same value: a run 4227 * of between 1 and e-1 non-zero bits, rotated within the 4228 * element by between 0 and e-1 bits. 4229 * 4230 * The element size and run length are encoded into immn (1 bit) 4231 * and imms (6 bits) as follows: 4232 * 64 bit elements: immn = 1, imms = <length of run - 1> 4233 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4234 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4235 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4236 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4237 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4238 * Notice that immn = 0, imms = 11111x is the only combination 4239 * not covered by one of the above options; this is reserved. 4240 * Further, <length of run - 1> all-ones is a reserved pattern. 4241 * 4242 * In all cases the rotation is by immr % e (and immr is 6 bits). 4243 */ 4244 4245 /* First determine the element size */ 4246 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4247 if (len < 1) { 4248 /* This is the immn == 0, imms == 0x11111x case */ 4249 return false; 4250 } 4251 e = 1 << len; 4252 4253 levels = e - 1; 4254 s = imms & levels; 4255 r = immr & levels; 4256 4257 if (s == levels) { 4258 /* <length of run - 1> mustn't be all-ones. */ 4259 return false; 4260 } 4261 4262 /* Create the value of one element: s+1 set bits rotated 4263 * by r within the element (which is e bits wide)... 4264 */ 4265 mask = MAKE_64BIT_MASK(0, s + 1); 4266 if (r) { 4267 mask = (mask >> r) | (mask << (e - r)); 4268 mask &= MAKE_64BIT_MASK(0, e); 4269 } 4270 /* ...then replicate the element over the whole 64 bit value */ 4271 mask = bitfield_replicate(mask, e); 4272 *result = mask; 4273 return true; 4274 } 4275 4276 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4277 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4278 { 4279 TCGv_i64 tcg_rd, tcg_rn; 4280 uint64_t imm; 4281 4282 /* Some immediate field values are reserved. */ 4283 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4284 extract32(a->dbm, 0, 6), 4285 extract32(a->dbm, 6, 6))) { 4286 return false; 4287 } 4288 if (!a->sf) { 4289 imm &= 0xffffffffull; 4290 } 4291 4292 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4293 tcg_rn = cpu_reg(s, a->rn); 4294 4295 fn(tcg_rd, tcg_rn, imm); 4296 if (set_cc) { 4297 gen_logic_CC(a->sf, tcg_rd); 4298 } 4299 if (!a->sf) { 4300 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4301 } 4302 return true; 4303 } 4304 4305 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4306 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4307 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4308 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4309 4310 /* 4311 * Move wide (immediate) 4312 */ 4313 4314 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4315 { 4316 int pos = a->hw << 4; 4317 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4318 return true; 4319 } 4320 4321 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4322 { 4323 int pos = a->hw << 4; 4324 uint64_t imm = a->imm; 4325 4326 imm = ~(imm << pos); 4327 if (!a->sf) { 4328 imm = (uint32_t)imm; 4329 } 4330 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4331 return true; 4332 } 4333 4334 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4335 { 4336 int pos = a->hw << 4; 4337 TCGv_i64 tcg_rd, tcg_im; 4338 4339 tcg_rd = cpu_reg(s, a->rd); 4340 tcg_im = tcg_constant_i64(a->imm); 4341 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4342 if (!a->sf) { 4343 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4344 } 4345 return true; 4346 } 4347 4348 /* 4349 * Bitfield 4350 */ 4351 4352 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4353 { 4354 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4355 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4356 unsigned int bitsize = a->sf ? 64 : 32; 4357 unsigned int ri = a->immr; 4358 unsigned int si = a->imms; 4359 unsigned int pos, len; 4360 4361 if (si >= ri) { 4362 /* Wd<s-r:0> = Wn<s:r> */ 4363 len = (si - ri) + 1; 4364 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4365 if (!a->sf) { 4366 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4367 } 4368 } else { 4369 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4370 len = si + 1; 4371 pos = (bitsize - ri) & (bitsize - 1); 4372 4373 if (len < ri) { 4374 /* 4375 * Sign extend the destination field from len to fill the 4376 * balance of the word. Let the deposit below insert all 4377 * of those sign bits. 4378 */ 4379 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4380 len = ri; 4381 } 4382 4383 /* 4384 * We start with zero, and we haven't modified any bits outside 4385 * bitsize, therefore no final zero-extension is unneeded for !sf. 4386 */ 4387 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4388 } 4389 return true; 4390 } 4391 4392 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4393 { 4394 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4395 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4396 unsigned int bitsize = a->sf ? 64 : 32; 4397 unsigned int ri = a->immr; 4398 unsigned int si = a->imms; 4399 unsigned int pos, len; 4400 4401 tcg_rd = cpu_reg(s, a->rd); 4402 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4403 4404 if (si >= ri) { 4405 /* Wd<s-r:0> = Wn<s:r> */ 4406 len = (si - ri) + 1; 4407 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4408 } else { 4409 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4410 len = si + 1; 4411 pos = (bitsize - ri) & (bitsize - 1); 4412 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4413 } 4414 return true; 4415 } 4416 4417 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4418 { 4419 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4420 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4421 unsigned int bitsize = a->sf ? 64 : 32; 4422 unsigned int ri = a->immr; 4423 unsigned int si = a->imms; 4424 unsigned int pos, len; 4425 4426 tcg_rd = cpu_reg(s, a->rd); 4427 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4428 4429 if (si >= ri) { 4430 /* Wd<s-r:0> = Wn<s:r> */ 4431 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4432 len = (si - ri) + 1; 4433 pos = 0; 4434 } else { 4435 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4436 len = si + 1; 4437 pos = (bitsize - ri) & (bitsize - 1); 4438 } 4439 4440 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4441 if (!a->sf) { 4442 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4443 } 4444 return true; 4445 } 4446 4447 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4448 { 4449 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4450 4451 tcg_rd = cpu_reg(s, a->rd); 4452 4453 if (unlikely(a->imm == 0)) { 4454 /* 4455 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4456 * so an extract from bit 0 is a special case. 4457 */ 4458 if (a->sf) { 4459 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4460 } else { 4461 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4462 } 4463 } else { 4464 tcg_rm = cpu_reg(s, a->rm); 4465 tcg_rn = cpu_reg(s, a->rn); 4466 4467 if (a->sf) { 4468 /* Specialization to ROR happens in EXTRACT2. */ 4469 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4470 } else { 4471 TCGv_i32 t0 = tcg_temp_new_i32(); 4472 4473 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4474 if (a->rm == a->rn) { 4475 tcg_gen_rotri_i32(t0, t0, a->imm); 4476 } else { 4477 TCGv_i32 t1 = tcg_temp_new_i32(); 4478 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4479 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4480 } 4481 tcg_gen_extu_i32_i64(tcg_rd, t0); 4482 } 4483 } 4484 return true; 4485 } 4486 4487 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4488 * Note that it is the caller's responsibility to ensure that the 4489 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4490 * mandated semantics for out of range shifts. 4491 */ 4492 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4493 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4494 { 4495 switch (shift_type) { 4496 case A64_SHIFT_TYPE_LSL: 4497 tcg_gen_shl_i64(dst, src, shift_amount); 4498 break; 4499 case A64_SHIFT_TYPE_LSR: 4500 tcg_gen_shr_i64(dst, src, shift_amount); 4501 break; 4502 case A64_SHIFT_TYPE_ASR: 4503 if (!sf) { 4504 tcg_gen_ext32s_i64(dst, src); 4505 } 4506 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4507 break; 4508 case A64_SHIFT_TYPE_ROR: 4509 if (sf) { 4510 tcg_gen_rotr_i64(dst, src, shift_amount); 4511 } else { 4512 TCGv_i32 t0, t1; 4513 t0 = tcg_temp_new_i32(); 4514 t1 = tcg_temp_new_i32(); 4515 tcg_gen_extrl_i64_i32(t0, src); 4516 tcg_gen_extrl_i64_i32(t1, shift_amount); 4517 tcg_gen_rotr_i32(t0, t0, t1); 4518 tcg_gen_extu_i32_i64(dst, t0); 4519 } 4520 break; 4521 default: 4522 assert(FALSE); /* all shift types should be handled */ 4523 break; 4524 } 4525 4526 if (!sf) { /* zero extend final result */ 4527 tcg_gen_ext32u_i64(dst, dst); 4528 } 4529 } 4530 4531 /* Shift a TCGv src by immediate, put result in dst. 4532 * The shift amount must be in range (this should always be true as the 4533 * relevant instructions will UNDEF on bad shift immediates). 4534 */ 4535 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4536 enum a64_shift_type shift_type, unsigned int shift_i) 4537 { 4538 assert(shift_i < (sf ? 64 : 32)); 4539 4540 if (shift_i == 0) { 4541 tcg_gen_mov_i64(dst, src); 4542 } else { 4543 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4544 } 4545 } 4546 4547 /* Logical (shifted register) 4548 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4549 * +----+-----+-----------+-------+---+------+--------+------+------+ 4550 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4551 * +----+-----+-----------+-------+---+------+--------+------+------+ 4552 */ 4553 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4554 { 4555 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4556 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4557 4558 sf = extract32(insn, 31, 1); 4559 opc = extract32(insn, 29, 2); 4560 shift_type = extract32(insn, 22, 2); 4561 invert = extract32(insn, 21, 1); 4562 rm = extract32(insn, 16, 5); 4563 shift_amount = extract32(insn, 10, 6); 4564 rn = extract32(insn, 5, 5); 4565 rd = extract32(insn, 0, 5); 4566 4567 if (!sf && (shift_amount & (1 << 5))) { 4568 unallocated_encoding(s); 4569 return; 4570 } 4571 4572 tcg_rd = cpu_reg(s, rd); 4573 4574 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4575 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4576 * register-register MOV and MVN, so it is worth special casing. 4577 */ 4578 tcg_rm = cpu_reg(s, rm); 4579 if (invert) { 4580 tcg_gen_not_i64(tcg_rd, tcg_rm); 4581 if (!sf) { 4582 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4583 } 4584 } else { 4585 if (sf) { 4586 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4587 } else { 4588 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4589 } 4590 } 4591 return; 4592 } 4593 4594 tcg_rm = read_cpu_reg(s, rm, sf); 4595 4596 if (shift_amount) { 4597 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4598 } 4599 4600 tcg_rn = cpu_reg(s, rn); 4601 4602 switch (opc | (invert << 2)) { 4603 case 0: /* AND */ 4604 case 3: /* ANDS */ 4605 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4606 break; 4607 case 1: /* ORR */ 4608 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4609 break; 4610 case 2: /* EOR */ 4611 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4612 break; 4613 case 4: /* BIC */ 4614 case 7: /* BICS */ 4615 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4616 break; 4617 case 5: /* ORN */ 4618 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4619 break; 4620 case 6: /* EON */ 4621 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4622 break; 4623 default: 4624 assert(FALSE); 4625 break; 4626 } 4627 4628 if (!sf) { 4629 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4630 } 4631 4632 if (opc == 3) { 4633 gen_logic_CC(sf, tcg_rd); 4634 } 4635 } 4636 4637 /* 4638 * Add/subtract (extended register) 4639 * 4640 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4641 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4642 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4643 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4644 * 4645 * sf: 0 -> 32bit, 1 -> 64bit 4646 * op: 0 -> add , 1 -> sub 4647 * S: 1 -> set flags 4648 * opt: 00 4649 * option: extension type (see DecodeRegExtend) 4650 * imm3: optional shift to Rm 4651 * 4652 * Rd = Rn + LSL(extend(Rm), amount) 4653 */ 4654 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4655 { 4656 int rd = extract32(insn, 0, 5); 4657 int rn = extract32(insn, 5, 5); 4658 int imm3 = extract32(insn, 10, 3); 4659 int option = extract32(insn, 13, 3); 4660 int rm = extract32(insn, 16, 5); 4661 int opt = extract32(insn, 22, 2); 4662 bool setflags = extract32(insn, 29, 1); 4663 bool sub_op = extract32(insn, 30, 1); 4664 bool sf = extract32(insn, 31, 1); 4665 4666 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4667 TCGv_i64 tcg_rd; 4668 TCGv_i64 tcg_result; 4669 4670 if (imm3 > 4 || opt != 0) { 4671 unallocated_encoding(s); 4672 return; 4673 } 4674 4675 /* non-flag setting ops may use SP */ 4676 if (!setflags) { 4677 tcg_rd = cpu_reg_sp(s, rd); 4678 } else { 4679 tcg_rd = cpu_reg(s, rd); 4680 } 4681 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4682 4683 tcg_rm = read_cpu_reg(s, rm, sf); 4684 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4685 4686 tcg_result = tcg_temp_new_i64(); 4687 4688 if (!setflags) { 4689 if (sub_op) { 4690 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4691 } else { 4692 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4693 } 4694 } else { 4695 if (sub_op) { 4696 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4697 } else { 4698 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4699 } 4700 } 4701 4702 if (sf) { 4703 tcg_gen_mov_i64(tcg_rd, tcg_result); 4704 } else { 4705 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4706 } 4707 } 4708 4709 /* 4710 * Add/subtract (shifted register) 4711 * 4712 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4713 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4714 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4715 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4716 * 4717 * sf: 0 -> 32bit, 1 -> 64bit 4718 * op: 0 -> add , 1 -> sub 4719 * S: 1 -> set flags 4720 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4721 * imm6: Shift amount to apply to Rm before the add/sub 4722 */ 4723 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4724 { 4725 int rd = extract32(insn, 0, 5); 4726 int rn = extract32(insn, 5, 5); 4727 int imm6 = extract32(insn, 10, 6); 4728 int rm = extract32(insn, 16, 5); 4729 int shift_type = extract32(insn, 22, 2); 4730 bool setflags = extract32(insn, 29, 1); 4731 bool sub_op = extract32(insn, 30, 1); 4732 bool sf = extract32(insn, 31, 1); 4733 4734 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4735 TCGv_i64 tcg_rn, tcg_rm; 4736 TCGv_i64 tcg_result; 4737 4738 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 4739 unallocated_encoding(s); 4740 return; 4741 } 4742 4743 tcg_rn = read_cpu_reg(s, rn, sf); 4744 tcg_rm = read_cpu_reg(s, rm, sf); 4745 4746 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 4747 4748 tcg_result = tcg_temp_new_i64(); 4749 4750 if (!setflags) { 4751 if (sub_op) { 4752 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4753 } else { 4754 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4755 } 4756 } else { 4757 if (sub_op) { 4758 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4759 } else { 4760 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4761 } 4762 } 4763 4764 if (sf) { 4765 tcg_gen_mov_i64(tcg_rd, tcg_result); 4766 } else { 4767 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4768 } 4769 } 4770 4771 /* Data-processing (3 source) 4772 * 4773 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 4774 * +--+------+-----------+------+------+----+------+------+------+ 4775 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 4776 * +--+------+-----------+------+------+----+------+------+------+ 4777 */ 4778 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 4779 { 4780 int rd = extract32(insn, 0, 5); 4781 int rn = extract32(insn, 5, 5); 4782 int ra = extract32(insn, 10, 5); 4783 int rm = extract32(insn, 16, 5); 4784 int op_id = (extract32(insn, 29, 3) << 4) | 4785 (extract32(insn, 21, 3) << 1) | 4786 extract32(insn, 15, 1); 4787 bool sf = extract32(insn, 31, 1); 4788 bool is_sub = extract32(op_id, 0, 1); 4789 bool is_high = extract32(op_id, 2, 1); 4790 bool is_signed = false; 4791 TCGv_i64 tcg_op1; 4792 TCGv_i64 tcg_op2; 4793 TCGv_i64 tcg_tmp; 4794 4795 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 4796 switch (op_id) { 4797 case 0x42: /* SMADDL */ 4798 case 0x43: /* SMSUBL */ 4799 case 0x44: /* SMULH */ 4800 is_signed = true; 4801 break; 4802 case 0x0: /* MADD (32bit) */ 4803 case 0x1: /* MSUB (32bit) */ 4804 case 0x40: /* MADD (64bit) */ 4805 case 0x41: /* MSUB (64bit) */ 4806 case 0x4a: /* UMADDL */ 4807 case 0x4b: /* UMSUBL */ 4808 case 0x4c: /* UMULH */ 4809 break; 4810 default: 4811 unallocated_encoding(s); 4812 return; 4813 } 4814 4815 if (is_high) { 4816 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 4817 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4818 TCGv_i64 tcg_rn = cpu_reg(s, rn); 4819 TCGv_i64 tcg_rm = cpu_reg(s, rm); 4820 4821 if (is_signed) { 4822 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4823 } else { 4824 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4825 } 4826 return; 4827 } 4828 4829 tcg_op1 = tcg_temp_new_i64(); 4830 tcg_op2 = tcg_temp_new_i64(); 4831 tcg_tmp = tcg_temp_new_i64(); 4832 4833 if (op_id < 0x42) { 4834 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 4835 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 4836 } else { 4837 if (is_signed) { 4838 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 4839 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 4840 } else { 4841 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 4842 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 4843 } 4844 } 4845 4846 if (ra == 31 && !is_sub) { 4847 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 4848 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 4849 } else { 4850 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 4851 if (is_sub) { 4852 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4853 } else { 4854 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4855 } 4856 } 4857 4858 if (!sf) { 4859 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 4860 } 4861 } 4862 4863 /* Add/subtract (with carry) 4864 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 4865 * +--+--+--+------------------------+------+-------------+------+-----+ 4866 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 4867 * +--+--+--+------------------------+------+-------------+------+-----+ 4868 */ 4869 4870 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 4871 { 4872 unsigned int sf, op, setflags, rm, rn, rd; 4873 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 4874 4875 sf = extract32(insn, 31, 1); 4876 op = extract32(insn, 30, 1); 4877 setflags = extract32(insn, 29, 1); 4878 rm = extract32(insn, 16, 5); 4879 rn = extract32(insn, 5, 5); 4880 rd = extract32(insn, 0, 5); 4881 4882 tcg_rd = cpu_reg(s, rd); 4883 tcg_rn = cpu_reg(s, rn); 4884 4885 if (op) { 4886 tcg_y = tcg_temp_new_i64(); 4887 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 4888 } else { 4889 tcg_y = cpu_reg(s, rm); 4890 } 4891 4892 if (setflags) { 4893 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 4894 } else { 4895 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 4896 } 4897 } 4898 4899 /* 4900 * Rotate right into flags 4901 * 31 30 29 21 15 10 5 4 0 4902 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4903 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 4904 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4905 */ 4906 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 4907 { 4908 int mask = extract32(insn, 0, 4); 4909 int o2 = extract32(insn, 4, 1); 4910 int rn = extract32(insn, 5, 5); 4911 int imm6 = extract32(insn, 15, 6); 4912 int sf_op_s = extract32(insn, 29, 3); 4913 TCGv_i64 tcg_rn; 4914 TCGv_i32 nzcv; 4915 4916 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 4917 unallocated_encoding(s); 4918 return; 4919 } 4920 4921 tcg_rn = read_cpu_reg(s, rn, 1); 4922 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 4923 4924 nzcv = tcg_temp_new_i32(); 4925 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 4926 4927 if (mask & 8) { /* N */ 4928 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 4929 } 4930 if (mask & 4) { /* Z */ 4931 tcg_gen_not_i32(cpu_ZF, nzcv); 4932 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 4933 } 4934 if (mask & 2) { /* C */ 4935 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 4936 } 4937 if (mask & 1) { /* V */ 4938 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 4939 } 4940 } 4941 4942 /* 4943 * Evaluate into flags 4944 * 31 30 29 21 15 14 10 5 4 0 4945 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4946 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 4947 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4948 */ 4949 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 4950 { 4951 int o3_mask = extract32(insn, 0, 5); 4952 int rn = extract32(insn, 5, 5); 4953 int o2 = extract32(insn, 15, 6); 4954 int sz = extract32(insn, 14, 1); 4955 int sf_op_s = extract32(insn, 29, 3); 4956 TCGv_i32 tmp; 4957 int shift; 4958 4959 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 4960 !dc_isar_feature(aa64_condm_4, s)) { 4961 unallocated_encoding(s); 4962 return; 4963 } 4964 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 4965 4966 tmp = tcg_temp_new_i32(); 4967 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 4968 tcg_gen_shli_i32(cpu_NF, tmp, shift); 4969 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 4970 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 4971 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 4972 } 4973 4974 /* Conditional compare (immediate / register) 4975 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 4976 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4977 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 4978 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4979 * [1] y [0] [0] 4980 */ 4981 static void disas_cc(DisasContext *s, uint32_t insn) 4982 { 4983 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 4984 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 4985 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 4986 DisasCompare c; 4987 4988 if (!extract32(insn, 29, 1)) { 4989 unallocated_encoding(s); 4990 return; 4991 } 4992 if (insn & (1 << 10 | 1 << 4)) { 4993 unallocated_encoding(s); 4994 return; 4995 } 4996 sf = extract32(insn, 31, 1); 4997 op = extract32(insn, 30, 1); 4998 is_imm = extract32(insn, 11, 1); 4999 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 5000 cond = extract32(insn, 12, 4); 5001 rn = extract32(insn, 5, 5); 5002 nzcv = extract32(insn, 0, 4); 5003 5004 /* Set T0 = !COND. */ 5005 tcg_t0 = tcg_temp_new_i32(); 5006 arm_test_cc(&c, cond); 5007 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 5008 5009 /* Load the arguments for the new comparison. */ 5010 if (is_imm) { 5011 tcg_y = tcg_temp_new_i64(); 5012 tcg_gen_movi_i64(tcg_y, y); 5013 } else { 5014 tcg_y = cpu_reg(s, y); 5015 } 5016 tcg_rn = cpu_reg(s, rn); 5017 5018 /* Set the flags for the new comparison. */ 5019 tcg_tmp = tcg_temp_new_i64(); 5020 if (op) { 5021 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5022 } else { 5023 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5024 } 5025 5026 /* If COND was false, force the flags to #nzcv. Compute two masks 5027 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 5028 * For tcg hosts that support ANDC, we can make do with just T1. 5029 * In either case, allow the tcg optimizer to delete any unused mask. 5030 */ 5031 tcg_t1 = tcg_temp_new_i32(); 5032 tcg_t2 = tcg_temp_new_i32(); 5033 tcg_gen_neg_i32(tcg_t1, tcg_t0); 5034 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 5035 5036 if (nzcv & 8) { /* N */ 5037 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 5038 } else { 5039 if (TCG_TARGET_HAS_andc_i32) { 5040 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 5041 } else { 5042 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 5043 } 5044 } 5045 if (nzcv & 4) { /* Z */ 5046 if (TCG_TARGET_HAS_andc_i32) { 5047 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 5048 } else { 5049 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 5050 } 5051 } else { 5052 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 5053 } 5054 if (nzcv & 2) { /* C */ 5055 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 5056 } else { 5057 if (TCG_TARGET_HAS_andc_i32) { 5058 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 5059 } else { 5060 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 5061 } 5062 } 5063 if (nzcv & 1) { /* V */ 5064 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 5065 } else { 5066 if (TCG_TARGET_HAS_andc_i32) { 5067 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5068 } else { 5069 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5070 } 5071 } 5072 } 5073 5074 /* Conditional select 5075 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5076 * +----+----+---+-----------------+------+------+-----+------+------+ 5077 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5078 * +----+----+---+-----------------+------+------+-----+------+------+ 5079 */ 5080 static void disas_cond_select(DisasContext *s, uint32_t insn) 5081 { 5082 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5083 TCGv_i64 tcg_rd, zero; 5084 DisasCompare64 c; 5085 5086 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5087 /* S == 1 or op2<1> == 1 */ 5088 unallocated_encoding(s); 5089 return; 5090 } 5091 sf = extract32(insn, 31, 1); 5092 else_inv = extract32(insn, 30, 1); 5093 rm = extract32(insn, 16, 5); 5094 cond = extract32(insn, 12, 4); 5095 else_inc = extract32(insn, 10, 1); 5096 rn = extract32(insn, 5, 5); 5097 rd = extract32(insn, 0, 5); 5098 5099 tcg_rd = cpu_reg(s, rd); 5100 5101 a64_test_cc(&c, cond); 5102 zero = tcg_constant_i64(0); 5103 5104 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5105 /* CSET & CSETM. */ 5106 if (else_inv) { 5107 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 5108 tcg_rd, c.value, zero); 5109 } else { 5110 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 5111 tcg_rd, c.value, zero); 5112 } 5113 } else { 5114 TCGv_i64 t_true = cpu_reg(s, rn); 5115 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5116 if (else_inv && else_inc) { 5117 tcg_gen_neg_i64(t_false, t_false); 5118 } else if (else_inv) { 5119 tcg_gen_not_i64(t_false, t_false); 5120 } else if (else_inc) { 5121 tcg_gen_addi_i64(t_false, t_false, 1); 5122 } 5123 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5124 } 5125 5126 if (!sf) { 5127 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5128 } 5129 } 5130 5131 static void handle_clz(DisasContext *s, unsigned int sf, 5132 unsigned int rn, unsigned int rd) 5133 { 5134 TCGv_i64 tcg_rd, tcg_rn; 5135 tcg_rd = cpu_reg(s, rd); 5136 tcg_rn = cpu_reg(s, rn); 5137 5138 if (sf) { 5139 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5140 } else { 5141 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5142 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5143 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5144 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5145 } 5146 } 5147 5148 static void handle_cls(DisasContext *s, unsigned int sf, 5149 unsigned int rn, unsigned int rd) 5150 { 5151 TCGv_i64 tcg_rd, tcg_rn; 5152 tcg_rd = cpu_reg(s, rd); 5153 tcg_rn = cpu_reg(s, rn); 5154 5155 if (sf) { 5156 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5157 } else { 5158 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5159 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5160 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5161 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5162 } 5163 } 5164 5165 static void handle_rbit(DisasContext *s, unsigned int sf, 5166 unsigned int rn, unsigned int rd) 5167 { 5168 TCGv_i64 tcg_rd, tcg_rn; 5169 tcg_rd = cpu_reg(s, rd); 5170 tcg_rn = cpu_reg(s, rn); 5171 5172 if (sf) { 5173 gen_helper_rbit64(tcg_rd, tcg_rn); 5174 } else { 5175 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5176 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5177 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5178 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5179 } 5180 } 5181 5182 /* REV with sf==1, opcode==3 ("REV64") */ 5183 static void handle_rev64(DisasContext *s, unsigned int sf, 5184 unsigned int rn, unsigned int rd) 5185 { 5186 if (!sf) { 5187 unallocated_encoding(s); 5188 return; 5189 } 5190 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5191 } 5192 5193 /* REV with sf==0, opcode==2 5194 * REV32 (sf==1, opcode==2) 5195 */ 5196 static void handle_rev32(DisasContext *s, unsigned int sf, 5197 unsigned int rn, unsigned int rd) 5198 { 5199 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5200 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5201 5202 if (sf) { 5203 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5204 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5205 } else { 5206 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5207 } 5208 } 5209 5210 /* REV16 (opcode==1) */ 5211 static void handle_rev16(DisasContext *s, unsigned int sf, 5212 unsigned int rn, unsigned int rd) 5213 { 5214 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5215 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5216 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5217 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5218 5219 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5220 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5221 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5222 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5223 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5224 } 5225 5226 /* Data-processing (1 source) 5227 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5228 * +----+---+---+-----------------+---------+--------+------+------+ 5229 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5230 * +----+---+---+-----------------+---------+--------+------+------+ 5231 */ 5232 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5233 { 5234 unsigned int sf, opcode, opcode2, rn, rd; 5235 TCGv_i64 tcg_rd; 5236 5237 if (extract32(insn, 29, 1)) { 5238 unallocated_encoding(s); 5239 return; 5240 } 5241 5242 sf = extract32(insn, 31, 1); 5243 opcode = extract32(insn, 10, 6); 5244 opcode2 = extract32(insn, 16, 5); 5245 rn = extract32(insn, 5, 5); 5246 rd = extract32(insn, 0, 5); 5247 5248 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5249 5250 switch (MAP(sf, opcode2, opcode)) { 5251 case MAP(0, 0x00, 0x00): /* RBIT */ 5252 case MAP(1, 0x00, 0x00): 5253 handle_rbit(s, sf, rn, rd); 5254 break; 5255 case MAP(0, 0x00, 0x01): /* REV16 */ 5256 case MAP(1, 0x00, 0x01): 5257 handle_rev16(s, sf, rn, rd); 5258 break; 5259 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5260 case MAP(1, 0x00, 0x02): 5261 handle_rev32(s, sf, rn, rd); 5262 break; 5263 case MAP(1, 0x00, 0x03): /* REV64 */ 5264 handle_rev64(s, sf, rn, rd); 5265 break; 5266 case MAP(0, 0x00, 0x04): /* CLZ */ 5267 case MAP(1, 0x00, 0x04): 5268 handle_clz(s, sf, rn, rd); 5269 break; 5270 case MAP(0, 0x00, 0x05): /* CLS */ 5271 case MAP(1, 0x00, 0x05): 5272 handle_cls(s, sf, rn, rd); 5273 break; 5274 case MAP(1, 0x01, 0x00): /* PACIA */ 5275 if (s->pauth_active) { 5276 tcg_rd = cpu_reg(s, rd); 5277 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5278 } else if (!dc_isar_feature(aa64_pauth, s)) { 5279 goto do_unallocated; 5280 } 5281 break; 5282 case MAP(1, 0x01, 0x01): /* PACIB */ 5283 if (s->pauth_active) { 5284 tcg_rd = cpu_reg(s, rd); 5285 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5286 } else if (!dc_isar_feature(aa64_pauth, s)) { 5287 goto do_unallocated; 5288 } 5289 break; 5290 case MAP(1, 0x01, 0x02): /* PACDA */ 5291 if (s->pauth_active) { 5292 tcg_rd = cpu_reg(s, rd); 5293 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5294 } else if (!dc_isar_feature(aa64_pauth, s)) { 5295 goto do_unallocated; 5296 } 5297 break; 5298 case MAP(1, 0x01, 0x03): /* PACDB */ 5299 if (s->pauth_active) { 5300 tcg_rd = cpu_reg(s, rd); 5301 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5302 } else if (!dc_isar_feature(aa64_pauth, s)) { 5303 goto do_unallocated; 5304 } 5305 break; 5306 case MAP(1, 0x01, 0x04): /* AUTIA */ 5307 if (s->pauth_active) { 5308 tcg_rd = cpu_reg(s, rd); 5309 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5310 } else if (!dc_isar_feature(aa64_pauth, s)) { 5311 goto do_unallocated; 5312 } 5313 break; 5314 case MAP(1, 0x01, 0x05): /* AUTIB */ 5315 if (s->pauth_active) { 5316 tcg_rd = cpu_reg(s, rd); 5317 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5318 } else if (!dc_isar_feature(aa64_pauth, s)) { 5319 goto do_unallocated; 5320 } 5321 break; 5322 case MAP(1, 0x01, 0x06): /* AUTDA */ 5323 if (s->pauth_active) { 5324 tcg_rd = cpu_reg(s, rd); 5325 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5326 } else if (!dc_isar_feature(aa64_pauth, s)) { 5327 goto do_unallocated; 5328 } 5329 break; 5330 case MAP(1, 0x01, 0x07): /* AUTDB */ 5331 if (s->pauth_active) { 5332 tcg_rd = cpu_reg(s, rd); 5333 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 5334 } else if (!dc_isar_feature(aa64_pauth, s)) { 5335 goto do_unallocated; 5336 } 5337 break; 5338 case MAP(1, 0x01, 0x08): /* PACIZA */ 5339 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5340 goto do_unallocated; 5341 } else if (s->pauth_active) { 5342 tcg_rd = cpu_reg(s, rd); 5343 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5344 } 5345 break; 5346 case MAP(1, 0x01, 0x09): /* PACIZB */ 5347 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5348 goto do_unallocated; 5349 } else if (s->pauth_active) { 5350 tcg_rd = cpu_reg(s, rd); 5351 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5352 } 5353 break; 5354 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5355 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5356 goto do_unallocated; 5357 } else if (s->pauth_active) { 5358 tcg_rd = cpu_reg(s, rd); 5359 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5360 } 5361 break; 5362 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5363 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5364 goto do_unallocated; 5365 } else if (s->pauth_active) { 5366 tcg_rd = cpu_reg(s, rd); 5367 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5368 } 5369 break; 5370 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5371 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5372 goto do_unallocated; 5373 } else if (s->pauth_active) { 5374 tcg_rd = cpu_reg(s, rd); 5375 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5376 } 5377 break; 5378 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5379 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5380 goto do_unallocated; 5381 } else if (s->pauth_active) { 5382 tcg_rd = cpu_reg(s, rd); 5383 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5384 } 5385 break; 5386 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5387 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5388 goto do_unallocated; 5389 } else if (s->pauth_active) { 5390 tcg_rd = cpu_reg(s, rd); 5391 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5392 } 5393 break; 5394 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5395 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5396 goto do_unallocated; 5397 } else if (s->pauth_active) { 5398 tcg_rd = cpu_reg(s, rd); 5399 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 5400 } 5401 break; 5402 case MAP(1, 0x01, 0x10): /* XPACI */ 5403 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5404 goto do_unallocated; 5405 } else if (s->pauth_active) { 5406 tcg_rd = cpu_reg(s, rd); 5407 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); 5408 } 5409 break; 5410 case MAP(1, 0x01, 0x11): /* XPACD */ 5411 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5412 goto do_unallocated; 5413 } else if (s->pauth_active) { 5414 tcg_rd = cpu_reg(s, rd); 5415 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); 5416 } 5417 break; 5418 default: 5419 do_unallocated: 5420 unallocated_encoding(s); 5421 break; 5422 } 5423 5424 #undef MAP 5425 } 5426 5427 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5428 unsigned int rm, unsigned int rn, unsigned int rd) 5429 { 5430 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5431 tcg_rd = cpu_reg(s, rd); 5432 5433 if (!sf && is_signed) { 5434 tcg_n = tcg_temp_new_i64(); 5435 tcg_m = tcg_temp_new_i64(); 5436 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5437 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5438 } else { 5439 tcg_n = read_cpu_reg(s, rn, sf); 5440 tcg_m = read_cpu_reg(s, rm, sf); 5441 } 5442 5443 if (is_signed) { 5444 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5445 } else { 5446 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5447 } 5448 5449 if (!sf) { /* zero extend final result */ 5450 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5451 } 5452 } 5453 5454 /* LSLV, LSRV, ASRV, RORV */ 5455 static void handle_shift_reg(DisasContext *s, 5456 enum a64_shift_type shift_type, unsigned int sf, 5457 unsigned int rm, unsigned int rn, unsigned int rd) 5458 { 5459 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5460 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5461 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5462 5463 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5464 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5465 } 5466 5467 /* CRC32[BHWX], CRC32C[BHWX] */ 5468 static void handle_crc32(DisasContext *s, 5469 unsigned int sf, unsigned int sz, bool crc32c, 5470 unsigned int rm, unsigned int rn, unsigned int rd) 5471 { 5472 TCGv_i64 tcg_acc, tcg_val; 5473 TCGv_i32 tcg_bytes; 5474 5475 if (!dc_isar_feature(aa64_crc32, s) 5476 || (sf == 1 && sz != 3) 5477 || (sf == 0 && sz == 3)) { 5478 unallocated_encoding(s); 5479 return; 5480 } 5481 5482 if (sz == 3) { 5483 tcg_val = cpu_reg(s, rm); 5484 } else { 5485 uint64_t mask; 5486 switch (sz) { 5487 case 0: 5488 mask = 0xFF; 5489 break; 5490 case 1: 5491 mask = 0xFFFF; 5492 break; 5493 case 2: 5494 mask = 0xFFFFFFFF; 5495 break; 5496 default: 5497 g_assert_not_reached(); 5498 } 5499 tcg_val = tcg_temp_new_i64(); 5500 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5501 } 5502 5503 tcg_acc = cpu_reg(s, rn); 5504 tcg_bytes = tcg_constant_i32(1 << sz); 5505 5506 if (crc32c) { 5507 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5508 } else { 5509 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5510 } 5511 } 5512 5513 /* Data-processing (2 source) 5514 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5515 * +----+---+---+-----------------+------+--------+------+------+ 5516 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5517 * +----+---+---+-----------------+------+--------+------+------+ 5518 */ 5519 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5520 { 5521 unsigned int sf, rm, opcode, rn, rd, setflag; 5522 sf = extract32(insn, 31, 1); 5523 setflag = extract32(insn, 29, 1); 5524 rm = extract32(insn, 16, 5); 5525 opcode = extract32(insn, 10, 6); 5526 rn = extract32(insn, 5, 5); 5527 rd = extract32(insn, 0, 5); 5528 5529 if (setflag && opcode != 0) { 5530 unallocated_encoding(s); 5531 return; 5532 } 5533 5534 switch (opcode) { 5535 case 0: /* SUBP(S) */ 5536 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5537 goto do_unallocated; 5538 } else { 5539 TCGv_i64 tcg_n, tcg_m, tcg_d; 5540 5541 tcg_n = read_cpu_reg_sp(s, rn, true); 5542 tcg_m = read_cpu_reg_sp(s, rm, true); 5543 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5544 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5545 tcg_d = cpu_reg(s, rd); 5546 5547 if (setflag) { 5548 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5549 } else { 5550 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5551 } 5552 } 5553 break; 5554 case 2: /* UDIV */ 5555 handle_div(s, false, sf, rm, rn, rd); 5556 break; 5557 case 3: /* SDIV */ 5558 handle_div(s, true, sf, rm, rn, rd); 5559 break; 5560 case 4: /* IRG */ 5561 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5562 goto do_unallocated; 5563 } 5564 if (s->ata[0]) { 5565 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, 5566 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5567 } else { 5568 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5569 cpu_reg_sp(s, rn)); 5570 } 5571 break; 5572 case 5: /* GMI */ 5573 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5574 goto do_unallocated; 5575 } else { 5576 TCGv_i64 t = tcg_temp_new_i64(); 5577 5578 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5579 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5580 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5581 } 5582 break; 5583 case 8: /* LSLV */ 5584 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5585 break; 5586 case 9: /* LSRV */ 5587 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5588 break; 5589 case 10: /* ASRV */ 5590 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5591 break; 5592 case 11: /* RORV */ 5593 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5594 break; 5595 case 12: /* PACGA */ 5596 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5597 goto do_unallocated; 5598 } 5599 gen_helper_pacga(cpu_reg(s, rd), tcg_env, 5600 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5601 break; 5602 case 16: 5603 case 17: 5604 case 18: 5605 case 19: 5606 case 20: 5607 case 21: 5608 case 22: 5609 case 23: /* CRC32 */ 5610 { 5611 int sz = extract32(opcode, 0, 2); 5612 bool crc32c = extract32(opcode, 2, 1); 5613 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5614 break; 5615 } 5616 default: 5617 do_unallocated: 5618 unallocated_encoding(s); 5619 break; 5620 } 5621 } 5622 5623 /* 5624 * Data processing - register 5625 * 31 30 29 28 25 21 20 16 10 0 5626 * +--+---+--+---+-------+-----+-------+-------+---------+ 5627 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5628 * +--+---+--+---+-------+-----+-------+-------+---------+ 5629 */ 5630 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5631 { 5632 int op0 = extract32(insn, 30, 1); 5633 int op1 = extract32(insn, 28, 1); 5634 int op2 = extract32(insn, 21, 4); 5635 int op3 = extract32(insn, 10, 6); 5636 5637 if (!op1) { 5638 if (op2 & 8) { 5639 if (op2 & 1) { 5640 /* Add/sub (extended register) */ 5641 disas_add_sub_ext_reg(s, insn); 5642 } else { 5643 /* Add/sub (shifted register) */ 5644 disas_add_sub_reg(s, insn); 5645 } 5646 } else { 5647 /* Logical (shifted register) */ 5648 disas_logic_reg(s, insn); 5649 } 5650 return; 5651 } 5652 5653 switch (op2) { 5654 case 0x0: 5655 switch (op3) { 5656 case 0x00: /* Add/subtract (with carry) */ 5657 disas_adc_sbc(s, insn); 5658 break; 5659 5660 case 0x01: /* Rotate right into flags */ 5661 case 0x21: 5662 disas_rotate_right_into_flags(s, insn); 5663 break; 5664 5665 case 0x02: /* Evaluate into flags */ 5666 case 0x12: 5667 case 0x22: 5668 case 0x32: 5669 disas_evaluate_into_flags(s, insn); 5670 break; 5671 5672 default: 5673 goto do_unallocated; 5674 } 5675 break; 5676 5677 case 0x2: /* Conditional compare */ 5678 disas_cc(s, insn); /* both imm and reg forms */ 5679 break; 5680 5681 case 0x4: /* Conditional select */ 5682 disas_cond_select(s, insn); 5683 break; 5684 5685 case 0x6: /* Data-processing */ 5686 if (op0) { /* (1 source) */ 5687 disas_data_proc_1src(s, insn); 5688 } else { /* (2 source) */ 5689 disas_data_proc_2src(s, insn); 5690 } 5691 break; 5692 case 0x8 ... 0xf: /* (3 source) */ 5693 disas_data_proc_3src(s, insn); 5694 break; 5695 5696 default: 5697 do_unallocated: 5698 unallocated_encoding(s); 5699 break; 5700 } 5701 } 5702 5703 static void handle_fp_compare(DisasContext *s, int size, 5704 unsigned int rn, unsigned int rm, 5705 bool cmp_with_zero, bool signal_all_nans) 5706 { 5707 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5708 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 5709 5710 if (size == MO_64) { 5711 TCGv_i64 tcg_vn, tcg_vm; 5712 5713 tcg_vn = read_fp_dreg(s, rn); 5714 if (cmp_with_zero) { 5715 tcg_vm = tcg_constant_i64(0); 5716 } else { 5717 tcg_vm = read_fp_dreg(s, rm); 5718 } 5719 if (signal_all_nans) { 5720 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5721 } else { 5722 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5723 } 5724 } else { 5725 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 5726 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 5727 5728 read_vec_element_i32(s, tcg_vn, rn, 0, size); 5729 if (cmp_with_zero) { 5730 tcg_gen_movi_i32(tcg_vm, 0); 5731 } else { 5732 read_vec_element_i32(s, tcg_vm, rm, 0, size); 5733 } 5734 5735 switch (size) { 5736 case MO_32: 5737 if (signal_all_nans) { 5738 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5739 } else { 5740 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5741 } 5742 break; 5743 case MO_16: 5744 if (signal_all_nans) { 5745 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5746 } else { 5747 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5748 } 5749 break; 5750 default: 5751 g_assert_not_reached(); 5752 } 5753 } 5754 5755 gen_set_nzcv(tcg_flags); 5756 } 5757 5758 /* Floating point compare 5759 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 5760 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5761 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 5762 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5763 */ 5764 static void disas_fp_compare(DisasContext *s, uint32_t insn) 5765 { 5766 unsigned int mos, type, rm, op, rn, opc, op2r; 5767 int size; 5768 5769 mos = extract32(insn, 29, 3); 5770 type = extract32(insn, 22, 2); 5771 rm = extract32(insn, 16, 5); 5772 op = extract32(insn, 14, 2); 5773 rn = extract32(insn, 5, 5); 5774 opc = extract32(insn, 3, 2); 5775 op2r = extract32(insn, 0, 3); 5776 5777 if (mos || op || op2r) { 5778 unallocated_encoding(s); 5779 return; 5780 } 5781 5782 switch (type) { 5783 case 0: 5784 size = MO_32; 5785 break; 5786 case 1: 5787 size = MO_64; 5788 break; 5789 case 3: 5790 size = MO_16; 5791 if (dc_isar_feature(aa64_fp16, s)) { 5792 break; 5793 } 5794 /* fallthru */ 5795 default: 5796 unallocated_encoding(s); 5797 return; 5798 } 5799 5800 if (!fp_access_check(s)) { 5801 return; 5802 } 5803 5804 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 5805 } 5806 5807 /* Floating point conditional compare 5808 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5809 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5810 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 5811 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5812 */ 5813 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 5814 { 5815 unsigned int mos, type, rm, cond, rn, op, nzcv; 5816 TCGLabel *label_continue = NULL; 5817 int size; 5818 5819 mos = extract32(insn, 29, 3); 5820 type = extract32(insn, 22, 2); 5821 rm = extract32(insn, 16, 5); 5822 cond = extract32(insn, 12, 4); 5823 rn = extract32(insn, 5, 5); 5824 op = extract32(insn, 4, 1); 5825 nzcv = extract32(insn, 0, 4); 5826 5827 if (mos) { 5828 unallocated_encoding(s); 5829 return; 5830 } 5831 5832 switch (type) { 5833 case 0: 5834 size = MO_32; 5835 break; 5836 case 1: 5837 size = MO_64; 5838 break; 5839 case 3: 5840 size = MO_16; 5841 if (dc_isar_feature(aa64_fp16, s)) { 5842 break; 5843 } 5844 /* fallthru */ 5845 default: 5846 unallocated_encoding(s); 5847 return; 5848 } 5849 5850 if (!fp_access_check(s)) { 5851 return; 5852 } 5853 5854 if (cond < 0x0e) { /* not always */ 5855 TCGLabel *label_match = gen_new_label(); 5856 label_continue = gen_new_label(); 5857 arm_gen_test_cc(cond, label_match); 5858 /* nomatch: */ 5859 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 5860 tcg_gen_br(label_continue); 5861 gen_set_label(label_match); 5862 } 5863 5864 handle_fp_compare(s, size, rn, rm, false, op); 5865 5866 if (cond < 0x0e) { 5867 gen_set_label(label_continue); 5868 } 5869 } 5870 5871 /* Floating point conditional select 5872 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 5873 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5874 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 5875 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5876 */ 5877 static void disas_fp_csel(DisasContext *s, uint32_t insn) 5878 { 5879 unsigned int mos, type, rm, cond, rn, rd; 5880 TCGv_i64 t_true, t_false; 5881 DisasCompare64 c; 5882 MemOp sz; 5883 5884 mos = extract32(insn, 29, 3); 5885 type = extract32(insn, 22, 2); 5886 rm = extract32(insn, 16, 5); 5887 cond = extract32(insn, 12, 4); 5888 rn = extract32(insn, 5, 5); 5889 rd = extract32(insn, 0, 5); 5890 5891 if (mos) { 5892 unallocated_encoding(s); 5893 return; 5894 } 5895 5896 switch (type) { 5897 case 0: 5898 sz = MO_32; 5899 break; 5900 case 1: 5901 sz = MO_64; 5902 break; 5903 case 3: 5904 sz = MO_16; 5905 if (dc_isar_feature(aa64_fp16, s)) { 5906 break; 5907 } 5908 /* fallthru */ 5909 default: 5910 unallocated_encoding(s); 5911 return; 5912 } 5913 5914 if (!fp_access_check(s)) { 5915 return; 5916 } 5917 5918 /* Zero extend sreg & hreg inputs to 64 bits now. */ 5919 t_true = tcg_temp_new_i64(); 5920 t_false = tcg_temp_new_i64(); 5921 read_vec_element(s, t_true, rn, 0, sz); 5922 read_vec_element(s, t_false, rm, 0, sz); 5923 5924 a64_test_cc(&c, cond); 5925 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 5926 t_true, t_false); 5927 5928 /* Note that sregs & hregs write back zeros to the high bits, 5929 and we've already done the zero-extension. */ 5930 write_fp_dreg(s, rd, t_true); 5931 } 5932 5933 /* Floating-point data-processing (1 source) - half precision */ 5934 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 5935 { 5936 TCGv_ptr fpst = NULL; 5937 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 5938 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5939 5940 switch (opcode) { 5941 case 0x0: /* FMOV */ 5942 tcg_gen_mov_i32(tcg_res, tcg_op); 5943 break; 5944 case 0x1: /* FABS */ 5945 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 5946 break; 5947 case 0x2: /* FNEG */ 5948 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 5949 break; 5950 case 0x3: /* FSQRT */ 5951 fpst = fpstatus_ptr(FPST_FPCR_F16); 5952 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 5953 break; 5954 case 0x8: /* FRINTN */ 5955 case 0x9: /* FRINTP */ 5956 case 0xa: /* FRINTM */ 5957 case 0xb: /* FRINTZ */ 5958 case 0xc: /* FRINTA */ 5959 { 5960 TCGv_i32 tcg_rmode; 5961 5962 fpst = fpstatus_ptr(FPST_FPCR_F16); 5963 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 5964 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5965 gen_restore_rmode(tcg_rmode, fpst); 5966 break; 5967 } 5968 case 0xe: /* FRINTX */ 5969 fpst = fpstatus_ptr(FPST_FPCR_F16); 5970 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 5971 break; 5972 case 0xf: /* FRINTI */ 5973 fpst = fpstatus_ptr(FPST_FPCR_F16); 5974 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5975 break; 5976 default: 5977 g_assert_not_reached(); 5978 } 5979 5980 write_fp_sreg(s, rd, tcg_res); 5981 } 5982 5983 /* Floating-point data-processing (1 source) - single precision */ 5984 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 5985 { 5986 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 5987 TCGv_i32 tcg_op, tcg_res; 5988 TCGv_ptr fpst; 5989 int rmode = -1; 5990 5991 tcg_op = read_fp_sreg(s, rn); 5992 tcg_res = tcg_temp_new_i32(); 5993 5994 switch (opcode) { 5995 case 0x0: /* FMOV */ 5996 tcg_gen_mov_i32(tcg_res, tcg_op); 5997 goto done; 5998 case 0x1: /* FABS */ 5999 gen_helper_vfp_abss(tcg_res, tcg_op); 6000 goto done; 6001 case 0x2: /* FNEG */ 6002 gen_helper_vfp_negs(tcg_res, tcg_op); 6003 goto done; 6004 case 0x3: /* FSQRT */ 6005 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 6006 goto done; 6007 case 0x6: /* BFCVT */ 6008 gen_fpst = gen_helper_bfcvt; 6009 break; 6010 case 0x8: /* FRINTN */ 6011 case 0x9: /* FRINTP */ 6012 case 0xa: /* FRINTM */ 6013 case 0xb: /* FRINTZ */ 6014 case 0xc: /* FRINTA */ 6015 rmode = opcode & 7; 6016 gen_fpst = gen_helper_rints; 6017 break; 6018 case 0xe: /* FRINTX */ 6019 gen_fpst = gen_helper_rints_exact; 6020 break; 6021 case 0xf: /* FRINTI */ 6022 gen_fpst = gen_helper_rints; 6023 break; 6024 case 0x10: /* FRINT32Z */ 6025 rmode = FPROUNDING_ZERO; 6026 gen_fpst = gen_helper_frint32_s; 6027 break; 6028 case 0x11: /* FRINT32X */ 6029 gen_fpst = gen_helper_frint32_s; 6030 break; 6031 case 0x12: /* FRINT64Z */ 6032 rmode = FPROUNDING_ZERO; 6033 gen_fpst = gen_helper_frint64_s; 6034 break; 6035 case 0x13: /* FRINT64X */ 6036 gen_fpst = gen_helper_frint64_s; 6037 break; 6038 default: 6039 g_assert_not_reached(); 6040 } 6041 6042 fpst = fpstatus_ptr(FPST_FPCR); 6043 if (rmode >= 0) { 6044 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 6045 gen_fpst(tcg_res, tcg_op, fpst); 6046 gen_restore_rmode(tcg_rmode, fpst); 6047 } else { 6048 gen_fpst(tcg_res, tcg_op, fpst); 6049 } 6050 6051 done: 6052 write_fp_sreg(s, rd, tcg_res); 6053 } 6054 6055 /* Floating-point data-processing (1 source) - double precision */ 6056 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 6057 { 6058 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 6059 TCGv_i64 tcg_op, tcg_res; 6060 TCGv_ptr fpst; 6061 int rmode = -1; 6062 6063 switch (opcode) { 6064 case 0x0: /* FMOV */ 6065 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6066 return; 6067 } 6068 6069 tcg_op = read_fp_dreg(s, rn); 6070 tcg_res = tcg_temp_new_i64(); 6071 6072 switch (opcode) { 6073 case 0x1: /* FABS */ 6074 gen_helper_vfp_absd(tcg_res, tcg_op); 6075 goto done; 6076 case 0x2: /* FNEG */ 6077 gen_helper_vfp_negd(tcg_res, tcg_op); 6078 goto done; 6079 case 0x3: /* FSQRT */ 6080 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); 6081 goto done; 6082 case 0x8: /* FRINTN */ 6083 case 0x9: /* FRINTP */ 6084 case 0xa: /* FRINTM */ 6085 case 0xb: /* FRINTZ */ 6086 case 0xc: /* FRINTA */ 6087 rmode = opcode & 7; 6088 gen_fpst = gen_helper_rintd; 6089 break; 6090 case 0xe: /* FRINTX */ 6091 gen_fpst = gen_helper_rintd_exact; 6092 break; 6093 case 0xf: /* FRINTI */ 6094 gen_fpst = gen_helper_rintd; 6095 break; 6096 case 0x10: /* FRINT32Z */ 6097 rmode = FPROUNDING_ZERO; 6098 gen_fpst = gen_helper_frint32_d; 6099 break; 6100 case 0x11: /* FRINT32X */ 6101 gen_fpst = gen_helper_frint32_d; 6102 break; 6103 case 0x12: /* FRINT64Z */ 6104 rmode = FPROUNDING_ZERO; 6105 gen_fpst = gen_helper_frint64_d; 6106 break; 6107 case 0x13: /* FRINT64X */ 6108 gen_fpst = gen_helper_frint64_d; 6109 break; 6110 default: 6111 g_assert_not_reached(); 6112 } 6113 6114 fpst = fpstatus_ptr(FPST_FPCR); 6115 if (rmode >= 0) { 6116 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 6117 gen_fpst(tcg_res, tcg_op, fpst); 6118 gen_restore_rmode(tcg_rmode, fpst); 6119 } else { 6120 gen_fpst(tcg_res, tcg_op, fpst); 6121 } 6122 6123 done: 6124 write_fp_dreg(s, rd, tcg_res); 6125 } 6126 6127 static void handle_fp_fcvt(DisasContext *s, int opcode, 6128 int rd, int rn, int dtype, int ntype) 6129 { 6130 switch (ntype) { 6131 case 0x0: 6132 { 6133 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6134 if (dtype == 1) { 6135 /* Single to double */ 6136 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6137 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 6138 write_fp_dreg(s, rd, tcg_rd); 6139 } else { 6140 /* Single to half */ 6141 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6142 TCGv_i32 ahp = get_ahp_flag(); 6143 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6144 6145 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6146 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6147 write_fp_sreg(s, rd, tcg_rd); 6148 } 6149 break; 6150 } 6151 case 0x1: 6152 { 6153 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6154 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6155 if (dtype == 0) { 6156 /* Double to single */ 6157 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 6158 } else { 6159 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6160 TCGv_i32 ahp = get_ahp_flag(); 6161 /* Double to half */ 6162 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6163 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6164 } 6165 write_fp_sreg(s, rd, tcg_rd); 6166 break; 6167 } 6168 case 0x3: 6169 { 6170 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6171 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6172 TCGv_i32 tcg_ahp = get_ahp_flag(); 6173 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6174 if (dtype == 0) { 6175 /* Half to single */ 6176 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6177 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6178 write_fp_sreg(s, rd, tcg_rd); 6179 } else { 6180 /* Half to double */ 6181 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6182 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6183 write_fp_dreg(s, rd, tcg_rd); 6184 } 6185 break; 6186 } 6187 default: 6188 g_assert_not_reached(); 6189 } 6190 } 6191 6192 /* Floating point data-processing (1 source) 6193 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6194 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6195 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6196 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6197 */ 6198 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6199 { 6200 int mos = extract32(insn, 29, 3); 6201 int type = extract32(insn, 22, 2); 6202 int opcode = extract32(insn, 15, 6); 6203 int rn = extract32(insn, 5, 5); 6204 int rd = extract32(insn, 0, 5); 6205 6206 if (mos) { 6207 goto do_unallocated; 6208 } 6209 6210 switch (opcode) { 6211 case 0x4: case 0x5: case 0x7: 6212 { 6213 /* FCVT between half, single and double precision */ 6214 int dtype = extract32(opcode, 0, 2); 6215 if (type == 2 || dtype == type) { 6216 goto do_unallocated; 6217 } 6218 if (!fp_access_check(s)) { 6219 return; 6220 } 6221 6222 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6223 break; 6224 } 6225 6226 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6227 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6228 goto do_unallocated; 6229 } 6230 /* fall through */ 6231 case 0x0 ... 0x3: 6232 case 0x8 ... 0xc: 6233 case 0xe ... 0xf: 6234 /* 32-to-32 and 64-to-64 ops */ 6235 switch (type) { 6236 case 0: 6237 if (!fp_access_check(s)) { 6238 return; 6239 } 6240 handle_fp_1src_single(s, opcode, rd, rn); 6241 break; 6242 case 1: 6243 if (!fp_access_check(s)) { 6244 return; 6245 } 6246 handle_fp_1src_double(s, opcode, rd, rn); 6247 break; 6248 case 3: 6249 if (!dc_isar_feature(aa64_fp16, s)) { 6250 goto do_unallocated; 6251 } 6252 6253 if (!fp_access_check(s)) { 6254 return; 6255 } 6256 handle_fp_1src_half(s, opcode, rd, rn); 6257 break; 6258 default: 6259 goto do_unallocated; 6260 } 6261 break; 6262 6263 case 0x6: 6264 switch (type) { 6265 case 1: /* BFCVT */ 6266 if (!dc_isar_feature(aa64_bf16, s)) { 6267 goto do_unallocated; 6268 } 6269 if (!fp_access_check(s)) { 6270 return; 6271 } 6272 handle_fp_1src_single(s, opcode, rd, rn); 6273 break; 6274 default: 6275 goto do_unallocated; 6276 } 6277 break; 6278 6279 default: 6280 do_unallocated: 6281 unallocated_encoding(s); 6282 break; 6283 } 6284 } 6285 6286 /* Floating-point data-processing (2 source) - single precision */ 6287 static void handle_fp_2src_single(DisasContext *s, int opcode, 6288 int rd, int rn, int rm) 6289 { 6290 TCGv_i32 tcg_op1; 6291 TCGv_i32 tcg_op2; 6292 TCGv_i32 tcg_res; 6293 TCGv_ptr fpst; 6294 6295 tcg_res = tcg_temp_new_i32(); 6296 fpst = fpstatus_ptr(FPST_FPCR); 6297 tcg_op1 = read_fp_sreg(s, rn); 6298 tcg_op2 = read_fp_sreg(s, rm); 6299 6300 switch (opcode) { 6301 case 0x0: /* FMUL */ 6302 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6303 break; 6304 case 0x1: /* FDIV */ 6305 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6306 break; 6307 case 0x2: /* FADD */ 6308 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6309 break; 6310 case 0x3: /* FSUB */ 6311 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6312 break; 6313 case 0x4: /* FMAX */ 6314 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6315 break; 6316 case 0x5: /* FMIN */ 6317 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6318 break; 6319 case 0x6: /* FMAXNM */ 6320 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6321 break; 6322 case 0x7: /* FMINNM */ 6323 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6324 break; 6325 case 0x8: /* FNMUL */ 6326 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6327 gen_helper_vfp_negs(tcg_res, tcg_res); 6328 break; 6329 } 6330 6331 write_fp_sreg(s, rd, tcg_res); 6332 } 6333 6334 /* Floating-point data-processing (2 source) - double precision */ 6335 static void handle_fp_2src_double(DisasContext *s, int opcode, 6336 int rd, int rn, int rm) 6337 { 6338 TCGv_i64 tcg_op1; 6339 TCGv_i64 tcg_op2; 6340 TCGv_i64 tcg_res; 6341 TCGv_ptr fpst; 6342 6343 tcg_res = tcg_temp_new_i64(); 6344 fpst = fpstatus_ptr(FPST_FPCR); 6345 tcg_op1 = read_fp_dreg(s, rn); 6346 tcg_op2 = read_fp_dreg(s, rm); 6347 6348 switch (opcode) { 6349 case 0x0: /* FMUL */ 6350 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6351 break; 6352 case 0x1: /* FDIV */ 6353 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6354 break; 6355 case 0x2: /* FADD */ 6356 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6357 break; 6358 case 0x3: /* FSUB */ 6359 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6360 break; 6361 case 0x4: /* FMAX */ 6362 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6363 break; 6364 case 0x5: /* FMIN */ 6365 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6366 break; 6367 case 0x6: /* FMAXNM */ 6368 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6369 break; 6370 case 0x7: /* FMINNM */ 6371 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6372 break; 6373 case 0x8: /* FNMUL */ 6374 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6375 gen_helper_vfp_negd(tcg_res, tcg_res); 6376 break; 6377 } 6378 6379 write_fp_dreg(s, rd, tcg_res); 6380 } 6381 6382 /* Floating-point data-processing (2 source) - half precision */ 6383 static void handle_fp_2src_half(DisasContext *s, int opcode, 6384 int rd, int rn, int rm) 6385 { 6386 TCGv_i32 tcg_op1; 6387 TCGv_i32 tcg_op2; 6388 TCGv_i32 tcg_res; 6389 TCGv_ptr fpst; 6390 6391 tcg_res = tcg_temp_new_i32(); 6392 fpst = fpstatus_ptr(FPST_FPCR_F16); 6393 tcg_op1 = read_fp_hreg(s, rn); 6394 tcg_op2 = read_fp_hreg(s, rm); 6395 6396 switch (opcode) { 6397 case 0x0: /* FMUL */ 6398 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6399 break; 6400 case 0x1: /* FDIV */ 6401 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6402 break; 6403 case 0x2: /* FADD */ 6404 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6405 break; 6406 case 0x3: /* FSUB */ 6407 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6408 break; 6409 case 0x4: /* FMAX */ 6410 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6411 break; 6412 case 0x5: /* FMIN */ 6413 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6414 break; 6415 case 0x6: /* FMAXNM */ 6416 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6417 break; 6418 case 0x7: /* FMINNM */ 6419 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6420 break; 6421 case 0x8: /* FNMUL */ 6422 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6423 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6424 break; 6425 default: 6426 g_assert_not_reached(); 6427 } 6428 6429 write_fp_sreg(s, rd, tcg_res); 6430 } 6431 6432 /* Floating point data-processing (2 source) 6433 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6434 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6435 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6436 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6437 */ 6438 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6439 { 6440 int mos = extract32(insn, 29, 3); 6441 int type = extract32(insn, 22, 2); 6442 int rd = extract32(insn, 0, 5); 6443 int rn = extract32(insn, 5, 5); 6444 int rm = extract32(insn, 16, 5); 6445 int opcode = extract32(insn, 12, 4); 6446 6447 if (opcode > 8 || mos) { 6448 unallocated_encoding(s); 6449 return; 6450 } 6451 6452 switch (type) { 6453 case 0: 6454 if (!fp_access_check(s)) { 6455 return; 6456 } 6457 handle_fp_2src_single(s, opcode, rd, rn, rm); 6458 break; 6459 case 1: 6460 if (!fp_access_check(s)) { 6461 return; 6462 } 6463 handle_fp_2src_double(s, opcode, rd, rn, rm); 6464 break; 6465 case 3: 6466 if (!dc_isar_feature(aa64_fp16, s)) { 6467 unallocated_encoding(s); 6468 return; 6469 } 6470 if (!fp_access_check(s)) { 6471 return; 6472 } 6473 handle_fp_2src_half(s, opcode, rd, rn, rm); 6474 break; 6475 default: 6476 unallocated_encoding(s); 6477 } 6478 } 6479 6480 /* Floating-point data-processing (3 source) - single precision */ 6481 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6482 int rd, int rn, int rm, int ra) 6483 { 6484 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6485 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6486 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6487 6488 tcg_op1 = read_fp_sreg(s, rn); 6489 tcg_op2 = read_fp_sreg(s, rm); 6490 tcg_op3 = read_fp_sreg(s, ra); 6491 6492 /* These are fused multiply-add, and must be done as one 6493 * floating point operation with no rounding between the 6494 * multiplication and addition steps. 6495 * NB that doing the negations here as separate steps is 6496 * correct : an input NaN should come out with its sign bit 6497 * flipped if it is a negated-input. 6498 */ 6499 if (o1 == true) { 6500 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6501 } 6502 6503 if (o0 != o1) { 6504 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6505 } 6506 6507 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6508 6509 write_fp_sreg(s, rd, tcg_res); 6510 } 6511 6512 /* Floating-point data-processing (3 source) - double precision */ 6513 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6514 int rd, int rn, int rm, int ra) 6515 { 6516 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6517 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6518 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6519 6520 tcg_op1 = read_fp_dreg(s, rn); 6521 tcg_op2 = read_fp_dreg(s, rm); 6522 tcg_op3 = read_fp_dreg(s, ra); 6523 6524 /* These are fused multiply-add, and must be done as one 6525 * floating point operation with no rounding between the 6526 * multiplication and addition steps. 6527 * NB that doing the negations here as separate steps is 6528 * correct : an input NaN should come out with its sign bit 6529 * flipped if it is a negated-input. 6530 */ 6531 if (o1 == true) { 6532 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6533 } 6534 6535 if (o0 != o1) { 6536 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6537 } 6538 6539 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6540 6541 write_fp_dreg(s, rd, tcg_res); 6542 } 6543 6544 /* Floating-point data-processing (3 source) - half precision */ 6545 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6546 int rd, int rn, int rm, int ra) 6547 { 6548 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6549 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6550 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6551 6552 tcg_op1 = read_fp_hreg(s, rn); 6553 tcg_op2 = read_fp_hreg(s, rm); 6554 tcg_op3 = read_fp_hreg(s, ra); 6555 6556 /* These are fused multiply-add, and must be done as one 6557 * floating point operation with no rounding between the 6558 * multiplication and addition steps. 6559 * NB that doing the negations here as separate steps is 6560 * correct : an input NaN should come out with its sign bit 6561 * flipped if it is a negated-input. 6562 */ 6563 if (o1 == true) { 6564 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6565 } 6566 6567 if (o0 != o1) { 6568 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6569 } 6570 6571 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6572 6573 write_fp_sreg(s, rd, tcg_res); 6574 } 6575 6576 /* Floating point data-processing (3 source) 6577 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6578 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6579 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6580 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6581 */ 6582 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6583 { 6584 int mos = extract32(insn, 29, 3); 6585 int type = extract32(insn, 22, 2); 6586 int rd = extract32(insn, 0, 5); 6587 int rn = extract32(insn, 5, 5); 6588 int ra = extract32(insn, 10, 5); 6589 int rm = extract32(insn, 16, 5); 6590 bool o0 = extract32(insn, 15, 1); 6591 bool o1 = extract32(insn, 21, 1); 6592 6593 if (mos) { 6594 unallocated_encoding(s); 6595 return; 6596 } 6597 6598 switch (type) { 6599 case 0: 6600 if (!fp_access_check(s)) { 6601 return; 6602 } 6603 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6604 break; 6605 case 1: 6606 if (!fp_access_check(s)) { 6607 return; 6608 } 6609 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6610 break; 6611 case 3: 6612 if (!dc_isar_feature(aa64_fp16, s)) { 6613 unallocated_encoding(s); 6614 return; 6615 } 6616 if (!fp_access_check(s)) { 6617 return; 6618 } 6619 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6620 break; 6621 default: 6622 unallocated_encoding(s); 6623 } 6624 } 6625 6626 /* Floating point immediate 6627 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6628 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6629 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6630 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6631 */ 6632 static void disas_fp_imm(DisasContext *s, uint32_t insn) 6633 { 6634 int rd = extract32(insn, 0, 5); 6635 int imm5 = extract32(insn, 5, 5); 6636 int imm8 = extract32(insn, 13, 8); 6637 int type = extract32(insn, 22, 2); 6638 int mos = extract32(insn, 29, 3); 6639 uint64_t imm; 6640 MemOp sz; 6641 6642 if (mos || imm5) { 6643 unallocated_encoding(s); 6644 return; 6645 } 6646 6647 switch (type) { 6648 case 0: 6649 sz = MO_32; 6650 break; 6651 case 1: 6652 sz = MO_64; 6653 break; 6654 case 3: 6655 sz = MO_16; 6656 if (dc_isar_feature(aa64_fp16, s)) { 6657 break; 6658 } 6659 /* fallthru */ 6660 default: 6661 unallocated_encoding(s); 6662 return; 6663 } 6664 6665 if (!fp_access_check(s)) { 6666 return; 6667 } 6668 6669 imm = vfp_expand_imm(sz, imm8); 6670 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 6671 } 6672 6673 /* Handle floating point <=> fixed point conversions. Note that we can 6674 * also deal with fp <=> integer conversions as a special case (scale == 64) 6675 * OPTME: consider handling that special case specially or at least skipping 6676 * the call to scalbn in the helpers for zero shifts. 6677 */ 6678 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 6679 bool itof, int rmode, int scale, int sf, int type) 6680 { 6681 bool is_signed = !(opcode & 1); 6682 TCGv_ptr tcg_fpstatus; 6683 TCGv_i32 tcg_shift, tcg_single; 6684 TCGv_i64 tcg_double; 6685 6686 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 6687 6688 tcg_shift = tcg_constant_i32(64 - scale); 6689 6690 if (itof) { 6691 TCGv_i64 tcg_int = cpu_reg(s, rn); 6692 if (!sf) { 6693 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 6694 6695 if (is_signed) { 6696 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 6697 } else { 6698 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 6699 } 6700 6701 tcg_int = tcg_extend; 6702 } 6703 6704 switch (type) { 6705 case 1: /* float64 */ 6706 tcg_double = tcg_temp_new_i64(); 6707 if (is_signed) { 6708 gen_helper_vfp_sqtod(tcg_double, tcg_int, 6709 tcg_shift, tcg_fpstatus); 6710 } else { 6711 gen_helper_vfp_uqtod(tcg_double, tcg_int, 6712 tcg_shift, tcg_fpstatus); 6713 } 6714 write_fp_dreg(s, rd, tcg_double); 6715 break; 6716 6717 case 0: /* float32 */ 6718 tcg_single = tcg_temp_new_i32(); 6719 if (is_signed) { 6720 gen_helper_vfp_sqtos(tcg_single, tcg_int, 6721 tcg_shift, tcg_fpstatus); 6722 } else { 6723 gen_helper_vfp_uqtos(tcg_single, tcg_int, 6724 tcg_shift, tcg_fpstatus); 6725 } 6726 write_fp_sreg(s, rd, tcg_single); 6727 break; 6728 6729 case 3: /* float16 */ 6730 tcg_single = tcg_temp_new_i32(); 6731 if (is_signed) { 6732 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 6733 tcg_shift, tcg_fpstatus); 6734 } else { 6735 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 6736 tcg_shift, tcg_fpstatus); 6737 } 6738 write_fp_sreg(s, rd, tcg_single); 6739 break; 6740 6741 default: 6742 g_assert_not_reached(); 6743 } 6744 } else { 6745 TCGv_i64 tcg_int = cpu_reg(s, rd); 6746 TCGv_i32 tcg_rmode; 6747 6748 if (extract32(opcode, 2, 1)) { 6749 /* There are too many rounding modes to all fit into rmode, 6750 * so FCVTA[US] is a special case. 6751 */ 6752 rmode = FPROUNDING_TIEAWAY; 6753 } 6754 6755 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 6756 6757 switch (type) { 6758 case 1: /* float64 */ 6759 tcg_double = read_fp_dreg(s, rn); 6760 if (is_signed) { 6761 if (!sf) { 6762 gen_helper_vfp_tosld(tcg_int, tcg_double, 6763 tcg_shift, tcg_fpstatus); 6764 } else { 6765 gen_helper_vfp_tosqd(tcg_int, tcg_double, 6766 tcg_shift, tcg_fpstatus); 6767 } 6768 } else { 6769 if (!sf) { 6770 gen_helper_vfp_tould(tcg_int, tcg_double, 6771 tcg_shift, tcg_fpstatus); 6772 } else { 6773 gen_helper_vfp_touqd(tcg_int, tcg_double, 6774 tcg_shift, tcg_fpstatus); 6775 } 6776 } 6777 if (!sf) { 6778 tcg_gen_ext32u_i64(tcg_int, tcg_int); 6779 } 6780 break; 6781 6782 case 0: /* float32 */ 6783 tcg_single = read_fp_sreg(s, rn); 6784 if (sf) { 6785 if (is_signed) { 6786 gen_helper_vfp_tosqs(tcg_int, tcg_single, 6787 tcg_shift, tcg_fpstatus); 6788 } else { 6789 gen_helper_vfp_touqs(tcg_int, tcg_single, 6790 tcg_shift, tcg_fpstatus); 6791 } 6792 } else { 6793 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6794 if (is_signed) { 6795 gen_helper_vfp_tosls(tcg_dest, tcg_single, 6796 tcg_shift, tcg_fpstatus); 6797 } else { 6798 gen_helper_vfp_touls(tcg_dest, tcg_single, 6799 tcg_shift, tcg_fpstatus); 6800 } 6801 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6802 } 6803 break; 6804 6805 case 3: /* float16 */ 6806 tcg_single = read_fp_sreg(s, rn); 6807 if (sf) { 6808 if (is_signed) { 6809 gen_helper_vfp_tosqh(tcg_int, tcg_single, 6810 tcg_shift, tcg_fpstatus); 6811 } else { 6812 gen_helper_vfp_touqh(tcg_int, tcg_single, 6813 tcg_shift, tcg_fpstatus); 6814 } 6815 } else { 6816 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6817 if (is_signed) { 6818 gen_helper_vfp_toslh(tcg_dest, tcg_single, 6819 tcg_shift, tcg_fpstatus); 6820 } else { 6821 gen_helper_vfp_toulh(tcg_dest, tcg_single, 6822 tcg_shift, tcg_fpstatus); 6823 } 6824 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6825 } 6826 break; 6827 6828 default: 6829 g_assert_not_reached(); 6830 } 6831 6832 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 6833 } 6834 } 6835 6836 /* Floating point <-> fixed point conversions 6837 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6838 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6839 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 6840 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6841 */ 6842 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 6843 { 6844 int rd = extract32(insn, 0, 5); 6845 int rn = extract32(insn, 5, 5); 6846 int scale = extract32(insn, 10, 6); 6847 int opcode = extract32(insn, 16, 3); 6848 int rmode = extract32(insn, 19, 2); 6849 int type = extract32(insn, 22, 2); 6850 bool sbit = extract32(insn, 29, 1); 6851 bool sf = extract32(insn, 31, 1); 6852 bool itof; 6853 6854 if (sbit || (!sf && scale < 32)) { 6855 unallocated_encoding(s); 6856 return; 6857 } 6858 6859 switch (type) { 6860 case 0: /* float32 */ 6861 case 1: /* float64 */ 6862 break; 6863 case 3: /* float16 */ 6864 if (dc_isar_feature(aa64_fp16, s)) { 6865 break; 6866 } 6867 /* fallthru */ 6868 default: 6869 unallocated_encoding(s); 6870 return; 6871 } 6872 6873 switch ((rmode << 3) | opcode) { 6874 case 0x2: /* SCVTF */ 6875 case 0x3: /* UCVTF */ 6876 itof = true; 6877 break; 6878 case 0x18: /* FCVTZS */ 6879 case 0x19: /* FCVTZU */ 6880 itof = false; 6881 break; 6882 default: 6883 unallocated_encoding(s); 6884 return; 6885 } 6886 6887 if (!fp_access_check(s)) { 6888 return; 6889 } 6890 6891 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 6892 } 6893 6894 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 6895 { 6896 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 6897 * without conversion. 6898 */ 6899 6900 if (itof) { 6901 TCGv_i64 tcg_rn = cpu_reg(s, rn); 6902 TCGv_i64 tmp; 6903 6904 switch (type) { 6905 case 0: 6906 /* 32 bit */ 6907 tmp = tcg_temp_new_i64(); 6908 tcg_gen_ext32u_i64(tmp, tcg_rn); 6909 write_fp_dreg(s, rd, tmp); 6910 break; 6911 case 1: 6912 /* 64 bit */ 6913 write_fp_dreg(s, rd, tcg_rn); 6914 break; 6915 case 2: 6916 /* 64 bit to top half. */ 6917 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); 6918 clear_vec_high(s, true, rd); 6919 break; 6920 case 3: 6921 /* 16 bit */ 6922 tmp = tcg_temp_new_i64(); 6923 tcg_gen_ext16u_i64(tmp, tcg_rn); 6924 write_fp_dreg(s, rd, tmp); 6925 break; 6926 default: 6927 g_assert_not_reached(); 6928 } 6929 } else { 6930 TCGv_i64 tcg_rd = cpu_reg(s, rd); 6931 6932 switch (type) { 6933 case 0: 6934 /* 32 bit */ 6935 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); 6936 break; 6937 case 1: 6938 /* 64 bit */ 6939 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); 6940 break; 6941 case 2: 6942 /* 64 bits from top half */ 6943 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); 6944 break; 6945 case 3: 6946 /* 16 bit */ 6947 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); 6948 break; 6949 default: 6950 g_assert_not_reached(); 6951 } 6952 } 6953 } 6954 6955 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 6956 { 6957 TCGv_i64 t = read_fp_dreg(s, rn); 6958 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 6959 6960 gen_helper_fjcvtzs(t, t, fpstatus); 6961 6962 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 6963 tcg_gen_extrh_i64_i32(cpu_ZF, t); 6964 tcg_gen_movi_i32(cpu_CF, 0); 6965 tcg_gen_movi_i32(cpu_NF, 0); 6966 tcg_gen_movi_i32(cpu_VF, 0); 6967 } 6968 6969 /* Floating point <-> integer conversions 6970 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6971 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6972 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 6973 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6974 */ 6975 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 6976 { 6977 int rd = extract32(insn, 0, 5); 6978 int rn = extract32(insn, 5, 5); 6979 int opcode = extract32(insn, 16, 3); 6980 int rmode = extract32(insn, 19, 2); 6981 int type = extract32(insn, 22, 2); 6982 bool sbit = extract32(insn, 29, 1); 6983 bool sf = extract32(insn, 31, 1); 6984 bool itof = false; 6985 6986 if (sbit) { 6987 goto do_unallocated; 6988 } 6989 6990 switch (opcode) { 6991 case 2: /* SCVTF */ 6992 case 3: /* UCVTF */ 6993 itof = true; 6994 /* fallthru */ 6995 case 4: /* FCVTAS */ 6996 case 5: /* FCVTAU */ 6997 if (rmode != 0) { 6998 goto do_unallocated; 6999 } 7000 /* fallthru */ 7001 case 0: /* FCVT[NPMZ]S */ 7002 case 1: /* FCVT[NPMZ]U */ 7003 switch (type) { 7004 case 0: /* float32 */ 7005 case 1: /* float64 */ 7006 break; 7007 case 3: /* float16 */ 7008 if (!dc_isar_feature(aa64_fp16, s)) { 7009 goto do_unallocated; 7010 } 7011 break; 7012 default: 7013 goto do_unallocated; 7014 } 7015 if (!fp_access_check(s)) { 7016 return; 7017 } 7018 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 7019 break; 7020 7021 default: 7022 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 7023 case 0b01100110: /* FMOV half <-> 32-bit int */ 7024 case 0b01100111: 7025 case 0b11100110: /* FMOV half <-> 64-bit int */ 7026 case 0b11100111: 7027 if (!dc_isar_feature(aa64_fp16, s)) { 7028 goto do_unallocated; 7029 } 7030 /* fallthru */ 7031 case 0b00000110: /* FMOV 32-bit */ 7032 case 0b00000111: 7033 case 0b10100110: /* FMOV 64-bit */ 7034 case 0b10100111: 7035 case 0b11001110: /* FMOV top half of 128-bit */ 7036 case 0b11001111: 7037 if (!fp_access_check(s)) { 7038 return; 7039 } 7040 itof = opcode & 1; 7041 handle_fmov(s, rd, rn, type, itof); 7042 break; 7043 7044 case 0b00111110: /* FJCVTZS */ 7045 if (!dc_isar_feature(aa64_jscvt, s)) { 7046 goto do_unallocated; 7047 } else if (fp_access_check(s)) { 7048 handle_fjcvtzs(s, rd, rn); 7049 } 7050 break; 7051 7052 default: 7053 do_unallocated: 7054 unallocated_encoding(s); 7055 return; 7056 } 7057 break; 7058 } 7059 } 7060 7061 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 7062 * 31 30 29 28 25 24 0 7063 * +---+---+---+---------+-----------------------------+ 7064 * | | 0 | | 1 1 1 1 | | 7065 * +---+---+---+---------+-----------------------------+ 7066 */ 7067 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7068 { 7069 if (extract32(insn, 24, 1)) { 7070 /* Floating point data-processing (3 source) */ 7071 disas_fp_3src(s, insn); 7072 } else if (extract32(insn, 21, 1) == 0) { 7073 /* Floating point to fixed point conversions */ 7074 disas_fp_fixed_conv(s, insn); 7075 } else { 7076 switch (extract32(insn, 10, 2)) { 7077 case 1: 7078 /* Floating point conditional compare */ 7079 disas_fp_ccomp(s, insn); 7080 break; 7081 case 2: 7082 /* Floating point data-processing (2 source) */ 7083 disas_fp_2src(s, insn); 7084 break; 7085 case 3: 7086 /* Floating point conditional select */ 7087 disas_fp_csel(s, insn); 7088 break; 7089 case 0: 7090 switch (ctz32(extract32(insn, 12, 4))) { 7091 case 0: /* [15:12] == xxx1 */ 7092 /* Floating point immediate */ 7093 disas_fp_imm(s, insn); 7094 break; 7095 case 1: /* [15:12] == xx10 */ 7096 /* Floating point compare */ 7097 disas_fp_compare(s, insn); 7098 break; 7099 case 2: /* [15:12] == x100 */ 7100 /* Floating point data-processing (1 source) */ 7101 disas_fp_1src(s, insn); 7102 break; 7103 case 3: /* [15:12] == 1000 */ 7104 unallocated_encoding(s); 7105 break; 7106 default: /* [15:12] == 0000 */ 7107 /* Floating point <-> integer conversions */ 7108 disas_fp_int_conv(s, insn); 7109 break; 7110 } 7111 break; 7112 } 7113 } 7114 } 7115 7116 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7117 int pos) 7118 { 7119 /* Extract 64 bits from the middle of two concatenated 64 bit 7120 * vector register slices left:right. The extracted bits start 7121 * at 'pos' bits into the right (least significant) side. 7122 * We return the result in tcg_right, and guarantee not to 7123 * trash tcg_left. 7124 */ 7125 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7126 assert(pos > 0 && pos < 64); 7127 7128 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7129 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7130 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7131 } 7132 7133 /* EXT 7134 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7135 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7136 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7137 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7138 */ 7139 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7140 { 7141 int is_q = extract32(insn, 30, 1); 7142 int op2 = extract32(insn, 22, 2); 7143 int imm4 = extract32(insn, 11, 4); 7144 int rm = extract32(insn, 16, 5); 7145 int rn = extract32(insn, 5, 5); 7146 int rd = extract32(insn, 0, 5); 7147 int pos = imm4 << 3; 7148 TCGv_i64 tcg_resl, tcg_resh; 7149 7150 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7151 unallocated_encoding(s); 7152 return; 7153 } 7154 7155 if (!fp_access_check(s)) { 7156 return; 7157 } 7158 7159 tcg_resh = tcg_temp_new_i64(); 7160 tcg_resl = tcg_temp_new_i64(); 7161 7162 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7163 * either extracting 128 bits from a 128:128 concatenation, or 7164 * extracting 64 bits from a 64:64 concatenation. 7165 */ 7166 if (!is_q) { 7167 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7168 if (pos != 0) { 7169 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7170 do_ext64(s, tcg_resh, tcg_resl, pos); 7171 } 7172 } else { 7173 TCGv_i64 tcg_hh; 7174 typedef struct { 7175 int reg; 7176 int elt; 7177 } EltPosns; 7178 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7179 EltPosns *elt = eltposns; 7180 7181 if (pos >= 64) { 7182 elt++; 7183 pos -= 64; 7184 } 7185 7186 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7187 elt++; 7188 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7189 elt++; 7190 if (pos != 0) { 7191 do_ext64(s, tcg_resh, tcg_resl, pos); 7192 tcg_hh = tcg_temp_new_i64(); 7193 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7194 do_ext64(s, tcg_hh, tcg_resh, pos); 7195 } 7196 } 7197 7198 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7199 if (is_q) { 7200 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7201 } 7202 clear_vec_high(s, is_q, rd); 7203 } 7204 7205 /* TBL/TBX 7206 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7207 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7208 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7209 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7210 */ 7211 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7212 { 7213 int op2 = extract32(insn, 22, 2); 7214 int is_q = extract32(insn, 30, 1); 7215 int rm = extract32(insn, 16, 5); 7216 int rn = extract32(insn, 5, 5); 7217 int rd = extract32(insn, 0, 5); 7218 int is_tbx = extract32(insn, 12, 1); 7219 int len = (extract32(insn, 13, 2) + 1) * 16; 7220 7221 if (op2 != 0) { 7222 unallocated_encoding(s); 7223 return; 7224 } 7225 7226 if (!fp_access_check(s)) { 7227 return; 7228 } 7229 7230 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7231 vec_full_reg_offset(s, rm), tcg_env, 7232 is_q ? 16 : 8, vec_full_reg_size(s), 7233 (len << 6) | (is_tbx << 5) | rn, 7234 gen_helper_simd_tblx); 7235 } 7236 7237 /* ZIP/UZP/TRN 7238 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7239 * +---+---+-------------+------+---+------+---+------------------+------+ 7240 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7241 * +---+---+-------------+------+---+------+---+------------------+------+ 7242 */ 7243 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7244 { 7245 int rd = extract32(insn, 0, 5); 7246 int rn = extract32(insn, 5, 5); 7247 int rm = extract32(insn, 16, 5); 7248 int size = extract32(insn, 22, 2); 7249 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7250 * bit 2 indicates 1 vs 2 variant of the insn. 7251 */ 7252 int opcode = extract32(insn, 12, 2); 7253 bool part = extract32(insn, 14, 1); 7254 bool is_q = extract32(insn, 30, 1); 7255 int esize = 8 << size; 7256 int i; 7257 int datasize = is_q ? 128 : 64; 7258 int elements = datasize / esize; 7259 TCGv_i64 tcg_res[2], tcg_ele; 7260 7261 if (opcode == 0 || (size == 3 && !is_q)) { 7262 unallocated_encoding(s); 7263 return; 7264 } 7265 7266 if (!fp_access_check(s)) { 7267 return; 7268 } 7269 7270 tcg_res[0] = tcg_temp_new_i64(); 7271 tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; 7272 tcg_ele = tcg_temp_new_i64(); 7273 7274 for (i = 0; i < elements; i++) { 7275 int o, w; 7276 7277 switch (opcode) { 7278 case 1: /* UZP1/2 */ 7279 { 7280 int midpoint = elements / 2; 7281 if (i < midpoint) { 7282 read_vec_element(s, tcg_ele, rn, 2 * i + part, size); 7283 } else { 7284 read_vec_element(s, tcg_ele, rm, 7285 2 * (i - midpoint) + part, size); 7286 } 7287 break; 7288 } 7289 case 2: /* TRN1/2 */ 7290 if (i & 1) { 7291 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); 7292 } else { 7293 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); 7294 } 7295 break; 7296 case 3: /* ZIP1/2 */ 7297 { 7298 int base = part * elements / 2; 7299 if (i & 1) { 7300 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); 7301 } else { 7302 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); 7303 } 7304 break; 7305 } 7306 default: 7307 g_assert_not_reached(); 7308 } 7309 7310 w = (i * esize) / 64; 7311 o = (i * esize) % 64; 7312 if (o == 0) { 7313 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 7314 } else { 7315 tcg_gen_shli_i64(tcg_ele, tcg_ele, o); 7316 tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); 7317 } 7318 } 7319 7320 for (i = 0; i <= is_q; ++i) { 7321 write_vec_element(s, tcg_res[i], rd, i, MO_64); 7322 } 7323 clear_vec_high(s, is_q, rd); 7324 } 7325 7326 /* 7327 * do_reduction_op helper 7328 * 7329 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7330 * important for correct NaN propagation that we do these 7331 * operations in exactly the order specified by the pseudocode. 7332 * 7333 * This is a recursive function, TCG temps should be freed by the 7334 * calling function once it is done with the values. 7335 */ 7336 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7337 int esize, int size, int vmap, TCGv_ptr fpst) 7338 { 7339 if (esize == size) { 7340 int element; 7341 MemOp msize = esize == 16 ? MO_16 : MO_32; 7342 TCGv_i32 tcg_elem; 7343 7344 /* We should have one register left here */ 7345 assert(ctpop8(vmap) == 1); 7346 element = ctz32(vmap); 7347 assert(element < 8); 7348 7349 tcg_elem = tcg_temp_new_i32(); 7350 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7351 return tcg_elem; 7352 } else { 7353 int bits = size / 2; 7354 int shift = ctpop8(vmap) / 2; 7355 int vmap_lo = (vmap >> shift) & vmap; 7356 int vmap_hi = (vmap & ~vmap_lo); 7357 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7358 7359 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7360 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7361 tcg_res = tcg_temp_new_i32(); 7362 7363 switch (fpopcode) { 7364 case 0x0c: /* fmaxnmv half-precision */ 7365 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7366 break; 7367 case 0x0f: /* fmaxv half-precision */ 7368 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7369 break; 7370 case 0x1c: /* fminnmv half-precision */ 7371 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7372 break; 7373 case 0x1f: /* fminv half-precision */ 7374 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7375 break; 7376 case 0x2c: /* fmaxnmv */ 7377 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7378 break; 7379 case 0x2f: /* fmaxv */ 7380 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7381 break; 7382 case 0x3c: /* fminnmv */ 7383 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7384 break; 7385 case 0x3f: /* fminv */ 7386 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7387 break; 7388 default: 7389 g_assert_not_reached(); 7390 } 7391 return tcg_res; 7392 } 7393 } 7394 7395 /* AdvSIMD across lanes 7396 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7397 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7398 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7399 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7400 */ 7401 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7402 { 7403 int rd = extract32(insn, 0, 5); 7404 int rn = extract32(insn, 5, 5); 7405 int size = extract32(insn, 22, 2); 7406 int opcode = extract32(insn, 12, 5); 7407 bool is_q = extract32(insn, 30, 1); 7408 bool is_u = extract32(insn, 29, 1); 7409 bool is_fp = false; 7410 bool is_min = false; 7411 int esize; 7412 int elements; 7413 int i; 7414 TCGv_i64 tcg_res, tcg_elt; 7415 7416 switch (opcode) { 7417 case 0x1b: /* ADDV */ 7418 if (is_u) { 7419 unallocated_encoding(s); 7420 return; 7421 } 7422 /* fall through */ 7423 case 0x3: /* SADDLV, UADDLV */ 7424 case 0xa: /* SMAXV, UMAXV */ 7425 case 0x1a: /* SMINV, UMINV */ 7426 if (size == 3 || (size == 2 && !is_q)) { 7427 unallocated_encoding(s); 7428 return; 7429 } 7430 break; 7431 case 0xc: /* FMAXNMV, FMINNMV */ 7432 case 0xf: /* FMAXV, FMINV */ 7433 /* Bit 1 of size field encodes min vs max and the actual size 7434 * depends on the encoding of the U bit. If not set (and FP16 7435 * enabled) then we do half-precision float instead of single 7436 * precision. 7437 */ 7438 is_min = extract32(size, 1, 1); 7439 is_fp = true; 7440 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7441 size = 1; 7442 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7443 unallocated_encoding(s); 7444 return; 7445 } else { 7446 size = 2; 7447 } 7448 break; 7449 default: 7450 unallocated_encoding(s); 7451 return; 7452 } 7453 7454 if (!fp_access_check(s)) { 7455 return; 7456 } 7457 7458 esize = 8 << size; 7459 elements = (is_q ? 128 : 64) / esize; 7460 7461 tcg_res = tcg_temp_new_i64(); 7462 tcg_elt = tcg_temp_new_i64(); 7463 7464 /* These instructions operate across all lanes of a vector 7465 * to produce a single result. We can guarantee that a 64 7466 * bit intermediate is sufficient: 7467 * + for [US]ADDLV the maximum element size is 32 bits, and 7468 * the result type is 64 bits 7469 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7470 * same as the element size, which is 32 bits at most 7471 * For the integer operations we can choose to work at 64 7472 * or 32 bits and truncate at the end; for simplicity 7473 * we use 64 bits always. The floating point 7474 * ops do require 32 bit intermediates, though. 7475 */ 7476 if (!is_fp) { 7477 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7478 7479 for (i = 1; i < elements; i++) { 7480 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7481 7482 switch (opcode) { 7483 case 0x03: /* SADDLV / UADDLV */ 7484 case 0x1b: /* ADDV */ 7485 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7486 break; 7487 case 0x0a: /* SMAXV / UMAXV */ 7488 if (is_u) { 7489 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7490 } else { 7491 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7492 } 7493 break; 7494 case 0x1a: /* SMINV / UMINV */ 7495 if (is_u) { 7496 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7497 } else { 7498 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7499 } 7500 break; 7501 default: 7502 g_assert_not_reached(); 7503 } 7504 7505 } 7506 } else { 7507 /* Floating point vector reduction ops which work across 32 7508 * bit (single) or 16 bit (half-precision) intermediates. 7509 * Note that correct NaN propagation requires that we do these 7510 * operations in exactly the order specified by the pseudocode. 7511 */ 7512 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7513 int fpopcode = opcode | is_min << 4 | is_u << 5; 7514 int vmap = (1 << elements) - 1; 7515 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7516 (is_q ? 128 : 64), vmap, fpst); 7517 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7518 } 7519 7520 /* Now truncate the result to the width required for the final output */ 7521 if (opcode == 0x03) { 7522 /* SADDLV, UADDLV: result is 2*esize */ 7523 size++; 7524 } 7525 7526 switch (size) { 7527 case 0: 7528 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7529 break; 7530 case 1: 7531 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7532 break; 7533 case 2: 7534 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7535 break; 7536 case 3: 7537 break; 7538 default: 7539 g_assert_not_reached(); 7540 } 7541 7542 write_fp_dreg(s, rd, tcg_res); 7543 } 7544 7545 /* DUP (Element, Vector) 7546 * 7547 * 31 30 29 21 20 16 15 10 9 5 4 0 7548 * +---+---+-------------------+--------+-------------+------+------+ 7549 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7550 * +---+---+-------------------+--------+-------------+------+------+ 7551 * 7552 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7553 */ 7554 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7555 int imm5) 7556 { 7557 int size = ctz32(imm5); 7558 int index; 7559 7560 if (size > 3 || (size == 3 && !is_q)) { 7561 unallocated_encoding(s); 7562 return; 7563 } 7564 7565 if (!fp_access_check(s)) { 7566 return; 7567 } 7568 7569 index = imm5 >> (size + 1); 7570 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7571 vec_reg_offset(s, rn, index, size), 7572 is_q ? 16 : 8, vec_full_reg_size(s)); 7573 } 7574 7575 /* DUP (element, scalar) 7576 * 31 21 20 16 15 10 9 5 4 0 7577 * +-----------------------+--------+-------------+------+------+ 7578 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7579 * +-----------------------+--------+-------------+------+------+ 7580 */ 7581 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7582 int imm5) 7583 { 7584 int size = ctz32(imm5); 7585 int index; 7586 TCGv_i64 tmp; 7587 7588 if (size > 3) { 7589 unallocated_encoding(s); 7590 return; 7591 } 7592 7593 if (!fp_access_check(s)) { 7594 return; 7595 } 7596 7597 index = imm5 >> (size + 1); 7598 7599 /* This instruction just extracts the specified element and 7600 * zero-extends it into the bottom of the destination register. 7601 */ 7602 tmp = tcg_temp_new_i64(); 7603 read_vec_element(s, tmp, rn, index, size); 7604 write_fp_dreg(s, rd, tmp); 7605 } 7606 7607 /* DUP (General) 7608 * 7609 * 31 30 29 21 20 16 15 10 9 5 4 0 7610 * +---+---+-------------------+--------+-------------+------+------+ 7611 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 7612 * +---+---+-------------------+--------+-------------+------+------+ 7613 * 7614 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7615 */ 7616 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 7617 int imm5) 7618 { 7619 int size = ctz32(imm5); 7620 uint32_t dofs, oprsz, maxsz; 7621 7622 if (size > 3 || ((size == 3) && !is_q)) { 7623 unallocated_encoding(s); 7624 return; 7625 } 7626 7627 if (!fp_access_check(s)) { 7628 return; 7629 } 7630 7631 dofs = vec_full_reg_offset(s, rd); 7632 oprsz = is_q ? 16 : 8; 7633 maxsz = vec_full_reg_size(s); 7634 7635 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 7636 } 7637 7638 /* INS (Element) 7639 * 7640 * 31 21 20 16 15 14 11 10 9 5 4 0 7641 * +-----------------------+--------+------------+---+------+------+ 7642 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7643 * +-----------------------+--------+------------+---+------+------+ 7644 * 7645 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7646 * index: encoded in imm5<4:size+1> 7647 */ 7648 static void handle_simd_inse(DisasContext *s, int rd, int rn, 7649 int imm4, int imm5) 7650 { 7651 int size = ctz32(imm5); 7652 int src_index, dst_index; 7653 TCGv_i64 tmp; 7654 7655 if (size > 3) { 7656 unallocated_encoding(s); 7657 return; 7658 } 7659 7660 if (!fp_access_check(s)) { 7661 return; 7662 } 7663 7664 dst_index = extract32(imm5, 1+size, 5); 7665 src_index = extract32(imm4, size, 4); 7666 7667 tmp = tcg_temp_new_i64(); 7668 7669 read_vec_element(s, tmp, rn, src_index, size); 7670 write_vec_element(s, tmp, rd, dst_index, size); 7671 7672 /* INS is considered a 128-bit write for SVE. */ 7673 clear_vec_high(s, true, rd); 7674 } 7675 7676 7677 /* INS (General) 7678 * 7679 * 31 21 20 16 15 10 9 5 4 0 7680 * +-----------------------+--------+-------------+------+------+ 7681 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 7682 * +-----------------------+--------+-------------+------+------+ 7683 * 7684 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7685 * index: encoded in imm5<4:size+1> 7686 */ 7687 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 7688 { 7689 int size = ctz32(imm5); 7690 int idx; 7691 7692 if (size > 3) { 7693 unallocated_encoding(s); 7694 return; 7695 } 7696 7697 if (!fp_access_check(s)) { 7698 return; 7699 } 7700 7701 idx = extract32(imm5, 1 + size, 4 - size); 7702 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 7703 7704 /* INS is considered a 128-bit write for SVE. */ 7705 clear_vec_high(s, true, rd); 7706 } 7707 7708 /* 7709 * UMOV (General) 7710 * SMOV (General) 7711 * 7712 * 31 30 29 21 20 16 15 12 10 9 5 4 0 7713 * +---+---+-------------------+--------+-------------+------+------+ 7714 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 7715 * +---+---+-------------------+--------+-------------+------+------+ 7716 * 7717 * U: unsigned when set 7718 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7719 */ 7720 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 7721 int rn, int rd, int imm5) 7722 { 7723 int size = ctz32(imm5); 7724 int element; 7725 TCGv_i64 tcg_rd; 7726 7727 /* Check for UnallocatedEncodings */ 7728 if (is_signed) { 7729 if (size > 2 || (size == 2 && !is_q)) { 7730 unallocated_encoding(s); 7731 return; 7732 } 7733 } else { 7734 if (size > 3 7735 || (size < 3 && is_q) 7736 || (size == 3 && !is_q)) { 7737 unallocated_encoding(s); 7738 return; 7739 } 7740 } 7741 7742 if (!fp_access_check(s)) { 7743 return; 7744 } 7745 7746 element = extract32(imm5, 1+size, 4); 7747 7748 tcg_rd = cpu_reg(s, rd); 7749 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 7750 if (is_signed && !is_q) { 7751 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7752 } 7753 } 7754 7755 /* AdvSIMD copy 7756 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7757 * +---+---+----+-----------------+------+---+------+---+------+------+ 7758 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7759 * +---+---+----+-----------------+------+---+------+---+------+------+ 7760 */ 7761 static void disas_simd_copy(DisasContext *s, uint32_t insn) 7762 { 7763 int rd = extract32(insn, 0, 5); 7764 int rn = extract32(insn, 5, 5); 7765 int imm4 = extract32(insn, 11, 4); 7766 int op = extract32(insn, 29, 1); 7767 int is_q = extract32(insn, 30, 1); 7768 int imm5 = extract32(insn, 16, 5); 7769 7770 if (op) { 7771 if (is_q) { 7772 /* INS (element) */ 7773 handle_simd_inse(s, rd, rn, imm4, imm5); 7774 } else { 7775 unallocated_encoding(s); 7776 } 7777 } else { 7778 switch (imm4) { 7779 case 0: 7780 /* DUP (element - vector) */ 7781 handle_simd_dupe(s, is_q, rd, rn, imm5); 7782 break; 7783 case 1: 7784 /* DUP (general) */ 7785 handle_simd_dupg(s, is_q, rd, rn, imm5); 7786 break; 7787 case 3: 7788 if (is_q) { 7789 /* INS (general) */ 7790 handle_simd_insg(s, rd, rn, imm5); 7791 } else { 7792 unallocated_encoding(s); 7793 } 7794 break; 7795 case 5: 7796 case 7: 7797 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 7798 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 7799 break; 7800 default: 7801 unallocated_encoding(s); 7802 break; 7803 } 7804 } 7805 } 7806 7807 /* AdvSIMD modified immediate 7808 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 7809 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7810 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 7811 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7812 * 7813 * There are a number of operations that can be carried out here: 7814 * MOVI - move (shifted) imm into register 7815 * MVNI - move inverted (shifted) imm into register 7816 * ORR - bitwise OR of (shifted) imm with register 7817 * BIC - bitwise clear of (shifted) imm with register 7818 * With ARMv8.2 we also have: 7819 * FMOV half-precision 7820 */ 7821 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 7822 { 7823 int rd = extract32(insn, 0, 5); 7824 int cmode = extract32(insn, 12, 4); 7825 int o2 = extract32(insn, 11, 1); 7826 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 7827 bool is_neg = extract32(insn, 29, 1); 7828 bool is_q = extract32(insn, 30, 1); 7829 uint64_t imm = 0; 7830 7831 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 7832 /* Check for FMOV (vector, immediate) - half-precision */ 7833 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 7834 unallocated_encoding(s); 7835 return; 7836 } 7837 } 7838 7839 if (!fp_access_check(s)) { 7840 return; 7841 } 7842 7843 if (cmode == 15 && o2 && !is_neg) { 7844 /* FMOV (vector, immediate) - half-precision */ 7845 imm = vfp_expand_imm(MO_16, abcdefgh); 7846 /* now duplicate across the lanes */ 7847 imm = dup_const(MO_16, imm); 7848 } else { 7849 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 7850 } 7851 7852 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 7853 /* MOVI or MVNI, with MVNI negation handled above. */ 7854 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 7855 vec_full_reg_size(s), imm); 7856 } else { 7857 /* ORR or BIC, with BIC negation to AND handled above. */ 7858 if (is_neg) { 7859 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 7860 } else { 7861 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 7862 } 7863 } 7864 } 7865 7866 /* AdvSIMD scalar copy 7867 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7868 * +-----+----+-----------------+------+---+------+---+------+------+ 7869 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7870 * +-----+----+-----------------+------+---+------+---+------+------+ 7871 */ 7872 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 7873 { 7874 int rd = extract32(insn, 0, 5); 7875 int rn = extract32(insn, 5, 5); 7876 int imm4 = extract32(insn, 11, 4); 7877 int imm5 = extract32(insn, 16, 5); 7878 int op = extract32(insn, 29, 1); 7879 7880 if (op != 0 || imm4 != 0) { 7881 unallocated_encoding(s); 7882 return; 7883 } 7884 7885 /* DUP (element, scalar) */ 7886 handle_simd_dupes(s, rd, rn, imm5); 7887 } 7888 7889 /* AdvSIMD scalar pairwise 7890 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7891 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7892 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7893 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7894 */ 7895 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 7896 { 7897 int u = extract32(insn, 29, 1); 7898 int size = extract32(insn, 22, 2); 7899 int opcode = extract32(insn, 12, 5); 7900 int rn = extract32(insn, 5, 5); 7901 int rd = extract32(insn, 0, 5); 7902 TCGv_ptr fpst; 7903 7904 /* For some ops (the FP ones), size[1] is part of the encoding. 7905 * For ADDP strictly it is not but size[1] is always 1 for valid 7906 * encodings. 7907 */ 7908 opcode |= (extract32(size, 1, 1) << 5); 7909 7910 switch (opcode) { 7911 case 0x3b: /* ADDP */ 7912 if (u || size != 3) { 7913 unallocated_encoding(s); 7914 return; 7915 } 7916 if (!fp_access_check(s)) { 7917 return; 7918 } 7919 7920 fpst = NULL; 7921 break; 7922 case 0xc: /* FMAXNMP */ 7923 case 0xd: /* FADDP */ 7924 case 0xf: /* FMAXP */ 7925 case 0x2c: /* FMINNMP */ 7926 case 0x2f: /* FMINP */ 7927 /* FP op, size[0] is 32 or 64 bit*/ 7928 if (!u) { 7929 if (!dc_isar_feature(aa64_fp16, s)) { 7930 unallocated_encoding(s); 7931 return; 7932 } else { 7933 size = MO_16; 7934 } 7935 } else { 7936 size = extract32(size, 0, 1) ? MO_64 : MO_32; 7937 } 7938 7939 if (!fp_access_check(s)) { 7940 return; 7941 } 7942 7943 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7944 break; 7945 default: 7946 unallocated_encoding(s); 7947 return; 7948 } 7949 7950 if (size == MO_64) { 7951 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 7952 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 7953 TCGv_i64 tcg_res = tcg_temp_new_i64(); 7954 7955 read_vec_element(s, tcg_op1, rn, 0, MO_64); 7956 read_vec_element(s, tcg_op2, rn, 1, MO_64); 7957 7958 switch (opcode) { 7959 case 0x3b: /* ADDP */ 7960 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 7961 break; 7962 case 0xc: /* FMAXNMP */ 7963 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7964 break; 7965 case 0xd: /* FADDP */ 7966 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 7967 break; 7968 case 0xf: /* FMAXP */ 7969 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 7970 break; 7971 case 0x2c: /* FMINNMP */ 7972 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7973 break; 7974 case 0x2f: /* FMINP */ 7975 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 7976 break; 7977 default: 7978 g_assert_not_reached(); 7979 } 7980 7981 write_fp_dreg(s, rd, tcg_res); 7982 } else { 7983 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 7984 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 7985 TCGv_i32 tcg_res = tcg_temp_new_i32(); 7986 7987 read_vec_element_i32(s, tcg_op1, rn, 0, size); 7988 read_vec_element_i32(s, tcg_op2, rn, 1, size); 7989 7990 if (size == MO_16) { 7991 switch (opcode) { 7992 case 0xc: /* FMAXNMP */ 7993 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 7994 break; 7995 case 0xd: /* FADDP */ 7996 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 7997 break; 7998 case 0xf: /* FMAXP */ 7999 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 8000 break; 8001 case 0x2c: /* FMINNMP */ 8002 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8003 break; 8004 case 0x2f: /* FMINP */ 8005 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 8006 break; 8007 default: 8008 g_assert_not_reached(); 8009 } 8010 } else { 8011 switch (opcode) { 8012 case 0xc: /* FMAXNMP */ 8013 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8014 break; 8015 case 0xd: /* FADDP */ 8016 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8017 break; 8018 case 0xf: /* FMAXP */ 8019 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8020 break; 8021 case 0x2c: /* FMINNMP */ 8022 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8023 break; 8024 case 0x2f: /* FMINP */ 8025 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8026 break; 8027 default: 8028 g_assert_not_reached(); 8029 } 8030 } 8031 8032 write_fp_sreg(s, rd, tcg_res); 8033 } 8034 } 8035 8036 /* 8037 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 8038 * 8039 * This code is handles the common shifting code and is used by both 8040 * the vector and scalar code. 8041 */ 8042 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 8043 TCGv_i64 tcg_rnd, bool accumulate, 8044 bool is_u, int size, int shift) 8045 { 8046 bool extended_result = false; 8047 bool round = tcg_rnd != NULL; 8048 int ext_lshift = 0; 8049 TCGv_i64 tcg_src_hi; 8050 8051 if (round && size == 3) { 8052 extended_result = true; 8053 ext_lshift = 64 - shift; 8054 tcg_src_hi = tcg_temp_new_i64(); 8055 } else if (shift == 64) { 8056 if (!accumulate && is_u) { 8057 /* result is zero */ 8058 tcg_gen_movi_i64(tcg_res, 0); 8059 return; 8060 } 8061 } 8062 8063 /* Deal with the rounding step */ 8064 if (round) { 8065 if (extended_result) { 8066 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8067 if (!is_u) { 8068 /* take care of sign extending tcg_res */ 8069 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8070 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8071 tcg_src, tcg_src_hi, 8072 tcg_rnd, tcg_zero); 8073 } else { 8074 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8075 tcg_src, tcg_zero, 8076 tcg_rnd, tcg_zero); 8077 } 8078 } else { 8079 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8080 } 8081 } 8082 8083 /* Now do the shift right */ 8084 if (round && extended_result) { 8085 /* extended case, >64 bit precision required */ 8086 if (ext_lshift == 0) { 8087 /* special case, only high bits matter */ 8088 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8089 } else { 8090 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8091 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8092 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8093 } 8094 } else { 8095 if (is_u) { 8096 if (shift == 64) { 8097 /* essentially shifting in 64 zeros */ 8098 tcg_gen_movi_i64(tcg_src, 0); 8099 } else { 8100 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8101 } 8102 } else { 8103 if (shift == 64) { 8104 /* effectively extending the sign-bit */ 8105 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8106 } else { 8107 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8108 } 8109 } 8110 } 8111 8112 if (accumulate) { 8113 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8114 } else { 8115 tcg_gen_mov_i64(tcg_res, tcg_src); 8116 } 8117 } 8118 8119 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8120 static void handle_scalar_simd_shri(DisasContext *s, 8121 bool is_u, int immh, int immb, 8122 int opcode, int rn, int rd) 8123 { 8124 const int size = 3; 8125 int immhb = immh << 3 | immb; 8126 int shift = 2 * (8 << size) - immhb; 8127 bool accumulate = false; 8128 bool round = false; 8129 bool insert = false; 8130 TCGv_i64 tcg_rn; 8131 TCGv_i64 tcg_rd; 8132 TCGv_i64 tcg_round; 8133 8134 if (!extract32(immh, 3, 1)) { 8135 unallocated_encoding(s); 8136 return; 8137 } 8138 8139 if (!fp_access_check(s)) { 8140 return; 8141 } 8142 8143 switch (opcode) { 8144 case 0x02: /* SSRA / USRA (accumulate) */ 8145 accumulate = true; 8146 break; 8147 case 0x04: /* SRSHR / URSHR (rounding) */ 8148 round = true; 8149 break; 8150 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8151 accumulate = round = true; 8152 break; 8153 case 0x08: /* SRI */ 8154 insert = true; 8155 break; 8156 } 8157 8158 if (round) { 8159 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8160 } else { 8161 tcg_round = NULL; 8162 } 8163 8164 tcg_rn = read_fp_dreg(s, rn); 8165 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8166 8167 if (insert) { 8168 /* shift count same as element size is valid but does nothing; 8169 * special case to avoid potential shift by 64. 8170 */ 8171 int esize = 8 << size; 8172 if (shift != esize) { 8173 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8174 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8175 } 8176 } else { 8177 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8178 accumulate, is_u, size, shift); 8179 } 8180 8181 write_fp_dreg(s, rd, tcg_rd); 8182 } 8183 8184 /* SHL/SLI - Scalar shift left */ 8185 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8186 int immh, int immb, int opcode, 8187 int rn, int rd) 8188 { 8189 int size = 32 - clz32(immh) - 1; 8190 int immhb = immh << 3 | immb; 8191 int shift = immhb - (8 << size); 8192 TCGv_i64 tcg_rn; 8193 TCGv_i64 tcg_rd; 8194 8195 if (!extract32(immh, 3, 1)) { 8196 unallocated_encoding(s); 8197 return; 8198 } 8199 8200 if (!fp_access_check(s)) { 8201 return; 8202 } 8203 8204 tcg_rn = read_fp_dreg(s, rn); 8205 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8206 8207 if (insert) { 8208 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8209 } else { 8210 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8211 } 8212 8213 write_fp_dreg(s, rd, tcg_rd); 8214 } 8215 8216 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8217 * (signed/unsigned) narrowing */ 8218 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8219 bool is_u_shift, bool is_u_narrow, 8220 int immh, int immb, int opcode, 8221 int rn, int rd) 8222 { 8223 int immhb = immh << 3 | immb; 8224 int size = 32 - clz32(immh) - 1; 8225 int esize = 8 << size; 8226 int shift = (2 * esize) - immhb; 8227 int elements = is_scalar ? 1 : (64 / esize); 8228 bool round = extract32(opcode, 0, 1); 8229 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8230 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8231 TCGv_i32 tcg_rd_narrowed; 8232 TCGv_i64 tcg_final; 8233 8234 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8235 { gen_helper_neon_narrow_sat_s8, 8236 gen_helper_neon_unarrow_sat8 }, 8237 { gen_helper_neon_narrow_sat_s16, 8238 gen_helper_neon_unarrow_sat16 }, 8239 { gen_helper_neon_narrow_sat_s32, 8240 gen_helper_neon_unarrow_sat32 }, 8241 { NULL, NULL }, 8242 }; 8243 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8244 gen_helper_neon_narrow_sat_u8, 8245 gen_helper_neon_narrow_sat_u16, 8246 gen_helper_neon_narrow_sat_u32, 8247 NULL 8248 }; 8249 NeonGenNarrowEnvFn *narrowfn; 8250 8251 int i; 8252 8253 assert(size < 4); 8254 8255 if (extract32(immh, 3, 1)) { 8256 unallocated_encoding(s); 8257 return; 8258 } 8259 8260 if (!fp_access_check(s)) { 8261 return; 8262 } 8263 8264 if (is_u_shift) { 8265 narrowfn = unsigned_narrow_fns[size]; 8266 } else { 8267 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8268 } 8269 8270 tcg_rn = tcg_temp_new_i64(); 8271 tcg_rd = tcg_temp_new_i64(); 8272 tcg_rd_narrowed = tcg_temp_new_i32(); 8273 tcg_final = tcg_temp_new_i64(); 8274 8275 if (round) { 8276 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8277 } else { 8278 tcg_round = NULL; 8279 } 8280 8281 for (i = 0; i < elements; i++) { 8282 read_vec_element(s, tcg_rn, rn, i, ldop); 8283 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8284 false, is_u_shift, size+1, shift); 8285 narrowfn(tcg_rd_narrowed, tcg_env, tcg_rd); 8286 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8287 if (i == 0) { 8288 tcg_gen_mov_i64(tcg_final, tcg_rd); 8289 } else { 8290 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8291 } 8292 } 8293 8294 if (!is_q) { 8295 write_vec_element(s, tcg_final, rd, 0, MO_64); 8296 } else { 8297 write_vec_element(s, tcg_final, rd, 1, MO_64); 8298 } 8299 clear_vec_high(s, is_q, rd); 8300 } 8301 8302 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8303 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8304 bool src_unsigned, bool dst_unsigned, 8305 int immh, int immb, int rn, int rd) 8306 { 8307 int immhb = immh << 3 | immb; 8308 int size = 32 - clz32(immh) - 1; 8309 int shift = immhb - (8 << size); 8310 int pass; 8311 8312 assert(immh != 0); 8313 assert(!(scalar && is_q)); 8314 8315 if (!scalar) { 8316 if (!is_q && extract32(immh, 3, 1)) { 8317 unallocated_encoding(s); 8318 return; 8319 } 8320 8321 /* Since we use the variable-shift helpers we must 8322 * replicate the shift count into each element of 8323 * the tcg_shift value. 8324 */ 8325 switch (size) { 8326 case 0: 8327 shift |= shift << 8; 8328 /* fall through */ 8329 case 1: 8330 shift |= shift << 16; 8331 break; 8332 case 2: 8333 case 3: 8334 break; 8335 default: 8336 g_assert_not_reached(); 8337 } 8338 } 8339 8340 if (!fp_access_check(s)) { 8341 return; 8342 } 8343 8344 if (size == 3) { 8345 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8346 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8347 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8348 { NULL, gen_helper_neon_qshl_u64 }, 8349 }; 8350 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8351 int maxpass = is_q ? 2 : 1; 8352 8353 for (pass = 0; pass < maxpass; pass++) { 8354 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8355 8356 read_vec_element(s, tcg_op, rn, pass, MO_64); 8357 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 8358 write_vec_element(s, tcg_op, rd, pass, MO_64); 8359 } 8360 clear_vec_high(s, is_q, rd); 8361 } else { 8362 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8363 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8364 { 8365 { gen_helper_neon_qshl_s8, 8366 gen_helper_neon_qshl_s16, 8367 gen_helper_neon_qshl_s32 }, 8368 { gen_helper_neon_qshlu_s8, 8369 gen_helper_neon_qshlu_s16, 8370 gen_helper_neon_qshlu_s32 } 8371 }, { 8372 { NULL, NULL, NULL }, 8373 { gen_helper_neon_qshl_u8, 8374 gen_helper_neon_qshl_u16, 8375 gen_helper_neon_qshl_u32 } 8376 } 8377 }; 8378 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8379 MemOp memop = scalar ? size : MO_32; 8380 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8381 8382 for (pass = 0; pass < maxpass; pass++) { 8383 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8384 8385 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8386 genfn(tcg_op, tcg_env, tcg_op, tcg_shift); 8387 if (scalar) { 8388 switch (size) { 8389 case 0: 8390 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8391 break; 8392 case 1: 8393 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8394 break; 8395 case 2: 8396 break; 8397 default: 8398 g_assert_not_reached(); 8399 } 8400 write_fp_sreg(s, rd, tcg_op); 8401 } else { 8402 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8403 } 8404 } 8405 8406 if (!scalar) { 8407 clear_vec_high(s, is_q, rd); 8408 } 8409 } 8410 } 8411 8412 /* Common vector code for handling integer to FP conversion */ 8413 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8414 int elements, int is_signed, 8415 int fracbits, int size) 8416 { 8417 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8418 TCGv_i32 tcg_shift = NULL; 8419 8420 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8421 int pass; 8422 8423 if (fracbits || size == MO_64) { 8424 tcg_shift = tcg_constant_i32(fracbits); 8425 } 8426 8427 if (size == MO_64) { 8428 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8429 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8430 8431 for (pass = 0; pass < elements; pass++) { 8432 read_vec_element(s, tcg_int64, rn, pass, mop); 8433 8434 if (is_signed) { 8435 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8436 tcg_shift, tcg_fpst); 8437 } else { 8438 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8439 tcg_shift, tcg_fpst); 8440 } 8441 if (elements == 1) { 8442 write_fp_dreg(s, rd, tcg_double); 8443 } else { 8444 write_vec_element(s, tcg_double, rd, pass, MO_64); 8445 } 8446 } 8447 } else { 8448 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8449 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8450 8451 for (pass = 0; pass < elements; pass++) { 8452 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8453 8454 switch (size) { 8455 case MO_32: 8456 if (fracbits) { 8457 if (is_signed) { 8458 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8459 tcg_shift, tcg_fpst); 8460 } else { 8461 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8462 tcg_shift, tcg_fpst); 8463 } 8464 } else { 8465 if (is_signed) { 8466 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8467 } else { 8468 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8469 } 8470 } 8471 break; 8472 case MO_16: 8473 if (fracbits) { 8474 if (is_signed) { 8475 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8476 tcg_shift, tcg_fpst); 8477 } else { 8478 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8479 tcg_shift, tcg_fpst); 8480 } 8481 } else { 8482 if (is_signed) { 8483 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8484 } else { 8485 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8486 } 8487 } 8488 break; 8489 default: 8490 g_assert_not_reached(); 8491 } 8492 8493 if (elements == 1) { 8494 write_fp_sreg(s, rd, tcg_float); 8495 } else { 8496 write_vec_element_i32(s, tcg_float, rd, pass, size); 8497 } 8498 } 8499 } 8500 8501 clear_vec_high(s, elements << size == 16, rd); 8502 } 8503 8504 /* UCVTF/SCVTF - Integer to FP conversion */ 8505 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8506 bool is_q, bool is_u, 8507 int immh, int immb, int opcode, 8508 int rn, int rd) 8509 { 8510 int size, elements, fracbits; 8511 int immhb = immh << 3 | immb; 8512 8513 if (immh & 8) { 8514 size = MO_64; 8515 if (!is_scalar && !is_q) { 8516 unallocated_encoding(s); 8517 return; 8518 } 8519 } else if (immh & 4) { 8520 size = MO_32; 8521 } else if (immh & 2) { 8522 size = MO_16; 8523 if (!dc_isar_feature(aa64_fp16, s)) { 8524 unallocated_encoding(s); 8525 return; 8526 } 8527 } else { 8528 /* immh == 0 would be a failure of the decode logic */ 8529 g_assert(immh == 1); 8530 unallocated_encoding(s); 8531 return; 8532 } 8533 8534 if (is_scalar) { 8535 elements = 1; 8536 } else { 8537 elements = (8 << is_q) >> size; 8538 } 8539 fracbits = (16 << size) - immhb; 8540 8541 if (!fp_access_check(s)) { 8542 return; 8543 } 8544 8545 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8546 } 8547 8548 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8549 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8550 bool is_q, bool is_u, 8551 int immh, int immb, int rn, int rd) 8552 { 8553 int immhb = immh << 3 | immb; 8554 int pass, size, fracbits; 8555 TCGv_ptr tcg_fpstatus; 8556 TCGv_i32 tcg_rmode, tcg_shift; 8557 8558 if (immh & 0x8) { 8559 size = MO_64; 8560 if (!is_scalar && !is_q) { 8561 unallocated_encoding(s); 8562 return; 8563 } 8564 } else if (immh & 0x4) { 8565 size = MO_32; 8566 } else if (immh & 0x2) { 8567 size = MO_16; 8568 if (!dc_isar_feature(aa64_fp16, s)) { 8569 unallocated_encoding(s); 8570 return; 8571 } 8572 } else { 8573 /* Should have split out AdvSIMD modified immediate earlier. */ 8574 assert(immh == 1); 8575 unallocated_encoding(s); 8576 return; 8577 } 8578 8579 if (!fp_access_check(s)) { 8580 return; 8581 } 8582 8583 assert(!(is_scalar && is_q)); 8584 8585 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8586 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 8587 fracbits = (16 << size) - immhb; 8588 tcg_shift = tcg_constant_i32(fracbits); 8589 8590 if (size == MO_64) { 8591 int maxpass = is_scalar ? 1 : 2; 8592 8593 for (pass = 0; pass < maxpass; pass++) { 8594 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8595 8596 read_vec_element(s, tcg_op, rn, pass, MO_64); 8597 if (is_u) { 8598 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8599 } else { 8600 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8601 } 8602 write_vec_element(s, tcg_op, rd, pass, MO_64); 8603 } 8604 clear_vec_high(s, is_q, rd); 8605 } else { 8606 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 8607 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 8608 8609 switch (size) { 8610 case MO_16: 8611 if (is_u) { 8612 fn = gen_helper_vfp_touhh; 8613 } else { 8614 fn = gen_helper_vfp_toshh; 8615 } 8616 break; 8617 case MO_32: 8618 if (is_u) { 8619 fn = gen_helper_vfp_touls; 8620 } else { 8621 fn = gen_helper_vfp_tosls; 8622 } 8623 break; 8624 default: 8625 g_assert_not_reached(); 8626 } 8627 8628 for (pass = 0; pass < maxpass; pass++) { 8629 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8630 8631 read_vec_element_i32(s, tcg_op, rn, pass, size); 8632 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8633 if (is_scalar) { 8634 write_fp_sreg(s, rd, tcg_op); 8635 } else { 8636 write_vec_element_i32(s, tcg_op, rd, pass, size); 8637 } 8638 } 8639 if (!is_scalar) { 8640 clear_vec_high(s, is_q, rd); 8641 } 8642 } 8643 8644 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8645 } 8646 8647 /* AdvSIMD scalar shift by immediate 8648 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 8649 * +-----+---+-------------+------+------+--------+---+------+------+ 8650 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 8651 * +-----+---+-------------+------+------+--------+---+------+------+ 8652 * 8653 * This is the scalar version so it works on a fixed sized registers 8654 */ 8655 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 8656 { 8657 int rd = extract32(insn, 0, 5); 8658 int rn = extract32(insn, 5, 5); 8659 int opcode = extract32(insn, 11, 5); 8660 int immb = extract32(insn, 16, 3); 8661 int immh = extract32(insn, 19, 4); 8662 bool is_u = extract32(insn, 29, 1); 8663 8664 if (immh == 0) { 8665 unallocated_encoding(s); 8666 return; 8667 } 8668 8669 switch (opcode) { 8670 case 0x08: /* SRI */ 8671 if (!is_u) { 8672 unallocated_encoding(s); 8673 return; 8674 } 8675 /* fall through */ 8676 case 0x00: /* SSHR / USHR */ 8677 case 0x02: /* SSRA / USRA */ 8678 case 0x04: /* SRSHR / URSHR */ 8679 case 0x06: /* SRSRA / URSRA */ 8680 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 8681 break; 8682 case 0x0a: /* SHL / SLI */ 8683 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 8684 break; 8685 case 0x1c: /* SCVTF, UCVTF */ 8686 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 8687 opcode, rn, rd); 8688 break; 8689 case 0x10: /* SQSHRUN, SQSHRUN2 */ 8690 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 8691 if (!is_u) { 8692 unallocated_encoding(s); 8693 return; 8694 } 8695 handle_vec_simd_sqshrn(s, true, false, false, true, 8696 immh, immb, opcode, rn, rd); 8697 break; 8698 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 8699 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 8700 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 8701 immh, immb, opcode, rn, rd); 8702 break; 8703 case 0xc: /* SQSHLU */ 8704 if (!is_u) { 8705 unallocated_encoding(s); 8706 return; 8707 } 8708 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 8709 break; 8710 case 0xe: /* SQSHL, UQSHL */ 8711 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 8712 break; 8713 case 0x1f: /* FCVTZS, FCVTZU */ 8714 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 8715 break; 8716 default: 8717 unallocated_encoding(s); 8718 break; 8719 } 8720 } 8721 8722 /* AdvSIMD scalar three different 8723 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 8724 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8725 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 8726 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8727 */ 8728 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 8729 { 8730 bool is_u = extract32(insn, 29, 1); 8731 int size = extract32(insn, 22, 2); 8732 int opcode = extract32(insn, 12, 4); 8733 int rm = extract32(insn, 16, 5); 8734 int rn = extract32(insn, 5, 5); 8735 int rd = extract32(insn, 0, 5); 8736 8737 if (is_u) { 8738 unallocated_encoding(s); 8739 return; 8740 } 8741 8742 switch (opcode) { 8743 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8744 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8745 case 0xd: /* SQDMULL, SQDMULL2 */ 8746 if (size == 0 || size == 3) { 8747 unallocated_encoding(s); 8748 return; 8749 } 8750 break; 8751 default: 8752 unallocated_encoding(s); 8753 return; 8754 } 8755 8756 if (!fp_access_check(s)) { 8757 return; 8758 } 8759 8760 if (size == 2) { 8761 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8762 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8763 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8764 8765 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 8766 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 8767 8768 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 8769 gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, tcg_res, tcg_res); 8770 8771 switch (opcode) { 8772 case 0xd: /* SQDMULL, SQDMULL2 */ 8773 break; 8774 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8775 tcg_gen_neg_i64(tcg_res, tcg_res); 8776 /* fall through */ 8777 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8778 read_vec_element(s, tcg_op1, rd, 0, MO_64); 8779 gen_helper_neon_addl_saturate_s64(tcg_res, tcg_env, 8780 tcg_res, tcg_op1); 8781 break; 8782 default: 8783 g_assert_not_reached(); 8784 } 8785 8786 write_fp_dreg(s, rd, tcg_res); 8787 } else { 8788 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 8789 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 8790 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8791 8792 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 8793 gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, tcg_res, tcg_res); 8794 8795 switch (opcode) { 8796 case 0xd: /* SQDMULL, SQDMULL2 */ 8797 break; 8798 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8799 gen_helper_neon_negl_u32(tcg_res, tcg_res); 8800 /* fall through */ 8801 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8802 { 8803 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 8804 read_vec_element(s, tcg_op3, rd, 0, MO_32); 8805 gen_helper_neon_addl_saturate_s32(tcg_res, tcg_env, 8806 tcg_res, tcg_op3); 8807 break; 8808 } 8809 default: 8810 g_assert_not_reached(); 8811 } 8812 8813 tcg_gen_ext32u_i64(tcg_res, tcg_res); 8814 write_fp_dreg(s, rd, tcg_res); 8815 } 8816 } 8817 8818 static void handle_3same_64(DisasContext *s, int opcode, bool u, 8819 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 8820 { 8821 /* Handle 64x64->64 opcodes which are shared between the scalar 8822 * and vector 3-same groups. We cover every opcode where size == 3 8823 * is valid in either the three-reg-same (integer, not pairwise) 8824 * or scalar-three-reg-same groups. 8825 */ 8826 TCGCond cond; 8827 8828 switch (opcode) { 8829 case 0x1: /* SQADD */ 8830 if (u) { 8831 gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8832 } else { 8833 gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8834 } 8835 break; 8836 case 0x5: /* SQSUB */ 8837 if (u) { 8838 gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8839 } else { 8840 gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8841 } 8842 break; 8843 case 0x6: /* CMGT, CMHI */ 8844 cond = u ? TCG_COND_GTU : TCG_COND_GT; 8845 do_cmop: 8846 /* 64 bit integer comparison, result = test ? -1 : 0. */ 8847 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 8848 break; 8849 case 0x7: /* CMGE, CMHS */ 8850 cond = u ? TCG_COND_GEU : TCG_COND_GE; 8851 goto do_cmop; 8852 case 0x11: /* CMTST, CMEQ */ 8853 if (u) { 8854 cond = TCG_COND_EQ; 8855 goto do_cmop; 8856 } 8857 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 8858 break; 8859 case 0x8: /* SSHL, USHL */ 8860 if (u) { 8861 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 8862 } else { 8863 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 8864 } 8865 break; 8866 case 0x9: /* SQSHL, UQSHL */ 8867 if (u) { 8868 gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8869 } else { 8870 gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8871 } 8872 break; 8873 case 0xa: /* SRSHL, URSHL */ 8874 if (u) { 8875 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 8876 } else { 8877 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 8878 } 8879 break; 8880 case 0xb: /* SQRSHL, UQRSHL */ 8881 if (u) { 8882 gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8883 } else { 8884 gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); 8885 } 8886 break; 8887 case 0x10: /* ADD, SUB */ 8888 if (u) { 8889 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 8890 } else { 8891 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 8892 } 8893 break; 8894 default: 8895 g_assert_not_reached(); 8896 } 8897 } 8898 8899 /* Handle the 3-same-operands float operations; shared by the scalar 8900 * and vector encodings. The caller must filter out any encodings 8901 * not allocated for the encoding it is dealing with. 8902 */ 8903 static void handle_3same_float(DisasContext *s, int size, int elements, 8904 int fpopcode, int rd, int rn, int rm) 8905 { 8906 int pass; 8907 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8908 8909 for (pass = 0; pass < elements; pass++) { 8910 if (size) { 8911 /* Double */ 8912 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8913 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8914 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8915 8916 read_vec_element(s, tcg_op1, rn, pass, MO_64); 8917 read_vec_element(s, tcg_op2, rm, pass, MO_64); 8918 8919 switch (fpopcode) { 8920 case 0x39: /* FMLS */ 8921 /* As usual for ARM, separate negation for fused multiply-add */ 8922 gen_helper_vfp_negd(tcg_op1, tcg_op1); 8923 /* fall through */ 8924 case 0x19: /* FMLA */ 8925 read_vec_element(s, tcg_res, rd, pass, MO_64); 8926 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 8927 tcg_res, fpst); 8928 break; 8929 case 0x18: /* FMAXNM */ 8930 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8931 break; 8932 case 0x1a: /* FADD */ 8933 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8934 break; 8935 case 0x1b: /* FMULX */ 8936 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 8937 break; 8938 case 0x1c: /* FCMEQ */ 8939 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8940 break; 8941 case 0x1e: /* FMAX */ 8942 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8943 break; 8944 case 0x1f: /* FRECPS */ 8945 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8946 break; 8947 case 0x38: /* FMINNM */ 8948 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8949 break; 8950 case 0x3a: /* FSUB */ 8951 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8952 break; 8953 case 0x3e: /* FMIN */ 8954 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8955 break; 8956 case 0x3f: /* FRSQRTS */ 8957 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8958 break; 8959 case 0x5b: /* FMUL */ 8960 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 8961 break; 8962 case 0x5c: /* FCMGE */ 8963 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8964 break; 8965 case 0x5d: /* FACGE */ 8966 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8967 break; 8968 case 0x5f: /* FDIV */ 8969 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 8970 break; 8971 case 0x7a: /* FABD */ 8972 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8973 gen_helper_vfp_absd(tcg_res, tcg_res); 8974 break; 8975 case 0x7c: /* FCMGT */ 8976 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8977 break; 8978 case 0x7d: /* FACGT */ 8979 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8980 break; 8981 default: 8982 g_assert_not_reached(); 8983 } 8984 8985 write_vec_element(s, tcg_res, rd, pass, MO_64); 8986 } else { 8987 /* Single */ 8988 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8989 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8990 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8991 8992 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 8993 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 8994 8995 switch (fpopcode) { 8996 case 0x39: /* FMLS */ 8997 /* As usual for ARM, separate negation for fused multiply-add */ 8998 gen_helper_vfp_negs(tcg_op1, tcg_op1); 8999 /* fall through */ 9000 case 0x19: /* FMLA */ 9001 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9002 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 9003 tcg_res, fpst); 9004 break; 9005 case 0x1a: /* FADD */ 9006 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 9007 break; 9008 case 0x1b: /* FMULX */ 9009 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 9010 break; 9011 case 0x1c: /* FCMEQ */ 9012 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9013 break; 9014 case 0x1e: /* FMAX */ 9015 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 9016 break; 9017 case 0x1f: /* FRECPS */ 9018 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9019 break; 9020 case 0x18: /* FMAXNM */ 9021 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 9022 break; 9023 case 0x38: /* FMINNM */ 9024 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 9025 break; 9026 case 0x3a: /* FSUB */ 9027 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9028 break; 9029 case 0x3e: /* FMIN */ 9030 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 9031 break; 9032 case 0x3f: /* FRSQRTS */ 9033 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9034 break; 9035 case 0x5b: /* FMUL */ 9036 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 9037 break; 9038 case 0x5c: /* FCMGE */ 9039 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9040 break; 9041 case 0x5d: /* FACGE */ 9042 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9043 break; 9044 case 0x5f: /* FDIV */ 9045 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 9046 break; 9047 case 0x7a: /* FABD */ 9048 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9049 gen_helper_vfp_abss(tcg_res, tcg_res); 9050 break; 9051 case 0x7c: /* FCMGT */ 9052 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9053 break; 9054 case 0x7d: /* FACGT */ 9055 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9056 break; 9057 default: 9058 g_assert_not_reached(); 9059 } 9060 9061 if (elements == 1) { 9062 /* scalar single so clear high part */ 9063 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9064 9065 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9066 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9067 } else { 9068 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9069 } 9070 } 9071 } 9072 9073 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9074 } 9075 9076 /* AdvSIMD scalar three same 9077 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9078 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9079 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9080 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9081 */ 9082 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9083 { 9084 int rd = extract32(insn, 0, 5); 9085 int rn = extract32(insn, 5, 5); 9086 int opcode = extract32(insn, 11, 5); 9087 int rm = extract32(insn, 16, 5); 9088 int size = extract32(insn, 22, 2); 9089 bool u = extract32(insn, 29, 1); 9090 TCGv_i64 tcg_rd; 9091 9092 if (opcode >= 0x18) { 9093 /* Floating point: U, size[1] and opcode indicate operation */ 9094 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9095 switch (fpopcode) { 9096 case 0x1b: /* FMULX */ 9097 case 0x1f: /* FRECPS */ 9098 case 0x3f: /* FRSQRTS */ 9099 case 0x5d: /* FACGE */ 9100 case 0x7d: /* FACGT */ 9101 case 0x1c: /* FCMEQ */ 9102 case 0x5c: /* FCMGE */ 9103 case 0x7c: /* FCMGT */ 9104 case 0x7a: /* FABD */ 9105 break; 9106 default: 9107 unallocated_encoding(s); 9108 return; 9109 } 9110 9111 if (!fp_access_check(s)) { 9112 return; 9113 } 9114 9115 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9116 return; 9117 } 9118 9119 switch (opcode) { 9120 case 0x1: /* SQADD, UQADD */ 9121 case 0x5: /* SQSUB, UQSUB */ 9122 case 0x9: /* SQSHL, UQSHL */ 9123 case 0xb: /* SQRSHL, UQRSHL */ 9124 break; 9125 case 0x8: /* SSHL, USHL */ 9126 case 0xa: /* SRSHL, URSHL */ 9127 case 0x6: /* CMGT, CMHI */ 9128 case 0x7: /* CMGE, CMHS */ 9129 case 0x11: /* CMTST, CMEQ */ 9130 case 0x10: /* ADD, SUB (vector) */ 9131 if (size != 3) { 9132 unallocated_encoding(s); 9133 return; 9134 } 9135 break; 9136 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9137 if (size != 1 && size != 2) { 9138 unallocated_encoding(s); 9139 return; 9140 } 9141 break; 9142 default: 9143 unallocated_encoding(s); 9144 return; 9145 } 9146 9147 if (!fp_access_check(s)) { 9148 return; 9149 } 9150 9151 tcg_rd = tcg_temp_new_i64(); 9152 9153 if (size == 3) { 9154 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9155 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9156 9157 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9158 } else { 9159 /* Do a single operation on the lowest element in the vector. 9160 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9161 * no side effects for all these operations. 9162 * OPTME: special-purpose helpers would avoid doing some 9163 * unnecessary work in the helper for the 8 and 16 bit cases. 9164 */ 9165 NeonGenTwoOpEnvFn *genenvfn; 9166 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9167 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9168 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9169 9170 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9171 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9172 9173 switch (opcode) { 9174 case 0x1: /* SQADD, UQADD */ 9175 { 9176 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9177 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9178 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9179 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9180 }; 9181 genenvfn = fns[size][u]; 9182 break; 9183 } 9184 case 0x5: /* SQSUB, UQSUB */ 9185 { 9186 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9187 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9188 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9189 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9190 }; 9191 genenvfn = fns[size][u]; 9192 break; 9193 } 9194 case 0x9: /* SQSHL, UQSHL */ 9195 { 9196 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9197 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9198 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9199 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9200 }; 9201 genenvfn = fns[size][u]; 9202 break; 9203 } 9204 case 0xb: /* SQRSHL, UQRSHL */ 9205 { 9206 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9207 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9208 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9209 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9210 }; 9211 genenvfn = fns[size][u]; 9212 break; 9213 } 9214 case 0x16: /* SQDMULH, SQRDMULH */ 9215 { 9216 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9217 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9218 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9219 }; 9220 assert(size == 1 || size == 2); 9221 genenvfn = fns[size - 1][u]; 9222 break; 9223 } 9224 default: 9225 g_assert_not_reached(); 9226 } 9227 9228 genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm); 9229 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9230 } 9231 9232 write_fp_dreg(s, rd, tcg_rd); 9233 } 9234 9235 /* AdvSIMD scalar three same FP16 9236 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9237 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9238 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9239 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9240 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9241 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9242 */ 9243 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9244 uint32_t insn) 9245 { 9246 int rd = extract32(insn, 0, 5); 9247 int rn = extract32(insn, 5, 5); 9248 int opcode = extract32(insn, 11, 3); 9249 int rm = extract32(insn, 16, 5); 9250 bool u = extract32(insn, 29, 1); 9251 bool a = extract32(insn, 23, 1); 9252 int fpopcode = opcode | (a << 3) | (u << 4); 9253 TCGv_ptr fpst; 9254 TCGv_i32 tcg_op1; 9255 TCGv_i32 tcg_op2; 9256 TCGv_i32 tcg_res; 9257 9258 switch (fpopcode) { 9259 case 0x03: /* FMULX */ 9260 case 0x04: /* FCMEQ (reg) */ 9261 case 0x07: /* FRECPS */ 9262 case 0x0f: /* FRSQRTS */ 9263 case 0x14: /* FCMGE (reg) */ 9264 case 0x15: /* FACGE */ 9265 case 0x1a: /* FABD */ 9266 case 0x1c: /* FCMGT (reg) */ 9267 case 0x1d: /* FACGT */ 9268 break; 9269 default: 9270 unallocated_encoding(s); 9271 return; 9272 } 9273 9274 if (!dc_isar_feature(aa64_fp16, s)) { 9275 unallocated_encoding(s); 9276 } 9277 9278 if (!fp_access_check(s)) { 9279 return; 9280 } 9281 9282 fpst = fpstatus_ptr(FPST_FPCR_F16); 9283 9284 tcg_op1 = read_fp_hreg(s, rn); 9285 tcg_op2 = read_fp_hreg(s, rm); 9286 tcg_res = tcg_temp_new_i32(); 9287 9288 switch (fpopcode) { 9289 case 0x03: /* FMULX */ 9290 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9291 break; 9292 case 0x04: /* FCMEQ (reg) */ 9293 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9294 break; 9295 case 0x07: /* FRECPS */ 9296 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9297 break; 9298 case 0x0f: /* FRSQRTS */ 9299 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9300 break; 9301 case 0x14: /* FCMGE (reg) */ 9302 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9303 break; 9304 case 0x15: /* FACGE */ 9305 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9306 break; 9307 case 0x1a: /* FABD */ 9308 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9309 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9310 break; 9311 case 0x1c: /* FCMGT (reg) */ 9312 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9313 break; 9314 case 0x1d: /* FACGT */ 9315 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9316 break; 9317 default: 9318 g_assert_not_reached(); 9319 } 9320 9321 write_fp_sreg(s, rd, tcg_res); 9322 } 9323 9324 /* AdvSIMD scalar three same extra 9325 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9326 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9327 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9328 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9329 */ 9330 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9331 uint32_t insn) 9332 { 9333 int rd = extract32(insn, 0, 5); 9334 int rn = extract32(insn, 5, 5); 9335 int opcode = extract32(insn, 11, 4); 9336 int rm = extract32(insn, 16, 5); 9337 int size = extract32(insn, 22, 2); 9338 bool u = extract32(insn, 29, 1); 9339 TCGv_i32 ele1, ele2, ele3; 9340 TCGv_i64 res; 9341 bool feature; 9342 9343 switch (u * 16 + opcode) { 9344 case 0x10: /* SQRDMLAH (vector) */ 9345 case 0x11: /* SQRDMLSH (vector) */ 9346 if (size != 1 && size != 2) { 9347 unallocated_encoding(s); 9348 return; 9349 } 9350 feature = dc_isar_feature(aa64_rdm, s); 9351 break; 9352 default: 9353 unallocated_encoding(s); 9354 return; 9355 } 9356 if (!feature) { 9357 unallocated_encoding(s); 9358 return; 9359 } 9360 if (!fp_access_check(s)) { 9361 return; 9362 } 9363 9364 /* Do a single operation on the lowest element in the vector. 9365 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9366 * with no side effects for all these operations. 9367 * OPTME: special-purpose helpers would avoid doing some 9368 * unnecessary work in the helper for the 16 bit cases. 9369 */ 9370 ele1 = tcg_temp_new_i32(); 9371 ele2 = tcg_temp_new_i32(); 9372 ele3 = tcg_temp_new_i32(); 9373 9374 read_vec_element_i32(s, ele1, rn, 0, size); 9375 read_vec_element_i32(s, ele2, rm, 0, size); 9376 read_vec_element_i32(s, ele3, rd, 0, size); 9377 9378 switch (opcode) { 9379 case 0x0: /* SQRDMLAH */ 9380 if (size == 1) { 9381 gen_helper_neon_qrdmlah_s16(ele3, tcg_env, ele1, ele2, ele3); 9382 } else { 9383 gen_helper_neon_qrdmlah_s32(ele3, tcg_env, ele1, ele2, ele3); 9384 } 9385 break; 9386 case 0x1: /* SQRDMLSH */ 9387 if (size == 1) { 9388 gen_helper_neon_qrdmlsh_s16(ele3, tcg_env, ele1, ele2, ele3); 9389 } else { 9390 gen_helper_neon_qrdmlsh_s32(ele3, tcg_env, ele1, ele2, ele3); 9391 } 9392 break; 9393 default: 9394 g_assert_not_reached(); 9395 } 9396 9397 res = tcg_temp_new_i64(); 9398 tcg_gen_extu_i32_i64(res, ele3); 9399 write_fp_dreg(s, rd, res); 9400 } 9401 9402 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9403 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9404 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9405 { 9406 /* Handle 64->64 opcodes which are shared between the scalar and 9407 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9408 * is valid in either group and also the double-precision fp ops. 9409 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9410 * requires them. 9411 */ 9412 TCGCond cond; 9413 9414 switch (opcode) { 9415 case 0x4: /* CLS, CLZ */ 9416 if (u) { 9417 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9418 } else { 9419 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9420 } 9421 break; 9422 case 0x5: /* NOT */ 9423 /* This opcode is shared with CNT and RBIT but we have earlier 9424 * enforced that size == 3 if and only if this is the NOT insn. 9425 */ 9426 tcg_gen_not_i64(tcg_rd, tcg_rn); 9427 break; 9428 case 0x7: /* SQABS, SQNEG */ 9429 if (u) { 9430 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); 9431 } else { 9432 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); 9433 } 9434 break; 9435 case 0xa: /* CMLT */ 9436 cond = TCG_COND_LT; 9437 do_cmop: 9438 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 9439 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 9440 break; 9441 case 0x8: /* CMGT, CMGE */ 9442 cond = u ? TCG_COND_GE : TCG_COND_GT; 9443 goto do_cmop; 9444 case 0x9: /* CMEQ, CMLE */ 9445 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9446 goto do_cmop; 9447 case 0xb: /* ABS, NEG */ 9448 if (u) { 9449 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9450 } else { 9451 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9452 } 9453 break; 9454 case 0x2f: /* FABS */ 9455 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9456 break; 9457 case 0x6f: /* FNEG */ 9458 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9459 break; 9460 case 0x7f: /* FSQRT */ 9461 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); 9462 break; 9463 case 0x1a: /* FCVTNS */ 9464 case 0x1b: /* FCVTMS */ 9465 case 0x1c: /* FCVTAS */ 9466 case 0x3a: /* FCVTPS */ 9467 case 0x3b: /* FCVTZS */ 9468 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9469 break; 9470 case 0x5a: /* FCVTNU */ 9471 case 0x5b: /* FCVTMU */ 9472 case 0x5c: /* FCVTAU */ 9473 case 0x7a: /* FCVTPU */ 9474 case 0x7b: /* FCVTZU */ 9475 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9476 break; 9477 case 0x18: /* FRINTN */ 9478 case 0x19: /* FRINTM */ 9479 case 0x38: /* FRINTP */ 9480 case 0x39: /* FRINTZ */ 9481 case 0x58: /* FRINTA */ 9482 case 0x79: /* FRINTI */ 9483 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9484 break; 9485 case 0x59: /* FRINTX */ 9486 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9487 break; 9488 case 0x1e: /* FRINT32Z */ 9489 case 0x5e: /* FRINT32X */ 9490 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9491 break; 9492 case 0x1f: /* FRINT64Z */ 9493 case 0x5f: /* FRINT64X */ 9494 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9495 break; 9496 default: 9497 g_assert_not_reached(); 9498 } 9499 } 9500 9501 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9502 bool is_scalar, bool is_u, bool is_q, 9503 int size, int rn, int rd) 9504 { 9505 bool is_double = (size == MO_64); 9506 TCGv_ptr fpst; 9507 9508 if (!fp_access_check(s)) { 9509 return; 9510 } 9511 9512 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9513 9514 if (is_double) { 9515 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9516 TCGv_i64 tcg_zero = tcg_constant_i64(0); 9517 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9518 NeonGenTwoDoubleOpFn *genfn; 9519 bool swap = false; 9520 int pass; 9521 9522 switch (opcode) { 9523 case 0x2e: /* FCMLT (zero) */ 9524 swap = true; 9525 /* fallthrough */ 9526 case 0x2c: /* FCMGT (zero) */ 9527 genfn = gen_helper_neon_cgt_f64; 9528 break; 9529 case 0x2d: /* FCMEQ (zero) */ 9530 genfn = gen_helper_neon_ceq_f64; 9531 break; 9532 case 0x6d: /* FCMLE (zero) */ 9533 swap = true; 9534 /* fall through */ 9535 case 0x6c: /* FCMGE (zero) */ 9536 genfn = gen_helper_neon_cge_f64; 9537 break; 9538 default: 9539 g_assert_not_reached(); 9540 } 9541 9542 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9543 read_vec_element(s, tcg_op, rn, pass, MO_64); 9544 if (swap) { 9545 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9546 } else { 9547 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9548 } 9549 write_vec_element(s, tcg_res, rd, pass, MO_64); 9550 } 9551 9552 clear_vec_high(s, !is_scalar, rd); 9553 } else { 9554 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9555 TCGv_i32 tcg_zero = tcg_constant_i32(0); 9556 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9557 NeonGenTwoSingleOpFn *genfn; 9558 bool swap = false; 9559 int pass, maxpasses; 9560 9561 if (size == MO_16) { 9562 switch (opcode) { 9563 case 0x2e: /* FCMLT (zero) */ 9564 swap = true; 9565 /* fall through */ 9566 case 0x2c: /* FCMGT (zero) */ 9567 genfn = gen_helper_advsimd_cgt_f16; 9568 break; 9569 case 0x2d: /* FCMEQ (zero) */ 9570 genfn = gen_helper_advsimd_ceq_f16; 9571 break; 9572 case 0x6d: /* FCMLE (zero) */ 9573 swap = true; 9574 /* fall through */ 9575 case 0x6c: /* FCMGE (zero) */ 9576 genfn = gen_helper_advsimd_cge_f16; 9577 break; 9578 default: 9579 g_assert_not_reached(); 9580 } 9581 } else { 9582 switch (opcode) { 9583 case 0x2e: /* FCMLT (zero) */ 9584 swap = true; 9585 /* fall through */ 9586 case 0x2c: /* FCMGT (zero) */ 9587 genfn = gen_helper_neon_cgt_f32; 9588 break; 9589 case 0x2d: /* FCMEQ (zero) */ 9590 genfn = gen_helper_neon_ceq_f32; 9591 break; 9592 case 0x6d: /* FCMLE (zero) */ 9593 swap = true; 9594 /* fall through */ 9595 case 0x6c: /* FCMGE (zero) */ 9596 genfn = gen_helper_neon_cge_f32; 9597 break; 9598 default: 9599 g_assert_not_reached(); 9600 } 9601 } 9602 9603 if (is_scalar) { 9604 maxpasses = 1; 9605 } else { 9606 int vector_size = 8 << is_q; 9607 maxpasses = vector_size >> size; 9608 } 9609 9610 for (pass = 0; pass < maxpasses; pass++) { 9611 read_vec_element_i32(s, tcg_op, rn, pass, size); 9612 if (swap) { 9613 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9614 } else { 9615 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9616 } 9617 if (is_scalar) { 9618 write_fp_sreg(s, rd, tcg_res); 9619 } else { 9620 write_vec_element_i32(s, tcg_res, rd, pass, size); 9621 } 9622 } 9623 9624 if (!is_scalar) { 9625 clear_vec_high(s, is_q, rd); 9626 } 9627 } 9628 } 9629 9630 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 9631 bool is_scalar, bool is_u, bool is_q, 9632 int size, int rn, int rd) 9633 { 9634 bool is_double = (size == 3); 9635 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9636 9637 if (is_double) { 9638 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9639 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9640 int pass; 9641 9642 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9643 read_vec_element(s, tcg_op, rn, pass, MO_64); 9644 switch (opcode) { 9645 case 0x3d: /* FRECPE */ 9646 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 9647 break; 9648 case 0x3f: /* FRECPX */ 9649 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 9650 break; 9651 case 0x7d: /* FRSQRTE */ 9652 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 9653 break; 9654 default: 9655 g_assert_not_reached(); 9656 } 9657 write_vec_element(s, tcg_res, rd, pass, MO_64); 9658 } 9659 clear_vec_high(s, !is_scalar, rd); 9660 } else { 9661 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9662 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9663 int pass, maxpasses; 9664 9665 if (is_scalar) { 9666 maxpasses = 1; 9667 } else { 9668 maxpasses = is_q ? 4 : 2; 9669 } 9670 9671 for (pass = 0; pass < maxpasses; pass++) { 9672 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 9673 9674 switch (opcode) { 9675 case 0x3c: /* URECPE */ 9676 gen_helper_recpe_u32(tcg_res, tcg_op); 9677 break; 9678 case 0x3d: /* FRECPE */ 9679 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 9680 break; 9681 case 0x3f: /* FRECPX */ 9682 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 9683 break; 9684 case 0x7d: /* FRSQRTE */ 9685 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 9686 break; 9687 default: 9688 g_assert_not_reached(); 9689 } 9690 9691 if (is_scalar) { 9692 write_fp_sreg(s, rd, tcg_res); 9693 } else { 9694 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9695 } 9696 } 9697 if (!is_scalar) { 9698 clear_vec_high(s, is_q, rd); 9699 } 9700 } 9701 } 9702 9703 static void handle_2misc_narrow(DisasContext *s, bool scalar, 9704 int opcode, bool u, bool is_q, 9705 int size, int rn, int rd) 9706 { 9707 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 9708 * in the source becomes a size element in the destination). 9709 */ 9710 int pass; 9711 TCGv_i32 tcg_res[2]; 9712 int destelt = is_q ? 2 : 0; 9713 int passes = scalar ? 1 : 2; 9714 9715 if (scalar) { 9716 tcg_res[1] = tcg_constant_i32(0); 9717 } 9718 9719 for (pass = 0; pass < passes; pass++) { 9720 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9721 NeonGenNarrowFn *genfn = NULL; 9722 NeonGenNarrowEnvFn *genenvfn = NULL; 9723 9724 if (scalar) { 9725 read_vec_element(s, tcg_op, rn, pass, size + 1); 9726 } else { 9727 read_vec_element(s, tcg_op, rn, pass, MO_64); 9728 } 9729 tcg_res[pass] = tcg_temp_new_i32(); 9730 9731 switch (opcode) { 9732 case 0x12: /* XTN, SQXTUN */ 9733 { 9734 static NeonGenNarrowFn * const xtnfns[3] = { 9735 gen_helper_neon_narrow_u8, 9736 gen_helper_neon_narrow_u16, 9737 tcg_gen_extrl_i64_i32, 9738 }; 9739 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 9740 gen_helper_neon_unarrow_sat8, 9741 gen_helper_neon_unarrow_sat16, 9742 gen_helper_neon_unarrow_sat32, 9743 }; 9744 if (u) { 9745 genenvfn = sqxtunfns[size]; 9746 } else { 9747 genfn = xtnfns[size]; 9748 } 9749 break; 9750 } 9751 case 0x14: /* SQXTN, UQXTN */ 9752 { 9753 static NeonGenNarrowEnvFn * const fns[3][2] = { 9754 { gen_helper_neon_narrow_sat_s8, 9755 gen_helper_neon_narrow_sat_u8 }, 9756 { gen_helper_neon_narrow_sat_s16, 9757 gen_helper_neon_narrow_sat_u16 }, 9758 { gen_helper_neon_narrow_sat_s32, 9759 gen_helper_neon_narrow_sat_u32 }, 9760 }; 9761 genenvfn = fns[size][u]; 9762 break; 9763 } 9764 case 0x16: /* FCVTN, FCVTN2 */ 9765 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 9766 if (size == 2) { 9767 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, tcg_env); 9768 } else { 9769 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9770 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9771 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9772 TCGv_i32 ahp = get_ahp_flag(); 9773 9774 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 9775 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9776 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9777 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 9778 } 9779 break; 9780 case 0x36: /* BFCVTN, BFCVTN2 */ 9781 { 9782 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9783 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 9784 } 9785 break; 9786 case 0x56: /* FCVTXN, FCVTXN2 */ 9787 /* 64 bit to 32 bit float conversion 9788 * with von Neumann rounding (round to odd) 9789 */ 9790 assert(size == 2); 9791 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, tcg_env); 9792 break; 9793 default: 9794 g_assert_not_reached(); 9795 } 9796 9797 if (genfn) { 9798 genfn(tcg_res[pass], tcg_op); 9799 } else if (genenvfn) { 9800 genenvfn(tcg_res[pass], tcg_env, tcg_op); 9801 } 9802 } 9803 9804 for (pass = 0; pass < 2; pass++) { 9805 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 9806 } 9807 clear_vec_high(s, is_q, rd); 9808 } 9809 9810 /* Remaining saturating accumulating ops */ 9811 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 9812 bool is_q, int size, int rn, int rd) 9813 { 9814 bool is_double = (size == 3); 9815 9816 if (is_double) { 9817 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 9818 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9819 int pass; 9820 9821 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9822 read_vec_element(s, tcg_rn, rn, pass, MO_64); 9823 read_vec_element(s, tcg_rd, rd, pass, MO_64); 9824 9825 if (is_u) { /* USQADD */ 9826 gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9827 } else { /* SUQADD */ 9828 gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9829 } 9830 write_vec_element(s, tcg_rd, rd, pass, MO_64); 9831 } 9832 clear_vec_high(s, !is_scalar, rd); 9833 } else { 9834 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9835 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9836 int pass, maxpasses; 9837 9838 if (is_scalar) { 9839 maxpasses = 1; 9840 } else { 9841 maxpasses = is_q ? 4 : 2; 9842 } 9843 9844 for (pass = 0; pass < maxpasses; pass++) { 9845 if (is_scalar) { 9846 read_vec_element_i32(s, tcg_rn, rn, pass, size); 9847 read_vec_element_i32(s, tcg_rd, rd, pass, size); 9848 } else { 9849 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 9850 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9851 } 9852 9853 if (is_u) { /* USQADD */ 9854 switch (size) { 9855 case 0: 9856 gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9857 break; 9858 case 1: 9859 gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9860 break; 9861 case 2: 9862 gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9863 break; 9864 default: 9865 g_assert_not_reached(); 9866 } 9867 } else { /* SUQADD */ 9868 switch (size) { 9869 case 0: 9870 gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9871 break; 9872 case 1: 9873 gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9874 break; 9875 case 2: 9876 gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd); 9877 break; 9878 default: 9879 g_assert_not_reached(); 9880 } 9881 } 9882 9883 if (is_scalar) { 9884 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 9885 } 9886 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9887 } 9888 clear_vec_high(s, is_q, rd); 9889 } 9890 } 9891 9892 /* AdvSIMD scalar two reg misc 9893 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 9894 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9895 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 9896 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9897 */ 9898 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 9899 { 9900 int rd = extract32(insn, 0, 5); 9901 int rn = extract32(insn, 5, 5); 9902 int opcode = extract32(insn, 12, 5); 9903 int size = extract32(insn, 22, 2); 9904 bool u = extract32(insn, 29, 1); 9905 bool is_fcvt = false; 9906 int rmode; 9907 TCGv_i32 tcg_rmode; 9908 TCGv_ptr tcg_fpstatus; 9909 9910 switch (opcode) { 9911 case 0x3: /* USQADD / SUQADD*/ 9912 if (!fp_access_check(s)) { 9913 return; 9914 } 9915 handle_2misc_satacc(s, true, u, false, size, rn, rd); 9916 return; 9917 case 0x7: /* SQABS / SQNEG */ 9918 break; 9919 case 0xa: /* CMLT */ 9920 if (u) { 9921 unallocated_encoding(s); 9922 return; 9923 } 9924 /* fall through */ 9925 case 0x8: /* CMGT, CMGE */ 9926 case 0x9: /* CMEQ, CMLE */ 9927 case 0xb: /* ABS, NEG */ 9928 if (size != 3) { 9929 unallocated_encoding(s); 9930 return; 9931 } 9932 break; 9933 case 0x12: /* SQXTUN */ 9934 if (!u) { 9935 unallocated_encoding(s); 9936 return; 9937 } 9938 /* fall through */ 9939 case 0x14: /* SQXTN, UQXTN */ 9940 if (size == 3) { 9941 unallocated_encoding(s); 9942 return; 9943 } 9944 if (!fp_access_check(s)) { 9945 return; 9946 } 9947 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 9948 return; 9949 case 0xc ... 0xf: 9950 case 0x16 ... 0x1d: 9951 case 0x1f: 9952 /* Floating point: U, size[1] and opcode indicate operation; 9953 * size[0] indicates single or double precision. 9954 */ 9955 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 9956 size = extract32(size, 0, 1) ? 3 : 2; 9957 switch (opcode) { 9958 case 0x2c: /* FCMGT (zero) */ 9959 case 0x2d: /* FCMEQ (zero) */ 9960 case 0x2e: /* FCMLT (zero) */ 9961 case 0x6c: /* FCMGE (zero) */ 9962 case 0x6d: /* FCMLE (zero) */ 9963 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 9964 return; 9965 case 0x1d: /* SCVTF */ 9966 case 0x5d: /* UCVTF */ 9967 { 9968 bool is_signed = (opcode == 0x1d); 9969 if (!fp_access_check(s)) { 9970 return; 9971 } 9972 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 9973 return; 9974 } 9975 case 0x3d: /* FRECPE */ 9976 case 0x3f: /* FRECPX */ 9977 case 0x7d: /* FRSQRTE */ 9978 if (!fp_access_check(s)) { 9979 return; 9980 } 9981 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 9982 return; 9983 case 0x1a: /* FCVTNS */ 9984 case 0x1b: /* FCVTMS */ 9985 case 0x3a: /* FCVTPS */ 9986 case 0x3b: /* FCVTZS */ 9987 case 0x5a: /* FCVTNU */ 9988 case 0x5b: /* FCVTMU */ 9989 case 0x7a: /* FCVTPU */ 9990 case 0x7b: /* FCVTZU */ 9991 is_fcvt = true; 9992 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 9993 break; 9994 case 0x1c: /* FCVTAS */ 9995 case 0x5c: /* FCVTAU */ 9996 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 9997 is_fcvt = true; 9998 rmode = FPROUNDING_TIEAWAY; 9999 break; 10000 case 0x56: /* FCVTXN, FCVTXN2 */ 10001 if (size == 2) { 10002 unallocated_encoding(s); 10003 return; 10004 } 10005 if (!fp_access_check(s)) { 10006 return; 10007 } 10008 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10009 return; 10010 default: 10011 unallocated_encoding(s); 10012 return; 10013 } 10014 break; 10015 default: 10016 unallocated_encoding(s); 10017 return; 10018 } 10019 10020 if (!fp_access_check(s)) { 10021 return; 10022 } 10023 10024 if (is_fcvt) { 10025 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10026 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 10027 } else { 10028 tcg_fpstatus = NULL; 10029 tcg_rmode = NULL; 10030 } 10031 10032 if (size == 3) { 10033 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10034 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10035 10036 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10037 write_fp_dreg(s, rd, tcg_rd); 10038 } else { 10039 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10040 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10041 10042 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10043 10044 switch (opcode) { 10045 case 0x7: /* SQABS, SQNEG */ 10046 { 10047 NeonGenOneOpEnvFn *genfn; 10048 static NeonGenOneOpEnvFn * const fns[3][2] = { 10049 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10050 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10051 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10052 }; 10053 genfn = fns[size][u]; 10054 genfn(tcg_rd, tcg_env, tcg_rn); 10055 break; 10056 } 10057 case 0x1a: /* FCVTNS */ 10058 case 0x1b: /* FCVTMS */ 10059 case 0x1c: /* FCVTAS */ 10060 case 0x3a: /* FCVTPS */ 10061 case 0x3b: /* FCVTZS */ 10062 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10063 tcg_fpstatus); 10064 break; 10065 case 0x5a: /* FCVTNU */ 10066 case 0x5b: /* FCVTMU */ 10067 case 0x5c: /* FCVTAU */ 10068 case 0x7a: /* FCVTPU */ 10069 case 0x7b: /* FCVTZU */ 10070 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10071 tcg_fpstatus); 10072 break; 10073 default: 10074 g_assert_not_reached(); 10075 } 10076 10077 write_fp_sreg(s, rd, tcg_rd); 10078 } 10079 10080 if (is_fcvt) { 10081 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10082 } 10083 } 10084 10085 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10086 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10087 int immh, int immb, int opcode, int rn, int rd) 10088 { 10089 int size = 32 - clz32(immh) - 1; 10090 int immhb = immh << 3 | immb; 10091 int shift = 2 * (8 << size) - immhb; 10092 GVecGen2iFn *gvec_fn; 10093 10094 if (extract32(immh, 3, 1) && !is_q) { 10095 unallocated_encoding(s); 10096 return; 10097 } 10098 tcg_debug_assert(size <= 3); 10099 10100 if (!fp_access_check(s)) { 10101 return; 10102 } 10103 10104 switch (opcode) { 10105 case 0x02: /* SSRA / USRA (accumulate) */ 10106 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10107 break; 10108 10109 case 0x08: /* SRI */ 10110 gvec_fn = gen_gvec_sri; 10111 break; 10112 10113 case 0x00: /* SSHR / USHR */ 10114 if (is_u) { 10115 if (shift == 8 << size) { 10116 /* Shift count the same size as element size produces zero. */ 10117 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10118 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10119 return; 10120 } 10121 gvec_fn = tcg_gen_gvec_shri; 10122 } else { 10123 /* Shift count the same size as element size produces all sign. */ 10124 if (shift == 8 << size) { 10125 shift -= 1; 10126 } 10127 gvec_fn = tcg_gen_gvec_sari; 10128 } 10129 break; 10130 10131 case 0x04: /* SRSHR / URSHR (rounding) */ 10132 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10133 break; 10134 10135 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10136 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10137 break; 10138 10139 default: 10140 g_assert_not_reached(); 10141 } 10142 10143 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10144 } 10145 10146 /* SHL/SLI - Vector shift left */ 10147 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10148 int immh, int immb, int opcode, int rn, int rd) 10149 { 10150 int size = 32 - clz32(immh) - 1; 10151 int immhb = immh << 3 | immb; 10152 int shift = immhb - (8 << size); 10153 10154 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10155 assert(size >= 0 && size <= 3); 10156 10157 if (extract32(immh, 3, 1) && !is_q) { 10158 unallocated_encoding(s); 10159 return; 10160 } 10161 10162 if (!fp_access_check(s)) { 10163 return; 10164 } 10165 10166 if (insert) { 10167 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10168 } else { 10169 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10170 } 10171 } 10172 10173 /* USHLL/SHLL - Vector shift left with widening */ 10174 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10175 int immh, int immb, int opcode, int rn, int rd) 10176 { 10177 int size = 32 - clz32(immh) - 1; 10178 int immhb = immh << 3 | immb; 10179 int shift = immhb - (8 << size); 10180 int dsize = 64; 10181 int esize = 8 << size; 10182 int elements = dsize/esize; 10183 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10184 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10185 int i; 10186 10187 if (size >= 3) { 10188 unallocated_encoding(s); 10189 return; 10190 } 10191 10192 if (!fp_access_check(s)) { 10193 return; 10194 } 10195 10196 /* For the LL variants the store is larger than the load, 10197 * so if rd == rn we would overwrite parts of our input. 10198 * So load everything right now and use shifts in the main loop. 10199 */ 10200 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10201 10202 for (i = 0; i < elements; i++) { 10203 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10204 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10205 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10206 write_vec_element(s, tcg_rd, rd, i, size + 1); 10207 } 10208 } 10209 10210 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10211 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10212 int immh, int immb, int opcode, int rn, int rd) 10213 { 10214 int immhb = immh << 3 | immb; 10215 int size = 32 - clz32(immh) - 1; 10216 int dsize = 64; 10217 int esize = 8 << size; 10218 int elements = dsize/esize; 10219 int shift = (2 * esize) - immhb; 10220 bool round = extract32(opcode, 0, 1); 10221 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10222 TCGv_i64 tcg_round; 10223 int i; 10224 10225 if (extract32(immh, 3, 1)) { 10226 unallocated_encoding(s); 10227 return; 10228 } 10229 10230 if (!fp_access_check(s)) { 10231 return; 10232 } 10233 10234 tcg_rn = tcg_temp_new_i64(); 10235 tcg_rd = tcg_temp_new_i64(); 10236 tcg_final = tcg_temp_new_i64(); 10237 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10238 10239 if (round) { 10240 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10241 } else { 10242 tcg_round = NULL; 10243 } 10244 10245 for (i = 0; i < elements; i++) { 10246 read_vec_element(s, tcg_rn, rn, i, size+1); 10247 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10248 false, true, size+1, shift); 10249 10250 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10251 } 10252 10253 if (!is_q) { 10254 write_vec_element(s, tcg_final, rd, 0, MO_64); 10255 } else { 10256 write_vec_element(s, tcg_final, rd, 1, MO_64); 10257 } 10258 10259 clear_vec_high(s, is_q, rd); 10260 } 10261 10262 10263 /* AdvSIMD shift by immediate 10264 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10265 * +---+---+---+-------------+------+------+--------+---+------+------+ 10266 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10267 * +---+---+---+-------------+------+------+--------+---+------+------+ 10268 */ 10269 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10270 { 10271 int rd = extract32(insn, 0, 5); 10272 int rn = extract32(insn, 5, 5); 10273 int opcode = extract32(insn, 11, 5); 10274 int immb = extract32(insn, 16, 3); 10275 int immh = extract32(insn, 19, 4); 10276 bool is_u = extract32(insn, 29, 1); 10277 bool is_q = extract32(insn, 30, 1); 10278 10279 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10280 assert(immh != 0); 10281 10282 switch (opcode) { 10283 case 0x08: /* SRI */ 10284 if (!is_u) { 10285 unallocated_encoding(s); 10286 return; 10287 } 10288 /* fall through */ 10289 case 0x00: /* SSHR / USHR */ 10290 case 0x02: /* SSRA / USRA (accumulate) */ 10291 case 0x04: /* SRSHR / URSHR (rounding) */ 10292 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10293 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10294 break; 10295 case 0x0a: /* SHL / SLI */ 10296 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10297 break; 10298 case 0x10: /* SHRN */ 10299 case 0x11: /* RSHRN / SQRSHRUN */ 10300 if (is_u) { 10301 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10302 opcode, rn, rd); 10303 } else { 10304 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10305 } 10306 break; 10307 case 0x12: /* SQSHRN / UQSHRN */ 10308 case 0x13: /* SQRSHRN / UQRSHRN */ 10309 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10310 opcode, rn, rd); 10311 break; 10312 case 0x14: /* SSHLL / USHLL */ 10313 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10314 break; 10315 case 0x1c: /* SCVTF / UCVTF */ 10316 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10317 opcode, rn, rd); 10318 break; 10319 case 0xc: /* SQSHLU */ 10320 if (!is_u) { 10321 unallocated_encoding(s); 10322 return; 10323 } 10324 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10325 break; 10326 case 0xe: /* SQSHL, UQSHL */ 10327 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10328 break; 10329 case 0x1f: /* FCVTZS/ FCVTZU */ 10330 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10331 return; 10332 default: 10333 unallocated_encoding(s); 10334 return; 10335 } 10336 } 10337 10338 /* Generate code to do a "long" addition or subtraction, ie one done in 10339 * TCGv_i64 on vector lanes twice the width specified by size. 10340 */ 10341 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10342 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10343 { 10344 static NeonGenTwo64OpFn * const fns[3][2] = { 10345 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10346 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10347 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10348 }; 10349 NeonGenTwo64OpFn *genfn; 10350 assert(size < 3); 10351 10352 genfn = fns[size][is_sub]; 10353 genfn(tcg_res, tcg_op1, tcg_op2); 10354 } 10355 10356 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10357 int opcode, int rd, int rn, int rm) 10358 { 10359 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10360 TCGv_i64 tcg_res[2]; 10361 int pass, accop; 10362 10363 tcg_res[0] = tcg_temp_new_i64(); 10364 tcg_res[1] = tcg_temp_new_i64(); 10365 10366 /* Does this op do an adding accumulate, a subtracting accumulate, 10367 * or no accumulate at all? 10368 */ 10369 switch (opcode) { 10370 case 5: 10371 case 8: 10372 case 9: 10373 accop = 1; 10374 break; 10375 case 10: 10376 case 11: 10377 accop = -1; 10378 break; 10379 default: 10380 accop = 0; 10381 break; 10382 } 10383 10384 if (accop != 0) { 10385 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10386 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10387 } 10388 10389 /* size == 2 means two 32x32->64 operations; this is worth special 10390 * casing because we can generally handle it inline. 10391 */ 10392 if (size == 2) { 10393 for (pass = 0; pass < 2; pass++) { 10394 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10395 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10396 TCGv_i64 tcg_passres; 10397 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10398 10399 int elt = pass + is_q * 2; 10400 10401 read_vec_element(s, tcg_op1, rn, elt, memop); 10402 read_vec_element(s, tcg_op2, rm, elt, memop); 10403 10404 if (accop == 0) { 10405 tcg_passres = tcg_res[pass]; 10406 } else { 10407 tcg_passres = tcg_temp_new_i64(); 10408 } 10409 10410 switch (opcode) { 10411 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10412 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10413 break; 10414 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10415 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10416 break; 10417 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10418 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10419 { 10420 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10421 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10422 10423 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10424 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10425 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10426 tcg_passres, 10427 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10428 break; 10429 } 10430 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10431 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10432 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10433 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10434 break; 10435 case 9: /* SQDMLAL, SQDMLAL2 */ 10436 case 11: /* SQDMLSL, SQDMLSL2 */ 10437 case 13: /* SQDMULL, SQDMULL2 */ 10438 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10439 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, 10440 tcg_passres, tcg_passres); 10441 break; 10442 default: 10443 g_assert_not_reached(); 10444 } 10445 10446 if (opcode == 9 || opcode == 11) { 10447 /* saturating accumulate ops */ 10448 if (accop < 0) { 10449 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10450 } 10451 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, 10452 tcg_res[pass], tcg_passres); 10453 } else if (accop > 0) { 10454 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10455 } else if (accop < 0) { 10456 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10457 } 10458 } 10459 } else { 10460 /* size 0 or 1, generally helper functions */ 10461 for (pass = 0; pass < 2; pass++) { 10462 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10463 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10464 TCGv_i64 tcg_passres; 10465 int elt = pass + is_q * 2; 10466 10467 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 10468 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 10469 10470 if (accop == 0) { 10471 tcg_passres = tcg_res[pass]; 10472 } else { 10473 tcg_passres = tcg_temp_new_i64(); 10474 } 10475 10476 switch (opcode) { 10477 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10478 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10479 { 10480 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 10481 static NeonGenWidenFn * const widenfns[2][2] = { 10482 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10483 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10484 }; 10485 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10486 10487 widenfn(tcg_op2_64, tcg_op2); 10488 widenfn(tcg_passres, tcg_op1); 10489 gen_neon_addl(size, (opcode == 2), tcg_passres, 10490 tcg_passres, tcg_op2_64); 10491 break; 10492 } 10493 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10494 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10495 if (size == 0) { 10496 if (is_u) { 10497 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 10498 } else { 10499 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 10500 } 10501 } else { 10502 if (is_u) { 10503 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 10504 } else { 10505 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 10506 } 10507 } 10508 break; 10509 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10510 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10511 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10512 if (size == 0) { 10513 if (is_u) { 10514 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 10515 } else { 10516 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 10517 } 10518 } else { 10519 if (is_u) { 10520 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 10521 } else { 10522 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10523 } 10524 } 10525 break; 10526 case 9: /* SQDMLAL, SQDMLAL2 */ 10527 case 11: /* SQDMLSL, SQDMLSL2 */ 10528 case 13: /* SQDMULL, SQDMULL2 */ 10529 assert(size == 1); 10530 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10531 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, 10532 tcg_passres, tcg_passres); 10533 break; 10534 default: 10535 g_assert_not_reached(); 10536 } 10537 10538 if (accop != 0) { 10539 if (opcode == 9 || opcode == 11) { 10540 /* saturating accumulate ops */ 10541 if (accop < 0) { 10542 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 10543 } 10544 gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, 10545 tcg_res[pass], 10546 tcg_passres); 10547 } else { 10548 gen_neon_addl(size, (accop < 0), tcg_res[pass], 10549 tcg_res[pass], tcg_passres); 10550 } 10551 } 10552 } 10553 } 10554 10555 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 10556 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 10557 } 10558 10559 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 10560 int opcode, int rd, int rn, int rm) 10561 { 10562 TCGv_i64 tcg_res[2]; 10563 int part = is_q ? 2 : 0; 10564 int pass; 10565 10566 for (pass = 0; pass < 2; pass++) { 10567 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10568 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10569 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 10570 static NeonGenWidenFn * const widenfns[3][2] = { 10571 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10572 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10573 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 10574 }; 10575 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10576 10577 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10578 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 10579 widenfn(tcg_op2_wide, tcg_op2); 10580 tcg_res[pass] = tcg_temp_new_i64(); 10581 gen_neon_addl(size, (opcode == 3), 10582 tcg_res[pass], tcg_op1, tcg_op2_wide); 10583 } 10584 10585 for (pass = 0; pass < 2; pass++) { 10586 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10587 } 10588 } 10589 10590 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 10591 { 10592 tcg_gen_addi_i64(in, in, 1U << 31); 10593 tcg_gen_extrh_i64_i32(res, in); 10594 } 10595 10596 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 10597 int opcode, int rd, int rn, int rm) 10598 { 10599 TCGv_i32 tcg_res[2]; 10600 int part = is_q ? 2 : 0; 10601 int pass; 10602 10603 for (pass = 0; pass < 2; pass++) { 10604 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10605 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10606 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 10607 static NeonGenNarrowFn * const narrowfns[3][2] = { 10608 { gen_helper_neon_narrow_high_u8, 10609 gen_helper_neon_narrow_round_high_u8 }, 10610 { gen_helper_neon_narrow_high_u16, 10611 gen_helper_neon_narrow_round_high_u16 }, 10612 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 10613 }; 10614 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 10615 10616 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10617 read_vec_element(s, tcg_op2, rm, pass, MO_64); 10618 10619 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 10620 10621 tcg_res[pass] = tcg_temp_new_i32(); 10622 gennarrow(tcg_res[pass], tcg_wideres); 10623 } 10624 10625 for (pass = 0; pass < 2; pass++) { 10626 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 10627 } 10628 clear_vec_high(s, is_q, rd); 10629 } 10630 10631 /* AdvSIMD three different 10632 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 10633 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10634 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 10635 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10636 */ 10637 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 10638 { 10639 /* Instructions in this group fall into three basic classes 10640 * (in each case with the operation working on each element in 10641 * the input vectors): 10642 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 10643 * 128 bit input) 10644 * (2) wide 64 x 128 -> 128 10645 * (3) narrowing 128 x 128 -> 64 10646 * Here we do initial decode, catch unallocated cases and 10647 * dispatch to separate functions for each class. 10648 */ 10649 int is_q = extract32(insn, 30, 1); 10650 int is_u = extract32(insn, 29, 1); 10651 int size = extract32(insn, 22, 2); 10652 int opcode = extract32(insn, 12, 4); 10653 int rm = extract32(insn, 16, 5); 10654 int rn = extract32(insn, 5, 5); 10655 int rd = extract32(insn, 0, 5); 10656 10657 switch (opcode) { 10658 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 10659 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 10660 /* 64 x 128 -> 128 */ 10661 if (size == 3) { 10662 unallocated_encoding(s); 10663 return; 10664 } 10665 if (!fp_access_check(s)) { 10666 return; 10667 } 10668 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 10669 break; 10670 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 10671 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 10672 /* 128 x 128 -> 64 */ 10673 if (size == 3) { 10674 unallocated_encoding(s); 10675 return; 10676 } 10677 if (!fp_access_check(s)) { 10678 return; 10679 } 10680 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 10681 break; 10682 case 14: /* PMULL, PMULL2 */ 10683 if (is_u) { 10684 unallocated_encoding(s); 10685 return; 10686 } 10687 switch (size) { 10688 case 0: /* PMULL.P8 */ 10689 if (!fp_access_check(s)) { 10690 return; 10691 } 10692 /* The Q field specifies lo/hi half input for this insn. */ 10693 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10694 gen_helper_neon_pmull_h); 10695 break; 10696 10697 case 3: /* PMULL.P64 */ 10698 if (!dc_isar_feature(aa64_pmull, s)) { 10699 unallocated_encoding(s); 10700 return; 10701 } 10702 if (!fp_access_check(s)) { 10703 return; 10704 } 10705 /* The Q field specifies lo/hi half input for this insn. */ 10706 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10707 gen_helper_gvec_pmull_q); 10708 break; 10709 10710 default: 10711 unallocated_encoding(s); 10712 break; 10713 } 10714 return; 10715 case 9: /* SQDMLAL, SQDMLAL2 */ 10716 case 11: /* SQDMLSL, SQDMLSL2 */ 10717 case 13: /* SQDMULL, SQDMULL2 */ 10718 if (is_u || size == 0) { 10719 unallocated_encoding(s); 10720 return; 10721 } 10722 /* fall through */ 10723 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10724 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10725 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10726 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10727 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10728 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10729 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 10730 /* 64 x 64 -> 128 */ 10731 if (size == 3) { 10732 unallocated_encoding(s); 10733 return; 10734 } 10735 if (!fp_access_check(s)) { 10736 return; 10737 } 10738 10739 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 10740 break; 10741 default: 10742 /* opcode 15 not allocated */ 10743 unallocated_encoding(s); 10744 break; 10745 } 10746 } 10747 10748 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 10749 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 10750 { 10751 int rd = extract32(insn, 0, 5); 10752 int rn = extract32(insn, 5, 5); 10753 int rm = extract32(insn, 16, 5); 10754 int size = extract32(insn, 22, 2); 10755 bool is_u = extract32(insn, 29, 1); 10756 bool is_q = extract32(insn, 30, 1); 10757 10758 if (!fp_access_check(s)) { 10759 return; 10760 } 10761 10762 switch (size + 4 * is_u) { 10763 case 0: /* AND */ 10764 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 10765 return; 10766 case 1: /* BIC */ 10767 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 10768 return; 10769 case 2: /* ORR */ 10770 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 10771 return; 10772 case 3: /* ORN */ 10773 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 10774 return; 10775 case 4: /* EOR */ 10776 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 10777 return; 10778 10779 case 5: /* BSL bitwise select */ 10780 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 10781 return; 10782 case 6: /* BIT, bitwise insert if true */ 10783 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 10784 return; 10785 case 7: /* BIF, bitwise insert if false */ 10786 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 10787 return; 10788 10789 default: 10790 g_assert_not_reached(); 10791 } 10792 } 10793 10794 /* Pairwise op subgroup of C3.6.16. 10795 * 10796 * This is called directly or via the handle_3same_float for float pairwise 10797 * operations where the opcode and size are calculated differently. 10798 */ 10799 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 10800 int size, int rn, int rm, int rd) 10801 { 10802 TCGv_ptr fpst; 10803 int pass; 10804 10805 /* Floating point operations need fpst */ 10806 if (opcode >= 0x58) { 10807 fpst = fpstatus_ptr(FPST_FPCR); 10808 } else { 10809 fpst = NULL; 10810 } 10811 10812 if (!fp_access_check(s)) { 10813 return; 10814 } 10815 10816 /* These operations work on the concatenated rm:rn, with each pair of 10817 * adjacent elements being operated on to produce an element in the result. 10818 */ 10819 if (size == 3) { 10820 TCGv_i64 tcg_res[2]; 10821 10822 for (pass = 0; pass < 2; pass++) { 10823 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10824 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10825 int passreg = (pass == 0) ? rn : rm; 10826 10827 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 10828 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 10829 tcg_res[pass] = tcg_temp_new_i64(); 10830 10831 switch (opcode) { 10832 case 0x17: /* ADDP */ 10833 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 10834 break; 10835 case 0x58: /* FMAXNMP */ 10836 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10837 break; 10838 case 0x5a: /* FADDP */ 10839 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10840 break; 10841 case 0x5e: /* FMAXP */ 10842 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10843 break; 10844 case 0x78: /* FMINNMP */ 10845 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10846 break; 10847 case 0x7e: /* FMINP */ 10848 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10849 break; 10850 default: 10851 g_assert_not_reached(); 10852 } 10853 } 10854 10855 for (pass = 0; pass < 2; pass++) { 10856 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10857 } 10858 } else { 10859 int maxpass = is_q ? 4 : 2; 10860 TCGv_i32 tcg_res[4]; 10861 10862 for (pass = 0; pass < maxpass; pass++) { 10863 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10864 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10865 NeonGenTwoOpFn *genfn = NULL; 10866 int passreg = pass < (maxpass / 2) ? rn : rm; 10867 int passelt = (is_q && (pass & 1)) ? 2 : 0; 10868 10869 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 10870 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 10871 tcg_res[pass] = tcg_temp_new_i32(); 10872 10873 switch (opcode) { 10874 case 0x17: /* ADDP */ 10875 { 10876 static NeonGenTwoOpFn * const fns[3] = { 10877 gen_helper_neon_padd_u8, 10878 gen_helper_neon_padd_u16, 10879 tcg_gen_add_i32, 10880 }; 10881 genfn = fns[size]; 10882 break; 10883 } 10884 case 0x14: /* SMAXP, UMAXP */ 10885 { 10886 static NeonGenTwoOpFn * const fns[3][2] = { 10887 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 10888 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 10889 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 10890 }; 10891 genfn = fns[size][u]; 10892 break; 10893 } 10894 case 0x15: /* SMINP, UMINP */ 10895 { 10896 static NeonGenTwoOpFn * const fns[3][2] = { 10897 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 10898 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 10899 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 10900 }; 10901 genfn = fns[size][u]; 10902 break; 10903 } 10904 /* The FP operations are all on single floats (32 bit) */ 10905 case 0x58: /* FMAXNMP */ 10906 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10907 break; 10908 case 0x5a: /* FADDP */ 10909 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10910 break; 10911 case 0x5e: /* FMAXP */ 10912 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10913 break; 10914 case 0x78: /* FMINNMP */ 10915 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10916 break; 10917 case 0x7e: /* FMINP */ 10918 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10919 break; 10920 default: 10921 g_assert_not_reached(); 10922 } 10923 10924 /* FP ops called directly, otherwise call now */ 10925 if (genfn) { 10926 genfn(tcg_res[pass], tcg_op1, tcg_op2); 10927 } 10928 } 10929 10930 for (pass = 0; pass < maxpass; pass++) { 10931 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10932 } 10933 clear_vec_high(s, is_q, rd); 10934 } 10935 } 10936 10937 /* Floating point op subgroup of C3.6.16. */ 10938 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 10939 { 10940 /* For floating point ops, the U, size[1] and opcode bits 10941 * together indicate the operation. size[0] indicates single 10942 * or double. 10943 */ 10944 int fpopcode = extract32(insn, 11, 5) 10945 | (extract32(insn, 23, 1) << 5) 10946 | (extract32(insn, 29, 1) << 6); 10947 int is_q = extract32(insn, 30, 1); 10948 int size = extract32(insn, 22, 1); 10949 int rm = extract32(insn, 16, 5); 10950 int rn = extract32(insn, 5, 5); 10951 int rd = extract32(insn, 0, 5); 10952 10953 int datasize = is_q ? 128 : 64; 10954 int esize = 32 << size; 10955 int elements = datasize / esize; 10956 10957 if (size == 1 && !is_q) { 10958 unallocated_encoding(s); 10959 return; 10960 } 10961 10962 switch (fpopcode) { 10963 case 0x58: /* FMAXNMP */ 10964 case 0x5a: /* FADDP */ 10965 case 0x5e: /* FMAXP */ 10966 case 0x78: /* FMINNMP */ 10967 case 0x7e: /* FMINP */ 10968 if (size && !is_q) { 10969 unallocated_encoding(s); 10970 return; 10971 } 10972 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 10973 rn, rm, rd); 10974 return; 10975 case 0x1b: /* FMULX */ 10976 case 0x1f: /* FRECPS */ 10977 case 0x3f: /* FRSQRTS */ 10978 case 0x5d: /* FACGE */ 10979 case 0x7d: /* FACGT */ 10980 case 0x19: /* FMLA */ 10981 case 0x39: /* FMLS */ 10982 case 0x18: /* FMAXNM */ 10983 case 0x1a: /* FADD */ 10984 case 0x1c: /* FCMEQ */ 10985 case 0x1e: /* FMAX */ 10986 case 0x38: /* FMINNM */ 10987 case 0x3a: /* FSUB */ 10988 case 0x3e: /* FMIN */ 10989 case 0x5b: /* FMUL */ 10990 case 0x5c: /* FCMGE */ 10991 case 0x5f: /* FDIV */ 10992 case 0x7a: /* FABD */ 10993 case 0x7c: /* FCMGT */ 10994 if (!fp_access_check(s)) { 10995 return; 10996 } 10997 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 10998 return; 10999 11000 case 0x1d: /* FMLAL */ 11001 case 0x3d: /* FMLSL */ 11002 case 0x59: /* FMLAL2 */ 11003 case 0x79: /* FMLSL2 */ 11004 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 11005 unallocated_encoding(s); 11006 return; 11007 } 11008 if (fp_access_check(s)) { 11009 int is_s = extract32(insn, 23, 1); 11010 int is_2 = extract32(insn, 29, 1); 11011 int data = (is_2 << 1) | is_s; 11012 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 11013 vec_full_reg_offset(s, rn), 11014 vec_full_reg_offset(s, rm), tcg_env, 11015 is_q ? 16 : 8, vec_full_reg_size(s), 11016 data, gen_helper_gvec_fmlal_a64); 11017 } 11018 return; 11019 11020 default: 11021 unallocated_encoding(s); 11022 return; 11023 } 11024 } 11025 11026 /* Integer op subgroup of C3.6.16. */ 11027 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 11028 { 11029 int is_q = extract32(insn, 30, 1); 11030 int u = extract32(insn, 29, 1); 11031 int size = extract32(insn, 22, 2); 11032 int opcode = extract32(insn, 11, 5); 11033 int rm = extract32(insn, 16, 5); 11034 int rn = extract32(insn, 5, 5); 11035 int rd = extract32(insn, 0, 5); 11036 int pass; 11037 TCGCond cond; 11038 11039 switch (opcode) { 11040 case 0x13: /* MUL, PMUL */ 11041 if (u && size != 0) { 11042 unallocated_encoding(s); 11043 return; 11044 } 11045 /* fall through */ 11046 case 0x0: /* SHADD, UHADD */ 11047 case 0x2: /* SRHADD, URHADD */ 11048 case 0x4: /* SHSUB, UHSUB */ 11049 case 0xc: /* SMAX, UMAX */ 11050 case 0xd: /* SMIN, UMIN */ 11051 case 0xe: /* SABD, UABD */ 11052 case 0xf: /* SABA, UABA */ 11053 case 0x12: /* MLA, MLS */ 11054 if (size == 3) { 11055 unallocated_encoding(s); 11056 return; 11057 } 11058 break; 11059 case 0x16: /* SQDMULH, SQRDMULH */ 11060 if (size == 0 || size == 3) { 11061 unallocated_encoding(s); 11062 return; 11063 } 11064 break; 11065 default: 11066 if (size == 3 && !is_q) { 11067 unallocated_encoding(s); 11068 return; 11069 } 11070 break; 11071 } 11072 11073 if (!fp_access_check(s)) { 11074 return; 11075 } 11076 11077 switch (opcode) { 11078 case 0x01: /* SQADD, UQADD */ 11079 if (u) { 11080 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11081 } else { 11082 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11083 } 11084 return; 11085 case 0x05: /* SQSUB, UQSUB */ 11086 if (u) { 11087 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11088 } else { 11089 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11090 } 11091 return; 11092 case 0x08: /* SSHL, USHL */ 11093 if (u) { 11094 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11095 } else { 11096 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11097 } 11098 return; 11099 case 0x0c: /* SMAX, UMAX */ 11100 if (u) { 11101 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11102 } else { 11103 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11104 } 11105 return; 11106 case 0x0d: /* SMIN, UMIN */ 11107 if (u) { 11108 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11109 } else { 11110 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11111 } 11112 return; 11113 case 0xe: /* SABD, UABD */ 11114 if (u) { 11115 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11116 } else { 11117 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11118 } 11119 return; 11120 case 0xf: /* SABA, UABA */ 11121 if (u) { 11122 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11123 } else { 11124 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11125 } 11126 return; 11127 case 0x10: /* ADD, SUB */ 11128 if (u) { 11129 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11130 } else { 11131 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11132 } 11133 return; 11134 case 0x13: /* MUL, PMUL */ 11135 if (!u) { /* MUL */ 11136 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11137 } else { /* PMUL */ 11138 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11139 } 11140 return; 11141 case 0x12: /* MLA, MLS */ 11142 if (u) { 11143 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11144 } else { 11145 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11146 } 11147 return; 11148 case 0x16: /* SQDMULH, SQRDMULH */ 11149 { 11150 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11151 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11152 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11153 }; 11154 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11155 } 11156 return; 11157 case 0x11: 11158 if (!u) { /* CMTST */ 11159 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11160 return; 11161 } 11162 /* else CMEQ */ 11163 cond = TCG_COND_EQ; 11164 goto do_gvec_cmp; 11165 case 0x06: /* CMGT, CMHI */ 11166 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11167 goto do_gvec_cmp; 11168 case 0x07: /* CMGE, CMHS */ 11169 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11170 do_gvec_cmp: 11171 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11172 vec_full_reg_offset(s, rn), 11173 vec_full_reg_offset(s, rm), 11174 is_q ? 16 : 8, vec_full_reg_size(s)); 11175 return; 11176 } 11177 11178 if (size == 3) { 11179 assert(is_q); 11180 for (pass = 0; pass < 2; pass++) { 11181 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11182 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11183 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11184 11185 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11186 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11187 11188 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11189 11190 write_vec_element(s, tcg_res, rd, pass, MO_64); 11191 } 11192 } else { 11193 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11194 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11195 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11196 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11197 NeonGenTwoOpFn *genfn = NULL; 11198 NeonGenTwoOpEnvFn *genenvfn = NULL; 11199 11200 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11201 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11202 11203 switch (opcode) { 11204 case 0x0: /* SHADD, UHADD */ 11205 { 11206 static NeonGenTwoOpFn * const fns[3][2] = { 11207 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11208 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11209 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11210 }; 11211 genfn = fns[size][u]; 11212 break; 11213 } 11214 case 0x2: /* SRHADD, URHADD */ 11215 { 11216 static NeonGenTwoOpFn * const fns[3][2] = { 11217 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11218 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11219 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11220 }; 11221 genfn = fns[size][u]; 11222 break; 11223 } 11224 case 0x4: /* SHSUB, UHSUB */ 11225 { 11226 static NeonGenTwoOpFn * const fns[3][2] = { 11227 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11228 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11229 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11230 }; 11231 genfn = fns[size][u]; 11232 break; 11233 } 11234 case 0x9: /* SQSHL, UQSHL */ 11235 { 11236 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11237 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11238 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11239 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11240 }; 11241 genenvfn = fns[size][u]; 11242 break; 11243 } 11244 case 0xa: /* SRSHL, URSHL */ 11245 { 11246 static NeonGenTwoOpFn * const fns[3][2] = { 11247 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11248 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11249 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11250 }; 11251 genfn = fns[size][u]; 11252 break; 11253 } 11254 case 0xb: /* SQRSHL, UQRSHL */ 11255 { 11256 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11257 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11258 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11259 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11260 }; 11261 genenvfn = fns[size][u]; 11262 break; 11263 } 11264 default: 11265 g_assert_not_reached(); 11266 } 11267 11268 if (genenvfn) { 11269 genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2); 11270 } else { 11271 genfn(tcg_res, tcg_op1, tcg_op2); 11272 } 11273 11274 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11275 } 11276 } 11277 clear_vec_high(s, is_q, rd); 11278 } 11279 11280 /* AdvSIMD three same 11281 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11282 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11283 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11284 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11285 */ 11286 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11287 { 11288 int opcode = extract32(insn, 11, 5); 11289 11290 switch (opcode) { 11291 case 0x3: /* logic ops */ 11292 disas_simd_3same_logic(s, insn); 11293 break; 11294 case 0x17: /* ADDP */ 11295 case 0x14: /* SMAXP, UMAXP */ 11296 case 0x15: /* SMINP, UMINP */ 11297 { 11298 /* Pairwise operations */ 11299 int is_q = extract32(insn, 30, 1); 11300 int u = extract32(insn, 29, 1); 11301 int size = extract32(insn, 22, 2); 11302 int rm = extract32(insn, 16, 5); 11303 int rn = extract32(insn, 5, 5); 11304 int rd = extract32(insn, 0, 5); 11305 if (opcode == 0x17) { 11306 if (u || (size == 3 && !is_q)) { 11307 unallocated_encoding(s); 11308 return; 11309 } 11310 } else { 11311 if (size == 3) { 11312 unallocated_encoding(s); 11313 return; 11314 } 11315 } 11316 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11317 break; 11318 } 11319 case 0x18 ... 0x31: 11320 /* floating point ops, sz[1] and U are part of opcode */ 11321 disas_simd_3same_float(s, insn); 11322 break; 11323 default: 11324 disas_simd_3same_int(s, insn); 11325 break; 11326 } 11327 } 11328 11329 /* 11330 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11331 * 11332 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11333 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11334 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11335 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11336 * 11337 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11338 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11339 * 11340 */ 11341 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11342 { 11343 int opcode = extract32(insn, 11, 3); 11344 int u = extract32(insn, 29, 1); 11345 int a = extract32(insn, 23, 1); 11346 int is_q = extract32(insn, 30, 1); 11347 int rm = extract32(insn, 16, 5); 11348 int rn = extract32(insn, 5, 5); 11349 int rd = extract32(insn, 0, 5); 11350 /* 11351 * For these floating point ops, the U, a and opcode bits 11352 * together indicate the operation. 11353 */ 11354 int fpopcode = opcode | (a << 3) | (u << 4); 11355 int datasize = is_q ? 128 : 64; 11356 int elements = datasize / 16; 11357 bool pairwise; 11358 TCGv_ptr fpst; 11359 int pass; 11360 11361 switch (fpopcode) { 11362 case 0x0: /* FMAXNM */ 11363 case 0x1: /* FMLA */ 11364 case 0x2: /* FADD */ 11365 case 0x3: /* FMULX */ 11366 case 0x4: /* FCMEQ */ 11367 case 0x6: /* FMAX */ 11368 case 0x7: /* FRECPS */ 11369 case 0x8: /* FMINNM */ 11370 case 0x9: /* FMLS */ 11371 case 0xa: /* FSUB */ 11372 case 0xe: /* FMIN */ 11373 case 0xf: /* FRSQRTS */ 11374 case 0x13: /* FMUL */ 11375 case 0x14: /* FCMGE */ 11376 case 0x15: /* FACGE */ 11377 case 0x17: /* FDIV */ 11378 case 0x1a: /* FABD */ 11379 case 0x1c: /* FCMGT */ 11380 case 0x1d: /* FACGT */ 11381 pairwise = false; 11382 break; 11383 case 0x10: /* FMAXNMP */ 11384 case 0x12: /* FADDP */ 11385 case 0x16: /* FMAXP */ 11386 case 0x18: /* FMINNMP */ 11387 case 0x1e: /* FMINP */ 11388 pairwise = true; 11389 break; 11390 default: 11391 unallocated_encoding(s); 11392 return; 11393 } 11394 11395 if (!dc_isar_feature(aa64_fp16, s)) { 11396 unallocated_encoding(s); 11397 return; 11398 } 11399 11400 if (!fp_access_check(s)) { 11401 return; 11402 } 11403 11404 fpst = fpstatus_ptr(FPST_FPCR_F16); 11405 11406 if (pairwise) { 11407 int maxpass = is_q ? 8 : 4; 11408 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11409 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11410 TCGv_i32 tcg_res[8]; 11411 11412 for (pass = 0; pass < maxpass; pass++) { 11413 int passreg = pass < (maxpass / 2) ? rn : rm; 11414 int passelt = (pass << 1) & (maxpass - 1); 11415 11416 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11417 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11418 tcg_res[pass] = tcg_temp_new_i32(); 11419 11420 switch (fpopcode) { 11421 case 0x10: /* FMAXNMP */ 11422 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11423 fpst); 11424 break; 11425 case 0x12: /* FADDP */ 11426 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11427 break; 11428 case 0x16: /* FMAXP */ 11429 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11430 break; 11431 case 0x18: /* FMINNMP */ 11432 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 11433 fpst); 11434 break; 11435 case 0x1e: /* FMINP */ 11436 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11437 break; 11438 default: 11439 g_assert_not_reached(); 11440 } 11441 } 11442 11443 for (pass = 0; pass < maxpass; pass++) { 11444 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 11445 } 11446 } else { 11447 for (pass = 0; pass < elements; pass++) { 11448 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11449 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11450 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11451 11452 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 11453 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 11454 11455 switch (fpopcode) { 11456 case 0x0: /* FMAXNM */ 11457 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11458 break; 11459 case 0x1: /* FMLA */ 11460 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11461 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11462 fpst); 11463 break; 11464 case 0x2: /* FADD */ 11465 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 11466 break; 11467 case 0x3: /* FMULX */ 11468 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 11469 break; 11470 case 0x4: /* FCMEQ */ 11471 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11472 break; 11473 case 0x6: /* FMAX */ 11474 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 11475 break; 11476 case 0x7: /* FRECPS */ 11477 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11478 break; 11479 case 0x8: /* FMINNM */ 11480 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11481 break; 11482 case 0x9: /* FMLS */ 11483 /* As usual for ARM, separate negation for fused multiply-add */ 11484 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 11485 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11486 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11487 fpst); 11488 break; 11489 case 0xa: /* FSUB */ 11490 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11491 break; 11492 case 0xe: /* FMIN */ 11493 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 11494 break; 11495 case 0xf: /* FRSQRTS */ 11496 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11497 break; 11498 case 0x13: /* FMUL */ 11499 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 11500 break; 11501 case 0x14: /* FCMGE */ 11502 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11503 break; 11504 case 0x15: /* FACGE */ 11505 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11506 break; 11507 case 0x17: /* FDIV */ 11508 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 11509 break; 11510 case 0x1a: /* FABD */ 11511 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11512 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 11513 break; 11514 case 0x1c: /* FCMGT */ 11515 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11516 break; 11517 case 0x1d: /* FACGT */ 11518 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11519 break; 11520 default: 11521 g_assert_not_reached(); 11522 } 11523 11524 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11525 } 11526 } 11527 11528 clear_vec_high(s, is_q, rd); 11529 } 11530 11531 /* AdvSIMD three same extra 11532 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 11533 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11534 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 11535 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11536 */ 11537 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 11538 { 11539 int rd = extract32(insn, 0, 5); 11540 int rn = extract32(insn, 5, 5); 11541 int opcode = extract32(insn, 11, 4); 11542 int rm = extract32(insn, 16, 5); 11543 int size = extract32(insn, 22, 2); 11544 bool u = extract32(insn, 29, 1); 11545 bool is_q = extract32(insn, 30, 1); 11546 bool feature; 11547 int rot; 11548 11549 switch (u * 16 + opcode) { 11550 case 0x10: /* SQRDMLAH (vector) */ 11551 case 0x11: /* SQRDMLSH (vector) */ 11552 if (size != 1 && size != 2) { 11553 unallocated_encoding(s); 11554 return; 11555 } 11556 feature = dc_isar_feature(aa64_rdm, s); 11557 break; 11558 case 0x02: /* SDOT (vector) */ 11559 case 0x12: /* UDOT (vector) */ 11560 if (size != MO_32) { 11561 unallocated_encoding(s); 11562 return; 11563 } 11564 feature = dc_isar_feature(aa64_dp, s); 11565 break; 11566 case 0x03: /* USDOT */ 11567 if (size != MO_32) { 11568 unallocated_encoding(s); 11569 return; 11570 } 11571 feature = dc_isar_feature(aa64_i8mm, s); 11572 break; 11573 case 0x04: /* SMMLA */ 11574 case 0x14: /* UMMLA */ 11575 case 0x05: /* USMMLA */ 11576 if (!is_q || size != MO_32) { 11577 unallocated_encoding(s); 11578 return; 11579 } 11580 feature = dc_isar_feature(aa64_i8mm, s); 11581 break; 11582 case 0x18: /* FCMLA, #0 */ 11583 case 0x19: /* FCMLA, #90 */ 11584 case 0x1a: /* FCMLA, #180 */ 11585 case 0x1b: /* FCMLA, #270 */ 11586 case 0x1c: /* FCADD, #90 */ 11587 case 0x1e: /* FCADD, #270 */ 11588 if (size == 0 11589 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 11590 || (size == 3 && !is_q)) { 11591 unallocated_encoding(s); 11592 return; 11593 } 11594 feature = dc_isar_feature(aa64_fcma, s); 11595 break; 11596 case 0x1d: /* BFMMLA */ 11597 if (size != MO_16 || !is_q) { 11598 unallocated_encoding(s); 11599 return; 11600 } 11601 feature = dc_isar_feature(aa64_bf16, s); 11602 break; 11603 case 0x1f: 11604 switch (size) { 11605 case 1: /* BFDOT */ 11606 case 3: /* BFMLAL{B,T} */ 11607 feature = dc_isar_feature(aa64_bf16, s); 11608 break; 11609 default: 11610 unallocated_encoding(s); 11611 return; 11612 } 11613 break; 11614 default: 11615 unallocated_encoding(s); 11616 return; 11617 } 11618 if (!feature) { 11619 unallocated_encoding(s); 11620 return; 11621 } 11622 if (!fp_access_check(s)) { 11623 return; 11624 } 11625 11626 switch (opcode) { 11627 case 0x0: /* SQRDMLAH (vector) */ 11628 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 11629 return; 11630 11631 case 0x1: /* SQRDMLSH (vector) */ 11632 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 11633 return; 11634 11635 case 0x2: /* SDOT / UDOT */ 11636 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 11637 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 11638 return; 11639 11640 case 0x3: /* USDOT */ 11641 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 11642 return; 11643 11644 case 0x04: /* SMMLA, UMMLA */ 11645 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 11646 u ? gen_helper_gvec_ummla_b 11647 : gen_helper_gvec_smmla_b); 11648 return; 11649 case 0x05: /* USMMLA */ 11650 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 11651 return; 11652 11653 case 0x8: /* FCMLA, #0 */ 11654 case 0x9: /* FCMLA, #90 */ 11655 case 0xa: /* FCMLA, #180 */ 11656 case 0xb: /* FCMLA, #270 */ 11657 rot = extract32(opcode, 0, 2); 11658 switch (size) { 11659 case 1: 11660 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 11661 gen_helper_gvec_fcmlah); 11662 break; 11663 case 2: 11664 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11665 gen_helper_gvec_fcmlas); 11666 break; 11667 case 3: 11668 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11669 gen_helper_gvec_fcmlad); 11670 break; 11671 default: 11672 g_assert_not_reached(); 11673 } 11674 return; 11675 11676 case 0xc: /* FCADD, #90 */ 11677 case 0xe: /* FCADD, #270 */ 11678 rot = extract32(opcode, 1, 1); 11679 switch (size) { 11680 case 1: 11681 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11682 gen_helper_gvec_fcaddh); 11683 break; 11684 case 2: 11685 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11686 gen_helper_gvec_fcadds); 11687 break; 11688 case 3: 11689 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11690 gen_helper_gvec_fcaddd); 11691 break; 11692 default: 11693 g_assert_not_reached(); 11694 } 11695 return; 11696 11697 case 0xd: /* BFMMLA */ 11698 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 11699 return; 11700 case 0xf: 11701 switch (size) { 11702 case 1: /* BFDOT */ 11703 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 11704 break; 11705 case 3: /* BFMLAL{B,T} */ 11706 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 11707 gen_helper_gvec_bfmlal); 11708 break; 11709 default: 11710 g_assert_not_reached(); 11711 } 11712 return; 11713 11714 default: 11715 g_assert_not_reached(); 11716 } 11717 } 11718 11719 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 11720 int size, int rn, int rd) 11721 { 11722 /* Handle 2-reg-misc ops which are widening (so each size element 11723 * in the source becomes a 2*size element in the destination. 11724 * The only instruction like this is FCVTL. 11725 */ 11726 int pass; 11727 11728 if (size == 3) { 11729 /* 32 -> 64 bit fp conversion */ 11730 TCGv_i64 tcg_res[2]; 11731 int srcelt = is_q ? 2 : 0; 11732 11733 for (pass = 0; pass < 2; pass++) { 11734 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11735 tcg_res[pass] = tcg_temp_new_i64(); 11736 11737 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 11738 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 11739 } 11740 for (pass = 0; pass < 2; pass++) { 11741 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11742 } 11743 } else { 11744 /* 16 -> 32 bit fp conversion */ 11745 int srcelt = is_q ? 4 : 0; 11746 TCGv_i32 tcg_res[4]; 11747 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 11748 TCGv_i32 ahp = get_ahp_flag(); 11749 11750 for (pass = 0; pass < 4; pass++) { 11751 tcg_res[pass] = tcg_temp_new_i32(); 11752 11753 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 11754 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 11755 fpst, ahp); 11756 } 11757 for (pass = 0; pass < 4; pass++) { 11758 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11759 } 11760 } 11761 } 11762 11763 static void handle_rev(DisasContext *s, int opcode, bool u, 11764 bool is_q, int size, int rn, int rd) 11765 { 11766 int op = (opcode << 1) | u; 11767 int opsz = op + size; 11768 int grp_size = 3 - opsz; 11769 int dsize = is_q ? 128 : 64; 11770 int i; 11771 11772 if (opsz >= 3) { 11773 unallocated_encoding(s); 11774 return; 11775 } 11776 11777 if (!fp_access_check(s)) { 11778 return; 11779 } 11780 11781 if (size == 0) { 11782 /* Special case bytes, use bswap op on each group of elements */ 11783 int groups = dsize / (8 << grp_size); 11784 11785 for (i = 0; i < groups; i++) { 11786 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 11787 11788 read_vec_element(s, tcg_tmp, rn, i, grp_size); 11789 switch (grp_size) { 11790 case MO_16: 11791 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11792 break; 11793 case MO_32: 11794 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11795 break; 11796 case MO_64: 11797 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 11798 break; 11799 default: 11800 g_assert_not_reached(); 11801 } 11802 write_vec_element(s, tcg_tmp, rd, i, grp_size); 11803 } 11804 clear_vec_high(s, is_q, rd); 11805 } else { 11806 int revmask = (1 << grp_size) - 1; 11807 int esize = 8 << size; 11808 int elements = dsize / esize; 11809 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 11810 TCGv_i64 tcg_rd[2]; 11811 11812 for (i = 0; i < 2; i++) { 11813 tcg_rd[i] = tcg_temp_new_i64(); 11814 tcg_gen_movi_i64(tcg_rd[i], 0); 11815 } 11816 11817 for (i = 0; i < elements; i++) { 11818 int e_rev = (i & 0xf) ^ revmask; 11819 int w = (e_rev * esize) / 64; 11820 int o = (e_rev * esize) % 64; 11821 11822 read_vec_element(s, tcg_rn, rn, i, size); 11823 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 11824 } 11825 11826 for (i = 0; i < 2; i++) { 11827 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 11828 } 11829 clear_vec_high(s, true, rd); 11830 } 11831 } 11832 11833 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 11834 bool is_q, int size, int rn, int rd) 11835 { 11836 /* Implement the pairwise operations from 2-misc: 11837 * SADDLP, UADDLP, SADALP, UADALP. 11838 * These all add pairs of elements in the input to produce a 11839 * double-width result element in the output (possibly accumulating). 11840 */ 11841 bool accum = (opcode == 0x6); 11842 int maxpass = is_q ? 2 : 1; 11843 int pass; 11844 TCGv_i64 tcg_res[2]; 11845 11846 if (size == 2) { 11847 /* 32 + 32 -> 64 op */ 11848 MemOp memop = size + (u ? 0 : MO_SIGN); 11849 11850 for (pass = 0; pass < maxpass; pass++) { 11851 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11852 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11853 11854 tcg_res[pass] = tcg_temp_new_i64(); 11855 11856 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 11857 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 11858 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11859 if (accum) { 11860 read_vec_element(s, tcg_op1, rd, pass, MO_64); 11861 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 11862 } 11863 } 11864 } else { 11865 for (pass = 0; pass < maxpass; pass++) { 11866 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11867 NeonGenOne64OpFn *genfn; 11868 static NeonGenOne64OpFn * const fns[2][2] = { 11869 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 11870 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 11871 }; 11872 11873 genfn = fns[size][u]; 11874 11875 tcg_res[pass] = tcg_temp_new_i64(); 11876 11877 read_vec_element(s, tcg_op, rn, pass, MO_64); 11878 genfn(tcg_res[pass], tcg_op); 11879 11880 if (accum) { 11881 read_vec_element(s, tcg_op, rd, pass, MO_64); 11882 if (size == 0) { 11883 gen_helper_neon_addl_u16(tcg_res[pass], 11884 tcg_res[pass], tcg_op); 11885 } else { 11886 gen_helper_neon_addl_u32(tcg_res[pass], 11887 tcg_res[pass], tcg_op); 11888 } 11889 } 11890 } 11891 } 11892 if (!is_q) { 11893 tcg_res[1] = tcg_constant_i64(0); 11894 } 11895 for (pass = 0; pass < 2; pass++) { 11896 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11897 } 11898 } 11899 11900 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 11901 { 11902 /* Implement SHLL and SHLL2 */ 11903 int pass; 11904 int part = is_q ? 2 : 0; 11905 TCGv_i64 tcg_res[2]; 11906 11907 for (pass = 0; pass < 2; pass++) { 11908 static NeonGenWidenFn * const widenfns[3] = { 11909 gen_helper_neon_widen_u8, 11910 gen_helper_neon_widen_u16, 11911 tcg_gen_extu_i32_i64, 11912 }; 11913 NeonGenWidenFn *widenfn = widenfns[size]; 11914 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11915 11916 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 11917 tcg_res[pass] = tcg_temp_new_i64(); 11918 widenfn(tcg_res[pass], tcg_op); 11919 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 11920 } 11921 11922 for (pass = 0; pass < 2; pass++) { 11923 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11924 } 11925 } 11926 11927 /* AdvSIMD two reg misc 11928 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 11929 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11930 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 11931 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11932 */ 11933 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 11934 { 11935 int size = extract32(insn, 22, 2); 11936 int opcode = extract32(insn, 12, 5); 11937 bool u = extract32(insn, 29, 1); 11938 bool is_q = extract32(insn, 30, 1); 11939 int rn = extract32(insn, 5, 5); 11940 int rd = extract32(insn, 0, 5); 11941 bool need_fpstatus = false; 11942 int rmode = -1; 11943 TCGv_i32 tcg_rmode; 11944 TCGv_ptr tcg_fpstatus; 11945 11946 switch (opcode) { 11947 case 0x0: /* REV64, REV32 */ 11948 case 0x1: /* REV16 */ 11949 handle_rev(s, opcode, u, is_q, size, rn, rd); 11950 return; 11951 case 0x5: /* CNT, NOT, RBIT */ 11952 if (u && size == 0) { 11953 /* NOT */ 11954 break; 11955 } else if (u && size == 1) { 11956 /* RBIT */ 11957 break; 11958 } else if (!u && size == 0) { 11959 /* CNT */ 11960 break; 11961 } 11962 unallocated_encoding(s); 11963 return; 11964 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 11965 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 11966 if (size == 3) { 11967 unallocated_encoding(s); 11968 return; 11969 } 11970 if (!fp_access_check(s)) { 11971 return; 11972 } 11973 11974 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 11975 return; 11976 case 0x4: /* CLS, CLZ */ 11977 if (size == 3) { 11978 unallocated_encoding(s); 11979 return; 11980 } 11981 break; 11982 case 0x2: /* SADDLP, UADDLP */ 11983 case 0x6: /* SADALP, UADALP */ 11984 if (size == 3) { 11985 unallocated_encoding(s); 11986 return; 11987 } 11988 if (!fp_access_check(s)) { 11989 return; 11990 } 11991 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 11992 return; 11993 case 0x13: /* SHLL, SHLL2 */ 11994 if (u == 0 || size == 3) { 11995 unallocated_encoding(s); 11996 return; 11997 } 11998 if (!fp_access_check(s)) { 11999 return; 12000 } 12001 handle_shll(s, is_q, size, rn, rd); 12002 return; 12003 case 0xa: /* CMLT */ 12004 if (u == 1) { 12005 unallocated_encoding(s); 12006 return; 12007 } 12008 /* fall through */ 12009 case 0x8: /* CMGT, CMGE */ 12010 case 0x9: /* CMEQ, CMLE */ 12011 case 0xb: /* ABS, NEG */ 12012 if (size == 3 && !is_q) { 12013 unallocated_encoding(s); 12014 return; 12015 } 12016 break; 12017 case 0x3: /* SUQADD, USQADD */ 12018 if (size == 3 && !is_q) { 12019 unallocated_encoding(s); 12020 return; 12021 } 12022 if (!fp_access_check(s)) { 12023 return; 12024 } 12025 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 12026 return; 12027 case 0x7: /* SQABS, SQNEG */ 12028 if (size == 3 && !is_q) { 12029 unallocated_encoding(s); 12030 return; 12031 } 12032 break; 12033 case 0xc ... 0xf: 12034 case 0x16 ... 0x1f: 12035 { 12036 /* Floating point: U, size[1] and opcode indicate operation; 12037 * size[0] indicates single or double precision. 12038 */ 12039 int is_double = extract32(size, 0, 1); 12040 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 12041 size = is_double ? 3 : 2; 12042 switch (opcode) { 12043 case 0x2f: /* FABS */ 12044 case 0x6f: /* FNEG */ 12045 if (size == 3 && !is_q) { 12046 unallocated_encoding(s); 12047 return; 12048 } 12049 break; 12050 case 0x1d: /* SCVTF */ 12051 case 0x5d: /* UCVTF */ 12052 { 12053 bool is_signed = (opcode == 0x1d) ? true : false; 12054 int elements = is_double ? 2 : is_q ? 4 : 2; 12055 if (is_double && !is_q) { 12056 unallocated_encoding(s); 12057 return; 12058 } 12059 if (!fp_access_check(s)) { 12060 return; 12061 } 12062 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 12063 return; 12064 } 12065 case 0x2c: /* FCMGT (zero) */ 12066 case 0x2d: /* FCMEQ (zero) */ 12067 case 0x2e: /* FCMLT (zero) */ 12068 case 0x6c: /* FCMGE (zero) */ 12069 case 0x6d: /* FCMLE (zero) */ 12070 if (size == 3 && !is_q) { 12071 unallocated_encoding(s); 12072 return; 12073 } 12074 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12075 return; 12076 case 0x7f: /* FSQRT */ 12077 if (size == 3 && !is_q) { 12078 unallocated_encoding(s); 12079 return; 12080 } 12081 break; 12082 case 0x1a: /* FCVTNS */ 12083 case 0x1b: /* FCVTMS */ 12084 case 0x3a: /* FCVTPS */ 12085 case 0x3b: /* FCVTZS */ 12086 case 0x5a: /* FCVTNU */ 12087 case 0x5b: /* FCVTMU */ 12088 case 0x7a: /* FCVTPU */ 12089 case 0x7b: /* FCVTZU */ 12090 need_fpstatus = true; 12091 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12092 if (size == 3 && !is_q) { 12093 unallocated_encoding(s); 12094 return; 12095 } 12096 break; 12097 case 0x5c: /* FCVTAU */ 12098 case 0x1c: /* FCVTAS */ 12099 need_fpstatus = true; 12100 rmode = FPROUNDING_TIEAWAY; 12101 if (size == 3 && !is_q) { 12102 unallocated_encoding(s); 12103 return; 12104 } 12105 break; 12106 case 0x3c: /* URECPE */ 12107 if (size == 3) { 12108 unallocated_encoding(s); 12109 return; 12110 } 12111 /* fall through */ 12112 case 0x3d: /* FRECPE */ 12113 case 0x7d: /* FRSQRTE */ 12114 if (size == 3 && !is_q) { 12115 unallocated_encoding(s); 12116 return; 12117 } 12118 if (!fp_access_check(s)) { 12119 return; 12120 } 12121 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12122 return; 12123 case 0x56: /* FCVTXN, FCVTXN2 */ 12124 if (size == 2) { 12125 unallocated_encoding(s); 12126 return; 12127 } 12128 /* fall through */ 12129 case 0x16: /* FCVTN, FCVTN2 */ 12130 /* handle_2misc_narrow does a 2*size -> size operation, but these 12131 * instructions encode the source size rather than dest size. 12132 */ 12133 if (!fp_access_check(s)) { 12134 return; 12135 } 12136 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12137 return; 12138 case 0x36: /* BFCVTN, BFCVTN2 */ 12139 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12140 unallocated_encoding(s); 12141 return; 12142 } 12143 if (!fp_access_check(s)) { 12144 return; 12145 } 12146 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12147 return; 12148 case 0x17: /* FCVTL, FCVTL2 */ 12149 if (!fp_access_check(s)) { 12150 return; 12151 } 12152 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12153 return; 12154 case 0x18: /* FRINTN */ 12155 case 0x19: /* FRINTM */ 12156 case 0x38: /* FRINTP */ 12157 case 0x39: /* FRINTZ */ 12158 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12159 /* fall through */ 12160 case 0x59: /* FRINTX */ 12161 case 0x79: /* FRINTI */ 12162 need_fpstatus = true; 12163 if (size == 3 && !is_q) { 12164 unallocated_encoding(s); 12165 return; 12166 } 12167 break; 12168 case 0x58: /* FRINTA */ 12169 rmode = FPROUNDING_TIEAWAY; 12170 need_fpstatus = true; 12171 if (size == 3 && !is_q) { 12172 unallocated_encoding(s); 12173 return; 12174 } 12175 break; 12176 case 0x7c: /* URSQRTE */ 12177 if (size == 3) { 12178 unallocated_encoding(s); 12179 return; 12180 } 12181 break; 12182 case 0x1e: /* FRINT32Z */ 12183 case 0x1f: /* FRINT64Z */ 12184 rmode = FPROUNDING_ZERO; 12185 /* fall through */ 12186 case 0x5e: /* FRINT32X */ 12187 case 0x5f: /* FRINT64X */ 12188 need_fpstatus = true; 12189 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12190 unallocated_encoding(s); 12191 return; 12192 } 12193 break; 12194 default: 12195 unallocated_encoding(s); 12196 return; 12197 } 12198 break; 12199 } 12200 default: 12201 unallocated_encoding(s); 12202 return; 12203 } 12204 12205 if (!fp_access_check(s)) { 12206 return; 12207 } 12208 12209 if (need_fpstatus || rmode >= 0) { 12210 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12211 } else { 12212 tcg_fpstatus = NULL; 12213 } 12214 if (rmode >= 0) { 12215 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12216 } else { 12217 tcg_rmode = NULL; 12218 } 12219 12220 switch (opcode) { 12221 case 0x5: 12222 if (u && size == 0) { /* NOT */ 12223 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12224 return; 12225 } 12226 break; 12227 case 0x8: /* CMGT, CMGE */ 12228 if (u) { 12229 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12230 } else { 12231 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12232 } 12233 return; 12234 case 0x9: /* CMEQ, CMLE */ 12235 if (u) { 12236 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12237 } else { 12238 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12239 } 12240 return; 12241 case 0xa: /* CMLT */ 12242 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12243 return; 12244 case 0xb: 12245 if (u) { /* ABS, NEG */ 12246 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12247 } else { 12248 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12249 } 12250 return; 12251 } 12252 12253 if (size == 3) { 12254 /* All 64-bit element operations can be shared with scalar 2misc */ 12255 int pass; 12256 12257 /* Coverity claims (size == 3 && !is_q) has been eliminated 12258 * from all paths leading to here. 12259 */ 12260 tcg_debug_assert(is_q); 12261 for (pass = 0; pass < 2; pass++) { 12262 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12263 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12264 12265 read_vec_element(s, tcg_op, rn, pass, MO_64); 12266 12267 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12268 tcg_rmode, tcg_fpstatus); 12269 12270 write_vec_element(s, tcg_res, rd, pass, MO_64); 12271 } 12272 } else { 12273 int pass; 12274 12275 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12276 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12277 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12278 12279 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12280 12281 if (size == 2) { 12282 /* Special cases for 32 bit elements */ 12283 switch (opcode) { 12284 case 0x4: /* CLS */ 12285 if (u) { 12286 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12287 } else { 12288 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12289 } 12290 break; 12291 case 0x7: /* SQABS, SQNEG */ 12292 if (u) { 12293 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); 12294 } else { 12295 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); 12296 } 12297 break; 12298 case 0x2f: /* FABS */ 12299 gen_helper_vfp_abss(tcg_res, tcg_op); 12300 break; 12301 case 0x6f: /* FNEG */ 12302 gen_helper_vfp_negs(tcg_res, tcg_op); 12303 break; 12304 case 0x7f: /* FSQRT */ 12305 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 12306 break; 12307 case 0x1a: /* FCVTNS */ 12308 case 0x1b: /* FCVTMS */ 12309 case 0x1c: /* FCVTAS */ 12310 case 0x3a: /* FCVTPS */ 12311 case 0x3b: /* FCVTZS */ 12312 gen_helper_vfp_tosls(tcg_res, tcg_op, 12313 tcg_constant_i32(0), tcg_fpstatus); 12314 break; 12315 case 0x5a: /* FCVTNU */ 12316 case 0x5b: /* FCVTMU */ 12317 case 0x5c: /* FCVTAU */ 12318 case 0x7a: /* FCVTPU */ 12319 case 0x7b: /* FCVTZU */ 12320 gen_helper_vfp_touls(tcg_res, tcg_op, 12321 tcg_constant_i32(0), tcg_fpstatus); 12322 break; 12323 case 0x18: /* FRINTN */ 12324 case 0x19: /* FRINTM */ 12325 case 0x38: /* FRINTP */ 12326 case 0x39: /* FRINTZ */ 12327 case 0x58: /* FRINTA */ 12328 case 0x79: /* FRINTI */ 12329 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12330 break; 12331 case 0x59: /* FRINTX */ 12332 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12333 break; 12334 case 0x7c: /* URSQRTE */ 12335 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12336 break; 12337 case 0x1e: /* FRINT32Z */ 12338 case 0x5e: /* FRINT32X */ 12339 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12340 break; 12341 case 0x1f: /* FRINT64Z */ 12342 case 0x5f: /* FRINT64X */ 12343 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12344 break; 12345 default: 12346 g_assert_not_reached(); 12347 } 12348 } else { 12349 /* Use helpers for 8 and 16 bit elements */ 12350 switch (opcode) { 12351 case 0x5: /* CNT, RBIT */ 12352 /* For these two insns size is part of the opcode specifier 12353 * (handled earlier); they always operate on byte elements. 12354 */ 12355 if (u) { 12356 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12357 } else { 12358 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12359 } 12360 break; 12361 case 0x7: /* SQABS, SQNEG */ 12362 { 12363 NeonGenOneOpEnvFn *genfn; 12364 static NeonGenOneOpEnvFn * const fns[2][2] = { 12365 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12366 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12367 }; 12368 genfn = fns[size][u]; 12369 genfn(tcg_res, tcg_env, tcg_op); 12370 break; 12371 } 12372 case 0x4: /* CLS, CLZ */ 12373 if (u) { 12374 if (size == 0) { 12375 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12376 } else { 12377 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12378 } 12379 } else { 12380 if (size == 0) { 12381 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12382 } else { 12383 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12384 } 12385 } 12386 break; 12387 default: 12388 g_assert_not_reached(); 12389 } 12390 } 12391 12392 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12393 } 12394 } 12395 clear_vec_high(s, is_q, rd); 12396 12397 if (tcg_rmode) { 12398 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12399 } 12400 } 12401 12402 /* AdvSIMD [scalar] two register miscellaneous (FP16) 12403 * 12404 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 12405 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12406 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 12407 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12408 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 12409 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 12410 * 12411 * This actually covers two groups where scalar access is governed by 12412 * bit 28. A bunch of the instructions (float to integral) only exist 12413 * in the vector form and are un-allocated for the scalar decode. Also 12414 * in the scalar decode Q is always 1. 12415 */ 12416 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 12417 { 12418 int fpop, opcode, a, u; 12419 int rn, rd; 12420 bool is_q; 12421 bool is_scalar; 12422 bool only_in_vector = false; 12423 12424 int pass; 12425 TCGv_i32 tcg_rmode = NULL; 12426 TCGv_ptr tcg_fpstatus = NULL; 12427 bool need_fpst = true; 12428 int rmode = -1; 12429 12430 if (!dc_isar_feature(aa64_fp16, s)) { 12431 unallocated_encoding(s); 12432 return; 12433 } 12434 12435 rd = extract32(insn, 0, 5); 12436 rn = extract32(insn, 5, 5); 12437 12438 a = extract32(insn, 23, 1); 12439 u = extract32(insn, 29, 1); 12440 is_scalar = extract32(insn, 28, 1); 12441 is_q = extract32(insn, 30, 1); 12442 12443 opcode = extract32(insn, 12, 5); 12444 fpop = deposit32(opcode, 5, 1, a); 12445 fpop = deposit32(fpop, 6, 1, u); 12446 12447 switch (fpop) { 12448 case 0x1d: /* SCVTF */ 12449 case 0x5d: /* UCVTF */ 12450 { 12451 int elements; 12452 12453 if (is_scalar) { 12454 elements = 1; 12455 } else { 12456 elements = (is_q ? 8 : 4); 12457 } 12458 12459 if (!fp_access_check(s)) { 12460 return; 12461 } 12462 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 12463 return; 12464 } 12465 break; 12466 case 0x2c: /* FCMGT (zero) */ 12467 case 0x2d: /* FCMEQ (zero) */ 12468 case 0x2e: /* FCMLT (zero) */ 12469 case 0x6c: /* FCMGE (zero) */ 12470 case 0x6d: /* FCMLE (zero) */ 12471 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 12472 return; 12473 case 0x3d: /* FRECPE */ 12474 case 0x3f: /* FRECPX */ 12475 break; 12476 case 0x18: /* FRINTN */ 12477 only_in_vector = true; 12478 rmode = FPROUNDING_TIEEVEN; 12479 break; 12480 case 0x19: /* FRINTM */ 12481 only_in_vector = true; 12482 rmode = FPROUNDING_NEGINF; 12483 break; 12484 case 0x38: /* FRINTP */ 12485 only_in_vector = true; 12486 rmode = FPROUNDING_POSINF; 12487 break; 12488 case 0x39: /* FRINTZ */ 12489 only_in_vector = true; 12490 rmode = FPROUNDING_ZERO; 12491 break; 12492 case 0x58: /* FRINTA */ 12493 only_in_vector = true; 12494 rmode = FPROUNDING_TIEAWAY; 12495 break; 12496 case 0x59: /* FRINTX */ 12497 case 0x79: /* FRINTI */ 12498 only_in_vector = true; 12499 /* current rounding mode */ 12500 break; 12501 case 0x1a: /* FCVTNS */ 12502 rmode = FPROUNDING_TIEEVEN; 12503 break; 12504 case 0x1b: /* FCVTMS */ 12505 rmode = FPROUNDING_NEGINF; 12506 break; 12507 case 0x1c: /* FCVTAS */ 12508 rmode = FPROUNDING_TIEAWAY; 12509 break; 12510 case 0x3a: /* FCVTPS */ 12511 rmode = FPROUNDING_POSINF; 12512 break; 12513 case 0x3b: /* FCVTZS */ 12514 rmode = FPROUNDING_ZERO; 12515 break; 12516 case 0x5a: /* FCVTNU */ 12517 rmode = FPROUNDING_TIEEVEN; 12518 break; 12519 case 0x5b: /* FCVTMU */ 12520 rmode = FPROUNDING_NEGINF; 12521 break; 12522 case 0x5c: /* FCVTAU */ 12523 rmode = FPROUNDING_TIEAWAY; 12524 break; 12525 case 0x7a: /* FCVTPU */ 12526 rmode = FPROUNDING_POSINF; 12527 break; 12528 case 0x7b: /* FCVTZU */ 12529 rmode = FPROUNDING_ZERO; 12530 break; 12531 case 0x2f: /* FABS */ 12532 case 0x6f: /* FNEG */ 12533 need_fpst = false; 12534 break; 12535 case 0x7d: /* FRSQRTE */ 12536 case 0x7f: /* FSQRT (vector) */ 12537 break; 12538 default: 12539 unallocated_encoding(s); 12540 return; 12541 } 12542 12543 12544 /* Check additional constraints for the scalar encoding */ 12545 if (is_scalar) { 12546 if (!is_q) { 12547 unallocated_encoding(s); 12548 return; 12549 } 12550 /* FRINTxx is only in the vector form */ 12551 if (only_in_vector) { 12552 unallocated_encoding(s); 12553 return; 12554 } 12555 } 12556 12557 if (!fp_access_check(s)) { 12558 return; 12559 } 12560 12561 if (rmode >= 0 || need_fpst) { 12562 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 12563 } 12564 12565 if (rmode >= 0) { 12566 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12567 } 12568 12569 if (is_scalar) { 12570 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 12571 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12572 12573 switch (fpop) { 12574 case 0x1a: /* FCVTNS */ 12575 case 0x1b: /* FCVTMS */ 12576 case 0x1c: /* FCVTAS */ 12577 case 0x3a: /* FCVTPS */ 12578 case 0x3b: /* FCVTZS */ 12579 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12580 break; 12581 case 0x3d: /* FRECPE */ 12582 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12583 break; 12584 case 0x3f: /* FRECPX */ 12585 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 12586 break; 12587 case 0x5a: /* FCVTNU */ 12588 case 0x5b: /* FCVTMU */ 12589 case 0x5c: /* FCVTAU */ 12590 case 0x7a: /* FCVTPU */ 12591 case 0x7b: /* FCVTZU */ 12592 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12593 break; 12594 case 0x6f: /* FNEG */ 12595 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12596 break; 12597 case 0x7d: /* FRSQRTE */ 12598 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12599 break; 12600 default: 12601 g_assert_not_reached(); 12602 } 12603 12604 /* limit any sign extension going on */ 12605 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 12606 write_fp_sreg(s, rd, tcg_res); 12607 } else { 12608 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 12609 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12610 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12611 12612 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 12613 12614 switch (fpop) { 12615 case 0x1a: /* FCVTNS */ 12616 case 0x1b: /* FCVTMS */ 12617 case 0x1c: /* FCVTAS */ 12618 case 0x3a: /* FCVTPS */ 12619 case 0x3b: /* FCVTZS */ 12620 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12621 break; 12622 case 0x3d: /* FRECPE */ 12623 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12624 break; 12625 case 0x5a: /* FCVTNU */ 12626 case 0x5b: /* FCVTMU */ 12627 case 0x5c: /* FCVTAU */ 12628 case 0x7a: /* FCVTPU */ 12629 case 0x7b: /* FCVTZU */ 12630 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12631 break; 12632 case 0x18: /* FRINTN */ 12633 case 0x19: /* FRINTM */ 12634 case 0x38: /* FRINTP */ 12635 case 0x39: /* FRINTZ */ 12636 case 0x58: /* FRINTA */ 12637 case 0x79: /* FRINTI */ 12638 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 12639 break; 12640 case 0x59: /* FRINTX */ 12641 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 12642 break; 12643 case 0x2f: /* FABS */ 12644 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 12645 break; 12646 case 0x6f: /* FNEG */ 12647 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12648 break; 12649 case 0x7d: /* FRSQRTE */ 12650 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12651 break; 12652 case 0x7f: /* FSQRT */ 12653 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 12654 break; 12655 default: 12656 g_assert_not_reached(); 12657 } 12658 12659 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12660 } 12661 12662 clear_vec_high(s, is_q, rd); 12663 } 12664 12665 if (tcg_rmode) { 12666 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12667 } 12668 } 12669 12670 /* AdvSIMD scalar x indexed element 12671 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12672 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12673 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12674 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12675 * AdvSIMD vector x indexed element 12676 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12677 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12678 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12679 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12680 */ 12681 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 12682 { 12683 /* This encoding has two kinds of instruction: 12684 * normal, where we perform elt x idxelt => elt for each 12685 * element in the vector 12686 * long, where we perform elt x idxelt and generate a result of 12687 * double the width of the input element 12688 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 12689 */ 12690 bool is_scalar = extract32(insn, 28, 1); 12691 bool is_q = extract32(insn, 30, 1); 12692 bool u = extract32(insn, 29, 1); 12693 int size = extract32(insn, 22, 2); 12694 int l = extract32(insn, 21, 1); 12695 int m = extract32(insn, 20, 1); 12696 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 12697 int rm = extract32(insn, 16, 4); 12698 int opcode = extract32(insn, 12, 4); 12699 int h = extract32(insn, 11, 1); 12700 int rn = extract32(insn, 5, 5); 12701 int rd = extract32(insn, 0, 5); 12702 bool is_long = false; 12703 int is_fp = 0; 12704 bool is_fp16 = false; 12705 int index; 12706 TCGv_ptr fpst; 12707 12708 switch (16 * u + opcode) { 12709 case 0x08: /* MUL */ 12710 case 0x10: /* MLA */ 12711 case 0x14: /* MLS */ 12712 if (is_scalar) { 12713 unallocated_encoding(s); 12714 return; 12715 } 12716 break; 12717 case 0x02: /* SMLAL, SMLAL2 */ 12718 case 0x12: /* UMLAL, UMLAL2 */ 12719 case 0x06: /* SMLSL, SMLSL2 */ 12720 case 0x16: /* UMLSL, UMLSL2 */ 12721 case 0x0a: /* SMULL, SMULL2 */ 12722 case 0x1a: /* UMULL, UMULL2 */ 12723 if (is_scalar) { 12724 unallocated_encoding(s); 12725 return; 12726 } 12727 is_long = true; 12728 break; 12729 case 0x03: /* SQDMLAL, SQDMLAL2 */ 12730 case 0x07: /* SQDMLSL, SQDMLSL2 */ 12731 case 0x0b: /* SQDMULL, SQDMULL2 */ 12732 is_long = true; 12733 break; 12734 case 0x0c: /* SQDMULH */ 12735 case 0x0d: /* SQRDMULH */ 12736 break; 12737 case 0x01: /* FMLA */ 12738 case 0x05: /* FMLS */ 12739 case 0x09: /* FMUL */ 12740 case 0x19: /* FMULX */ 12741 is_fp = 1; 12742 break; 12743 case 0x1d: /* SQRDMLAH */ 12744 case 0x1f: /* SQRDMLSH */ 12745 if (!dc_isar_feature(aa64_rdm, s)) { 12746 unallocated_encoding(s); 12747 return; 12748 } 12749 break; 12750 case 0x0e: /* SDOT */ 12751 case 0x1e: /* UDOT */ 12752 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 12753 unallocated_encoding(s); 12754 return; 12755 } 12756 break; 12757 case 0x0f: 12758 switch (size) { 12759 case 0: /* SUDOT */ 12760 case 2: /* USDOT */ 12761 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 12762 unallocated_encoding(s); 12763 return; 12764 } 12765 size = MO_32; 12766 break; 12767 case 1: /* BFDOT */ 12768 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12769 unallocated_encoding(s); 12770 return; 12771 } 12772 size = MO_32; 12773 break; 12774 case 3: /* BFMLAL{B,T} */ 12775 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12776 unallocated_encoding(s); 12777 return; 12778 } 12779 /* can't set is_fp without other incorrect size checks */ 12780 size = MO_16; 12781 break; 12782 default: 12783 unallocated_encoding(s); 12784 return; 12785 } 12786 break; 12787 case 0x11: /* FCMLA #0 */ 12788 case 0x13: /* FCMLA #90 */ 12789 case 0x15: /* FCMLA #180 */ 12790 case 0x17: /* FCMLA #270 */ 12791 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 12792 unallocated_encoding(s); 12793 return; 12794 } 12795 is_fp = 2; 12796 break; 12797 case 0x00: /* FMLAL */ 12798 case 0x04: /* FMLSL */ 12799 case 0x18: /* FMLAL2 */ 12800 case 0x1c: /* FMLSL2 */ 12801 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 12802 unallocated_encoding(s); 12803 return; 12804 } 12805 size = MO_16; 12806 /* is_fp, but we pass tcg_env not fp_status. */ 12807 break; 12808 default: 12809 unallocated_encoding(s); 12810 return; 12811 } 12812 12813 switch (is_fp) { 12814 case 1: /* normal fp */ 12815 /* convert insn encoded size to MemOp size */ 12816 switch (size) { 12817 case 0: /* half-precision */ 12818 size = MO_16; 12819 is_fp16 = true; 12820 break; 12821 case MO_32: /* single precision */ 12822 case MO_64: /* double precision */ 12823 break; 12824 default: 12825 unallocated_encoding(s); 12826 return; 12827 } 12828 break; 12829 12830 case 2: /* complex fp */ 12831 /* Each indexable element is a complex pair. */ 12832 size += 1; 12833 switch (size) { 12834 case MO_32: 12835 if (h && !is_q) { 12836 unallocated_encoding(s); 12837 return; 12838 } 12839 is_fp16 = true; 12840 break; 12841 case MO_64: 12842 break; 12843 default: 12844 unallocated_encoding(s); 12845 return; 12846 } 12847 break; 12848 12849 default: /* integer */ 12850 switch (size) { 12851 case MO_8: 12852 case MO_64: 12853 unallocated_encoding(s); 12854 return; 12855 } 12856 break; 12857 } 12858 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 12859 unallocated_encoding(s); 12860 return; 12861 } 12862 12863 /* Given MemOp size, adjust register and indexing. */ 12864 switch (size) { 12865 case MO_16: 12866 index = h << 2 | l << 1 | m; 12867 break; 12868 case MO_32: 12869 index = h << 1 | l; 12870 rm |= m << 4; 12871 break; 12872 case MO_64: 12873 if (l || !is_q) { 12874 unallocated_encoding(s); 12875 return; 12876 } 12877 index = h; 12878 rm |= m << 4; 12879 break; 12880 default: 12881 g_assert_not_reached(); 12882 } 12883 12884 if (!fp_access_check(s)) { 12885 return; 12886 } 12887 12888 if (is_fp) { 12889 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 12890 } else { 12891 fpst = NULL; 12892 } 12893 12894 switch (16 * u + opcode) { 12895 case 0x0e: /* SDOT */ 12896 case 0x1e: /* UDOT */ 12897 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12898 u ? gen_helper_gvec_udot_idx_b 12899 : gen_helper_gvec_sdot_idx_b); 12900 return; 12901 case 0x0f: 12902 switch (extract32(insn, 22, 2)) { 12903 case 0: /* SUDOT */ 12904 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12905 gen_helper_gvec_sudot_idx_b); 12906 return; 12907 case 1: /* BFDOT */ 12908 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12909 gen_helper_gvec_bfdot_idx); 12910 return; 12911 case 2: /* USDOT */ 12912 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12913 gen_helper_gvec_usdot_idx_b); 12914 return; 12915 case 3: /* BFMLAL{B,T} */ 12916 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 12917 gen_helper_gvec_bfmlal_idx); 12918 return; 12919 } 12920 g_assert_not_reached(); 12921 case 0x11: /* FCMLA #0 */ 12922 case 0x13: /* FCMLA #90 */ 12923 case 0x15: /* FCMLA #180 */ 12924 case 0x17: /* FCMLA #270 */ 12925 { 12926 int rot = extract32(insn, 13, 2); 12927 int data = (index << 2) | rot; 12928 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 12929 vec_full_reg_offset(s, rn), 12930 vec_full_reg_offset(s, rm), 12931 vec_full_reg_offset(s, rd), fpst, 12932 is_q ? 16 : 8, vec_full_reg_size(s), data, 12933 size == MO_64 12934 ? gen_helper_gvec_fcmlas_idx 12935 : gen_helper_gvec_fcmlah_idx); 12936 } 12937 return; 12938 12939 case 0x00: /* FMLAL */ 12940 case 0x04: /* FMLSL */ 12941 case 0x18: /* FMLAL2 */ 12942 case 0x1c: /* FMLSL2 */ 12943 { 12944 int is_s = extract32(opcode, 2, 1); 12945 int is_2 = u; 12946 int data = (index << 2) | (is_2 << 1) | is_s; 12947 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 12948 vec_full_reg_offset(s, rn), 12949 vec_full_reg_offset(s, rm), tcg_env, 12950 is_q ? 16 : 8, vec_full_reg_size(s), 12951 data, gen_helper_gvec_fmlal_idx_a64); 12952 } 12953 return; 12954 12955 case 0x08: /* MUL */ 12956 if (!is_long && !is_scalar) { 12957 static gen_helper_gvec_3 * const fns[3] = { 12958 gen_helper_gvec_mul_idx_h, 12959 gen_helper_gvec_mul_idx_s, 12960 gen_helper_gvec_mul_idx_d, 12961 }; 12962 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 12963 vec_full_reg_offset(s, rn), 12964 vec_full_reg_offset(s, rm), 12965 is_q ? 16 : 8, vec_full_reg_size(s), 12966 index, fns[size - 1]); 12967 return; 12968 } 12969 break; 12970 12971 case 0x10: /* MLA */ 12972 if (!is_long && !is_scalar) { 12973 static gen_helper_gvec_4 * const fns[3] = { 12974 gen_helper_gvec_mla_idx_h, 12975 gen_helper_gvec_mla_idx_s, 12976 gen_helper_gvec_mla_idx_d, 12977 }; 12978 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12979 vec_full_reg_offset(s, rn), 12980 vec_full_reg_offset(s, rm), 12981 vec_full_reg_offset(s, rd), 12982 is_q ? 16 : 8, vec_full_reg_size(s), 12983 index, fns[size - 1]); 12984 return; 12985 } 12986 break; 12987 12988 case 0x14: /* MLS */ 12989 if (!is_long && !is_scalar) { 12990 static gen_helper_gvec_4 * const fns[3] = { 12991 gen_helper_gvec_mls_idx_h, 12992 gen_helper_gvec_mls_idx_s, 12993 gen_helper_gvec_mls_idx_d, 12994 }; 12995 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12996 vec_full_reg_offset(s, rn), 12997 vec_full_reg_offset(s, rm), 12998 vec_full_reg_offset(s, rd), 12999 is_q ? 16 : 8, vec_full_reg_size(s), 13000 index, fns[size - 1]); 13001 return; 13002 } 13003 break; 13004 } 13005 13006 if (size == 3) { 13007 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13008 int pass; 13009 13010 assert(is_fp && is_q && !is_long); 13011 13012 read_vec_element(s, tcg_idx, rm, index, MO_64); 13013 13014 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13015 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13016 TCGv_i64 tcg_res = tcg_temp_new_i64(); 13017 13018 read_vec_element(s, tcg_op, rn, pass, MO_64); 13019 13020 switch (16 * u + opcode) { 13021 case 0x05: /* FMLS */ 13022 /* As usual for ARM, separate negation for fused multiply-add */ 13023 gen_helper_vfp_negd(tcg_op, tcg_op); 13024 /* fall through */ 13025 case 0x01: /* FMLA */ 13026 read_vec_element(s, tcg_res, rd, pass, MO_64); 13027 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 13028 break; 13029 case 0x09: /* FMUL */ 13030 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 13031 break; 13032 case 0x19: /* FMULX */ 13033 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 13034 break; 13035 default: 13036 g_assert_not_reached(); 13037 } 13038 13039 write_vec_element(s, tcg_res, rd, pass, MO_64); 13040 } 13041 13042 clear_vec_high(s, !is_scalar, rd); 13043 } else if (!is_long) { 13044 /* 32 bit floating point, or 16 or 32 bit integer. 13045 * For the 16 bit scalar case we use the usual Neon helpers and 13046 * rely on the fact that 0 op 0 == 0 with no side effects. 13047 */ 13048 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13049 int pass, maxpasses; 13050 13051 if (is_scalar) { 13052 maxpasses = 1; 13053 } else { 13054 maxpasses = is_q ? 4 : 2; 13055 } 13056 13057 read_vec_element_i32(s, tcg_idx, rm, index, size); 13058 13059 if (size == 1 && !is_scalar) { 13060 /* The simplest way to handle the 16x16 indexed ops is to duplicate 13061 * the index into both halves of the 32 bit tcg_idx and then use 13062 * the usual Neon helpers. 13063 */ 13064 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13065 } 13066 13067 for (pass = 0; pass < maxpasses; pass++) { 13068 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13069 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13070 13071 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13072 13073 switch (16 * u + opcode) { 13074 case 0x08: /* MUL */ 13075 case 0x10: /* MLA */ 13076 case 0x14: /* MLS */ 13077 { 13078 static NeonGenTwoOpFn * const fns[2][2] = { 13079 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13080 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13081 }; 13082 NeonGenTwoOpFn *genfn; 13083 bool is_sub = opcode == 0x4; 13084 13085 if (size == 1) { 13086 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13087 } else { 13088 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13089 } 13090 if (opcode == 0x8) { 13091 break; 13092 } 13093 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13094 genfn = fns[size - 1][is_sub]; 13095 genfn(tcg_res, tcg_op, tcg_res); 13096 break; 13097 } 13098 case 0x05: /* FMLS */ 13099 case 0x01: /* FMLA */ 13100 read_vec_element_i32(s, tcg_res, rd, pass, 13101 is_scalar ? size : MO_32); 13102 switch (size) { 13103 case 1: 13104 if (opcode == 0x5) { 13105 /* As usual for ARM, separate negation for fused 13106 * multiply-add */ 13107 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13108 } 13109 if (is_scalar) { 13110 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13111 tcg_res, fpst); 13112 } else { 13113 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13114 tcg_res, fpst); 13115 } 13116 break; 13117 case 2: 13118 if (opcode == 0x5) { 13119 /* As usual for ARM, separate negation for 13120 * fused multiply-add */ 13121 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13122 } 13123 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13124 tcg_res, fpst); 13125 break; 13126 default: 13127 g_assert_not_reached(); 13128 } 13129 break; 13130 case 0x09: /* FMUL */ 13131 switch (size) { 13132 case 1: 13133 if (is_scalar) { 13134 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13135 tcg_idx, fpst); 13136 } else { 13137 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13138 tcg_idx, fpst); 13139 } 13140 break; 13141 case 2: 13142 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13143 break; 13144 default: 13145 g_assert_not_reached(); 13146 } 13147 break; 13148 case 0x19: /* FMULX */ 13149 switch (size) { 13150 case 1: 13151 if (is_scalar) { 13152 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13153 tcg_idx, fpst); 13154 } else { 13155 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13156 tcg_idx, fpst); 13157 } 13158 break; 13159 case 2: 13160 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13161 break; 13162 default: 13163 g_assert_not_reached(); 13164 } 13165 break; 13166 case 0x0c: /* SQDMULH */ 13167 if (size == 1) { 13168 gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, 13169 tcg_op, tcg_idx); 13170 } else { 13171 gen_helper_neon_qdmulh_s32(tcg_res, tcg_env, 13172 tcg_op, tcg_idx); 13173 } 13174 break; 13175 case 0x0d: /* SQRDMULH */ 13176 if (size == 1) { 13177 gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env, 13178 tcg_op, tcg_idx); 13179 } else { 13180 gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env, 13181 tcg_op, tcg_idx); 13182 } 13183 break; 13184 case 0x1d: /* SQRDMLAH */ 13185 read_vec_element_i32(s, tcg_res, rd, pass, 13186 is_scalar ? size : MO_32); 13187 if (size == 1) { 13188 gen_helper_neon_qrdmlah_s16(tcg_res, tcg_env, 13189 tcg_op, tcg_idx, tcg_res); 13190 } else { 13191 gen_helper_neon_qrdmlah_s32(tcg_res, tcg_env, 13192 tcg_op, tcg_idx, tcg_res); 13193 } 13194 break; 13195 case 0x1f: /* SQRDMLSH */ 13196 read_vec_element_i32(s, tcg_res, rd, pass, 13197 is_scalar ? size : MO_32); 13198 if (size == 1) { 13199 gen_helper_neon_qrdmlsh_s16(tcg_res, tcg_env, 13200 tcg_op, tcg_idx, tcg_res); 13201 } else { 13202 gen_helper_neon_qrdmlsh_s32(tcg_res, tcg_env, 13203 tcg_op, tcg_idx, tcg_res); 13204 } 13205 break; 13206 default: 13207 g_assert_not_reached(); 13208 } 13209 13210 if (is_scalar) { 13211 write_fp_sreg(s, rd, tcg_res); 13212 } else { 13213 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13214 } 13215 } 13216 13217 clear_vec_high(s, is_q, rd); 13218 } else { 13219 /* long ops: 16x16->32 or 32x32->64 */ 13220 TCGv_i64 tcg_res[2]; 13221 int pass; 13222 bool satop = extract32(opcode, 0, 1); 13223 MemOp memop = MO_32; 13224 13225 if (satop || !u) { 13226 memop |= MO_SIGN; 13227 } 13228 13229 if (size == 2) { 13230 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13231 13232 read_vec_element(s, tcg_idx, rm, index, memop); 13233 13234 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13235 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13236 TCGv_i64 tcg_passres; 13237 int passelt; 13238 13239 if (is_scalar) { 13240 passelt = 0; 13241 } else { 13242 passelt = pass + (is_q * 2); 13243 } 13244 13245 read_vec_element(s, tcg_op, rn, passelt, memop); 13246 13247 tcg_res[pass] = tcg_temp_new_i64(); 13248 13249 if (opcode == 0xa || opcode == 0xb) { 13250 /* Non-accumulating ops */ 13251 tcg_passres = tcg_res[pass]; 13252 } else { 13253 tcg_passres = tcg_temp_new_i64(); 13254 } 13255 13256 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13257 13258 if (satop) { 13259 /* saturating, doubling */ 13260 gen_helper_neon_addl_saturate_s64(tcg_passres, tcg_env, 13261 tcg_passres, tcg_passres); 13262 } 13263 13264 if (opcode == 0xa || opcode == 0xb) { 13265 continue; 13266 } 13267 13268 /* Accumulating op: handle accumulate step */ 13269 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13270 13271 switch (opcode) { 13272 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13273 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13274 break; 13275 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13276 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13277 break; 13278 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13279 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13280 /* fall through */ 13281 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13282 gen_helper_neon_addl_saturate_s64(tcg_res[pass], tcg_env, 13283 tcg_res[pass], 13284 tcg_passres); 13285 break; 13286 default: 13287 g_assert_not_reached(); 13288 } 13289 } 13290 13291 clear_vec_high(s, !is_scalar, rd); 13292 } else { 13293 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13294 13295 assert(size == 1); 13296 read_vec_element_i32(s, tcg_idx, rm, index, size); 13297 13298 if (!is_scalar) { 13299 /* The simplest way to handle the 16x16 indexed ops is to 13300 * duplicate the index into both halves of the 32 bit tcg_idx 13301 * and then use the usual Neon helpers. 13302 */ 13303 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13304 } 13305 13306 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13307 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13308 TCGv_i64 tcg_passres; 13309 13310 if (is_scalar) { 13311 read_vec_element_i32(s, tcg_op, rn, pass, size); 13312 } else { 13313 read_vec_element_i32(s, tcg_op, rn, 13314 pass + (is_q * 2), MO_32); 13315 } 13316 13317 tcg_res[pass] = tcg_temp_new_i64(); 13318 13319 if (opcode == 0xa || opcode == 0xb) { 13320 /* Non-accumulating ops */ 13321 tcg_passres = tcg_res[pass]; 13322 } else { 13323 tcg_passres = tcg_temp_new_i64(); 13324 } 13325 13326 if (memop & MO_SIGN) { 13327 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13328 } else { 13329 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13330 } 13331 if (satop) { 13332 gen_helper_neon_addl_saturate_s32(tcg_passres, tcg_env, 13333 tcg_passres, tcg_passres); 13334 } 13335 13336 if (opcode == 0xa || opcode == 0xb) { 13337 continue; 13338 } 13339 13340 /* Accumulating op: handle accumulate step */ 13341 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13342 13343 switch (opcode) { 13344 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13345 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13346 tcg_passres); 13347 break; 13348 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13349 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 13350 tcg_passres); 13351 break; 13352 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13353 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 13354 /* fall through */ 13355 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13356 gen_helper_neon_addl_saturate_s32(tcg_res[pass], tcg_env, 13357 tcg_res[pass], 13358 tcg_passres); 13359 break; 13360 default: 13361 g_assert_not_reached(); 13362 } 13363 } 13364 13365 if (is_scalar) { 13366 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 13367 } 13368 } 13369 13370 if (is_scalar) { 13371 tcg_res[1] = tcg_constant_i64(0); 13372 } 13373 13374 for (pass = 0; pass < 2; pass++) { 13375 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13376 } 13377 } 13378 } 13379 13380 /* Crypto AES 13381 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13382 * +-----------------+------+-----------+--------+-----+------+------+ 13383 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13384 * +-----------------+------+-----------+--------+-----+------+------+ 13385 */ 13386 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 13387 { 13388 int size = extract32(insn, 22, 2); 13389 int opcode = extract32(insn, 12, 5); 13390 int rn = extract32(insn, 5, 5); 13391 int rd = extract32(insn, 0, 5); 13392 gen_helper_gvec_2 *genfn2 = NULL; 13393 gen_helper_gvec_3 *genfn3 = NULL; 13394 13395 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 13396 unallocated_encoding(s); 13397 return; 13398 } 13399 13400 switch (opcode) { 13401 case 0x4: /* AESE */ 13402 genfn3 = gen_helper_crypto_aese; 13403 break; 13404 case 0x6: /* AESMC */ 13405 genfn2 = gen_helper_crypto_aesmc; 13406 break; 13407 case 0x5: /* AESD */ 13408 genfn3 = gen_helper_crypto_aesd; 13409 break; 13410 case 0x7: /* AESIMC */ 13411 genfn2 = gen_helper_crypto_aesimc; 13412 break; 13413 default: 13414 unallocated_encoding(s); 13415 return; 13416 } 13417 13418 if (!fp_access_check(s)) { 13419 return; 13420 } 13421 if (genfn2) { 13422 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); 13423 } else { 13424 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); 13425 } 13426 } 13427 13428 /* Crypto three-reg SHA 13429 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 13430 * +-----------------+------+---+------+---+--------+-----+------+------+ 13431 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 13432 * +-----------------+------+---+------+---+--------+-----+------+------+ 13433 */ 13434 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 13435 { 13436 int size = extract32(insn, 22, 2); 13437 int opcode = extract32(insn, 12, 3); 13438 int rm = extract32(insn, 16, 5); 13439 int rn = extract32(insn, 5, 5); 13440 int rd = extract32(insn, 0, 5); 13441 gen_helper_gvec_3 *genfn; 13442 bool feature; 13443 13444 if (size != 0) { 13445 unallocated_encoding(s); 13446 return; 13447 } 13448 13449 switch (opcode) { 13450 case 0: /* SHA1C */ 13451 genfn = gen_helper_crypto_sha1c; 13452 feature = dc_isar_feature(aa64_sha1, s); 13453 break; 13454 case 1: /* SHA1P */ 13455 genfn = gen_helper_crypto_sha1p; 13456 feature = dc_isar_feature(aa64_sha1, s); 13457 break; 13458 case 2: /* SHA1M */ 13459 genfn = gen_helper_crypto_sha1m; 13460 feature = dc_isar_feature(aa64_sha1, s); 13461 break; 13462 case 3: /* SHA1SU0 */ 13463 genfn = gen_helper_crypto_sha1su0; 13464 feature = dc_isar_feature(aa64_sha1, s); 13465 break; 13466 case 4: /* SHA256H */ 13467 genfn = gen_helper_crypto_sha256h; 13468 feature = dc_isar_feature(aa64_sha256, s); 13469 break; 13470 case 5: /* SHA256H2 */ 13471 genfn = gen_helper_crypto_sha256h2; 13472 feature = dc_isar_feature(aa64_sha256, s); 13473 break; 13474 case 6: /* SHA256SU1 */ 13475 genfn = gen_helper_crypto_sha256su1; 13476 feature = dc_isar_feature(aa64_sha256, s); 13477 break; 13478 default: 13479 unallocated_encoding(s); 13480 return; 13481 } 13482 13483 if (!feature) { 13484 unallocated_encoding(s); 13485 return; 13486 } 13487 13488 if (!fp_access_check(s)) { 13489 return; 13490 } 13491 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 13492 } 13493 13494 /* Crypto two-reg SHA 13495 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13496 * +-----------------+------+-----------+--------+-----+------+------+ 13497 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13498 * +-----------------+------+-----------+--------+-----+------+------+ 13499 */ 13500 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 13501 { 13502 int size = extract32(insn, 22, 2); 13503 int opcode = extract32(insn, 12, 5); 13504 int rn = extract32(insn, 5, 5); 13505 int rd = extract32(insn, 0, 5); 13506 gen_helper_gvec_2 *genfn; 13507 bool feature; 13508 13509 if (size != 0) { 13510 unallocated_encoding(s); 13511 return; 13512 } 13513 13514 switch (opcode) { 13515 case 0: /* SHA1H */ 13516 feature = dc_isar_feature(aa64_sha1, s); 13517 genfn = gen_helper_crypto_sha1h; 13518 break; 13519 case 1: /* SHA1SU1 */ 13520 feature = dc_isar_feature(aa64_sha1, s); 13521 genfn = gen_helper_crypto_sha1su1; 13522 break; 13523 case 2: /* SHA256SU0 */ 13524 feature = dc_isar_feature(aa64_sha256, s); 13525 genfn = gen_helper_crypto_sha256su0; 13526 break; 13527 default: 13528 unallocated_encoding(s); 13529 return; 13530 } 13531 13532 if (!feature) { 13533 unallocated_encoding(s); 13534 return; 13535 } 13536 13537 if (!fp_access_check(s)) { 13538 return; 13539 } 13540 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 13541 } 13542 13543 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 13544 { 13545 tcg_gen_rotli_i64(d, m, 1); 13546 tcg_gen_xor_i64(d, d, n); 13547 } 13548 13549 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 13550 { 13551 tcg_gen_rotli_vec(vece, d, m, 1); 13552 tcg_gen_xor_vec(vece, d, d, n); 13553 } 13554 13555 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 13556 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 13557 { 13558 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 13559 static const GVecGen3 op = { 13560 .fni8 = gen_rax1_i64, 13561 .fniv = gen_rax1_vec, 13562 .opt_opc = vecop_list, 13563 .fno = gen_helper_crypto_rax1, 13564 .vece = MO_64, 13565 }; 13566 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 13567 } 13568 13569 /* Crypto three-reg SHA512 13570 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13571 * +-----------------------+------+---+---+-----+--------+------+------+ 13572 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 13573 * +-----------------------+------+---+---+-----+--------+------+------+ 13574 */ 13575 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 13576 { 13577 int opcode = extract32(insn, 10, 2); 13578 int o = extract32(insn, 14, 1); 13579 int rm = extract32(insn, 16, 5); 13580 int rn = extract32(insn, 5, 5); 13581 int rd = extract32(insn, 0, 5); 13582 bool feature; 13583 gen_helper_gvec_3 *oolfn = NULL; 13584 GVecGen3Fn *gvecfn = NULL; 13585 13586 if (o == 0) { 13587 switch (opcode) { 13588 case 0: /* SHA512H */ 13589 feature = dc_isar_feature(aa64_sha512, s); 13590 oolfn = gen_helper_crypto_sha512h; 13591 break; 13592 case 1: /* SHA512H2 */ 13593 feature = dc_isar_feature(aa64_sha512, s); 13594 oolfn = gen_helper_crypto_sha512h2; 13595 break; 13596 case 2: /* SHA512SU1 */ 13597 feature = dc_isar_feature(aa64_sha512, s); 13598 oolfn = gen_helper_crypto_sha512su1; 13599 break; 13600 case 3: /* RAX1 */ 13601 feature = dc_isar_feature(aa64_sha3, s); 13602 gvecfn = gen_gvec_rax1; 13603 break; 13604 default: 13605 g_assert_not_reached(); 13606 } 13607 } else { 13608 switch (opcode) { 13609 case 0: /* SM3PARTW1 */ 13610 feature = dc_isar_feature(aa64_sm3, s); 13611 oolfn = gen_helper_crypto_sm3partw1; 13612 break; 13613 case 1: /* SM3PARTW2 */ 13614 feature = dc_isar_feature(aa64_sm3, s); 13615 oolfn = gen_helper_crypto_sm3partw2; 13616 break; 13617 case 2: /* SM4EKEY */ 13618 feature = dc_isar_feature(aa64_sm4, s); 13619 oolfn = gen_helper_crypto_sm4ekey; 13620 break; 13621 default: 13622 unallocated_encoding(s); 13623 return; 13624 } 13625 } 13626 13627 if (!feature) { 13628 unallocated_encoding(s); 13629 return; 13630 } 13631 13632 if (!fp_access_check(s)) { 13633 return; 13634 } 13635 13636 if (oolfn) { 13637 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 13638 } else { 13639 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 13640 } 13641 } 13642 13643 /* Crypto two-reg SHA512 13644 * 31 12 11 10 9 5 4 0 13645 * +-----------------------------------------+--------+------+------+ 13646 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 13647 * +-----------------------------------------+--------+------+------+ 13648 */ 13649 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 13650 { 13651 int opcode = extract32(insn, 10, 2); 13652 int rn = extract32(insn, 5, 5); 13653 int rd = extract32(insn, 0, 5); 13654 bool feature; 13655 13656 switch (opcode) { 13657 case 0: /* SHA512SU0 */ 13658 feature = dc_isar_feature(aa64_sha512, s); 13659 break; 13660 case 1: /* SM4E */ 13661 feature = dc_isar_feature(aa64_sm4, s); 13662 break; 13663 default: 13664 unallocated_encoding(s); 13665 return; 13666 } 13667 13668 if (!feature) { 13669 unallocated_encoding(s); 13670 return; 13671 } 13672 13673 if (!fp_access_check(s)) { 13674 return; 13675 } 13676 13677 switch (opcode) { 13678 case 0: /* SHA512SU0 */ 13679 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 13680 break; 13681 case 1: /* SM4E */ 13682 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 13683 break; 13684 default: 13685 g_assert_not_reached(); 13686 } 13687 } 13688 13689 /* Crypto four-register 13690 * 31 23 22 21 20 16 15 14 10 9 5 4 0 13691 * +-------------------+-----+------+---+------+------+------+ 13692 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 13693 * +-------------------+-----+------+---+------+------+------+ 13694 */ 13695 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 13696 { 13697 int op0 = extract32(insn, 21, 2); 13698 int rm = extract32(insn, 16, 5); 13699 int ra = extract32(insn, 10, 5); 13700 int rn = extract32(insn, 5, 5); 13701 int rd = extract32(insn, 0, 5); 13702 bool feature; 13703 13704 switch (op0) { 13705 case 0: /* EOR3 */ 13706 case 1: /* BCAX */ 13707 feature = dc_isar_feature(aa64_sha3, s); 13708 break; 13709 case 2: /* SM3SS1 */ 13710 feature = dc_isar_feature(aa64_sm3, s); 13711 break; 13712 default: 13713 unallocated_encoding(s); 13714 return; 13715 } 13716 13717 if (!feature) { 13718 unallocated_encoding(s); 13719 return; 13720 } 13721 13722 if (!fp_access_check(s)) { 13723 return; 13724 } 13725 13726 if (op0 < 2) { 13727 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 13728 int pass; 13729 13730 tcg_op1 = tcg_temp_new_i64(); 13731 tcg_op2 = tcg_temp_new_i64(); 13732 tcg_op3 = tcg_temp_new_i64(); 13733 tcg_res[0] = tcg_temp_new_i64(); 13734 tcg_res[1] = tcg_temp_new_i64(); 13735 13736 for (pass = 0; pass < 2; pass++) { 13737 read_vec_element(s, tcg_op1, rn, pass, MO_64); 13738 read_vec_element(s, tcg_op2, rm, pass, MO_64); 13739 read_vec_element(s, tcg_op3, ra, pass, MO_64); 13740 13741 if (op0 == 0) { 13742 /* EOR3 */ 13743 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 13744 } else { 13745 /* BCAX */ 13746 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 13747 } 13748 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 13749 } 13750 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 13751 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 13752 } else { 13753 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 13754 13755 tcg_op1 = tcg_temp_new_i32(); 13756 tcg_op2 = tcg_temp_new_i32(); 13757 tcg_op3 = tcg_temp_new_i32(); 13758 tcg_res = tcg_temp_new_i32(); 13759 tcg_zero = tcg_constant_i32(0); 13760 13761 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 13762 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 13763 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 13764 13765 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 13766 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 13767 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 13768 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 13769 13770 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 13771 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 13772 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 13773 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 13774 } 13775 } 13776 13777 /* Crypto XAR 13778 * 31 21 20 16 15 10 9 5 4 0 13779 * +-----------------------+------+--------+------+------+ 13780 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 13781 * +-----------------------+------+--------+------+------+ 13782 */ 13783 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 13784 { 13785 int rm = extract32(insn, 16, 5); 13786 int imm6 = extract32(insn, 10, 6); 13787 int rn = extract32(insn, 5, 5); 13788 int rd = extract32(insn, 0, 5); 13789 13790 if (!dc_isar_feature(aa64_sha3, s)) { 13791 unallocated_encoding(s); 13792 return; 13793 } 13794 13795 if (!fp_access_check(s)) { 13796 return; 13797 } 13798 13799 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 13800 vec_full_reg_offset(s, rn), 13801 vec_full_reg_offset(s, rm), imm6, 16, 13802 vec_full_reg_size(s)); 13803 } 13804 13805 /* Crypto three-reg imm2 13806 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13807 * +-----------------------+------+-----+------+--------+------+------+ 13808 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 13809 * +-----------------------+------+-----+------+--------+------+------+ 13810 */ 13811 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 13812 { 13813 static gen_helper_gvec_3 * const fns[4] = { 13814 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 13815 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 13816 }; 13817 int opcode = extract32(insn, 10, 2); 13818 int imm2 = extract32(insn, 12, 2); 13819 int rm = extract32(insn, 16, 5); 13820 int rn = extract32(insn, 5, 5); 13821 int rd = extract32(insn, 0, 5); 13822 13823 if (!dc_isar_feature(aa64_sm3, s)) { 13824 unallocated_encoding(s); 13825 return; 13826 } 13827 13828 if (!fp_access_check(s)) { 13829 return; 13830 } 13831 13832 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 13833 } 13834 13835 /* C3.6 Data processing - SIMD, inc Crypto 13836 * 13837 * As the decode gets a little complex we are using a table based 13838 * approach for this part of the decode. 13839 */ 13840 static const AArch64DecodeTable data_proc_simd[] = { 13841 /* pattern , mask , fn */ 13842 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 13843 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 13844 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 13845 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 13846 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 13847 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 13848 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 13849 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 13850 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 13851 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 13852 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 13853 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 13854 { 0x2e000000, 0xbf208400, disas_simd_ext }, 13855 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 13856 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 13857 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 13858 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 13859 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 13860 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 13861 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 13862 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 13863 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 13864 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 13865 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 13866 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 13867 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 13868 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 13869 { 0xce800000, 0xffe00000, disas_crypto_xar }, 13870 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 13871 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 13872 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 13873 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 13874 { 0x00000000, 0x00000000, NULL } 13875 }; 13876 13877 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 13878 { 13879 /* Note that this is called with all non-FP cases from 13880 * table C3-6 so it must UNDEF for entries not specifically 13881 * allocated to instructions in that table. 13882 */ 13883 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 13884 if (fn) { 13885 fn(s, insn); 13886 } else { 13887 unallocated_encoding(s); 13888 } 13889 } 13890 13891 /* C3.6 Data processing - SIMD and floating point */ 13892 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 13893 { 13894 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 13895 disas_data_proc_fp(s, insn); 13896 } else { 13897 /* SIMD, including crypto */ 13898 disas_data_proc_simd(s, insn); 13899 } 13900 } 13901 13902 static bool trans_OK(DisasContext *s, arg_OK *a) 13903 { 13904 return true; 13905 } 13906 13907 static bool trans_FAIL(DisasContext *s, arg_OK *a) 13908 { 13909 s->is_nonstreaming = true; 13910 return true; 13911 } 13912 13913 /** 13914 * is_guarded_page: 13915 * @env: The cpu environment 13916 * @s: The DisasContext 13917 * 13918 * Return true if the page is guarded. 13919 */ 13920 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 13921 { 13922 uint64_t addr = s->base.pc_first; 13923 #ifdef CONFIG_USER_ONLY 13924 return page_get_flags(addr) & PAGE_BTI; 13925 #else 13926 CPUTLBEntryFull *full; 13927 void *host; 13928 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 13929 int flags; 13930 13931 /* 13932 * We test this immediately after reading an insn, which means 13933 * that the TLB entry must be present and valid, and thus this 13934 * access will never raise an exception. 13935 */ 13936 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 13937 false, &host, &full, 0); 13938 assert(!(flags & TLB_INVALID_MASK)); 13939 13940 return full->extra.arm.guarded; 13941 #endif 13942 } 13943 13944 /** 13945 * btype_destination_ok: 13946 * @insn: The instruction at the branch destination 13947 * @bt: SCTLR_ELx.BT 13948 * @btype: PSTATE.BTYPE, and is non-zero 13949 * 13950 * On a guarded page, there are a limited number of insns 13951 * that may be present at the branch target: 13952 * - branch target identifiers, 13953 * - paciasp, pacibsp, 13954 * - BRK insn 13955 * - HLT insn 13956 * Anything else causes a Branch Target Exception. 13957 * 13958 * Return true if the branch is compatible, false to raise BTITRAP. 13959 */ 13960 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 13961 { 13962 if ((insn & 0xfffff01fu) == 0xd503201fu) { 13963 /* HINT space */ 13964 switch (extract32(insn, 5, 7)) { 13965 case 0b011001: /* PACIASP */ 13966 case 0b011011: /* PACIBSP */ 13967 /* 13968 * If SCTLR_ELx.BT, then PACI*SP are not compatible 13969 * with btype == 3. Otherwise all btype are ok. 13970 */ 13971 return !bt || btype != 3; 13972 case 0b100000: /* BTI */ 13973 /* Not compatible with any btype. */ 13974 return false; 13975 case 0b100010: /* BTI c */ 13976 /* Not compatible with btype == 3 */ 13977 return btype != 3; 13978 case 0b100100: /* BTI j */ 13979 /* Not compatible with btype == 2 */ 13980 return btype != 2; 13981 case 0b100110: /* BTI jc */ 13982 /* Compatible with any btype. */ 13983 return true; 13984 } 13985 } else { 13986 switch (insn & 0xffe0001fu) { 13987 case 0xd4200000u: /* BRK */ 13988 case 0xd4400000u: /* HLT */ 13989 /* Give priority to the breakpoint exception. */ 13990 return true; 13991 } 13992 } 13993 return false; 13994 } 13995 13996 /* C3.1 A64 instruction index by encoding */ 13997 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 13998 { 13999 switch (extract32(insn, 25, 4)) { 14000 case 0x5: 14001 case 0xd: /* Data processing - register */ 14002 disas_data_proc_reg(s, insn); 14003 break; 14004 case 0x7: 14005 case 0xf: /* Data processing - SIMD and floating point */ 14006 disas_data_proc_simd_fp(s, insn); 14007 break; 14008 default: 14009 unallocated_encoding(s); 14010 break; 14011 } 14012 } 14013 14014 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 14015 CPUState *cpu) 14016 { 14017 DisasContext *dc = container_of(dcbase, DisasContext, base); 14018 CPUARMState *env = cpu_env(cpu); 14019 ARMCPU *arm_cpu = env_archcpu(env); 14020 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 14021 int bound, core_mmu_idx; 14022 14023 dc->isar = &arm_cpu->isar; 14024 dc->condjmp = 0; 14025 dc->pc_save = dc->base.pc_first; 14026 dc->aarch64 = true; 14027 dc->thumb = false; 14028 dc->sctlr_b = 0; 14029 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 14030 dc->condexec_mask = 0; 14031 dc->condexec_cond = 0; 14032 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 14033 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 14034 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 14035 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 14036 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 14037 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 14038 #if !defined(CONFIG_USER_ONLY) 14039 dc->user = (dc->current_el == 0); 14040 #endif 14041 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 14042 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 14043 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 14044 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 14045 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 14046 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 14047 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 14048 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 14049 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 14050 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 14051 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 14052 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 14053 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 14054 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 14055 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 14056 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 14057 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 14058 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 14059 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 14060 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 14061 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 14062 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 14063 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 14064 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 14065 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 14066 dc->vec_len = 0; 14067 dc->vec_stride = 0; 14068 dc->cp_regs = arm_cpu->cp_regs; 14069 dc->features = env->features; 14070 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14071 dc->gm_blocksize = arm_cpu->gm_blocksize; 14072 14073 #ifdef CONFIG_USER_ONLY 14074 /* In sve_probe_page, we assume TBI is enabled. */ 14075 tcg_debug_assert(dc->tbid & 1); 14076 #endif 14077 14078 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 14079 14080 /* Single step state. The code-generation logic here is: 14081 * SS_ACTIVE == 0: 14082 * generate code with no special handling for single-stepping (except 14083 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14084 * this happens anyway because those changes are all system register or 14085 * PSTATE writes). 14086 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14087 * emit code for one insn 14088 * emit code to clear PSTATE.SS 14089 * emit code to generate software step exception for completed step 14090 * end TB (as usual for having generated an exception) 14091 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14092 * emit code to generate a software step exception 14093 * end the TB 14094 */ 14095 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14096 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14097 dc->is_ldex = false; 14098 14099 /* Bound the number of insns to execute to those left on the page. */ 14100 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14101 14102 /* If architectural single step active, limit to 1. */ 14103 if (dc->ss_active) { 14104 bound = 1; 14105 } 14106 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14107 } 14108 14109 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14110 { 14111 } 14112 14113 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14114 { 14115 DisasContext *dc = container_of(dcbase, DisasContext, base); 14116 target_ulong pc_arg = dc->base.pc_next; 14117 14118 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14119 pc_arg &= ~TARGET_PAGE_MASK; 14120 } 14121 tcg_gen_insn_start(pc_arg, 0, 0); 14122 dc->insn_start = tcg_last_op(); 14123 } 14124 14125 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14126 { 14127 DisasContext *s = container_of(dcbase, DisasContext, base); 14128 CPUARMState *env = cpu_env(cpu); 14129 uint64_t pc = s->base.pc_next; 14130 uint32_t insn; 14131 14132 /* Singlestep exceptions have the highest priority. */ 14133 if (s->ss_active && !s->pstate_ss) { 14134 /* Singlestep state is Active-pending. 14135 * If we're in this state at the start of a TB then either 14136 * a) we just took an exception to an EL which is being debugged 14137 * and this is the first insn in the exception handler 14138 * b) debug exceptions were masked and we just unmasked them 14139 * without changing EL (eg by clearing PSTATE.D) 14140 * In either case we're going to take a swstep exception in the 14141 * "did not step an insn" case, and so the syndrome ISV and EX 14142 * bits should be zero. 14143 */ 14144 assert(s->base.num_insns == 1); 14145 gen_swstep_exception(s, 0, 0); 14146 s->base.is_jmp = DISAS_NORETURN; 14147 s->base.pc_next = pc + 4; 14148 return; 14149 } 14150 14151 if (pc & 3) { 14152 /* 14153 * PC alignment fault. This has priority over the instruction abort 14154 * that we would receive from a translation fault via arm_ldl_code. 14155 * This should only be possible after an indirect branch, at the 14156 * start of the TB. 14157 */ 14158 assert(s->base.num_insns == 1); 14159 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 14160 s->base.is_jmp = DISAS_NORETURN; 14161 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14162 return; 14163 } 14164 14165 s->pc_curr = pc; 14166 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14167 s->insn = insn; 14168 s->base.pc_next = pc + 4; 14169 14170 s->fp_access_checked = false; 14171 s->sve_access_checked = false; 14172 14173 if (s->pstate_il) { 14174 /* 14175 * Illegal execution state. This has priority over BTI 14176 * exceptions, but comes after instruction abort exceptions. 14177 */ 14178 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14179 return; 14180 } 14181 14182 if (dc_isar_feature(aa64_bti, s)) { 14183 if (s->base.num_insns == 1) { 14184 /* 14185 * At the first insn of the TB, compute s->guarded_page. 14186 * We delayed computing this until successfully reading 14187 * the first insn of the TB, above. This (mostly) ensures 14188 * that the softmmu tlb entry has been populated, and the 14189 * page table GP bit is available. 14190 * 14191 * Note that we need to compute this even if btype == 0, 14192 * because this value is used for BR instructions later 14193 * where ENV is not available. 14194 */ 14195 s->guarded_page = is_guarded_page(env, s); 14196 14197 /* First insn can have btype set to non-zero. */ 14198 tcg_debug_assert(s->btype >= 0); 14199 14200 /* 14201 * Note that the Branch Target Exception has fairly high 14202 * priority -- below debugging exceptions but above most 14203 * everything else. This allows us to handle this now 14204 * instead of waiting until the insn is otherwise decoded. 14205 */ 14206 if (s->btype != 0 14207 && s->guarded_page 14208 && !btype_destination_ok(insn, s->bt, s->btype)) { 14209 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14210 return; 14211 } 14212 } else { 14213 /* Not the first insn: btype must be 0. */ 14214 tcg_debug_assert(s->btype == 0); 14215 } 14216 } 14217 14218 s->is_nonstreaming = false; 14219 if (s->sme_trap_nonstreaming) { 14220 disas_sme_fa64(s, insn); 14221 } 14222 14223 if (!disas_a64(s, insn) && 14224 !disas_sme(s, insn) && 14225 !disas_sve(s, insn)) { 14226 disas_a64_legacy(s, insn); 14227 } 14228 14229 /* 14230 * After execution of most insns, btype is reset to 0. 14231 * Note that we set btype == -1 when the insn sets btype. 14232 */ 14233 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14234 reset_btype(s); 14235 } 14236 } 14237 14238 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14239 { 14240 DisasContext *dc = container_of(dcbase, DisasContext, base); 14241 14242 if (unlikely(dc->ss_active)) { 14243 /* Note that this means single stepping WFI doesn't halt the CPU. 14244 * For conditional branch insns this is harmless unreachable code as 14245 * gen_goto_tb() has already handled emitting the debug exception 14246 * (and thus a tb-jump is not possible when singlestepping). 14247 */ 14248 switch (dc->base.is_jmp) { 14249 default: 14250 gen_a64_update_pc(dc, 4); 14251 /* fall through */ 14252 case DISAS_EXIT: 14253 case DISAS_JUMP: 14254 gen_step_complete_exception(dc); 14255 break; 14256 case DISAS_NORETURN: 14257 break; 14258 } 14259 } else { 14260 switch (dc->base.is_jmp) { 14261 case DISAS_NEXT: 14262 case DISAS_TOO_MANY: 14263 gen_goto_tb(dc, 1, 4); 14264 break; 14265 default: 14266 case DISAS_UPDATE_EXIT: 14267 gen_a64_update_pc(dc, 4); 14268 /* fall through */ 14269 case DISAS_EXIT: 14270 tcg_gen_exit_tb(NULL, 0); 14271 break; 14272 case DISAS_UPDATE_NOCHAIN: 14273 gen_a64_update_pc(dc, 4); 14274 /* fall through */ 14275 case DISAS_JUMP: 14276 tcg_gen_lookup_and_goto_ptr(); 14277 break; 14278 case DISAS_NORETURN: 14279 case DISAS_SWI: 14280 break; 14281 case DISAS_WFE: 14282 gen_a64_update_pc(dc, 4); 14283 gen_helper_wfe(tcg_env); 14284 break; 14285 case DISAS_YIELD: 14286 gen_a64_update_pc(dc, 4); 14287 gen_helper_yield(tcg_env); 14288 break; 14289 case DISAS_WFI: 14290 /* 14291 * This is a special case because we don't want to just halt 14292 * the CPU if trying to debug across a WFI. 14293 */ 14294 gen_a64_update_pc(dc, 4); 14295 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 14296 /* 14297 * The helper doesn't necessarily throw an exception, but we 14298 * must go back to the main loop to check for interrupts anyway. 14299 */ 14300 tcg_gen_exit_tb(NULL, 0); 14301 break; 14302 } 14303 } 14304 } 14305 14306 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14307 CPUState *cpu, FILE *logfile) 14308 { 14309 DisasContext *dc = container_of(dcbase, DisasContext, base); 14310 14311 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14312 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 14313 } 14314 14315 const TranslatorOps aarch64_translator_ops = { 14316 .init_disas_context = aarch64_tr_init_disas_context, 14317 .tb_start = aarch64_tr_tb_start, 14318 .insn_start = aarch64_tr_insn_start, 14319 .translate_insn = aarch64_tr_translate_insn, 14320 .tb_stop = aarch64_tr_tb_stop, 14321 .disas_log = aarch64_tr_disas_log, 14322 }; 14323